/** * @file DTBObliqueLS.h * @brief oblique decision tree * @author Sven Sickert * @date 10/15/2014 */ #ifndef DTBOBLIQUELSINCLUDE #define DTBOBLIQUELSINCLUDE #include "core/vector/VectorT.h" #include "core/vector/MatrixT.h" #include "core/basics/Config.h" #include "DecisionTreeBuilder.h" #include "vislearning/cbaselib/CachedExample.h" namespace OBJREC { struct SplitInfo { double threshold; double informationGain; double entropyLeft; double entropyRight; double *distLeft; double *distRight; NICE::Vector params; }; /** random oblique decision tree */ class DTBObliqueLS : public DecisionTreeBuilder { protected: ///////////////////////// ///////////////////////// // PROTECTED VARIABLES // ///////////////////////// ///////////////////////// /** Whether to use shannon entropy or not */ bool useShannonEntropy; /** Whether to save indices in leaves or not */ bool saveIndices; /** Whether to use one-vs-one or one-vs-all for multiclass scenarios */ bool useOneVsOne; /** Whether to increase the influence of regularization over time or not */ bool useDynamicRegularization; /** Amount of steps for complete search for best threshold */ int splitSteps; /** Maximum allowed depth of a tree */ int maxDepth; /* Minimum amount of features in a leaf node */ int minExamples; /** Regularization type */ int regularizationType; /** Minimum entropy to continue with splitting */ double minimumEntropy; /** Minimum information gain to continue with splitting */ double minimumInformationGain; /** Regularization parameter */ double lambdaInit; ///////////////////////// ///////////////////////// // PROTECTED METHODS // ///////////////////////// ///////////////////////// /** * @brief adaptDataAndLabelForMultiClass * @param posClass positive class number * @param negClass negative class number * @param matX adapted data matrix * @param vecY adapted label vector * @param weights example weights * @return whether positive and negative classes have examples or not */ bool adaptDataAndLabelForMultiClass ( const int posClass, const int negClass, NICE::Matrix & matX, NICE::Vector & vecY ); /** * @brief get data matrix X and label vector y * @param fp feature pool * @param examples all examples of the training * @param examples_selection indeces of selected example subset * @param matX data matrix (amountExamples x amountParameters) * @param vecY label vector (amountExamples) */ void getDataAndLabel( const FeaturePool &fp, const Examples &examples, const std::vector & examples_selection, NICE::Matrix &X, NICE::Vector &y, NICE::Vector &w ); /** * @brief return a regularization matrix of size (dimParams)x(dimParams) * @param X data matrix * @param XTXreg return regularized X'*X * @param regOption which kind of regularization * @param lambda regularization parameter (weigthing) */ void regularizeDataMatrix ( const NICE::Matrix & X, NICE::Matrix &XTXreg, const int regOption, const double lambda ); /** * @brief find best threshold for current splitting * @param values feature values * @param bestSplitInfo struct including best split information * @param e entropy before split * @param maxClassNo maximum class number */ void findBestSplitThreshold ( FeatureValuesUnsorted & values, SplitInfo & bestSplitInfo, const NICE::Vector & params, const double & e, const int & maxClassNo ); /** * @brief recursive building method * @param fp feature pool * @param examples all examples of the training * @param examples_selection indeces of selected example subset * @param distribution class distribution in current node * @param entropy current entropy * @param maxClassNo maximum class number * @param depth current depth * @return Pointer to root/parent node */ DecisionNode *buildRecursive ( const FeaturePool & fp, const Examples & examples, std::vector & examples_selection, FullVector & distribution, double entropy, int maxClassNo, int depth, double curLambda ); /** * @brief compute entropy for left and right child * @param values feature values * @param threshold threshold for split * @param stat_left statistics for left child * @param stat_right statistics for right child * @param entropy_left entropy for left child * @param entropy_right entropy for right child * @param count_left amount of features in left child * @param count_right amount of features in right child * @param maxClassNo maximum class number * @return whether another split is possible or not */ bool entropyLeftRight ( const FeatureValuesUnsorted & values, double threshold, double* stat_left, double* stat_right, double & entropy_left, double & entropy_right, double & count_left, double & count_right, int maxClassNo ); public: /** simple constructor */ DTBObliqueLS ( const NICE::Config *conf, std::string section = "DTBObliqueLS" ); /** simple destructor */ virtual ~DTBObliqueLS(); /** * @brief initial building method * @param fp feature pool * @param examples all examples of the training * @param maxClassNo maximum class number * @return Pointer to root/parent node */ DecisionNode *build ( const FeaturePool &fp, const Examples &examples, int maxClassNo ); }; } //namespace #endif