소스 검색

modified DTBObliqueLS for the use of SplittingCriterion objects

Sven Sickert 8 년 전
부모
커밋
45f6a7288c
2개의 변경된 파일103개의 추가작업 그리고 181개의 파일을 삭제
  1. 92 138
      classifier/fpclassifier/randomforest/DTBObliqueLS.cpp
  2. 11 43
      classifier/fpclassifier/randomforest/DTBObliqueLS.h

+ 92 - 138
classifier/fpclassifier/randomforest/DTBObliqueLS.cpp

@@ -9,6 +9,9 @@
 #include <time.h>
 
 #include "DTBObliqueLS.h"
+#include "SCInformationGain.h"
+#include "SCGiniIndex.h"
+
 #include "vislearning/features/fpfeatures/ConvolutionFeature.h"
 
 #include "core/vector/Algorithms.h"
@@ -17,88 +20,37 @@ using namespace OBJREC;
 
 //#define DEBUGTREE
 
-
-using namespace std;
-using namespace NICE;
-
-DTBObliqueLS::DTBObliqueLS ( const Config *conf, string section )
+DTBObliqueLS::DTBObliqueLS ( const NICE::Config *conf, std::string section )
 {
     saveIndices = conf->gB( section, "save_indices", false);
-    useShannonEntropy = conf->gB( section, "use_shannon_entropy", false );
-    useOneVsOne = conf->gB( section, "use_one_vs_one", false );
     useDynamicRegularization = conf->gB( section, "use_dynamic_regularization", true );
+    multiClassMode = conf->gB( section, "multi_class_mode", 0 );
 
     splitSteps = conf->gI( section, "split_steps", 20 );
     maxDepth = conf->gI( section, "max_depth", 10 );
-    minExamples = conf->gI( section, "min_examples", 50);
     regularizationType = conf->gI( section, "regularization_type", 1 );
 
-    minimumEntropy = conf->gD( section, "minimum_entropy", 10e-5 );
-    minimumInformationGain = conf->gD( section, "minimum_information_gain", 10e-7 );
     lambdaInit = conf->gD( section, "lambda_init", 0.5 );
 
+    std::string splitCrit = conf->gS( section, "split_criterion", "information_gain" );
+    if (splitCrit == "information_gain")
+        splitCriterion = new SCInformationGain( conf );
+    else if (splitCrit == "gini_index")
+        splitCriterion = new SCGiniIndex( conf );
+    else
+    {
+        std::cerr << "DTBObliqueLS::DTBObliqueLS: No valid splitting criterion defined!" << std::endl;
+        splitCriterion = NULL;
+    }
+
     if ( conf->gB(section, "start_random_generator", true ) )
         srand(time(NULL));
 }
 
 DTBObliqueLS::~DTBObliqueLS()
 {
-
-}
-
-bool DTBObliqueLS::entropyLeftRight (
-        const FeatureValuesUnsorted & values,
-        double threshold,
-        double* stat_left,
-        double* stat_right,
-        double & entropy_left,
-        double & entropy_right,
-        double & count_left,
-        double & count_right,
-        int maxClassNo )
-{
-    count_left = 0;
-    count_right = 0;
-    int count_unweighted_left = 0;
-    int count_unweighted_right = 0;
-    for ( FeatureValuesUnsorted::const_iterator i = values.begin();
-          i != values.end();
-          i++ )
-    {
-        int classno = i->second;
-        double value = i->first;
-        if ( value < threshold ) {
-            stat_left[classno] += i->fourth;
-            count_left+=i->fourth;
-            count_unweighted_left++;
-        }
-        else
-        {
-            stat_right[classno] += i->fourth;
-            count_right+=i->fourth;
-            count_unweighted_right++;
-        }
-    }
-
-    if (  (count_unweighted_left < minExamples)
-       || (count_unweighted_right < minExamples) )
-        return false;
-
-    entropy_left = 0.0;
-    for ( int j = 0 ; j <= maxClassNo ; j++ )
-        if ( stat_left[j] != 0 )
-            entropy_left -= stat_left[j] * log(stat_left[j]);
-    entropy_left /= count_left;
-    entropy_left += log(count_left);
-
-    entropy_right = 0.0;
-    for ( int j = 0 ; j <= maxClassNo ; j++ )
-        if ( stat_right[j] != 0 )
-            entropy_right -= stat_right[j] * log(stat_right[j]);
-    entropy_right /= count_right;
-    entropy_right += log (count_right);
-
-    return true;
+    if (splitCriterion != NULL)
+        delete splitCriterion;
 }
 
 bool DTBObliqueLS::adaptDataAndLabelForMultiClass (
@@ -107,25 +59,21 @@ bool DTBObliqueLS::adaptDataAndLabelForMultiClass (
         NICE::Matrix & X,
         NICE::Vector & y )
 {
-    bool posHasExamples = false;
-    bool negHasExamples = false;
     int posCount = 0;
     int negCount = 0;
 
     // One-vs-one: Transforming into {-1,0,+1} problem
-    if ( useOneVsOne )
+    if ( multiClassMode == 0 )
         for ( int i = 0; i < y.size(); i++ )
         {
             if ( y[i] == posClass )
             {
                 y[i] = 1.0;
-                posHasExamples = true;
                 posCount++;
             }
             else if ( y[i] == negClass )
             {
                 y[i] = -1.0;
-                negHasExamples = true;
                 negCount++;
             }
             else
@@ -135,27 +83,58 @@ bool DTBObliqueLS::adaptDataAndLabelForMultiClass (
             }
         }
     // One-vs-all: Transforming into {-1,+1} problem
-    else
+    else if ( multiClassMode == 1 )
         for ( int i = 0; i < y.size(); i++ )
         {
             if ( y[i] == posClass )
             {
                 y[i] = 1.0;
-                posHasExamples = true;
                 posCount++;
             }
             else
             {
                 y[i] = -1.0;
-                negHasExamples = true;
                 negCount++;
             }
         }
-
-    if ( posHasExamples && negHasExamples )
-        return true;
+    // Many-vs-many: Transforming into {-1,+1}
     else
-        return false;
+    {
+        // get existing classes
+        std::vector<double> unClass = y.std_vector();
+        std::sort( unClass.begin(), unClass.end() );
+        unClass.erase( std::unique( unClass.begin(), unClass.end() ), unClass.end() );
+
+        // randomly split set of classes into two buckets
+        std::random_shuffle ( unClass.begin(), unClass.end() );
+        int firstHalf = std::ceil(unClass.size()/2.0);
+        for ( int i = 0; i < y.size(); i++ )
+        {
+           bool wasFound = false;
+           int c = 0;
+           //assign new labels
+           while ( (!wasFound) && (c<firstHalf) )
+           {
+               if ( y[i] == unClass[c] )
+               {
+                   wasFound = true;
+               }
+               c++;
+           }
+           if (wasFound)
+           {
+               y[i] = 1.0;
+               posCount++;
+           }
+           else
+           {
+               y[i] = -1.0;
+               negCount++;
+           }
+        }
+    }
+
+    return ( (posCount>0) && (negCount>0));
 }
 
 /** refresh data matrix X and label vector y */
@@ -176,11 +155,11 @@ void DTBObliqueLS::getDataAndLabel(
     w = NICE::Vector(amountExamples, 1.0);
 
     int matIndex = 0;
-    for ( vector<int>::const_iterator si = examples_selection.begin();
+    for ( std::vector<int>::const_iterator si = examples_selection.begin();
           si != examples_selection.end();
           si++ )
     {
-        const pair<int, Example> & p = examples[*si];
+        const std::pair<int, Example> & p = examples[*si];
         const Example & ex = p.second;
 
         NICE::Vector pixelRepr (amountParams, 1.0);
@@ -279,7 +258,6 @@ void DTBObliqueLS::findBestSplitThreshold (
         FeatureValuesUnsorted &values,
         SplitInfo &bestSplitInfo,
         const NICE::Vector &params,
-        const double &e,
         const int &maxClassNo )
 {
     double *distribution_left = new double [maxClassNo+1];
@@ -297,7 +275,6 @@ void DTBObliqueLS::findBestSplitThreshold (
         double threshold = (i * (maxValue - minValue ) / (double)splitSteps)
                             + minValue;
         // preparations
-        double el, er;
         for ( int k = 0 ; k <= maxClassNo ; k++ )
         {
             distribution_left[k] = 0.0;
@@ -305,27 +282,20 @@ void DTBObliqueLS::findBestSplitThreshold (
         }
 
         /** Test the current split */
-        // Does another split make sense?
-        double count_left;
-        double count_right;
-        if ( ! entropyLeftRight ( values, threshold,
-                                  distribution_left, distribution_right,
-                                  el, er, count_left, count_right, maxClassNo ) )
-            continue;
+        SplittingCriterion *curSplit = splitCriterion->clone();
 
-        // information gain and entropy
-        double pl = (count_left) / (count_left + count_right);
-        double ig = e - pl*el - (1-pl)*er;
+        if ( ! curSplit->evaluateSplit ( values, threshold,
+                 distribution_left, distribution_right, maxClassNo ) )
+            continue;
 
-        if ( useShannonEntropy )
-        {
-            double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
-            ig = 2*ig / ( e + esplit );
-        }
+        // get value for impurity
+        double purity = curSplit->computePurity();
+        double entropy = curSplit->getEntropy();
 
-        if ( ig > bestSplitInfo.informationGain )
+        if ( purity > bestSplitInfo.purity )
         {
-            bestSplitInfo.informationGain = ig;
+            bestSplitInfo.purity = purity;
+            bestSplitInfo.entropy = entropy;
             bestSplitInfo.threshold = threshold;
             bestSplitInfo.params = params;
 
@@ -334,9 +304,9 @@ void DTBObliqueLS::findBestSplitThreshold (
                 bestSplitInfo.distLeft[k] = distribution_left[k];
                 bestSplitInfo.distRight[k] = distribution_right[k];
             }
-            bestSplitInfo.entropyLeft = el;
-            bestSplitInfo.entropyRight = er;
         }
+
+        delete curSplit;
     }
 
     //cleaning up
@@ -350,22 +320,22 @@ DecisionNode *DTBObliqueLS::buildRecursive(
         const Examples & examples,
         std::vector<int> & examples_selection,
         FullVector & distribution,
-        double e,
+        double entropy,
         int maxClassNo,
         int depth,
         double lambdaCurrent )
 {
 
     std::cerr << "DTBObliqueLS: Examples: " << (int)examples_selection.size()
-              << ", Depth: " << (int)depth << ", Entropy: " << e << std::endl;
+              << ", Depth: " << (int)depth << ", Entropy: " << entropy << std::endl;
 
     // initialize new node
     DecisionNode *node = new DecisionNode ();
     node->distribution = distribution;
 
-    // stop criteria: maxDepth, minExamples, min_entropy
-    if (    ( e <= minimumEntropy )
-//         || ( (int)examples_selection.size() < minExamples )
+    // stopping criteria
+    if (    ( entropy <= splitCriterion->getMinimumEntropy() )
+         || ( (int)examples_selection.size() < splitCriterion->getMinimumExamples() )
          || ( depth > maxDepth ) )
 
     {
@@ -380,11 +350,10 @@ DecisionNode *DTBObliqueLS::buildRecursive(
     FeatureValuesUnsorted values;
     SplitInfo bestSplitInfo;
     bestSplitInfo.threshold = 0.0;
-    bestSplitInfo.informationGain = -1.0;
+    bestSplitInfo.purity = -1.0;
+    bestSplitInfo.entropy = 0.0;
     bestSplitInfo.distLeft = new double [maxClassNo+1];
     bestSplitInfo.distRight = new double [maxClassNo+1];
-    bestSplitInfo.entropyLeft = 0.0;
-    bestSplitInfo.entropyRight = 0.0;
 
     ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
     bestSplitInfo.params = f->getParameterVector();
@@ -404,10 +373,7 @@ DecisionNode *DTBObliqueLS::buildRecursive(
         int posClass, negClass;
 
         posClass = rand() % (maxClassNo+1);
-        negClass = posClass;
-
-        while ( posClass == negClass )
-            negClass = rand() % (maxClassNo+1);
+        negClass = (posClass + (rand() % maxClassNo)) % (maxClassNo+1);
 
         yCur = y;
         XCur = X;
@@ -436,22 +402,13 @@ DecisionNode *DTBObliqueLS::buildRecursive(
     f->calcFeatureValues( examples, examples_selection, values);
 
     // complete search for threshold
-    findBestSplitThreshold ( values, bestSplitInfo, params, e, maxClassNo );
+    findBestSplitThreshold ( values, bestSplitInfo, params, maxClassNo );
 
-//    f->setRandomParameterVector();
-//    params = f->getParameterVector();
-//    f->calcFeatureValues( examples, examples_selection, values);
-//    findBestSplitThreshold ( values, bestSplitInfo, params, e, maxClassNo );
-
-    // supress strange behaviour for values near zero (8.88178e-16)
-    if (bestSplitInfo.entropyLeft < 1.0e-10 ) bestSplitInfo.entropyLeft = 0.0;
-    if (bestSplitInfo.entropyRight < 1.0e-10 ) bestSplitInfo.entropyRight = 0.0;
-
-    // stop criteria: minimum information gain
-    if ( bestSplitInfo.informationGain < minimumInformationGain )
+    // stop criteria: minimum purity reached?
+    if ( bestSplitInfo.purity < splitCriterion->getMinimumPurity() )
     {
 #ifdef DEBUGTREE
-        std::cerr << "DTBObliqueLS: Minimum information gain reached!" << std::endl;
+        std::cerr << "DTBObliqueLS: Minimum purity reached!" << std::endl;
 #endif
         delete [] bestSplitInfo.distLeft;
         delete [] bestSplitInfo.distRight;
@@ -467,8 +424,8 @@ DecisionNode *DTBObliqueLS::buildRecursive(
     node->threshold = bestSplitInfo.threshold;
 
     /** Split examples according to best split function */
-    vector<int> examples_left;
-    vector<int> examples_right;
+    std::vector<int> examples_left;
+    std::vector<int> examples_right;
 
     examples_left.reserve ( values.size() / 2 );
     examples_right.reserve ( values.size() / 2 );
@@ -484,9 +441,6 @@ DecisionNode *DTBObliqueLS::buildRecursive(
 #ifdef DEBUGTREE
 //    node->f->store( std::cerr );
 //    std::cerr << std::endl;
-    std::cerr << "DTBObliqueLS: Information Gain: " << bestSplitInfo.informationGain
-              << ", Left Entropy: " <<  bestSplitInfo.entropyLeft << ", Right Entropy: "
-              << bestSplitInfo.entropyRight << std::endl;
 #endif
 
     FullVector distribution_left_sparse ( distribution.size() );
@@ -499,10 +453,10 @@ DecisionNode *DTBObliqueLS::buildRecursive(
             distribution_left_sparse[k] = l;
         if ( r != 0 )
             distribution_right_sparse[k] = r;
-//#ifdef DEBUGTREE
-//        std::cerr << "DTBObliqueLS: Split of Class " << k << " ("
-//                  << l << " <-> " << r << ") " << std::endl;
-//#endif
+#ifdef DEBUGTREE
+        std::cerr << "DTBObliqueLS: Split of Class " << k << " ("
+                  << l << " <-> " << r << ") " << std::endl;
+#endif
     }
 
     delete [] bestSplitInfo.distLeft;
@@ -528,11 +482,11 @@ DecisionNode *DTBObliqueLS::buildRecursive(
     /** Recursion */
     // left child
     node->left  = buildRecursive ( fp, examples, examples_left,
-                                   distribution_left_sparse, bestSplitInfo.entropyLeft,
+                                   distribution_left_sparse, bestSplitInfo.entropy,
                                    maxClassNo, depth+1, lambdaLeft );
     // right child
     node->right = buildRecursive ( fp, examples, examples_right,
-                                   distribution_right_sparse, bestSplitInfo.entropyRight,
+                                   distribution_right_sparse, bestSplitInfo.entropy,
                                    maxClassNo, depth+1, lambdaRight );
 
     return node;
@@ -546,7 +500,7 @@ DecisionNode *DTBObliqueLS::build ( const FeaturePool & fp,
     int index = 0;
 
     FullVector distribution ( maxClassNo+1 );
-    vector<int> all;
+    std::vector<int> all;
 
     all.reserve ( examples.size() );
     for ( Examples::const_iterator j = examples.begin();

+ 11 - 43
classifier/fpclassifier/randomforest/DTBObliqueLS.h

@@ -10,9 +10,11 @@
 
 #include "core/vector/VectorT.h"
 #include "core/vector/MatrixT.h"
-
 #include "core/basics/Config.h"
+
 #include "DecisionTreeBuilder.h"
+#include "SplittingCriterion.h"
+
 #include "vislearning/cbaselib/CachedExample.h"
 
 
@@ -20,9 +22,8 @@ namespace OBJREC {
 
 struct SplitInfo {
     double threshold;
-    double informationGain;
-    double entropyLeft;
-    double entropyRight;
+    double purity;
+    double entropy;
     double *distLeft;
     double *distRight;
     NICE::Vector params;
@@ -39,14 +40,14 @@ class DTBObliqueLS : public DecisionTreeBuilder
     /////////////////////////
     /////////////////////////
 
-    /** Whether to use shannon entropy or not */
-    bool useShannonEntropy;
-
+    /** Splitting criterion */
+    SplittingCriterion *splitCriterion;
+    
     /** Whether to save indices in leaves or not */
     bool saveIndices;
 
-    /** Whether to use one-vs-one or one-vs-all for multiclass scenarios */
-    bool useOneVsOne;
+    /** Whether to use one-vs-one (0), one-vs-all (1) or many-vs-many (2) for multiclass scenarios */
+    int multiClassMode;
 
     /** Whether to increase the influence of regularization over time or not */
     bool useDynamicRegularization;
@@ -57,18 +58,9 @@ class DTBObliqueLS : public DecisionTreeBuilder
     /** Maximum allowed depth of a tree */
     int maxDepth;
 
-    /* Minimum amount of features in a leaf node */
-    int minExamples;
-
     /** Regularization type */
     int regularizationType;
 
-    /** Minimum entropy to continue with splitting */
-    double minimumEntropy;
-
-    /** Minimum information gain to continue with splitting */
-    double minimumInformationGain;
-
     /** Regularization parameter */
     double lambdaInit;
 
@@ -126,14 +118,13 @@ class DTBObliqueLS : public DecisionTreeBuilder
      * @brief find best threshold for current splitting
      * @param values feature values
      * @param bestSplitInfo struct including best split information
-     * @param e entropy before split
+     * @param params parameter vector for oblique decision
      * @param maxClassNo maximum class number
      */
     void findBestSplitThreshold (
             FeatureValuesUnsorted & values,
             SplitInfo & bestSplitInfo,
             const NICE::Vector & params,
-            const double & e,
             const int & maxClassNo );
 
     /**
@@ -157,29 +148,6 @@ class DTBObliqueLS : public DecisionTreeBuilder
            int depth,
            double curLambda );
 
-    /**
-     * @brief compute entropy for left and right child
-     * @param values feature values
-     * @param threshold threshold for split
-     * @param stat_left statistics for left child
-     * @param stat_right statistics for right child
-     * @param entropy_left entropy for left child
-     * @param entropy_right entropy for right child
-     * @param count_left amount of features in left child
-     * @param count_right amount of features in right child
-     * @param maxClassNo maximum class number
-     * @return whether another split is possible or not
-     */
-    bool entropyLeftRight ( const FeatureValuesUnsorted & values,
-           double threshold,
-           double* stat_left,
-           double* stat_right,
-           double & entropy_left,
-           double & entropy_right,
-           double & count_left,
-           double & count_right,
-           int maxClassNo );
-
   public:
 
     /** simple constructor */