Browse Source

DTBOblique: possibility to use one-vs-one over one-vs-all

Sven Sickert 10 years ago
parent
commit
be662b9160

+ 172 - 110
classifier/fpclassifier/randomforest/DTBOblique.cpp

@@ -202,6 +202,74 @@ void DTBOblique::regularizeDataMatrix(
     }
 }
 
+void DTBOblique::findBestSplitThreshold (
+        FeatureValuesUnsorted &values,
+        SplitInfo &bestSplitInfo,
+        const NICE::Vector &beta,
+        const double &e,
+        const int &maxClassNo )
+{
+    double *distribution_left = new double [maxClassNo+1];
+    double *distribution_right = new double [maxClassNo+1];
+    double minValue = (min_element ( values.begin(), values.end() ))->first;
+    double maxValue = (max_element ( values.begin(), values.end() ))->first;
+
+    if ( maxValue - minValue < 1e-7 )
+        std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl;
+
+    // get best thresholds using complete search
+    for ( int i = 0; i < splitSteps; i++ )
+    {
+        double threshold = (i * (maxValue - minValue ) / (double)splitSteps)
+                            + minValue;
+        // preparations
+        double el, er;
+        for ( int k = 0 ; k <= maxClassNo ; k++ )
+        {
+            distribution_left[k] = 0.0;
+            distribution_right[k] = 0.0;
+        }
+
+        /** Test the current split */
+        // Does another split make sense?
+        double count_left;
+        double count_right;
+        if ( ! entropyLeftRight ( values, threshold,
+                                  distribution_left, distribution_right,
+                                  el, er, count_left, count_right, maxClassNo ) )
+            continue;
+
+        // information gain and entropy
+        double pl = (count_left) / (count_left + count_right);
+        double ig = e - pl*el - (1-pl)*er;
+
+        if ( useShannonEntropy )
+        {
+            double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
+            ig = 2*ig / ( e + esplit );
+        }
+
+        if ( ig > bestSplitInfo.informationGain )
+        {
+            bestSplitInfo.informationGain = ig;
+            bestSplitInfo.threshold = threshold;
+            bestSplitInfo.params = beta;
+
+            for ( int k = 0 ; k <= maxClassNo ; k++ )
+            {
+                bestSplitInfo.distLeft[k] = distribution_left[k];
+                bestSplitInfo.distRight[k] = distribution_right[k];
+            }
+            bestSplitInfo.entropyLeft = el;
+            bestSplitInfo.entropyRight = er;
+        }
+    }
+
+    //cleaning up
+    delete [] distribution_left;
+    delete [] distribution_right;
+}
+
 /** recursive building method */
 DecisionNode *DTBOblique::buildRecursive(
         const FeaturePool & fp,
@@ -237,18 +305,24 @@ DecisionNode *DTBOblique::buildRecursive(
     }
 
     // variables
-    double best_threshold = 0.0;
-    double best_ig = -1.0;
     FeatureValuesUnsorted values;
-    double *best_distribution_left = new double [maxClassNo+1];
-    double *best_distribution_right = new double [maxClassNo+1];
-    double *distribution_left = new double [maxClassNo+1];
-    double *distribution_right = new double [maxClassNo+1];
-    double best_entropy_left = 0.0;
-    double best_entropy_right = 0.0;
+    SplitInfo bestSplitInfo;
+    bestSplitInfo.threshold = 0.0;
+    bestSplitInfo.informationGain = -1.0;
+    bestSplitInfo.distLeft = new double [maxClassNo+1];
+    bestSplitInfo.distRight = new double [maxClassNo+1];
+    bestSplitInfo.entropyLeft = 0.0;
+    bestSplitInfo.entropyRight = 0.0;
+
+//    double best_threshold = 0.0;
+//    double best_ig = -1.0;
+//    double *best_distribution_left = new double [maxClassNo+1];
+//    double *best_distribution_right = new double [maxClassNo+1];
+//    double best_entropy_left = 0.0;
+//    double best_entropy_right = 0.0;
 
     ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
-    NICE::Vector best_beta = f->getParameterVector();
+    bestSplitInfo.params = f->getParameterVector();
 
     // Creating data matrix X and label vector y
     NICE::Matrix X, XTXr, G, temp;
@@ -257,134 +331,121 @@ DecisionNode *DTBOblique::buildRecursive(
 
     // Preparing system of linear equations
     regularizeDataMatrix( X, XTXr, regularizationType, lambdaCurrent );
-
-    if (regularizationType == 3)
-    {
-        G = NICE::invert(XTXr);
-        temp = G * X.transpose();
-    }
-    else
-    {
-        choleskyDecomp(XTXr, G);
-        choleskyInvert(G, XTXr);
-        temp = XTXr * X.transpose();
-    }
+    choleskyDecomp(XTXr, G);
+    choleskyInvert(G, XTXr);
+    temp = XTXr * X.transpose();
 
 
-    for ( int curClass = 0; curClass <= maxClassNo; curClass++ )
+    if ( useOneVsOne )
     {
-        // One-vs-all: Transforming into {-1,+1} problem
-        NICE::Vector yCur ( y.size(), -1.0 );
-        int idx = 0;
-        bool hasExamples = false;
-        for ( vector<int>::const_iterator si = examples_selection.begin();
-              si != examples_selection.end();
-              si++, idx++ )
-        {
-            const pair<int, Example> & p = examples[*si];
-            if (p.first == curClass)
+        // One-vs-one: Transforming into {-1,0,+1} problem
+        for ( int curClass = 0; curClass <= maxClassNo; curClass++ )
+            for ( int opClass = 0; opClass <= maxClassNo; opClass++ )
             {
-                yCur.set( idx, 1.0 );
-                hasExamples = true;
-            }
-        }
+                if ( curClass == opClass ) continue;
 
-        // TODO: One-vs-one: Transforming into {-1,0,+1} problem
+                NICE::Vector yCur ( y.size(), 0.0 );
+                int idx = 0;
+                bool curHasExamples = false;
+                bool opHasExamples = false;
 
-        // is there a positive example for current class in current set?
-        if (!hasExamples) continue;
-
-        // Solve system of linear equations in a least squares manner
-        beta.multiply(temp,yCur,false);
+                for ( vector<int>::const_iterator si = examples_selection.begin();
+                      si != examples_selection.end();
+                      si++, idx++ )
+                {
+                    const pair<int, Example> & p = examples[*si];
+                    if ( p.first == curClass )
+                    {
+                        yCur.set( idx, 1.0 );
+                        curHasExamples = true;
+                    }
+                    else if ( p.first == opClass )
+                    {
+                        yCur.set( idx, -1.0 );
+                        opHasExamples = true;
+                    }
+                }
 
-        // Updating parameter vector in convolutional feature
-        f->setParameterVector( beta );
+                // are there positive examples for current and opposition class in current set?
+                if ( !curHasExamples || !opHasExamples ) continue;
 
-        // Feature Values
-        values.clear();
-        f->calcFeatureValues( examples, examples_selection, values);
+                // Solve system of linear equations in a least squares manner
+                beta.multiply(temp,yCur,false);
 
-        double minValue = (min_element ( values.begin(), values.end() ))->first;
-        double maxValue = (max_element ( values.begin(), values.end() ))->first;
+                // Updating parameter vector in convolutional feature
+                f->setParameterVector( beta );
 
-        if ( maxValue - minValue < 1e-7 )
-            std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl;
+                // Feature Values
+                values.clear();
+                f->calcFeatureValues( examples, examples_selection, values);
 
-        // get best thresholds using complete search
-        for ( int i = 0; i < splitSteps; i++ )
+                // complete search for threshold
+                findBestSplitThreshold ( values, bestSplitInfo, beta, e,
+                                         maxClassNo );
+            }
+    }
+    else
+    {
+        // One-vs-all: Transforming into {-1,+1} problem
+        for ( int curClass = 0; curClass <= maxClassNo; curClass++ )
         {
-            double threshold = (i * (maxValue - minValue ) / (double)splitSteps)
-                                + minValue;
-            // preparations
-            double el, er;
-            for ( int k = 0 ; k <= maxClassNo ; k++ )
+            NICE::Vector yCur ( y.size(), -1.0 );
+            int idx = 0;
+            bool hasExamples = false;
+            for ( vector<int>::const_iterator si = examples_selection.begin();
+                  si != examples_selection.end();
+                  si++, idx++ )
             {
-                distribution_left[k] = 0.0;
-                distribution_right[k] = 0.0;
+                const pair<int, Example> & p = examples[*si];
+                if ( p.first == curClass )
+                {
+                    yCur.set( idx, 1.0 );
+                    hasExamples = true;
+                }
             }
 
-            /** Test the current split */
-            // Does another split make sense?
-            double count_left;
-            double count_right;
-            if ( ! entropyLeftRight ( values, threshold,
-                                      distribution_left, distribution_right,
-                                      el, er, count_left, count_right, maxClassNo ) )
-                continue;
+            // is there a positive example for current class in current set?
+            if (!hasExamples) continue;
 
-            // information gain and entropy
-            double pl = (count_left) / (count_left + count_right);
-            double ig = e - pl*el - (1-pl)*er;
+            // Solve system of linear equations in a least squares manner
+            beta.multiply(temp,yCur,false);
 
-            if ( useShannonEntropy )
-            {
-                double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
-                ig = 2*ig / ( e + esplit );
-            }
+            // Updating parameter vector in convolutional feature
+            f->setParameterVector( beta );
 
-            if ( ig > best_ig )
-            {
-                best_ig = ig;
-                best_threshold = threshold;
-                best_beta = beta;
+            // Feature Values
+            values.clear();
+            f->calcFeatureValues( examples, examples_selection, values);
+
+            // complete search for threshold
+            findBestSplitThreshold ( values, bestSplitInfo, beta, e, maxClassNo );
 
-                for ( int k = 0 ; k <= maxClassNo ; k++ )
-                {
-                    best_distribution_left[k] = distribution_left[k];
-                    best_distribution_right[k] = distribution_right[k];
-                }
-                best_entropy_left = el;
-                best_entropy_right = er;
-            }
         }
     }
 
-    // supress strange behaviour for values near zero (8.88178e-16)
-    if (best_entropy_left < 1.0e-10 ) best_entropy_left = 0.0;
-    if (best_entropy_right < 1.0e-10 ) best_entropy_right = 0.0;
 
-    //cleaning up
-    delete [] distribution_left;
-    delete [] distribution_right;
+    // supress strange behaviour for values near zero (8.88178e-16)
+    if (bestSplitInfo.entropyLeft < 1.0e-10 ) bestSplitInfo.entropyLeft = 0.0;
+    if (bestSplitInfo.entropyRight < 1.0e-10 ) bestSplitInfo.entropyRight = 0.0;
 
     // stop criteria: minimum information gain
-    if ( best_ig < minimumInformationGain )
+    if ( bestSplitInfo.informationGain < minimumInformationGain )
     {
 #ifdef DEBUGTREE
         std::cerr << "DTBOblique: Minimum information gain reached!" << std::endl;
 #endif
-        delete [] best_distribution_left;
-        delete [] best_distribution_right;
+        delete [] bestSplitInfo.distLeft;
+        delete [] bestSplitInfo.distRight;
         node->trainExamplesIndices = examples_selection;
         return node;
     }
 
     /** Save the best split to current node */
-    f->setParameterVector( best_beta );
+    f->setParameterVector( bestSplitInfo.params );
     values.clear();
     f->calcFeatureValues( examples, examples_selection, values);
     node->f = f->clone();
-    node->threshold = best_threshold;
+    node->threshold = bestSplitInfo.threshold;
 
     /** Split examples according to best split function */
     vector<int> examples_left;
@@ -396,7 +457,7 @@ DecisionNode *DTBOblique::buildRecursive(
           i != values.end(); i++ )
     {
         double value = i->first;
-        if ( value < best_threshold )
+        if ( value < bestSplitInfo.threshold )
             examples_left.push_back ( i->third );
         else
             examples_right.push_back ( i->third );
@@ -405,17 +466,17 @@ DecisionNode *DTBOblique::buildRecursive(
 #ifdef DEBUGTREE
     node->f->store( std::cerr );
     std::cerr << std::endl;
-    std::cerr << "DTBOblique: Information Gain: " << best_ig
-              << ", Left Entropy: " <<  best_entropy_left << ", Right Entropy: "
-              << best_entropy_right << std::endl;
+    std::cerr << "DTBOblique: Information Gain: " << bestSplitInfo.informationGain
+              << ", Left Entropy: " <<  bestSplitInfo.entropyLeft << ", Right Entropy: "
+              << bestSplitInfo.entropyRight << std::endl;
 #endif
 
     FullVector distribution_left_sparse ( distribution.size() );
     FullVector distribution_right_sparse ( distribution.size() );
     for ( int k = 0 ; k <= maxClassNo ; k++ )
     {
-        double l = best_distribution_left[k];
-        double r = best_distribution_right[k];
+        double l = bestSplitInfo.distLeft[k];
+        double r = bestSplitInfo.distRight[k];
         if ( l != 0 )
             distribution_left_sparse[k] = l;
         if ( r != 0 )
@@ -426,8 +487,9 @@ DecisionNode *DTBOblique::buildRecursive(
 #endif
     }
 
-    delete [] best_distribution_left;
-    delete [] best_distribution_right;
+    //TODO
+    //delete [] best_distribution_left;
+    //delete [] best_distribution_right;
 
     // update lambda by heuristic [Laptev/Buhmann, 2014]
     double lambdaLeft = lambdaCurrent *
@@ -444,11 +506,11 @@ DecisionNode *DTBOblique::buildRecursive(
     /** Recursion */
     // left child
     node->left  = buildRecursive ( fp, examples, examples_left,
-                                   distribution_left_sparse, best_entropy_left,
+                                   distribution_left_sparse, bestSplitInfo.entropyLeft,
                                    maxClassNo, depth+1, lambdaLeft );
     // right child
     node->right = buildRecursive ( fp, examples, examples_right,
-                                   distribution_right_sparse, best_entropy_right,
+                                   distribution_right_sparse, bestSplitInfo.entropyRight,
                                    maxClassNo, depth+1, lambdaRight );
 
     return node;

+ 24 - 0
classifier/fpclassifier/randomforest/DTBOblique.h

@@ -18,6 +18,16 @@
 
 namespace OBJREC {
 
+struct SplitInfo {
+    double threshold;
+    double informationGain;
+    double entropyLeft;
+    double entropyRight;
+    double *distLeft;
+    double *distRight;
+    NICE::Vector params;
+};
+
 /** random oblique decision tree */
 class DTBOblique : public DecisionTreeBuilder
 {
@@ -93,6 +103,20 @@ class DTBOblique : public DecisionTreeBuilder
             const int regOption,
             const double lambda );
 
+    /**
+     * @brief find best threshold for current splitting
+     * @param values feature values
+     * @param bestSplitInfo struct including best split information
+     * @param e entropy before split
+     * @param maxClassNo maximum class number
+     */
+    void findBestSplitThreshold (
+            FeatureValuesUnsorted & values,
+            SplitInfo & bestSplitInfo,
+            const NICE::Vector & beta,
+            const double & e,
+            const int & maxClassNo );
+
     /**
      * @brief recursive building method
      * @param fp feature pool