فهرست منبع

DTBRandomOblique: bugfix + example weighting

Sven Sickert 10 سال پیش
والد
کامیت
72367e8295

+ 19 - 16
classifier/fpclassifier/randomforest/DTBRandomOblique.cpp

@@ -102,7 +102,7 @@ void DTBRandomOblique::getDataAndLabel(
     int amountParams = f->getParameterLength();
     int amountExamples = examples_selection.size();
 
-    NICE::Matrix X(amountExamples, amountParams, 0.0);
+    NICE::Matrix X(amountExamples, amountParams, 0.0 );
     NICE::Vector y(amountExamples, 0.0);
 
     int matIndex = 0;
@@ -116,13 +116,17 @@ void DTBRandomOblique::getDataAndLabel(
 
         NICE::Vector pixelRepr = f->getFeatureVector( &ce );
         pixelRepr /= pixelRepr.Max();
-        X.setRow(matIndex,pixelRepr);
 
         // TODO for multiclass scenarios we need ONEvsALL!
-        if ( classno == 0 )
-            y.set(matIndex,-1.0);
-        else
-            y.set(matIndex, 1.0);
+
+        // {0,1} -> {-1,+1}
+        double label = 2*classno-1;
+
+        label *= ce.weight;
+        pixelRepr *= ce.weight;
+
+        y.set( matIndex, label );
+        X.setRow(matIndex,pixelRepr);
 
         matIndex++;
     }
@@ -167,18 +171,17 @@ DecisionNode *DTBRandomOblique::buildRecursive(
     // refresh/set X and y
     NICE::Matrix X;
     NICE::Vector y;
-    getDataAndLabel(fp, examples, examples_selection, X, y);
+    getDataAndLabel( fp, examples, examples_selection, X, y );
     NICE::Matrix XTX = X.transpose()*X;
     XTX.addDiagonal ( NICE::Vector( XTX.rows(), lambda) );
 
-    //TODO: incorporate weighting according class distribution
-
     NICE::Matrix G;
+    NICE::Vector beta;
     choleskyDecomp(XTX, G);
     choleskyInvert(G, XTX);
     NICE::Matrix temp = XTX * X.transpose();
-    NICE::Vector beta;
     beta.multiply(temp,y,false);
+//    choleskySolve(G, y, beta );
 
     // variables
     double best_threshold = 0.0;
@@ -208,13 +211,8 @@ DecisionNode *DTBRandomOblique::buildRecursive(
     // randomly chosen thresholds
     for ( int i = 0; i < random_split_tests; i++ )
     {
-        double threshold = ((maxValue - minValue ) / (double)random_split_tests)
+        double threshold = (i * (maxValue - minValue ) / (double)random_split_tests)
                             + minValue;
-#ifdef DETAILTREE
-        std::cerr << "Testing split #" << i << " for vector #" << k
-                   << ": t=" << threshold <<  std::endl;
-#endif
-
         // preparations
         double el, er;
         for ( int k = 0 ; k <= maxClassNo ; k++ )
@@ -242,6 +240,11 @@ DecisionNode *DTBRandomOblique::buildRecursive(
             ig = 2*ig / ( e + esplit );
         }
 
+#ifdef DETAILTREE
+        std::cerr << "Testing split #" << i << ": t=" << threshold
+                  << " ig=" << ig << std::endl;
+#endif
+
         if ( ig > best_ig )
         {
             best_ig = ig;

+ 1 - 1
classifier/fpclassifier/randomforest/DTBRandomOblique.h

@@ -61,9 +61,9 @@ class DTBRandomOblique : public DecisionTreeBuilder
 
     /**
     * @brief get data matrix X and label vector y
+    * @param fp feature pool
     * @param examples all examples of the training
     * @param examples_selection indeces of selected example subset
-    * @param fp feature pool
     * @param matX data matrix (amountExamples x amountParameters)
     * @param vecY label vector (amountExamples)
     */