10 лет назад · 2e8c73490b
--- a/classifier/fpclassifier/randomforest/DTBOblique.cpp
+++ b/classifier/fpclassifier/randomforest/DTBOblique.cpp
@@ -23,14 +23,15 @@ using namespace NICE;
 
				 
			
 
				 DTBOblique::DTBOblique ( const Config *conf, string section )
			
 
				 {
			
 
				-    random_split_tests = conf->gI(section, "random_split_tests", 10 );
			
 
				+    split_steps = conf->gI(section, "split_steps", 20 );
			
 
				     max_depth = conf->gI(section, "max_depth", 10 );
			
 
				     minimum_information_gain = conf->gD(section, "minimum_information_gain", 10e-7 );
			
 
				     minimum_entropy = conf->gD(section, "minimum_entropy", 10e-5 );
			
 
				     use_shannon_entropy = conf->gB(section, "use_shannon_entropy", false );
			
 
				     min_examples = conf->gI(section, "min_examples", 50);
			
 
				     save_indices = conf->gB(section, "save_indices", false);
			
 
				-    lambdaInit = conf->gD(section, "lambdaInit", 0.5 );
			
 
				+    lambdaInit = conf->gD(section, "lambda_init", 0.5 );
			
 
				+    regularizationType = conf->gI(section, "regularization_type", 1 );
			
 
				 }
			
 
				 
			
 
				 DTBOblique::~DTBOblique()
			
@@ -51,7 +52,9 @@ bool DTBOblique::entropyLeftRight (
 
				 {
			
 
				     count_left = 0;
			
 
				     count_right = 0;
			
 
				-    for ( FeatureValuesUnsorted::const_iterator i = values.begin(); i != values.end(); i++ )
			
 
				+    for ( FeatureValuesUnsorted::const_iterator i = values.begin();
			
 
				+          i != values.end();
			
 
				+          i++ )
			
 
				     {
			
 
				         int classno = i->second;
			
 
				         double value = i->first;
			
@@ -107,18 +110,12 @@ void DTBOblique::getDataAndLabel(
 
				           si++ )
			
 
				     {
			
 
				         const pair<int, Example> & p = examples[*si];
			
 
				-        int classno = p.first;
			
 
				         const Example & ce = p.second;
			
 
				 
			
 
				         NICE::Vector pixelRepr = f->getFeatureVector( &ce );
			
 
				         pixelRepr /= pixelRepr.Max();
			
 
				 
			
 
				-        // TODO for multiclass scenarios we need ONEvsALL!
			
 
				-
			
 
				-        // {0,1} -> {-1,+1}
			
 
				-        double label = 2*classno-1;
			
 
				-
			
 
				-        label *= ce.weight;
			
 
				+        double label = p.first * ce.weight;
			
 
				         pixelRepr *= ce.weight;
			
 
				 
			
 
				         y.set( matIndex, label );
			
@@ -131,6 +128,77 @@ void DTBOblique::getDataAndLabel(
 
				     vecY = y;
			
 
				 }
			
 
				 
			
 
				+void DTBOblique::regularizeDataMatrix(
			
 
				+        const NICE::Matrix &X,
			
 
				+        NICE::Matrix &XTXreg,
			
 
				+        const int regOption,
			
 
				+        const double lambda )
			
 
				+{
			
 
				+    XTXreg = X.transpose()*X;
			
 
				+    NICE::Matrix R;
			
 
				+    const int dim = X.cols();
			
 
				+
			
 
				+    switch (regOption)
			
 
				+    {
			
 
				+        // identity matrix
			
 
				+        case 0:
			
 
				+            R.resize(dim,dim);
			
 
				+            R.setIdentity();
			
 
				+            R *= lambda;
			
 
				+            XTXreg += R;
			
 
				+            break;
			
 
				+
			
 
				+        // differences operator, k=1
			
 
				+        case 1:
			
 
				+            R.resize(dim-1,dim);
			
 
				+            R.set( 0.0 );
			
 
				+            for ( int r = 0; r < dim-1; r++ )
			
 
				+            {
			
 
				+                R(r,r)   =  1.0;
			
 
				+                R(r,r+1) = -1.0;
			
 
				+            }
			
 
				+            R = R.transpose()*R;
			
 
				+            R *= lambda;
			
 
				+            XTXreg += R;
			
 
				+            break;
			
 
				+
			
 
				+        // difference operator, k=2
			
 
				+        case 2:
			
 
				+            R.resize(dim-2,dim);
			
 
				+            R.set( 0.0 );
			
 
				+            for ( int r = 0; r < dim-2; r++ )
			
 
				+            {
			
 
				+                R(r,r)   =  1.0;
			
 
				+                R(r,r+1) = -2.0;
			
 
				+                R(r,r+2) =  1.0;
			
 
				+            }
			
 
				+            R = R.transpose()*R;
			
 
				+            R *= lambda;
			
 
				+            XTXreg += R;
			
 
				+            break;
			
 
				+
			
 
				+        // as in [Chen et al., 2012]
			
 
				+        case 3:
			
 
				+        {
			
 
				+            NICE::Vector q ( dim, (1.0-lambda) );
			
 
				+            q[0] = 1;
			
 
				+            NICE::Matrix Q;
			
 
				+            Q.tensorProduct(q,q);
			
 
				+            R.multiply(XTXreg,Q);
			
 
				+            for ( int r = 0; r < dim; r++ )
			
 
				+                R(r,r) = q[r] * XTXreg(r,r);
			
 
				+            XTXreg = R;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        // no regularization
			
 
				+        default:
			
 
				+            std::cerr << "DTBOblique::regularizeDataMatrix: No regularization applied!"
			
 
				+                      << std::endl;
			
 
				+            break;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 /** recursive building method */
			
 
				 DecisionNode *DTBOblique::buildRecursive(
			
 
				         const FeaturePool & fp,
			
@@ -165,19 +233,6 @@ DecisionNode *DTBOblique::buildRecursive(
 
				         return node;
			
 
				     }
			
 
				 
			
 
				-    // refresh/set X and y
			
 
				-    NICE::Matrix X, G;
			
 
				-    NICE::Vector y, beta;
			
 
				-    getDataAndLabel( fp, examples, examples_selection, X, y );
			
 
				-
			
 
				-    // least squares solution
			
 
				-    NICE::Matrix XTX = X.transpose()*X;
			
 
				-    XTX.addDiagonal ( NICE::Vector( XTX.rows(), lambdaCurrent) );
			
 
				-    choleskyDecomp(XTX, G);
			
 
				-    choleskyInvert(G, XTX);
			
 
				-    NICE::Matrix temp = XTX * X.transpose();
			
 
				-    beta.multiply(temp,y,false);
			
 
				-
			
 
				     // variables
			
 
				     double best_threshold = 0.0;
			
 
				     double best_ig = -1.0;
			
@@ -189,64 +244,107 @@ DecisionNode *DTBOblique::buildRecursive(
 
				     double best_entropy_left = 0.0;
			
 
				     double best_entropy_right = 0.0;
			
 
				 
			
 
				-    // Setting Convolutional Feature
			
 
				     ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
			
 
				-    f->setParameterVector( beta );
			
 
				+    NICE::Vector best_beta = f->getParameterVector();
			
 
				 
			
 
				-    // Feature Values
			
 
				-    values.clear();
			
 
				-    f->calcFeatureValues( examples, examples_selection, values);
			
 
				+    // Creating data matrix X and label vector y
			
 
				+    NICE::Matrix X, XTXr, G;
			
 
				+    NICE::Vector y, beta;
			
 
				+    getDataAndLabel( fp, examples, examples_selection, X, y );
			
 
				 
			
 
				-    double minValue = (min_element ( values.begin(), values.end() ))->first;
			
 
				-    double maxValue = (max_element ( values.begin(), values.end() ))->first;
			
 
				+    // Preparing system of linear equations
			
 
				+    //NICE::Matrix XTX = X.transpose()*X;
			
 
				+    regularizeDataMatrix( X, XTXr, regularizationType, lambdaCurrent );
			
 
				+//    R *= lambdaCurrent;
			
 
				 
			
 
				-    if ( maxValue - minValue < 1e-7 )
			
 
				-        std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl;
			
 
				+    //choleskyDecomp(XTXr, G);
			
 
				+    //choleskyInvert(G, XTXr);
			
 
				+    G = NICE::invert(XTXr);
			
 
				+    NICE::Matrix temp = G * X.transpose();
			
 
				 
			
 
				-    // get best thresholds by complete search
			
 
				-    for ( int i = 0; i < random_split_tests; i++ )
			
 
				+    for ( int curClass = 0; curClass <= maxClassNo; curClass++ )
			
 
				     {
			
 
				-        double threshold = (i * (maxValue - minValue ) / (double)random_split_tests)
			
 
				-                            + minValue;
			
 
				-        // preparations
			
 
				-        double el, er;
			
 
				-        for ( int k = 0 ; k <= maxClassNo ; k++ )
			
 
				+        // One-vs-all: Transforming into {-1,+1} problem
			
 
				+        NICE::Vector yCur ( y.size(), -1.0 );
			
 
				+        int idx = 0;
			
 
				+        bool hasExamples = false;
			
 
				+        for ( vector<int>::const_iterator si = examples_selection.begin();
			
 
				+              si != examples_selection.end();
			
 
				+              si++, idx++ )
			
 
				         {
			
 
				-            distribution_left[k] = 0.0;
			
 
				-            distribution_right[k] = 0.0;
			
 
				+            const pair<int, Example> & p = examples[*si];
			
 
				+            if (p.first == curClass)
			
 
				+            {
			
 
				+                yCur.set( idx, 1.0 );
			
 
				+                hasExamples = true;
			
 
				+            }
			
 
				         }
			
 
				 
			
 
				-        /** Test the current split */
			
 
				-        // Does another split make sense?
			
 
				-        double count_left;
			
 
				-        double count_right;
			
 
				-        if ( ! entropyLeftRight ( values, threshold,
			
 
				-                                  distribution_left, distribution_right,
			
 
				-                                  el, er, count_left, count_right, maxClassNo ) )
			
 
				-            continue;
			
 
				+        // is there a positive example for current class in current set?
			
 
				+        if (!hasExamples) continue;
			
 
				 
			
 
				-        // information gain and entropy
			
 
				-        double pl = (count_left) / (count_left + count_right);
			
 
				-        double ig = e - pl*el - (1-pl)*er;
			
 
				+        // Solve system of linear equations in a least squares manner
			
 
				+        beta.multiply(temp,yCur,false);
			
 
				 
			
 
				-        if ( use_shannon_entropy )
			
 
				-        {
			
 
				-            double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
			
 
				-            ig = 2*ig / ( e + esplit );
			
 
				-        }
			
 
				+        // Updating parameter vector in convolutional feature
			
 
				+        f->setParameterVector( beta );
			
 
				 
			
 
				-        if ( ig > best_ig )
			
 
				-        {
			
 
				-            best_ig = ig;
			
 
				-            best_threshold = threshold;
			
 
				+        // Feature Values
			
 
				+        values.clear();
			
 
				+        f->calcFeatureValues( examples, examples_selection, values);
			
 
				+
			
 
				+        double minValue = (min_element ( values.begin(), values.end() ))->first;
			
 
				+        double maxValue = (max_element ( values.begin(), values.end() ))->first;
			
 
				+
			
 
				+        if ( maxValue - minValue < 1e-7 )
			
 
				+            std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl;
			
 
				 
			
 
				+        // get best thresholds by complete search
			
 
				+        for ( int i = 0; i < split_steps; i++ )
			
 
				+        {
			
 
				+            double threshold = (i * (maxValue - minValue ) / (double)split_steps)
			
 
				+                                + minValue;
			
 
				+            // preparations
			
 
				+            double el, er;
			
 
				             for ( int k = 0 ; k <= maxClassNo ; k++ )
			
 
				             {
			
 
				-                best_distribution_left[k] = distribution_left[k];
			
 
				-                best_distribution_right[k] = distribution_right[k];
			
 
				+                distribution_left[k] = 0.0;
			
 
				+                distribution_right[k] = 0.0;
			
 
				+            }
			
 
				+
			
 
				+            /** Test the current split */
			
 
				+            // Does another split make sense?
			
 
				+            double count_left;
			
 
				+            double count_right;
			
 
				+            if ( ! entropyLeftRight ( values, threshold,
			
 
				+                                      distribution_left, distribution_right,
			
 
				+                                      el, er, count_left, count_right, maxClassNo ) )
			
 
				+                continue;
			
 
				+
			
 
				+            // information gain and entropy
			
 
				+            double pl = (count_left) / (count_left + count_right);
			
 
				+            double ig = e - pl*el - (1-pl)*er;
			
 
				+
			
 
				+            if ( use_shannon_entropy )
			
 
				+            {
			
 
				+                double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
			
 
				+                ig = 2*ig / ( e + esplit );
			
 
				+            }
			
 
				+
			
 
				+            if ( ig > best_ig )
			
 
				+            {
			
 
				+                best_ig = ig;
			
 
				+                best_threshold = threshold;
			
 
				+                best_beta = beta;
			
 
				+
			
 
				+                for ( int k = 0 ; k <= maxClassNo ; k++ )
			
 
				+                {
			
 
				+                    best_distribution_left[k] = distribution_left[k];
			
 
				+                    best_distribution_right[k] = distribution_right[k];
			
 
				+                }
			
 
				+                best_entropy_left = el;
			
 
				+                best_entropy_right = er;
			
 
				             }
			
 
				-            best_entropy_left = el;
			
 
				-            best_entropy_right = er;
			
 
				         }
			
 
				     }
			
 
				 
			
@@ -267,10 +365,13 @@ DecisionNode *DTBOblique::buildRecursive(
 
				     }
			
 
				 
			
 
				     /** Save the best split to current node */
			
 
				+    f->setParameterVector( best_beta );
			
 
				+    values.clear();
			
 
				+    f->calcFeatureValues( examples, examples_selection, values);
			
 
				     node->f = f->clone();
			
 
				     node->threshold = best_threshold;
			
 
				 
			
 
				-    /** Split examples according to split function */
			
 
				+    /** Split examples according to best split function */
			
 
				     vector<int> examples_left;
			
 
				     vector<int> examples_right;
			
 
				 
			
@@ -305,11 +406,8 @@ DecisionNode *DTBOblique::buildRecursive(
 
				         if ( r != 0 )
			
 
				             distribution_right_sparse[k] = r;
			
 
				 #ifdef DEBUGTREE
			
 
				-        if ( (l>0)||(r>0) )
			
 
				-        {
			
 
				-            std::cerr << "DTBOblique: split of class " << k << " ("
			
 
				-                      << l << " <-> " << r << ") " << std::endl;
			
 
				-        }
			
 
				+        std::cerr << "DTBOblique: split of class " << k << " ("
			
 
				+                  << l << " <-> " << r << ") " << std::endl;
			
 
				 #endif
			
 
				     }
			
 
				 
			
@@ -323,7 +421,7 @@ DecisionNode *DTBOblique::buildRecursive(
 
				             pow(((double)examples_selection.size()/(double)examples_right.size()),(2./f->getParameterLength()));
			
 
				 
			
 
				 #ifdef DEBUGTREE
			
 
				-    std::cerr << "regularization parameter lambda: left " << lambdaLeft
			
 
				+    std::cerr << "regularization parameter lambda left " << lambdaLeft
			
 
				               << " right " << lambdaRight << std::endl;
			
 
				 
			
 
				 #endif
			
--- a/classifier/fpclassifier/randomforest/DTBOblique.h
+++ b/classifier/fpclassifier/randomforest/DTBOblique.h
@@ -29,8 +29,8 @@ class DTBOblique : public DecisionTreeBuilder
 
				     /////////////////////////
			
 
				     /////////////////////////
			
 
				 
			
 
				-    /** Amount of randomly chosen thresholds */
			
 
				-    int random_split_tests;
			
 
				+    /** Amount of steps for complete search for best threshold */
			
 
				+    int split_steps;
			
 
				 
			
 
				     /** Maximum allowed depth of a tree */
			
 
				     int max_depth;
			
@@ -53,6 +53,9 @@ class DTBOblique : public DecisionTreeBuilder
 
				     /** Regularization parameter */
			
 
				     double lambdaInit;
			
 
				 
			
 
				+    /** Regularization type */
			
 
				+    int regularizationType;
			
 
				+
			
 
				     /////////////////////////
			
 
				     /////////////////////////
			
 
				     //  PROTECTED METHODS  //
			
@@ -74,6 +77,19 @@ class DTBOblique : public DecisionTreeBuilder
 
				             NICE::Matrix &matX,
			
 
				             NICE::Vector &vecY );
			
 
				 
			
 
				+    /**
			
 
				+     * @brief return a regularization matrix of size (dimParams)x(dimParams)
			
 
				+     * @param X data matrix
			
 
				+     * @param XTXreg return regularized X'*X
			
 
				+     * @param regOption which kind of regularization
			
 
				+     * @param lambda regularization parameter (weigthing)
			
 
				+     */
			
 
				+    void regularizeDataMatrix (
			
 
				+            const NICE::Matrix & X,
			
 
				+            NICE::Matrix &XTXreg,
			
 
				+            const int regOption,
			
 
				+            const double lambda );
			
 
				+
			
 
				     /**
			
 
				      * @brief recursive building method
			
 
				      * @param fp feature pool