Browse Source

added some regularization methods

Sven Sickert 10 years ago
parent
commit
2e8c73490b

+ 170 - 72
classifier/fpclassifier/randomforest/DTBOblique.cpp

@@ -23,14 +23,15 @@ using namespace NICE;
 
 DTBOblique::DTBOblique ( const Config *conf, string section )
 {
-    random_split_tests = conf->gI(section, "random_split_tests", 10 );
+    split_steps = conf->gI(section, "split_steps", 20 );
     max_depth = conf->gI(section, "max_depth", 10 );
     minimum_information_gain = conf->gD(section, "minimum_information_gain", 10e-7 );
     minimum_entropy = conf->gD(section, "minimum_entropy", 10e-5 );
     use_shannon_entropy = conf->gB(section, "use_shannon_entropy", false );
     min_examples = conf->gI(section, "min_examples", 50);
     save_indices = conf->gB(section, "save_indices", false);
-    lambdaInit = conf->gD(section, "lambdaInit", 0.5 );
+    lambdaInit = conf->gD(section, "lambda_init", 0.5 );
+    regularizationType = conf->gI(section, "regularization_type", 1 );
 }
 
 DTBOblique::~DTBOblique()
@@ -51,7 +52,9 @@ bool DTBOblique::entropyLeftRight (
 {
     count_left = 0;
     count_right = 0;
-    for ( FeatureValuesUnsorted::const_iterator i = values.begin(); i != values.end(); i++ )
+    for ( FeatureValuesUnsorted::const_iterator i = values.begin();
+          i != values.end();
+          i++ )
     {
         int classno = i->second;
         double value = i->first;
@@ -107,18 +110,12 @@ void DTBOblique::getDataAndLabel(
           si++ )
     {
         const pair<int, Example> & p = examples[*si];
-        int classno = p.first;
         const Example & ce = p.second;
 
         NICE::Vector pixelRepr = f->getFeatureVector( &ce );
         pixelRepr /= pixelRepr.Max();
 
-        // TODO for multiclass scenarios we need ONEvsALL!
-
-        // {0,1} -> {-1,+1}
-        double label = 2*classno-1;
-
-        label *= ce.weight;
+        double label = p.first * ce.weight;
         pixelRepr *= ce.weight;
 
         y.set( matIndex, label );
@@ -131,6 +128,77 @@ void DTBOblique::getDataAndLabel(
     vecY = y;
 }
 
+void DTBOblique::regularizeDataMatrix(
+        const NICE::Matrix &X,
+        NICE::Matrix &XTXreg,
+        const int regOption,
+        const double lambda )
+{
+    XTXreg = X.transpose()*X;
+    NICE::Matrix R;
+    const int dim = X.cols();
+
+    switch (regOption)
+    {
+        // identity matrix
+        case 0:
+            R.resize(dim,dim);
+            R.setIdentity();
+            R *= lambda;
+            XTXreg += R;
+            break;
+
+        // differences operator, k=1
+        case 1:
+            R.resize(dim-1,dim);
+            R.set( 0.0 );
+            for ( int r = 0; r < dim-1; r++ )
+            {
+                R(r,r)   =  1.0;
+                R(r,r+1) = -1.0;
+            }
+            R = R.transpose()*R;
+            R *= lambda;
+            XTXreg += R;
+            break;
+
+        // difference operator, k=2
+        case 2:
+            R.resize(dim-2,dim);
+            R.set( 0.0 );
+            for ( int r = 0; r < dim-2; r++ )
+            {
+                R(r,r)   =  1.0;
+                R(r,r+1) = -2.0;
+                R(r,r+2) =  1.0;
+            }
+            R = R.transpose()*R;
+            R *= lambda;
+            XTXreg += R;
+            break;
+
+        // as in [Chen et al., 2012]
+        case 3:
+        {
+            NICE::Vector q ( dim, (1.0-lambda) );
+            q[0] = 1;
+            NICE::Matrix Q;
+            Q.tensorProduct(q,q);
+            R.multiply(XTXreg,Q);
+            for ( int r = 0; r < dim; r++ )
+                R(r,r) = q[r] * XTXreg(r,r);
+            XTXreg = R;
+            break;
+        }
+
+        // no regularization
+        default:
+            std::cerr << "DTBOblique::regularizeDataMatrix: No regularization applied!"
+                      << std::endl;
+            break;
+    }
+}
+
 /** recursive building method */
 DecisionNode *DTBOblique::buildRecursive(
         const FeaturePool & fp,
@@ -165,19 +233,6 @@ DecisionNode *DTBOblique::buildRecursive(
         return node;
     }
 
-    // refresh/set X and y
-    NICE::Matrix X, G;
-    NICE::Vector y, beta;
-    getDataAndLabel( fp, examples, examples_selection, X, y );
-
-    // least squares solution
-    NICE::Matrix XTX = X.transpose()*X;
-    XTX.addDiagonal ( NICE::Vector( XTX.rows(), lambdaCurrent) );
-    choleskyDecomp(XTX, G);
-    choleskyInvert(G, XTX);
-    NICE::Matrix temp = XTX * X.transpose();
-    beta.multiply(temp,y,false);
-
     // variables
     double best_threshold = 0.0;
     double best_ig = -1.0;
@@ -189,64 +244,107 @@ DecisionNode *DTBOblique::buildRecursive(
     double best_entropy_left = 0.0;
     double best_entropy_right = 0.0;
 
-    // Setting Convolutional Feature
     ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
-    f->setParameterVector( beta );
+    NICE::Vector best_beta = f->getParameterVector();
 
-    // Feature Values
-    values.clear();
-    f->calcFeatureValues( examples, examples_selection, values);
+    // Creating data matrix X and label vector y
+    NICE::Matrix X, XTXr, G;
+    NICE::Vector y, beta;
+    getDataAndLabel( fp, examples, examples_selection, X, y );
 
-    double minValue = (min_element ( values.begin(), values.end() ))->first;
-    double maxValue = (max_element ( values.begin(), values.end() ))->first;
+    // Preparing system of linear equations
+    //NICE::Matrix XTX = X.transpose()*X;
+    regularizeDataMatrix( X, XTXr, regularizationType, lambdaCurrent );
+//    R *= lambdaCurrent;
 
-    if ( maxValue - minValue < 1e-7 )
-        std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl;
+    //choleskyDecomp(XTXr, G);
+    //choleskyInvert(G, XTXr);
+    G = NICE::invert(XTXr);
+    NICE::Matrix temp = G * X.transpose();
 
-    // get best thresholds by complete search
-    for ( int i = 0; i < random_split_tests; i++ )
+    for ( int curClass = 0; curClass <= maxClassNo; curClass++ )
     {
-        double threshold = (i * (maxValue - minValue ) / (double)random_split_tests)
-                            + minValue;
-        // preparations
-        double el, er;
-        for ( int k = 0 ; k <= maxClassNo ; k++ )
+        // One-vs-all: Transforming into {-1,+1} problem
+        NICE::Vector yCur ( y.size(), -1.0 );
+        int idx = 0;
+        bool hasExamples = false;
+        for ( vector<int>::const_iterator si = examples_selection.begin();
+              si != examples_selection.end();
+              si++, idx++ )
         {
-            distribution_left[k] = 0.0;
-            distribution_right[k] = 0.0;
+            const pair<int, Example> & p = examples[*si];
+            if (p.first == curClass)
+            {
+                yCur.set( idx, 1.0 );
+                hasExamples = true;
+            }
         }
 
-        /** Test the current split */
-        // Does another split make sense?
-        double count_left;
-        double count_right;
-        if ( ! entropyLeftRight ( values, threshold,
-                                  distribution_left, distribution_right,
-                                  el, er, count_left, count_right, maxClassNo ) )
-            continue;
+        // is there a positive example for current class in current set?
+        if (!hasExamples) continue;
 
-        // information gain and entropy
-        double pl = (count_left) / (count_left + count_right);
-        double ig = e - pl*el - (1-pl)*er;
+        // Solve system of linear equations in a least squares manner
+        beta.multiply(temp,yCur,false);
 
-        if ( use_shannon_entropy )
-        {
-            double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
-            ig = 2*ig / ( e + esplit );
-        }
+        // Updating parameter vector in convolutional feature
+        f->setParameterVector( beta );
 
-        if ( ig > best_ig )
-        {
-            best_ig = ig;
-            best_threshold = threshold;
+        // Feature Values
+        values.clear();
+        f->calcFeatureValues( examples, examples_selection, values);
+
+        double minValue = (min_element ( values.begin(), values.end() ))->first;
+        double maxValue = (max_element ( values.begin(), values.end() ))->first;
+
+        if ( maxValue - minValue < 1e-7 )
+            std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl;
 
+        // get best thresholds by complete search
+        for ( int i = 0; i < split_steps; i++ )
+        {
+            double threshold = (i * (maxValue - minValue ) / (double)split_steps)
+                                + minValue;
+            // preparations
+            double el, er;
             for ( int k = 0 ; k <= maxClassNo ; k++ )
             {
-                best_distribution_left[k] = distribution_left[k];
-                best_distribution_right[k] = distribution_right[k];
+                distribution_left[k] = 0.0;
+                distribution_right[k] = 0.0;
+            }
+
+            /** Test the current split */
+            // Does another split make sense?
+            double count_left;
+            double count_right;
+            if ( ! entropyLeftRight ( values, threshold,
+                                      distribution_left, distribution_right,
+                                      el, er, count_left, count_right, maxClassNo ) )
+                continue;
+
+            // information gain and entropy
+            double pl = (count_left) / (count_left + count_right);
+            double ig = e - pl*el - (1-pl)*er;
+
+            if ( use_shannon_entropy )
+            {
+                double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
+                ig = 2*ig / ( e + esplit );
+            }
+
+            if ( ig > best_ig )
+            {
+                best_ig = ig;
+                best_threshold = threshold;
+                best_beta = beta;
+
+                for ( int k = 0 ; k <= maxClassNo ; k++ )
+                {
+                    best_distribution_left[k] = distribution_left[k];
+                    best_distribution_right[k] = distribution_right[k];
+                }
+                best_entropy_left = el;
+                best_entropy_right = er;
             }
-            best_entropy_left = el;
-            best_entropy_right = er;
         }
     }
 
@@ -267,10 +365,13 @@ DecisionNode *DTBOblique::buildRecursive(
     }
 
     /** Save the best split to current node */
+    f->setParameterVector( best_beta );
+    values.clear();
+    f->calcFeatureValues( examples, examples_selection, values);
     node->f = f->clone();
     node->threshold = best_threshold;
 
-    /** Split examples according to split function */
+    /** Split examples according to best split function */
     vector<int> examples_left;
     vector<int> examples_right;
 
@@ -305,11 +406,8 @@ DecisionNode *DTBOblique::buildRecursive(
         if ( r != 0 )
             distribution_right_sparse[k] = r;
 #ifdef DEBUGTREE
-        if ( (l>0)||(r>0) )
-        {
-            std::cerr << "DTBOblique: split of class " << k << " ("
-                      << l << " <-> " << r << ") " << std::endl;
-        }
+        std::cerr << "DTBOblique: split of class " << k << " ("
+                  << l << " <-> " << r << ") " << std::endl;
 #endif
     }
 
@@ -323,7 +421,7 @@ DecisionNode *DTBOblique::buildRecursive(
             pow(((double)examples_selection.size()/(double)examples_right.size()),(2./f->getParameterLength()));
 
 #ifdef DEBUGTREE
-    std::cerr << "regularization parameter lambda: left " << lambdaLeft
+    std::cerr << "regularization parameter lambda left " << lambdaLeft
               << " right " << lambdaRight << std::endl;
 
 #endif

+ 18 - 2
classifier/fpclassifier/randomforest/DTBOblique.h

@@ -29,8 +29,8 @@ class DTBOblique : public DecisionTreeBuilder
     /////////////////////////
     /////////////////////////
 
-    /** Amount of randomly chosen thresholds */
-    int random_split_tests;
+    /** Amount of steps for complete search for best threshold */
+    int split_steps;
 
     /** Maximum allowed depth of a tree */
     int max_depth;
@@ -53,6 +53,9 @@ class DTBOblique : public DecisionTreeBuilder
     /** Regularization parameter */
     double lambdaInit;
 
+    /** Regularization type */
+    int regularizationType;
+
     /////////////////////////
     /////////////////////////
     //  PROTECTED METHODS  //
@@ -74,6 +77,19 @@ class DTBOblique : public DecisionTreeBuilder
             NICE::Matrix &matX,
             NICE::Vector &vecY );
 
+    /**
+     * @brief return a regularization matrix of size (dimParams)x(dimParams)
+     * @param X data matrix
+     * @param XTXreg return regularized X'*X
+     * @param regOption which kind of regularization
+     * @param lambda regularization parameter (weigthing)
+     */
+    void regularizeDataMatrix (
+            const NICE::Matrix & X,
+            NICE::Matrix &XTXreg,
+            const int regOption,
+            const double lambda );
+
     /**
      * @brief recursive building method
      * @param fp feature pool