Pārlūkot izejas kodu

added RTBLinear for RDF regression

Sven Sickert 11 gadi atpakaļ
vecāks
revīzija
086686f72f

+ 258 - 0
regression/randomforest/RTBLinear.cpp

@@ -0,0 +1,258 @@
+/**
+* @file RTBLinear.cpp
+* @brief random regression tree, which learns a LSE-model in every inner node during training
+* @author Frank Prüfer
+* @date 09/17/2013
+
+*/
+#include <iostream>
+
+#include "RTBLinear.h"
+#include "vislearning/regression/linregression/LinRegression.h"
+
+using namespace OBJREC;
+
+#undef DEBUGTREE
+#undef DETAILTREE
+
+using namespace std;
+
+using namespace NICE;
+
+RTBLinear::RTBLinear( const Config *conf, std::string section )
+{
+  random_split_tests = conf->gI(section, "random_split_tests", 10 );
+  random_features = conf->gI(section, "random_features", 500 );
+  max_depth = conf->gI(section, "max_depth", 10 );
+  min_examples = conf->gI(section, "min_examples", 50);
+  minimum_error_reduction = conf->gD("RandomForest", "minimum_error_reduction", 10e-3 );
+  save_indices = conf->gB(section, "save_indices", false);
+  
+  if ( conf->gB(section, "start_random_generator", false ) )
+    srand(time(NULL));
+}
+
+RTBLinear::~RTBLinear()
+{
+}
+
+void RTBLinear::computeLinearLSError( const VVector& x,
+          const Vector& y,
+          const int& numEx,
+          double& lsError)
+{
+  LinRegression *lreg = new LinRegression;
+  lreg->teach ( x, y);
+
+  NICE::Vector diff ( numEx );
+  for ( int i = 0; i < numEx; i++ ){
+    diff[i] = y[i] - lreg->predict ( x[i] );
+    diff[i] *= diff[i];
+  }
+
+  lsError = diff.Mean();
+  delete lreg;
+}
+
+bool RTBLinear::errorReductionLeftRight(const vector< pair< double, int > > values,
+          const Vector & y,
+          double threshold,
+          double& error_left,
+          double& error_right,
+          int& count_left,
+          int& count_right)
+{
+  count_left = 0;
+  count_right = 0;
+  vector<int> selection_left;
+  vector<int> selection_right;
+  
+  NICE::VVector xLeft;
+  NICE::VVector xRight;
+  
+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < threshold )
+    {
+      count_left++;
+      selection_left.push_back( it->second );
+      NICE::Vector tmp(1,value);
+      xLeft.push_back( tmp );
+    }
+    else
+    {
+      count_right++;
+      selection_right.push_back( it->second );
+      NICE::Vector tmp2(1,value);
+      xRight.push_back( tmp2 );
+    }
+  }
+
+  if ( (count_left == 0) || (count_right == 0) )
+    return false; // no split
+  
+  if ( (count_left < min_examples)  || (count_right < min_examples) )
+    return false; // no split
+  
+
+  NICE::Vector yLeft (count_left);
+  for ( int i = 0; i < count_left; i++ ){
+    yLeft[i] = y[selection_left[i]];
+  }
+  computeLinearLSError(xLeft, yLeft, count_left, error_left);
+
+  NICE::Vector yRight (count_right);
+  for ( int i = 0; i < count_right; i++ ){
+    yRight[i] = y[selection_right[i]];
+  }
+  computeLinearLSError(xRight, yRight, count_right, error_right);
+  
+  return true;
+}
+
+RegressionNode *RTBLinear::buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth)
+{
+#ifdef DEBUGTREE
+    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
+    (int)depth);
+#endif
+    
+  RegressionNode *node = new RegressionNode ();
+//  node->nodePrediction( y, selection );
+  double lsError;
+  computeLinearLSError( x, y, (int)x.size(), lsError);
+  
+  if ( depth > max_depth )
+  {
+#ifdef DEBUGTREE
+   fprintf (stderr, "RTBLinear: maxmimum depth reached !\n");
+#endif
+   node->trainExamplesIndices = selection;
+   return node;
+  }
+  
+  if ( (int)selection.size() < min_examples )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBLinear: minimum examples reached %d < %d !\n",
+      (int)selection.size(), min_examples );
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+
+  int best_feature = 0;
+  double best_threshold = 0.0;
+  double best_reduct = -1.0;
+//  vector<pair<double, int> > best_values;
+  vector<pair<double, int> > values;
+  double lsError_left = 0.0;
+  double lsError_right = 0.0;
+  
+  for ( int k = 0; k < random_features; k++ )
+  {
+#ifdef DETAILTREE
+    fprintf (stderr, "calculating random feature %d\n", k );
+#endif
+    int f = rand() % x[0].size();
+    
+    values.clear();
+    collectFeatureValues ( x, selection, f, values );
+    
+    double minValue = (min_element ( values.begin(), values.end() ))->first;
+    double maxValue = (max_element ( values.begin(), values.end() ))->first;
+    
+#ifdef DETAILTREE
+    fprintf (stderr, "max %f min %f\n", maxValue, minValue );
+#endif
+    if ( maxValue - minValue < 1e-7 ) continue;
+    
+    for ( int i = 0; i < random_split_tests; i++ )
+    {
+      double threshold;
+      threshold = rand() * (maxValue -minValue ) / RAND_MAX + minValue;
+      
+#ifdef DETAILTREE
+      fprintf (stderr, "calculating split f/s(f) %d/%d %f\n", k, i, threshold );
+#endif
+      lsError_left = 0.0;
+      lsError_right = 0.0;
+      
+      int count_left, count_right;
+      if ( ! errorReductionLeftRight( values, y, threshold, lsError_left,
+          lsError_right, count_left, count_right) )
+        continue;
+      
+      //double pl = (count_left) / (count_left +count_right);
+      //double errorReduction = lsError - pl*lsError_left - (1-pl)*lsError_right;
+      double errorReduction = lsError - lsError_left - lsError_right;
+      
+      if ( errorReduction > best_reduct )
+      {
+        best_reduct = errorReduction;
+        best_threshold =  threshold;
+        best_feature = f;
+#ifdef DETAILTREE
+        fprintf (stderr, "t %f for feature %i\n", best_threshold, best_feature );
+#endif
+      }
+    }
+  }
+  
+  if ( best_reduct < minimum_error_reduction )
+  {
+#ifdef DEBUGTREE
+    fprintf (stderr, "RTBLinear: error reduction to small !\n");
+#endif
+    node->trainExamplesIndices = selection;
+    return node;
+  }
+  
+  node->f = best_feature;
+  node->threshold = best_threshold;
+  
+  // re calculating examples_left and examples_right
+  vector<int> best_examples_left;
+  vector<int> best_examples_right;
+  values.clear();
+  collectFeatureValues( x, selection, best_feature, values);
+  
+  best_examples_left.reserve ( values.size() / 2 );
+  best_examples_right.reserve ( values.size() / 2 );
+  
+  for ( vector< pair < double, int > >::const_iterator it = values.begin();
+        it != values.end(); it++ )
+  {
+    double value = it->first;
+    if ( value < best_threshold )
+      best_examples_left.push_back( it->second );
+    else
+      best_examples_right.push_back( it->second );
+  }
+  
+  node->left = buildRecursive( x, y, best_examples_left, depth+1 );
+  node->right = buildRecursive( x, y, best_examples_right, depth+1 );
+  
+  return node;
+}
+
+RegressionNode *RTBLinear::build( const NICE::VVector & x,
+          const NICE::Vector & y )
+{
+  int index = 0;
+  
+  vector<int> all;
+  all.reserve ( y.size() );
+  for ( uint i = 0; i < y.size(); i++ )
+  {
+    all.push_back( index );
+    index++;
+  }
+  
+  return buildRecursive( x, y, all, 0);
+} 

+ 77 - 0
regression/randomforest/RTBLinear.h

@@ -0,0 +1,77 @@
+/**
+* @file RTBLinear.h
+* @brief random regression tree, which learns a LSE-model in every inner node during training
+* @author Frank Prüfer
+* @date 09/17/2013
+
+*/
+#ifndef RTBLINEARINCLUDE
+#define RTBLINEARINCLUDE
+
+#include <vector>
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "core/basics/Config.h"
+#include "RegressionTreeBuilder.h"
+
+
+namespace OBJREC {
+
+/** random regression tree */
+class RTBLinear : public RegressionTreeBuilder
+{
+  
+  protected:
+    int random_split_tests;
+    int random_features;
+    int max_depth;
+    int min_examples;
+    double minimum_error_reduction;
+    
+    int random_split_mode;
+    
+    /** save indices in leaves */
+    bool save_indices;
+
+    enum {
+      RANDOM_SPLIT_INDEX = 0,
+      RANDOM_SPLIT_UNIFORM
+    };
+    
+    RegressionNode *buildRecursive ( const NICE::VVector & x,
+          const NICE::Vector & y,
+          std::vector<int> & selection,
+          int depth);
+
+    void computeLinearLSError ( const NICE::VVector & x,
+                                const NICE::Vector & y,
+                                const int & numEx,
+                                double & lsError);
+
+    bool errorReductionLeftRight ( const std::vector< std::pair< double, int > > values,
+          const NICE::Vector & y,
+          double threshold,
+          double & error_left,
+          double & error_right,
+          int & count_left,
+          int & count_right );
+
+  public:
+    
+    /** simple constructor */
+    RTBLinear( const NICE::Config *conf, std::string section = "RTBLinear" );
+    
+    /** simple destructor */
+    virtual ~RTBLinear();
+    
+    RegressionNode *build ( const NICE::VVector & x,
+          const NICE::Vector & y );
+    
+};
+  
+  
+} // namespace
+
+#endif 

+ 6 - 0
regression/randomforest/RegRandomForests.cpp

@@ -15,6 +15,7 @@
 
 #include "vislearning/regression/randomforest/RegRandomForests.h"
 #include "vislearning/regression/randomforest/RTBRandom.h"
+#include "vislearning/regression/randomforest/RTBLinear.h"
 #include "vislearning/regression/randomforest/RTBMinDist.h"
 #include "vislearning/regression/randomforest/RTBGrid.h"
 #include "vislearning/regression/randomforest/RTBClusterRandom.h"
@@ -61,6 +62,11 @@ RegRandomForests::RegRandomForests( const Config *_conf,
       std::string builder_section = conf->gS(section, "builder_section", "RTBMinDist");
       builder = new RTBMinDist ( conf, builder_section );
     }
+    else if ( builder_method == "linear" )
+    {
+      std::string builder_section = conf->gS(section, "builder_section", "RTBRandom");
+      builder = new RTBLinear ( conf, builder_section );
+    }
     else if ( builder_method == "grid" )
     {
       std::string builder_section = conf->gS(section, "builder_section", "RTBGrid");