11 年之前 · 086686f72f
--- a/regression/randomforest/RTBLinear.cpp
+++ b/regression/randomforest/RTBLinear.cpp
@@ -0,0 +1,258 @@
 
				+/**
			
 
				+* @file RTBLinear.cpp
			
 
				+* @brief random regression tree, which learns a LSE-model in every inner node during training
			
 
				+* @author Frank Prüfer
			
 
				+* @date 09/17/2013
			
 
				+
			
 
				+*/
			
 
				+#include <iostream>
			
 
				+
			
 
				+#include "RTBLinear.h"
			
 
				+#include "vislearning/regression/linregression/LinRegression.h"
			
 
				+
			
 
				+using namespace OBJREC;
			
 
				+
			
 
				+#undef DEBUGTREE
			
 
				+#undef DETAILTREE
			
 
				+
			
 
				+using namespace std;
			
 
				+
			
 
				+using namespace NICE;
			
 
				+
			
 
				+RTBLinear::RTBLinear( const Config *conf, std::string section )
			
 
				+{
			
 
				+  random_split_tests = conf->gI(section, "random_split_tests", 10 );
			
 
				+  random_features = conf->gI(section, "random_features", 500 );
			
 
				+  max_depth = conf->gI(section, "max_depth", 10 );
			
 
				+  min_examples = conf->gI(section, "min_examples", 50);
			
 
				+  minimum_error_reduction = conf->gD("RandomForest", "minimum_error_reduction", 10e-3 );
			
 
				+  save_indices = conf->gB(section, "save_indices", false);
			
 
				+  
			
 
				+  if ( conf->gB(section, "start_random_generator", false ) )
			
 
				+    srand(time(NULL));
			
 
				+}
			
 
				+
			
 
				+RTBLinear::~RTBLinear()
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+void RTBLinear::computeLinearLSError( const VVector& x,
			
 
				+          const Vector& y,
			
 
				+          const int& numEx,
			
 
				+          double& lsError)
			
 
				+{
			
 
				+  LinRegression *lreg = new LinRegression;
			
 
				+  lreg->teach ( x, y);
			
 
				+
			
 
				+  NICE::Vector diff ( numEx );
			
 
				+  for ( int i = 0; i < numEx; i++ ){
			
 
				+    diff[i] = y[i] - lreg->predict ( x[i] );
			
 
				+    diff[i] *= diff[i];
			
 
				+  }
			
 
				+
			
 
				+  lsError = diff.Mean();
			
 
				+  delete lreg;
			
 
				+}
			
 
				+
			
 
				+bool RTBLinear::errorReductionLeftRight(const vector< pair< double, int > > values,
			
 
				+          const Vector & y,
			
 
				+          double threshold,
			
 
				+          double& error_left,
			
 
				+          double& error_right,
			
 
				+          int& count_left,
			
 
				+          int& count_right)
			
 
				+{
			
 
				+  count_left = 0;
			
 
				+  count_right = 0;
			
 
				+  vector<int> selection_left;
			
 
				+  vector<int> selection_right;
			
 
				+  
			
 
				+  NICE::VVector xLeft;
			
 
				+  NICE::VVector xRight;
			
 
				+  
			
 
				+  for ( vector< pair< double, int > >::const_iterator it = values.begin();
			
 
				+        it != values.end(); it++ )
			
 
				+  {
			
 
				+    double value = it->first;
			
 
				+    if ( value < threshold )
			
 
				+    {
			
 
				+      count_left++;
			
 
				+      selection_left.push_back( it->second );
			
 
				+      NICE::Vector tmp(1,value);
			
 
				+      xLeft.push_back( tmp );
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+      count_right++;
			
 
				+      selection_right.push_back( it->second );
			
 
				+      NICE::Vector tmp2(1,value);
			
 
				+      xRight.push_back( tmp2 );
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  if ( (count_left == 0) || (count_right == 0) )
			
 
				+    return false; // no split
			
 
				+  
			
 
				+  if ( (count_left < min_examples)  || (count_right < min_examples) )
			
 
				+    return false; // no split
			
 
				+  
			
 
				+
			
 
				+  NICE::Vector yLeft (count_left);
			
 
				+  for ( int i = 0; i < count_left; i++ ){
			
 
				+    yLeft[i] = y[selection_left[i]];
			
 
				+  }
			
 
				+  computeLinearLSError(xLeft, yLeft, count_left, error_left);
			
 
				+
			
 
				+  NICE::Vector yRight (count_right);
			
 
				+  for ( int i = 0; i < count_right; i++ ){
			
 
				+    yRight[i] = y[selection_right[i]];
			
 
				+  }
			
 
				+  computeLinearLSError(xRight, yRight, count_right, error_right);
			
 
				+  
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+RegressionNode *RTBLinear::buildRecursive ( const NICE::VVector & x,
			
 
				+          const NICE::Vector & y,
			
 
				+          std::vector<int> & selection,
			
 
				+          int depth)
			
 
				+{
			
 
				+#ifdef DEBUGTREE
			
 
				+    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
			
 
				+    (int)depth);
			
 
				+#endif
			
 
				+    
			
 
				+  RegressionNode *node = new RegressionNode ();
			
 
				+//  node->nodePrediction( y, selection );
			
 
				+  double lsError;
			
 
				+  computeLinearLSError( x, y, (int)x.size(), lsError);
			
 
				+  
			
 
				+  if ( depth > max_depth )
			
 
				+  {
			
 
				+#ifdef DEBUGTREE
			
 
				+   fprintf (stderr, "RTBLinear: maxmimum depth reached !\n");
			
 
				+#endif
			
 
				+   node->trainExamplesIndices = selection;
			
 
				+   return node;
			
 
				+  }
			
 
				+  
			
 
				+  if ( (int)selection.size() < min_examples )
			
 
				+  {
			
 
				+#ifdef DEBUGTREE
			
 
				+    fprintf (stderr, "RTBLinear: minimum examples reached %d < %d !\n",
			
 
				+      (int)selection.size(), min_examples );
			
 
				+#endif
			
 
				+    node->trainExamplesIndices = selection;
			
 
				+    return node;
			
 
				+  }
			
 
				+
			
 
				+  int best_feature = 0;
			
 
				+  double best_threshold = 0.0;
			
 
				+  double best_reduct = -1.0;
			
 
				+//  vector<pair<double, int> > best_values;
			
 
				+  vector<pair<double, int> > values;
			
 
				+  double lsError_left = 0.0;
			
 
				+  double lsError_right = 0.0;
			
 
				+  
			
 
				+  for ( int k = 0; k < random_features; k++ )
			
 
				+  {
			
 
				+#ifdef DETAILTREE
			
 
				+    fprintf (stderr, "calculating random feature %d\n", k );
			
 
				+#endif
			
 
				+    int f = rand() % x[0].size();
			
 
				+    
			
 
				+    values.clear();
			
 
				+    collectFeatureValues ( x, selection, f, values );
			
 
				+    
			
 
				+    double minValue = (min_element ( values.begin(), values.end() ))->first;
			
 
				+    double maxValue = (max_element ( values.begin(), values.end() ))->first;
			
 
				+    
			
 
				+#ifdef DETAILTREE
			
 
				+    fprintf (stderr, "max %f min %f\n", maxValue, minValue );
			
 
				+#endif
			
 
				+    if ( maxValue - minValue < 1e-7 ) continue;
			
 
				+    
			
 
				+    for ( int i = 0; i < random_split_tests; i++ )
			
 
				+    {
			
 
				+      double threshold;
			
 
				+      threshold = rand() * (maxValue -minValue ) / RAND_MAX + minValue;
			
 
				+      
			
 
				+#ifdef DETAILTREE
			
 
				+      fprintf (stderr, "calculating split f/s(f) %d/%d %f\n", k, i, threshold );
			
 
				+#endif
			
 
				+      lsError_left = 0.0;
			
 
				+      lsError_right = 0.0;
			
 
				+      
			
 
				+      int count_left, count_right;
			
 
				+      if ( ! errorReductionLeftRight( values, y, threshold, lsError_left,
			
 
				+          lsError_right, count_left, count_right) )
			
 
				+        continue;
			
 
				+      
			
 
				+      //double pl = (count_left) / (count_left +count_right);
			
 
				+      //double errorReduction = lsError - pl*lsError_left - (1-pl)*lsError_right;
			
 
				+      double errorReduction = lsError - lsError_left - lsError_right;
			
 
				+      
			
 
				+      if ( errorReduction > best_reduct )
			
 
				+      {
			
 
				+        best_reduct = errorReduction;
			
 
				+        best_threshold =  threshold;
			
 
				+        best_feature = f;
			
 
				+#ifdef DETAILTREE
			
 
				+        fprintf (stderr, "t %f for feature %i\n", best_threshold, best_feature );
			
 
				+#endif
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  if ( best_reduct < minimum_error_reduction )
			
 
				+  {
			
 
				+#ifdef DEBUGTREE
			
 
				+    fprintf (stderr, "RTBLinear: error reduction to small !\n");
			
 
				+#endif
			
 
				+    node->trainExamplesIndices = selection;
			
 
				+    return node;
			
 
				+  }
			
 
				+  
			
 
				+  node->f = best_feature;
			
 
				+  node->threshold = best_threshold;
			
 
				+  
			
 
				+  // re calculating examples_left and examples_right
			
 
				+  vector<int> best_examples_left;
			
 
				+  vector<int> best_examples_right;
			
 
				+  values.clear();
			
 
				+  collectFeatureValues( x, selection, best_feature, values);
			
 
				+  
			
 
				+  best_examples_left.reserve ( values.size() / 2 );
			
 
				+  best_examples_right.reserve ( values.size() / 2 );
			
 
				+  
			
 
				+  for ( vector< pair < double, int > >::const_iterator it = values.begin();
			
 
				+        it != values.end(); it++ )
			
 
				+  {
			
 
				+    double value = it->first;
			
 
				+    if ( value < best_threshold )
			
 
				+      best_examples_left.push_back( it->second );
			
 
				+    else
			
 
				+      best_examples_right.push_back( it->second );
			
 
				+  }
			
 
				+  
			
 
				+  node->left = buildRecursive( x, y, best_examples_left, depth+1 );
			
 
				+  node->right = buildRecursive( x, y, best_examples_right, depth+1 );
			
 
				+  
			
 
				+  return node;
			
 
				+}
			
 
				+
			
 
				+RegressionNode *RTBLinear::build( const NICE::VVector & x,
			
 
				+          const NICE::Vector & y )
			
 
				+{
			
 
				+  int index = 0;
			
 
				+  
			
 
				+  vector<int> all;
			
 
				+  all.reserve ( y.size() );
			
 
				+  for ( uint i = 0; i < y.size(); i++ )
			
 
				+  {
			
 
				+    all.push_back( index );
			
 
				+    index++;
			
 
				+  }
			
 
				+  
			
 
				+  return buildRecursive( x, y, all, 0);
			
 
				+} 
			
--- a/regression/randomforest/RTBLinear.h
+++ b/regression/randomforest/RTBLinear.h
@@ -0,0 +1,77 @@
 
				+/**
			
 
				+* @file RTBLinear.h
			
 
				+* @brief random regression tree, which learns a LSE-model in every inner node during training
			
 
				+* @author Frank Prüfer
			
 
				+* @date 09/17/2013
			
 
				+
			
 
				+*/
			
 
				+#ifndef RTBLINEARINCLUDE
			
 
				+#define RTBLINEARINCLUDE
			
 
				+
			
 
				+#include <vector>
			
 
				+
			
 
				+#include "core/vector/VectorT.h"
			
 
				+#include "core/vector/VVector.h"
			
 
				+
			
 
				+#include "core/basics/Config.h"
			
 
				+#include "RegressionTreeBuilder.h"
			
 
				+
			
 
				+
			
 
				+namespace OBJREC {
			
 
				+
			
 
				+/** random regression tree */
			
 
				+class RTBLinear : public RegressionTreeBuilder
			
 
				+{
			
 
				+  
			
 
				+  protected:
			
 
				+    int random_split_tests;
			
 
				+    int random_features;
			
 
				+    int max_depth;
			
 
				+    int min_examples;
			
 
				+    double minimum_error_reduction;
			
 
				+    
			
 
				+    int random_split_mode;
			
 
				+    
			
 
				+    /** save indices in leaves */
			
 
				+    bool save_indices;
			
 
				+
			
 
				+    enum {
			
 
				+      RANDOM_SPLIT_INDEX = 0,
			
 
				+      RANDOM_SPLIT_UNIFORM
			
 
				+    };
			
 
				+    
			
 
				+    RegressionNode *buildRecursive ( const NICE::VVector & x,
			
 
				+          const NICE::Vector & y,
			
 
				+          std::vector<int> & selection,
			
 
				+          int depth);
			
 
				+
			
 
				+    void computeLinearLSError ( const NICE::VVector & x,
			
 
				+                                const NICE::Vector & y,
			
 
				+                                const int & numEx,
			
 
				+                                double & lsError);
			
 
				+
			
 
				+    bool errorReductionLeftRight ( const std::vector< std::pair< double, int > > values,
			
 
				+          const NICE::Vector & y,
			
 
				+          double threshold,
			
 
				+          double & error_left,
			
 
				+          double & error_right,
			
 
				+          int & count_left,
			
 
				+          int & count_right );
			
 
				+
			
 
				+  public:
			
 
				+    
			
 
				+    /** simple constructor */
			
 
				+    RTBLinear( const NICE::Config *conf, std::string section = "RTBLinear" );
			
 
				+    
			
 
				+    /** simple destructor */
			
 
				+    virtual ~RTBLinear();
			
 
				+    
			
 
				+    RegressionNode *build ( const NICE::VVector & x,
			
 
				+          const NICE::Vector & y );
			
 
				+    
			
 
				+};
			
 
				+  
			
 
				+  
			
 
				+} // namespace
			
 
				+
			
 
				+#endif 
			
--- a/regression/randomforest/RegRandomForests.cpp
+++ b/regression/randomforest/RegRandomForests.cpp
@@ -15,6 +15,7 @@
 
				 
			
 
				 #include "vislearning/regression/randomforest/RegRandomForests.h"
			
 
				 #include "vislearning/regression/randomforest/RTBRandom.h"
			
 
				+#include "vislearning/regression/randomforest/RTBLinear.h"
			
 
				 #include "vislearning/regression/randomforest/RTBMinDist.h"
			
 
				 #include "vislearning/regression/randomforest/RTBGrid.h"
			
 
				 #include "vislearning/regression/randomforest/RTBClusterRandom.h"
			
@@ -61,6 +62,11 @@ RegRandomForests::RegRandomForests( const Config *_conf,
 
				       std::string builder_section = conf->gS(section, "builder_section", "RTBMinDist");
			
 
				       builder = new RTBMinDist ( conf, builder_section );
			
 
				     }
			
 
				+    else if ( builder_method == "linear" )
			
 
				+    {
			
 
				+      std::string builder_section = conf->gS(section, "builder_section", "RTBRandom");
			
 
				+      builder = new RTBLinear ( conf, builder_section );
			
 
				+    }
			
 
				     else if ( builder_method == "grid" )
			
 
				     {
			
 
				       std::string builder_section = conf->gS(section, "builder_section", "RTBGrid");