|
@@ -0,0 +1,258 @@
|
|
|
+/**
|
|
|
+* @file RTBLinear.cpp
|
|
|
+* @brief random regression tree, which learns a LSE-model in every inner node during training
|
|
|
+* @author Frank Prüfer
|
|
|
+* @date 09/17/2013
|
|
|
+
|
|
|
+*/
|
|
|
+#include <iostream>
|
|
|
+
|
|
|
+#include "RTBLinear.h"
|
|
|
+#include "vislearning/regression/linregression/LinRegression.h"
|
|
|
+
|
|
|
+using namespace OBJREC;
|
|
|
+
|
|
|
+#undef DEBUGTREE
|
|
|
+#undef DETAILTREE
|
|
|
+
|
|
|
+using namespace std;
|
|
|
+
|
|
|
+using namespace NICE;
|
|
|
+
|
|
|
+RTBLinear::RTBLinear( const Config *conf, std::string section )
|
|
|
+{
|
|
|
+ random_split_tests = conf->gI(section, "random_split_tests", 10 );
|
|
|
+ random_features = conf->gI(section, "random_features", 500 );
|
|
|
+ max_depth = conf->gI(section, "max_depth", 10 );
|
|
|
+ min_examples = conf->gI(section, "min_examples", 50);
|
|
|
+ minimum_error_reduction = conf->gD("RandomForest", "minimum_error_reduction", 10e-3 );
|
|
|
+ save_indices = conf->gB(section, "save_indices", false);
|
|
|
+
|
|
|
+ if ( conf->gB(section, "start_random_generator", false ) )
|
|
|
+ srand(time(NULL));
|
|
|
+}
|
|
|
+
|
|
|
+RTBLinear::~RTBLinear()
|
|
|
+{
|
|
|
+}
|
|
|
+
|
|
|
+void RTBLinear::computeLinearLSError( const VVector& x,
|
|
|
+ const Vector& y,
|
|
|
+ const int& numEx,
|
|
|
+ double& lsError)
|
|
|
+{
|
|
|
+ LinRegression *lreg = new LinRegression;
|
|
|
+ lreg->teach ( x, y);
|
|
|
+
|
|
|
+ NICE::Vector diff ( numEx );
|
|
|
+ for ( int i = 0; i < numEx; i++ ){
|
|
|
+ diff[i] = y[i] - lreg->predict ( x[i] );
|
|
|
+ diff[i] *= diff[i];
|
|
|
+ }
|
|
|
+
|
|
|
+ lsError = diff.Mean();
|
|
|
+ delete lreg;
|
|
|
+}
|
|
|
+
|
|
|
+bool RTBLinear::errorReductionLeftRight(const vector< pair< double, int > > values,
|
|
|
+ const Vector & y,
|
|
|
+ double threshold,
|
|
|
+ double& error_left,
|
|
|
+ double& error_right,
|
|
|
+ int& count_left,
|
|
|
+ int& count_right)
|
|
|
+{
|
|
|
+ count_left = 0;
|
|
|
+ count_right = 0;
|
|
|
+ vector<int> selection_left;
|
|
|
+ vector<int> selection_right;
|
|
|
+
|
|
|
+ NICE::VVector xLeft;
|
|
|
+ NICE::VVector xRight;
|
|
|
+
|
|
|
+ for ( vector< pair< double, int > >::const_iterator it = values.begin();
|
|
|
+ it != values.end(); it++ )
|
|
|
+ {
|
|
|
+ double value = it->first;
|
|
|
+ if ( value < threshold )
|
|
|
+ {
|
|
|
+ count_left++;
|
|
|
+ selection_left.push_back( it->second );
|
|
|
+ NICE::Vector tmp(1,value);
|
|
|
+ xLeft.push_back( tmp );
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ count_right++;
|
|
|
+ selection_right.push_back( it->second );
|
|
|
+ NICE::Vector tmp2(1,value);
|
|
|
+ xRight.push_back( tmp2 );
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( (count_left == 0) || (count_right == 0) )
|
|
|
+ return false; // no split
|
|
|
+
|
|
|
+ if ( (count_left < min_examples) || (count_right < min_examples) )
|
|
|
+ return false; // no split
|
|
|
+
|
|
|
+
|
|
|
+ NICE::Vector yLeft (count_left);
|
|
|
+ for ( int i = 0; i < count_left; i++ ){
|
|
|
+ yLeft[i] = y[selection_left[i]];
|
|
|
+ }
|
|
|
+ computeLinearLSError(xLeft, yLeft, count_left, error_left);
|
|
|
+
|
|
|
+ NICE::Vector yRight (count_right);
|
|
|
+ for ( int i = 0; i < count_right; i++ ){
|
|
|
+ yRight[i] = y[selection_right[i]];
|
|
|
+ }
|
|
|
+ computeLinearLSError(xRight, yRight, count_right, error_right);
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+RegressionNode *RTBLinear::buildRecursive ( const NICE::VVector & x,
|
|
|
+ const NICE::Vector & y,
|
|
|
+ std::vector<int> & selection,
|
|
|
+ int depth)
|
|
|
+{
|
|
|
+#ifdef DEBUGTREE
|
|
|
+ fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
|
|
|
+ (int)depth);
|
|
|
+#endif
|
|
|
+
|
|
|
+ RegressionNode *node = new RegressionNode ();
|
|
|
+// node->nodePrediction( y, selection );
|
|
|
+ double lsError;
|
|
|
+ computeLinearLSError( x, y, (int)x.size(), lsError);
|
|
|
+
|
|
|
+ if ( depth > max_depth )
|
|
|
+ {
|
|
|
+#ifdef DEBUGTREE
|
|
|
+ fprintf (stderr, "RTBLinear: maxmimum depth reached !\n");
|
|
|
+#endif
|
|
|
+ node->trainExamplesIndices = selection;
|
|
|
+ return node;
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( (int)selection.size() < min_examples )
|
|
|
+ {
|
|
|
+#ifdef DEBUGTREE
|
|
|
+ fprintf (stderr, "RTBLinear: minimum examples reached %d < %d !\n",
|
|
|
+ (int)selection.size(), min_examples );
|
|
|
+#endif
|
|
|
+ node->trainExamplesIndices = selection;
|
|
|
+ return node;
|
|
|
+ }
|
|
|
+
|
|
|
+ int best_feature = 0;
|
|
|
+ double best_threshold = 0.0;
|
|
|
+ double best_reduct = -1.0;
|
|
|
+// vector<pair<double, int> > best_values;
|
|
|
+ vector<pair<double, int> > values;
|
|
|
+ double lsError_left = 0.0;
|
|
|
+ double lsError_right = 0.0;
|
|
|
+
|
|
|
+ for ( int k = 0; k < random_features; k++ )
|
|
|
+ {
|
|
|
+#ifdef DETAILTREE
|
|
|
+ fprintf (stderr, "calculating random feature %d\n", k );
|
|
|
+#endif
|
|
|
+ int f = rand() % x[0].size();
|
|
|
+
|
|
|
+ values.clear();
|
|
|
+ collectFeatureValues ( x, selection, f, values );
|
|
|
+
|
|
|
+ double minValue = (min_element ( values.begin(), values.end() ))->first;
|
|
|
+ double maxValue = (max_element ( values.begin(), values.end() ))->first;
|
|
|
+
|
|
|
+#ifdef DETAILTREE
|
|
|
+ fprintf (stderr, "max %f min %f\n", maxValue, minValue );
|
|
|
+#endif
|
|
|
+ if ( maxValue - minValue < 1e-7 ) continue;
|
|
|
+
|
|
|
+ for ( int i = 0; i < random_split_tests; i++ )
|
|
|
+ {
|
|
|
+ double threshold;
|
|
|
+ threshold = rand() * (maxValue -minValue ) / RAND_MAX + minValue;
|
|
|
+
|
|
|
+#ifdef DETAILTREE
|
|
|
+ fprintf (stderr, "calculating split f/s(f) %d/%d %f\n", k, i, threshold );
|
|
|
+#endif
|
|
|
+ lsError_left = 0.0;
|
|
|
+ lsError_right = 0.0;
|
|
|
+
|
|
|
+ int count_left, count_right;
|
|
|
+ if ( ! errorReductionLeftRight( values, y, threshold, lsError_left,
|
|
|
+ lsError_right, count_left, count_right) )
|
|
|
+ continue;
|
|
|
+
|
|
|
+ //double pl = (count_left) / (count_left +count_right);
|
|
|
+ //double errorReduction = lsError - pl*lsError_left - (1-pl)*lsError_right;
|
|
|
+ double errorReduction = lsError - lsError_left - lsError_right;
|
|
|
+
|
|
|
+ if ( errorReduction > best_reduct )
|
|
|
+ {
|
|
|
+ best_reduct = errorReduction;
|
|
|
+ best_threshold = threshold;
|
|
|
+ best_feature = f;
|
|
|
+#ifdef DETAILTREE
|
|
|
+ fprintf (stderr, "t %f for feature %i\n", best_threshold, best_feature );
|
|
|
+#endif
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( best_reduct < minimum_error_reduction )
|
|
|
+ {
|
|
|
+#ifdef DEBUGTREE
|
|
|
+ fprintf (stderr, "RTBLinear: error reduction to small !\n");
|
|
|
+#endif
|
|
|
+ node->trainExamplesIndices = selection;
|
|
|
+ return node;
|
|
|
+ }
|
|
|
+
|
|
|
+ node->f = best_feature;
|
|
|
+ node->threshold = best_threshold;
|
|
|
+
|
|
|
+ // re calculating examples_left and examples_right
|
|
|
+ vector<int> best_examples_left;
|
|
|
+ vector<int> best_examples_right;
|
|
|
+ values.clear();
|
|
|
+ collectFeatureValues( x, selection, best_feature, values);
|
|
|
+
|
|
|
+ best_examples_left.reserve ( values.size() / 2 );
|
|
|
+ best_examples_right.reserve ( values.size() / 2 );
|
|
|
+
|
|
|
+ for ( vector< pair < double, int > >::const_iterator it = values.begin();
|
|
|
+ it != values.end(); it++ )
|
|
|
+ {
|
|
|
+ double value = it->first;
|
|
|
+ if ( value < best_threshold )
|
|
|
+ best_examples_left.push_back( it->second );
|
|
|
+ else
|
|
|
+ best_examples_right.push_back( it->second );
|
|
|
+ }
|
|
|
+
|
|
|
+ node->left = buildRecursive( x, y, best_examples_left, depth+1 );
|
|
|
+ node->right = buildRecursive( x, y, best_examples_right, depth+1 );
|
|
|
+
|
|
|
+ return node;
|
|
|
+}
|
|
|
+
|
|
|
+RegressionNode *RTBLinear::build( const NICE::VVector & x,
|
|
|
+ const NICE::Vector & y )
|
|
|
+{
|
|
|
+ int index = 0;
|
|
|
+
|
|
|
+ vector<int> all;
|
|
|
+ all.reserve ( y.size() );
|
|
|
+ for ( uint i = 0; i < y.size(); i++ )
|
|
|
+ {
|
|
|
+ all.push_back( index );
|
|
|
+ index++;
|
|
|
+ }
|
|
|
+
|
|
|
+ return buildRecursive( x, y, all, 0);
|
|
|
+}
|