/** * @file RTBRandom.cpp * @brief random regression tree * @author Sven Sickert * @date 06/19/2013 */ #include #include "RTBRandom.h" using namespace OBJREC; #undef DEBUGTREE #undef DETAILTREE using namespace std; using namespace NICE; RTBRandom::RTBRandom( const Config *conf, std::string section ) { random_split_tests = conf->gI(section, "random_split_tests", 10 ); random_features = conf->gI(section, "random_features", 500 ); max_depth = conf->gI(section, "max_depth", 10 ); min_examples = conf->gI(section, "min_examples", 50); minimum_error_reduction = conf->gD("RandomForest", "minimum_error_reduction", 10e-3 ); save_indices = conf->gB(section, "save_indices", false); if ( conf->gB(section, "start_random_generator", false ) ) srand(time(NULL)); } RTBRandom::~RTBRandom() { } bool RTBRandom::errorReductionLeftRight(const vector< pair< double, int > > values, const Vector & y, double threshold, double& error_left, double& error_right, int& count_left, int& count_right) { count_left = 0; count_right = 0; vector selection_left; vector selection_right; for ( vector< pair< double, int > >::const_iterator it = values.begin(); it != values.end(); it++ ) { double value = it->first; if ( value < threshold ) { count_left++; selection_left.push_back( it->second ); } else { count_right++; selection_right.push_back( it->second ); } } // if ( (count_left == 0) || (count_right == 0) ) // return false; // no split if ( (count_left < min_examples) || (count_right < min_examples) ) return false; // no split RegressionNode *left = new RegressionNode (); left->nodePrediction( y, selection_left ); error_left = left->lsError; delete left; RegressionNode *right = new RegressionNode (); right->nodePrediction( y, selection_right ); error_right = right->lsError; delete right; return true; } RegressionNode *RTBRandom::buildRecursive ( const NICE::VVector & x, const NICE::Vector & y, std::vector & selection, int depth) { #ifdef DEBUGTREE fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(), (int)depth); #endif RegressionNode *node = new RegressionNode (); node->nodePrediction( y, selection ); double lsError = node->lsError; if ( depth > max_depth ) { #ifdef DEBUGTREE fprintf (stderr, "RTBRandom: maxmimum depth reached !\n"); #endif node->trainExamplesIndices = selection; return node; } if ( (int)selection.size() < min_examples ) { #ifdef DEBUGTREE fprintf (stderr, "RTBRandom: minimum examples reached %d < %d !\n", (int)selection.size(), min_examples ); #endif node->trainExamplesIndices = selection; return node; } int best_feature = 0; double best_threshold = 0.0; double best_reduct = -1.0; vector > best_values; vector > values; double lsError_left = 0.0; double lsError_right = 0.0; for ( int k = 0; k < random_features; k++ ) { #ifdef DETAILTREE fprintf (stderr, "calculating random feature %d\n", k ); #endif int f = rand() % x[0].size(); values.clear(); collectFeatureValues ( x, selection, f, values ); double minValue = (min_element ( values.begin(), values.end() ))->first; double maxValue = (max_element ( values.begin(), values.end() ))->first; #ifdef DETAILTREE fprintf (stderr, "max %f min %f\n", maxValue, minValue ); #endif if ( maxValue - minValue < 1e-7 ) continue; for ( int i = 0; i < random_split_tests; i++ ) { double threshold; threshold = rand() * (maxValue -minValue ) / RAND_MAX + minValue; #ifdef DETAILTREE fprintf (stderr, "calculating split f/s(f) %d/%d %f\n", k, i, threshold ); #endif lsError_left = 0.0; lsError_right = 0.0; int count_left, count_right; if ( ! errorReductionLeftRight( values, y, threshold, lsError_left, lsError_right, count_left, count_right) ) continue; //double pl = (count_left) / (count_left +count_right); //double errorReduction = lsError - pl*lsError_left - (1-pl)*lsError_right; double errorReduction = lsError - lsError_left - lsError_right; if ( errorReduction > best_reduct ) { best_reduct = errorReduction; best_threshold = threshold; best_feature = f; #ifdef DETAILTREE fprintf (stderr, "t %f for feature %i\n", best_threshold, best_feature ); #endif } } } if ( best_reduct < minimum_error_reduction ) { #ifdef DEBUGTREE fprintf (stderr, "RTBRandom: error reduction to small !\n"); #endif node->trainExamplesIndices = selection; return node; } node->f = best_feature; node->threshold = best_threshold; // re calculating examples_left and examples_right vector best_examples_left; vector best_examples_right; values.clear(); collectFeatureValues( x, selection, best_feature, values); best_examples_left.reserve ( values.size() / 2 ); best_examples_right.reserve ( values.size() / 2 ); for ( vector< pair < double, int > >::const_iterator it = values.begin(); it != values.end(); it++ ) { double value = it->first; if ( value < best_threshold ) best_examples_left.push_back( it->second ); else best_examples_right.push_back( it->second ); } node->left = buildRecursive( x, y, best_examples_left, depth+1 ); node->right = buildRecursive( x, y, best_examples_right, depth+1 ); return node; } RegressionNode *RTBRandom::build( const NICE::VVector & x, const NICE::Vector & y ) { int index = 0; vector all; all.reserve ( y.size() ); for ( uint i = 0; i < y.size(); i++ ) { all.push_back( index ); index++; } return buildRecursive( x, y, all, 0); }