/** * @file RTBGrid.cpp * @brief random regression tree * @author Sven Sickert * @date 07/15/2013 */ #include #include "RTBGrid.h" using namespace OBJREC; #undef DEBUGTREE #undef DETAILTREE using namespace std; using namespace NICE; RTBGrid::RTBGrid( const Config *conf, std::string section ) { max_depth = conf->gI(section, "max_depth", 20 ); min_examples = conf->gI(section, "min_examples", 10); save_indices = conf->gB(section, "save_indices", false); if ( conf->gB(section, "start_random_generator", false ) ) srand(time(NULL)); } RTBGrid::~RTBGrid() { } bool RTBGrid::balancingLeftRight(const vector< pair< double, int > > values, double threshold, int& count_left, int& count_right) { count_left = 0; count_right = 0; for ( vector< pair< double, int > >::const_iterator it = values.begin(); it != values.end(); it++ ) { double value = it->first; if ( value < threshold ) { count_left++; } else { count_right++; } } #ifdef DETAILTREE fprintf (stderr, "left vs. right: %d : %d\n", count_left, count_right ); #endif if ( (count_left == 0) || (count_right == 0) ) return false; // no split return true; } RegressionNode *RTBGrid::buildRecursive ( const NICE::VVector & x, const std::vector > & limits, std::vector & selection, int depth) { #ifdef DEBUGTREE fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(), (int)depth); #endif RegressionNode *node = new RegressionNode (); if ( depth > max_depth ) { #ifdef DEBUGTREE fprintf (stderr, "RTBGrid: maxmimum depth reached !\n"); #endif node->trainExamplesIndices = selection; return node; } if ( (int)selection.size() < min_examples ) { #ifdef DEBUGTREE fprintf (stderr, "RTBGrid: minimum examples reached %d < %d !\n", (int)selection.size(), min_examples ); #endif node->trainExamplesIndices = selection; return node; } vector > values; int f = depth % x[0].size(); values.clear(); collectFeatureValues ( x, selection, f, values ); #ifdef DETAILTREE double minValue = (min_element ( values.begin(), values.end() ))->first; double maxValue = (max_element ( values.begin(), values.end() ))->first; fprintf (stderr, "max %f min %f\n", maxValue, minValue ); #endif double threshold = 0.5 * (limits[f][0]+limits[f][1]); int tmp = depth; while( tmp > (int)x[0].size() ) { threshold *= 0.5; tmp -= x[0].size(); } int count_left, count_right; if ( ! balancingLeftRight( values, threshold, count_left, count_right) ) { fprintf ( stderr, "RTBGrid: no split possible (empty leaf)\n" ); node->trainExamplesIndices = selection; return node; } #ifdef DETAILTREE fprintf (stderr, "t %f for feature %d\n", threshold, f ); #endif node->f = f; node->threshold = threshold; // re calculating examples_left and examples_right vector best_examples_left; vector best_examples_right; best_examples_left.reserve ( values.size() / 2 ); best_examples_right.reserve ( values.size() / 2 ); for ( vector< pair < double, int > >::const_iterator it = values.begin(); it != values.end(); it++ ) { double value = it->first; if ( value < threshold ) best_examples_left.push_back( it->second ); else best_examples_right.push_back( it->second ); } node->left = buildRecursive( x, limits, best_examples_left, depth+1 ); node->right = buildRecursive( x, limits, best_examples_right, depth+1 ); return node; } RegressionNode *RTBGrid::build( const NICE::VVector & x, const NICE::Vector & y ) { int index = 0; vector all; all.reserve ( y.size() ); for ( uint i = 0; i < y.size(); i++ ) { all.push_back( index ); index++; } // get min/max values for all features int fcount = x[0].size(); vector< vector > limits; for ( int j = 0; j < fcount; j++ ) { double min = numeric_limits::max(); double max = numeric_limits::min(); for ( int i = 0; i < x.size(); i++ ) { double value = x[i][j]; if (value > max ) max = value; if (value < min ) min = value; } vector flimit; flimit.push_back(min); flimit.push_back(max); limits.push_back(flimit); } return buildRecursive( x, limits, all, 0); }