123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357 |
- /**
- * @file RegRandomForests.cpp
- * @brief implementation of random set forests for regression
- * @author Sven Sickert
- * @date 06/28/2013
- */
- #ifdef NICE_USELIB_OPENMP
- #include <omp.h>
- #endif
- #include <iostream>
- #include <assert.h>
- #include "vislearning/regression/randomforest/RegRandomForests.h"
- #include "vislearning/regression/randomforest/RTBRandom.h"
- #include "vislearning/regression/randomforest/RTBLinear.h"
- #include "vislearning/regression/randomforest/RTBMinDist.h"
- #include "vislearning/regression/randomforest/RTBGrid.h"
- #include "vislearning/regression/randomforest/RTBClusterRandom.h"
- #include "vislearning/regression/randomforest/RTBMeanPostImprovement.h"
- using namespace OBJREC;
- using namespace std;
- using namespace NICE;
- RegRandomForests::RegRandomForests()
- {
- builder = NULL;
- minimum_error_reduction = 0.0;
- enableOutOfBagEstimates = false;
- }
- RegRandomForests::RegRandomForests( const Config *_conf,
- std::string section ) : conf(_conf)
- {
- std::string builder_method = conf->gS(section, "builder", "random");
- minimum_error_reduction = conf->gD(section, "minimum_error_reduction", 10e-3);
- enableOutOfBagEstimates = conf->gB(section, "enable_out_of_bag_estimates", false);
-
- confsection = section;
-
- if ( builder_method == "none" ) {
- // do not initialize
- builder = NULL;
- }
- else {
- number_of_trees = conf->gI(section, "number_of_trees", 20 );
- features_per_tree = conf->gD(section, "features_per_tree", 1.0 );
- samples_per_tree = conf->gD(section, "samples_per_tree", 0.2 );
-
- if ( builder_method == "random" )
- {
- std::string builder_section = conf->gS(section, "builder_section", "RTBRandom");
- builder = new RTBRandom ( conf, builder_section );
- }
- else if ( builder_method == "min_dist" )
- {
- std::string builder_section = conf->gS(section, "builder_section", "RTBMinDist");
- builder = new RTBMinDist ( conf, builder_section );
- }
- else if ( builder_method == "linear" )
- {
- std::string builder_section = conf->gS(section, "builder_section", "RTBRandom");
- builder = new RTBLinear ( conf, builder_section );
- }
- else if ( builder_method == "grid" )
- {
- std::string builder_section = conf->gS(section, "builder_section", "RTBGrid");
- builder = new RTBGrid ( conf, builder_section );
- }
- else if ( builder_method == "cluster_random" )
- {
- std::string builder_section = conf->gS(section, "builder_section", "RTBClusterRandom");
- builder = new RTBClusterRandom ( conf, builder_section );
- }
- else if ( builder_method == "mean_post_improvement" )
- {
- std::string builder_section = conf->gS(section, "builder_section", "RTBMeanPostImprovement");
- builder = new RTBMeanPostImprovement ( conf, builder_section );
- } else {
- fprintf (stderr, "RegressionTreeBuilder %s not yet implemented !\n", builder_method.c_str() );
- exit(-1);
- }
- }
- }
- RegRandomForests::~RegRandomForests()
- {
- for ( vector<RegressionTree *>::iterator it = forest.begin();
- it != forest.end(); it++ )
- delete (*it);
-
- if ( builder != NULL )
- delete builder;
- }
- void RegRandomForests::calcOutOfBagEstimates (
- std::vector< std::vector<int> > & outofbagtrees,
- NICE::VVector x,
- NICE::Vector y )
- {
- oobResults.clear();
-
- // calculate out of bag regression results
- // as suggested bei Breiman
- // out of bag = training data not used to build
- // a single tree is used as testing data for the tree
- long index = 0;
- for ( int i = 0; i < (int)x.size(); i++, index++ )
- {
- double trueValue = y[i];
- const vector<int> & trees = outofbagtrees[index];
-
- if ( trees.size() <= 0 ) continue;
-
- double predValue = predict ( x[i], trees );
-
- double predError = abs( trueValue - predValue );
- oobResults.push_back ( pair<double, double> ( predError, trueValue ) );
- }
- }
- void RegRandomForests::getLeafNodes ( NICE::Vector x,
- std::vector<RegressionNode *> & leafNodes,
- int depth )
- {
- leafNodes.reserve ( forest.size() );
- for ( vector<RegressionTree *>::const_iterator it = forest.begin();
- it != forest.end(); it++ )
- {
- RegressionTree & rt = *(*it);
- RegressionNode *leaf = rt.getLeafNode ( x, depth );
- leafNodes.push_back ( leaf );
- }
- }
- void RegRandomForests::getAllLeafNodes ( vector<RegressionNode *> & leafNodes)
- {
- int z = 0;
- for ( vector<RegressionTree *>::const_iterator it = forest.begin();
- it != forest.end(); it++, z++ )
- {
- RegressionTree & rt = *(*it);
- vector<RegressionNode *> leaves = rt.getAllLeafNodes();
- for ( int j = 0; j < (int)leaves.size(); j++ )
- {
- for ( int k = 0; k < (int)leaves[j]->trainExamplesIndices.size(); k++ )
- {
- leaves[j]->trainExamplesIndices[k] = exselection[z][leaves[j]->trainExamplesIndices[k]];
- }
- leafNodes.push_back(leaves[j]);
- }
- }
- }
- void RegRandomForests::teach ( const NICE::VVector & x, const NICE::Vector & y )
- {
- cerr << "RegRandomForests::teach()" << endl;
- assert( builder != NULL );
-
- int featuresCount = (int) (x[0].size() * features_per_tree );
- fprintf(stderr, "RegRandomForests: number of features %d\n", (int)x[0].size() );
-
- vector< vector<int> > outofbagtrees;
- outofbagtrees.resize( x.size() );
-
- for ( int k = 0; k < number_of_trees; k++ )
- {
- vector<int> tmp;
- exselection.push_back(tmp);
- }
-
- #pragma omp parallel for
- for ( int k = 0; k < number_of_trees; k++ )
- {
- fprintf( stderr, "[ -- building tree %d/%d -- ]\n", k + 1, number_of_trees);
-
- vector<int> examples_index;
- for ( int i = 0; i < (int)x.size(); i++ )
- {
- examples_index.push_back( i );
- }
-
- int trainingExamples = (int)(examples_index.size() * samples_per_tree);
- fprintf (stderr, "RegRandomForests: selection of %d examples for each tree\n", trainingExamples );
-
- if ( (trainingExamples < 3) && ((int)examples_index.size() > trainingExamples) )
- {
- fprintf(stderr, "RegRandomForests: number of examples < 3 !! minExamples=%d, trainingExamples=%d\n",
- (int)x.size(), trainingExamples);
- trainingExamples = examples_index.size();
- fprintf(stderr, "RegRandomForests: I will use all %d examples. !!\n", trainingExamples);
- }
-
- if ( samples_per_tree < 1.0 )
- random_shuffle( examples_index.begin(), examples_index.end() );
-
- VVector subset;
- Vector subval ( trainingExamples );
- for ( int e = 0; e < trainingExamples; e++ )
- {
- exselection[k].push_back( examples_index[e] );
- subset.push_back( x[ examples_index[e] ] );
- subval.set( e, y[ examples_index[e] ] );
- }
-
- // set out of bag trees
- for ( uint e = trainingExamples; e < examples_index.size(); e++ )
- {
- int index = examples_index[e];
- #pragma omp critical
- outofbagtrees[index].push_back(k);
- }
-
- /******* select a random feature set *******/
- vector<int> features_subset;
- for ( int j = 0; j < (int)x[0].size(); j++ )
- features_subset.push_back( j );
-
- random_shuffle( features_subset.begin(), features_subset.end() );
- while ((int)features_subset.size() > featuresCount)
- features_subset.pop_back();
-
- /******* training of an individual tree ****/
- RegressionTree *tree = new RegressionTree( conf );
-
- builder->build( *tree, subset, subval );
-
- /******* prune tree using least squares criterion *****/
- //if ( minimum_error_reduction > 0.0 )
- // tree->pruneTreeLeastSquares( minimum_error_reduction );
-
- /******* add individual tree to ensemble *****/
- #pragma omp critical
- forest.push_back(tree);
- }
-
- if (enableOutOfBagEstimates)
- calcOutOfBagEstimates(outofbagtrees, x, y);
- }
- double RegRandomForests::predict ( const NICE::Vector & x,
- const vector< int > & outofbagtrees )
- {
- // predict using only a selection of all trees
- // contained in outofbagtrees
-
- double overall_prediction = 0.0;
- int treecount = 0;
-
- for ( vector<int>::const_iterator it = outofbagtrees.begin();
- it != outofbagtrees.end();
- it++ )
- {
- assert ( *it < (int)forest.size() );
- RegressionTree & rt = *(forest[(*it)]);
- double predVal;
- rt.traverse( x, predVal );
-
- overall_prediction += predVal;
- treecount++;
- }
-
- overall_prediction /= treecount;
-
- return overall_prediction;
- }
- double RegRandomForests::predict ( const NICE::Vector & x )
- {
- double overall_prediction = 0.0;
- int treecount = 0;
-
- for ( vector<RegressionTree *>::const_iterator it = forest.begin();
- it != forest.end();
- it++ )
- {
- RegressionTree & rt = *(*it);
- double predVal;
- rt.traverse( x, predVal );
-
- overall_prediction += predVal;
- treecount++;
- }
-
- overall_prediction /= treecount;
-
- return overall_prediction;
- }
- void RegRandomForests::restore(istream & is, int format)
- {
- std::string tag;
- int index;
- while ( (is >> tag) && (tag == "TREE") )
- {
- is >> index;
- RegressionTree *rt = new RegressionTree ( conf );
- rt->restore ( is );
- if ( minimum_error_reduction > 0.0 )
- rt->pruneTreeLeastSquares ( minimum_error_reduction );
- forest.push_back(rt);
- }
- }
- void RegRandomForests::store(ostream & os, int format) const
- {
- int index = 0;
- for ( vector<RegressionTree *>::const_iterator it = forest.begin();
- it != forest.end(); it++, index++ )
- {
- const RegressionTree & rt = *(*it);
- os << "TREE " << index << endl;
- rt.store ( os, format );
- os << "ENDTREE ";
- }
- }
- void RegRandomForests::clear()
- {
- for ( vector<RegressionTree *>::iterator it = forest.begin();
- it != forest.end(); it++ )
- delete (*it);
- forest.clear();
- }
- void RegRandomForests::indexDescendants(
- map<RegressionNode *, pair<long, int> > & index) const
- {
- long maxindex = 0;
- for ( vector<RegressionTree *>::const_iterator it = forest.begin();
- it != forest.end(); it++ )
- (*it)->indexDescendants ( index, maxindex );
- }
- void RegRandomForests::resetCounters()
- {
- for ( vector<RegressionTree *>::const_iterator it = forest.begin();
- it != forest.end(); it++ )
- (*it)->resetCounters ();
- }
- void RegRandomForests::setComplexity(int size)
- {
- fprintf (stderr, "RegRandomForests: set complexity to %d, overwriting current value %d\n",
- size, number_of_trees );
- number_of_trees = size;
- }
|