/** * @file RegPreRandomForests.cpp * @brief Combination of a regression method with a pre-clustering using a random forest * @author Sven Sickert * @date 07/12/2013 */ #include "vislearning/regression/regcombination/RegPreRandomForests.h" #include #include using namespace OBJREC; using namespace std; using namespace NICE; RegPreRandomForests::RegPreRandomForests(const Config * conf, const string & section, RegressionAlgorithm *_leafRegressionPrototype ) : leafRegressionPrototype(_leafRegressionPrototype) { string cluster_section = conf->gS ( section, "cluster_section", "RandomForest" ); mEx = conf->gI ( "RTBRandom", "min_examples", 500 ); randomforest = new RegRandomForests( conf, cluster_section ); } RegPreRandomForests::~RegPreRandomForests() { // delete the random forest if ( randomforest != NULL ) delete randomforest; // delete all regression methods in the leafs for ( map::const_iterator it = leafRegressions.begin(); it != leafRegressions.end(); it++ ) { RegressionAlgorithm * lr = it->second; if ( lr != NULL ) delete lr; } // delete regression prototype if ( leafRegressionPrototype != NULL ) delete leafRegressionPrototype; } void RegPreRandomForests::teach ( const VVector & X, const Vector & y ) { randomforest->teach ( X, y ); if ( leafRegressionPrototype != NULL ) { vector leafNodes; randomforest->getAllLeafNodes ( leafNodes ); int lsize = leafNodes.size(); int leafNo = 0; cerr << "leafnodes: " << lsize << endl; #pragma omp parallel for for ( int l = 0; l < lsize; l++ ) { leafNo++; RegressionNode *node = leafNodes[l]; if ( !node->isLeaf() ){ fprintf( stderr, "RegPreRandomForests::predict: ID #%d not a leaf node!", leafNo ); continue; } vector leafTrainInds = node->trainExamplesIndices; cerr << "Teaching regression method for leaf " << leafNo-1 << "..." << endl; cerr << "examples in leave: " << leafTrainInds.size() << endl; assert ( leafTrainInds.size() > 0 ); sort ( leafTrainInds.begin(), leafTrainInds.end() ); NICE::VVector leafTrainData; vector tmpVals; for ( int i = 0; i < (int)leafTrainInds.size(); i++ ) { if ( leafTrainInds[i] >= 0 && leafTrainInds[i] < (int)y.size() ) { leafTrainData.push_back( X[ leafTrainInds[i] ] ); tmpVals.push_back( y[ leafTrainInds[i] ] ); } } if (leafTrainData.size() <= 0 ) continue; NICE::Vector leafTrainVals( tmpVals ); RegressionAlgorithm *lr = leafRegressionPrototype->clone(); lr->teach( leafTrainData, leafTrainVals ); leafRegressions.insert ( pair< RegressionNode *, RegressionAlgorithm *> ( node, lr ) ); } } } double RegPreRandomForests::predict ( const Vector & x ) { double pred = 0.0; vector leafNodes; // traverse the forest and obtain all innvolved leaf nodes randomforest->getLeafNodes ( x, leafNodes ); for ( vector::const_iterator it = leafNodes.begin(); it != leafNodes.end(); it++ ) { RegressionNode *node = *it; map::const_iterator leafRegressionIt = leafRegressions.find( node ); if ( leafRegressionIt == leafRegressions.end() ) { // this leaf has no associated regression method // -> we will use the random forest result pred += node->predVal; continue; } RegressionAlgorithm *leafRegression = leafRegressionIt->second; pred += leafRegression->predict( x ); } pred /= leafNodes.size(); return pred; } void RegPreRandomForests::clear () { map::iterator iter; for ( iter = leafRegressions.begin(); iter != leafRegressions.end(); iter++ ) { iter->second->clear(); } randomforest->clear(); } void RegPreRandomForests::store ( ostream & os, int format ) const { cerr << "RegPreRandomForest::store: not yet implemented" << endl; } void RegPreRandomForests::restore ( istream& is, int format ) { cerr << "RegPreRandomForest::restore: not yet implemented" << endl; }