/** * @file DTBClusterRandom.cpp * @brief build a decision tree for clustering * @author Erik Rodner * @date 05/01/2010 */ #include #include "vislearning/classifier/fpclassifier/randomforest/DTBClusterRandom.h" using namespace OBJREC; using namespace std; using namespace NICE; DTBClusterRandom::DTBClusterRandom( const Config *conf, std::string section ) { max_depth = conf->gI(section, "max_depth", 10 ); min_examples = conf->gI(section, "min_examples", 50); } DTBClusterRandom::~DTBClusterRandom() { } DecisionNode *DTBClusterRandom::buildRecursive ( const FeaturePool & fp, const Examples & examples, vector & examples_selection, FullVector & distribution, int maxClassNo, int depth ) { #ifdef DEBUGTREE fprintf (stderr, "Examples: %d (depth %d)\n", (int)examples_selection.size(), (int)depth); #endif DecisionNode *node = new DecisionNode (); node->distribution = distribution; if ( depth > max_depth ) { #ifdef DEBUGTREE fprintf (stderr, "DTBClusterRandom: maxmimum depth reached !\n"); #endif return node; } if ( (int)examples_selection.size() < min_examples ) { #ifdef DEBUGTREE fprintf (stderr, "DTBClusterRandom: minimum examples reached %d < %d !\n", (int)examples_selection.size(), min_examples ); #endif return node; } Feature *f = fp.getRandomFeature (); FeatureValuesUnsorted values; values.clear(); f->calcFeatureValues ( examples, examples_selection, values ); sort ( values.begin(), values.end() ); double minValue = (values.begin())->first; double maxValue = (values.begin()+(values.size()-1))->first; double median = (values.begin()+values.size()/2)->first; #ifdef DETAILTREE fprintf (stderr, "max %f min %f median %f\n", maxValue, minValue, median ); #endif if ( maxValue - minValue < 1e-30 ) { #ifdef DEBUGTREE cerr << "DTBClusterRandom: max - min < 1e-30: exit" << endl; #endif return node; } node->f = f->clone(); node->threshold = median; // re calculating examples_left and examples_right vector best_examples_left; vector best_examples_right; FullVector best_distribution_left (maxClassNo+1); FullVector best_distribution_right (maxClassNo+1); best_distribution_left.set(0.0); best_distribution_right.set(0.0); best_examples_left.reserve ( values.size() / 2 ); best_examples_right.reserve ( values.size() / 2 ); for ( FeatureValuesUnsorted::const_iterator i = values.begin(); i != values.end(); i++ ) { double value = i->first; int classno = i->second; if ( value < median ) { best_examples_left.push_back ( i->third ); best_distribution_left[classno]++; } else { best_examples_right.push_back ( i->third ); best_distribution_right[classno]++; } } #ifdef DEBUGTREE node->f->store(cerr); cerr << endl; #endif node->left = buildRecursive ( fp, examples, best_examples_left, best_distribution_left, maxClassNo, depth+1 ); node->right = buildRecursive ( fp, examples, best_examples_right, best_distribution_right, maxClassNo, depth+1 ); return node; } DecisionNode *DTBClusterRandom::build ( const FeaturePool & fp, const Examples & examples, int maxClassNo ) { int index = 0; FullVector distribution ( maxClassNo+1 ); vector all; all.reserve ( examples.size() ); for ( Examples::const_iterator j = examples.begin(); j != examples.end(); j++ ) { int classno = j->first; distribution[classno] += j->second.weight; all.push_back ( index ); index++; } return buildRecursive ( fp, examples, all, distribution, maxClassNo, 0 ); }