VCPreRandomForest.cpp 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. /**
  2. * @file VCPreRandomForest.cpp
  3. * @brief Combination of a classifier with a pre-clustering using a random forest
  4. * @author Erik Rodner
  5. * @date 06/17/2010
  6. */
  7. #include "VCPreRandomForest.h"
  8. #include <iostream>
  9. #include <vislearning/cbaselib/VectorFeature.h>
  10. using namespace OBJREC;
  11. using namespace std;
  12. using namespace NICE;
  13. VCPreRandomForest::VCPreRandomForest( const Config *conf, const std::string & section, VecClassifier *_leafClassifierPrototype )
  14. : leafClassifierPrototype(_leafClassifierPrototype), fp(conf)
  15. {
  16. string cluster_section = conf->gS(section, "cluster_section", "RandomForest");
  17. mEx = conf->gI("DTBRandom", "min_examples", numeric_limits<int>::max());
  18. mEx = 500;
  19. randomforest = new FPCRandomForests ( conf, cluster_section );
  20. }
  21. VCPreRandomForest::~VCPreRandomForest()
  22. {
  23. // delete the random forest
  24. if ( randomforest != NULL )
  25. delete randomforest;
  26. // delete all classifiers in the leafs
  27. for ( map<DecisionNode *, VecClassifier *>::const_iterator i = leafClassifiers.begin();
  28. i != leafClassifiers.end(); i++ )
  29. {
  30. VecClassifier *lc = i->second;
  31. delete lc;
  32. }
  33. }
  34. ClassificationResult VCPreRandomForest::classify ( const NICE::Vector & x ) const
  35. {
  36. NICE::Vector *v = new NICE::Vector(x);
  37. Example example(v);
  38. vector<DecisionNode *> leafNodes;
  39. // traverse the forest and obtain all involved leaf nodes
  40. randomforest->getLeafNodes(example, leafNodes);
  41. ClassificationResult r ( ClassificationResult::REJECTION_NONE, maxClassNo );
  42. r.scores.set(0.0);
  43. for ( vector<DecisionNode *>::const_iterator i = leafNodes.begin();
  44. i != leafNodes.end(); i++ )
  45. {
  46. DecisionNode *node = *i;
  47. map<DecisionNode *, VecClassifier *>::const_iterator leafClassifierIt =
  48. leafClassifiers.find ( node );
  49. if ( leafClassifierIt == leafClassifiers.end() ) {
  50. // this leaf has no associated classifier
  51. // -> we will use the random forest "score" :)
  52. //
  53. double sum = node->distribution.sum();
  54. for(uint k = 0; k < (uint)std::min(node->distribution.size(), r.scores.size());k++)
  55. {
  56. r.scores[k] += node->distribution[k] / sum;
  57. }
  58. //fthrow(Exception, "Unable to find this leaf node !! (implementation bug)");
  59. continue;
  60. }
  61. VecClassifier *leafClassifier = leafClassifierIt->second;
  62. ClassificationResult rSingle = leafClassifier->classify ( x );
  63. rSingle.scores.normalize();
  64. for(uint k = 0; k < (uint)std::min(rSingle.scores.size(), r.scores.size());k++)
  65. {
  66. r.scores[k] += rSingle.scores[k];
  67. }
  68. }
  69. r.scores.multiply ( 1.0/(leafNodes.size()) );
  70. r.classno = r.scores.maxElement();
  71. if ( fabs(r.scores.sum() - 1.0) > 1e-2 )
  72. {
  73. //fthrow(Exception, "Ups !\n");
  74. r.scores[0] = 1.0;
  75. }
  76. example.clean();
  77. return r;
  78. }
  79. void VCPreRandomForest::teach ( const LabeledSetVector & teachSet )
  80. {
  81. Examples examples;
  82. maxClassNo = teachSet.getMaxClassno();
  83. LOOP_ALL(teachSet)
  84. {
  85. EACH(classno, x);
  86. NICE::Vector *v = new Vector(x);
  87. examples.push_back( pair<int, Example> (classno, Example(v)));
  88. }
  89. uint dimension = teachSet.dimension();
  90. fp.clear();
  91. Feature *f = new VectorFeature(dimension);
  92. f->explode(fp);
  93. // train the forest
  94. randomforest->setMaxClassNo( teachSet.getMaxClassno() );
  95. randomforest->train ( fp, examples );
  96. // free some useless memory, we do not need this
  97. // data structure any more
  98. examples.clean();
  99. vector<DecisionNode *> leafNodes;
  100. randomforest->getAllLeafNodes ( leafNodes );
  101. int lsize = leafNodes.size();
  102. cout << "leafnodes: " << lsize << endl;
  103. int leafNo = 0;
  104. #pragma omp parallel for
  105. for ( int l = 0; l < lsize; l++)
  106. {
  107. cerr << "Training classifier for leaf " << leafNo << endl;
  108. leafNo++;
  109. DecisionNode *node = leafNodes[l];
  110. if ( node->distribution.entropy() <= 0.0) continue;
  111. if ( ! node->isLeaf() ) continue;
  112. vector<int> examplesSet = node->trainExamplesIndices;
  113. assert(examplesSet.size() > 0);
  114. sort (examplesSet.begin(), examplesSet.end());
  115. LabeledSetVector trainSubSet;
  116. vector<double> counter(maxClassNo,0.0);
  117. uint exampleIndex = 0;
  118. uint c = 0;
  119. LOOP_ALL(teachSet)
  120. {
  121. EACH(classno, x);
  122. if ( examplesSet[c] == exampleIndex )
  123. {
  124. c++;
  125. trainSubSet.add ( classno, x );
  126. }
  127. exampleIndex++;
  128. }
  129. VecClassifier *lc = leafClassifierPrototype->clone();
  130. lc->teach ( trainSubSet );
  131. leafClassifiers.insert ( pair<DecisionNode *, VecClassifier *> ( node, lc ) );
  132. }
  133. }
  134. void VCPreRandomForest::clear()
  135. {
  136. map<DecisionNode *, VecClassifier *>::iterator iter;
  137. for( iter = leafClassifiers.begin(); iter != leafClassifiers.end(); ++iter )
  138. {
  139. iter->second->clear();
  140. }
  141. randomforest->clear();
  142. }