CodebookRandomForest.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. /**
  2. * @file CodebookRandomForest.cpp
  3. * @brief feature CodebookRandomForest
  4. * @author Erik Rodner
  5. * @date 02/15/2008
  6. */
  7. #include <queue>
  8. #include <iostream>
  9. #include "CodebookRandomForest.h"
  10. using namespace OBJREC;
  11. using namespace std;
  12. using namespace NICE;
  13. CodebookRandomForest::CodebookRandomForest( int maxDepth, int restrictedCodebookSize )
  14. {
  15. this->clusterforest = NULL;
  16. this->maxDepth = maxDepth;
  17. this->restrictedCodebookSize = restrictedCodebookSize;
  18. }
  19. CodebookRandomForest::CodebookRandomForest( FPCRandomForests *clusterforest,
  20. int maxDepth, int restrictedCodebookSize )
  21. {
  22. this->clusterforest = clusterforest;
  23. this->maxDepth = maxDepth;
  24. this->restrictedCodebookSize = restrictedCodebookSize;
  25. buildLeafMap();
  26. }
  27. CodebookRandomForest::~CodebookRandomForest()
  28. {
  29. if ( clusterforest != NULL )
  30. delete clusterforest;
  31. }
  32. void CodebookRandomForest::setClusterForest( FPCRandomForests *clusterforest)
  33. {
  34. if(this->clusterforest != NULL)
  35. delete this->clusterforest;
  36. this->clusterforest = clusterforest;
  37. buildLeafMap();
  38. }
  39. void CodebookRandomForest::buildParentStructure ( DecisionNode *node,
  40. map<DecisionNode *, DecisionNode *> & parentStructure )
  41. {
  42. if ( node == NULL ) return;
  43. if ( node->left != NULL )
  44. {
  45. parentStructure.insert ( pair<DecisionNode *, DecisionNode *> ( node->left, node ) );
  46. buildParentStructure ( node->left, parentStructure );
  47. }
  48. if ( node->right != NULL )
  49. {
  50. parentStructure.insert ( pair<DecisionNode *, DecisionNode *> ( node->right, node ) );
  51. buildParentStructure ( node->right, parentStructure );
  52. }
  53. }
  54. void CodebookRandomForest::pruneForest ()
  55. {
  56. map<DecisionNode *, pair<long, int> > index;
  57. clusterforest->indexDescendants( index );
  58. map<DecisionNode *, DecisionNode *> parentStructure;
  59. vector<DecisionTree *> & trees = clusterforest->getForestNonConst();
  60. for ( vector<DecisionTree *>::const_iterator i = trees.begin();
  61. i != trees.end(); i++ )
  62. {
  63. DecisionTree *tree = *i;
  64. parentStructure.insert ( pair<DecisionNode *, DecisionNode *> ( tree->getRoot(), NULL ) );
  65. buildParentStructure ( tree->getRoot(), parentStructure );
  66. }
  67. priority_queue< triplet<double, long, DecisionNode *> > lastLevelInnerNodes;
  68. long leafs = 0;
  69. for ( map<DecisionNode *, pair<long, int> >::const_iterator k = index.begin();
  70. k != index.end(); k++ )
  71. {
  72. DecisionNode *node = k->first;
  73. if ( (!node->isLeaf()) && ((node->left->isLeaf())
  74. || (node->right->isLeaf())) )
  75. {
  76. double mi = node->distribution.entropy() - (
  77. node->left->distribution.sum() * node->left->distribution.entropy() +
  78. node->right->distribution.sum() * node->right->distribution.entropy() )
  79. / node->distribution.sum();
  80. lastLevelInnerNodes.push ( triplet<double, long, DecisionNode *> (
  81. - mi, k->second.first, node ) );
  82. }
  83. if ( node->isLeaf() ) leafs++;
  84. }
  85. set< DecisionNode * > deletedRoots;
  86. /*********************************************
  87. * EVIL Pruning method *
  88. *********************************************/
  89. set<DecisionNode *> deletedNodes;
  90. while ( (leafs > restrictedCodebookSize) && (lastLevelInnerNodes.size() > 0) )
  91. {
  92. const triplet<double, long, DecisionNode *> & nodemi = lastLevelInnerNodes.top();
  93. //double current_mi = -nodemi.first;
  94. //fprintf (stderr, "CodebookRandomForest: %d contract leaf with mutual information %f\n", leafs, current_mi );
  95. DecisionNode *node = nodemi.third;
  96. lastLevelInnerNodes.pop();
  97. assert ( node != NULL );
  98. DecisionNode *left = node->left;
  99. DecisionNode *right = node->right;
  100. //fprintf (stderr, "node: %ld, left: %ld, right: %ld\n", (long int)node, (long int)left,
  101. // (long int)right );
  102. if ( (deletedNodes.find(node) != deletedNodes.end() ) || node->isLeaf() ) {
  103. // this is a tricky case...consider the subsequent contraction of
  104. // two childs of a node
  105. // After the first child is contracted, the node is added to lastLevelInnerNodes
  106. // If the second child is contracted, the node is still in the queue but
  107. // is now a leaf node.
  108. // A second problem exists if the parent node is contracted after the second
  109. // child but before the node. Therefore we introduced deletedNodes.
  110. continue;
  111. }
  112. #ifdef DEBUGPRUNING
  113. fprintf (stderr, "CodebookRandomForest: nodes remaining %ld (min:%d); current mi %f\n",
  114. leafs, restrictedCodebookSize, current_mi );
  115. #endif
  116. assert ( parentStructure.find(node) != parentStructure.end() );
  117. DecisionNode *parent_node = parentStructure[node];
  118. //fprintf (stderr, "parent: %ld\n", (long int)parent_node );
  119. if ( parent_node == NULL )
  120. {
  121. #ifdef DEBUGPRUNING
  122. fprintf (stderr, "CodebookRandomForest: Deleting the root node !!!\n");
  123. #endif
  124. DecisionNode *newParent = NULL;
  125. if ( (left->isLeaf()) && (right->isLeaf()) )
  126. {
  127. //fprintf (stderr, "case (a)\n");
  128. delete ( node->f );
  129. node->f = NULL;
  130. delete left;
  131. delete right;
  132. deletedNodes.insert ( left );
  133. deletedNodes.insert ( right );
  134. node->left = NULL;
  135. node->right = NULL;
  136. newParent = node;
  137. leafs--;
  138. } else if ( left->isLeaf() ) {
  139. // case (b) left child is a leaf
  140. delete left;
  141. delete node;
  142. deletedNodes.insert ( node );
  143. deletedNodes.insert ( left );
  144. parentStructure[right] = parent_node;
  145. newParent = right;
  146. leafs--;
  147. } else if ( right->isLeaf() ) {
  148. // case (b) right child is a leaf
  149. delete right;
  150. delete node;
  151. deletedNodes.insert ( right );
  152. deletedNodes.insert ( left );
  153. parentStructure[left] = parent_node;
  154. newParent = left;
  155. leafs--;
  156. } else {
  157. fprintf (stderr, "UNKNOWN CASE !!\n");
  158. exit(-1);
  159. }
  160. for ( vector<DecisionTree *>::iterator i = trees.begin(); i != trees.end() ; i++ )
  161. if ( (*i)->getRoot() == node )
  162. (*i)->setRoot(newParent);
  163. continue;
  164. }
  165. long int parent_index = index[parent_node].first;
  166. double mi = 0.0;
  167. bool nodeIsLeft = ( parent_node->left == node );
  168. DecisionNode *sibling = nodeIsLeft ? parent_node->right : parent_node->left;
  169. if ( (left == NULL) || (right == NULL) )
  170. fthrow(Exception, "There is a bug in this code: CodebookRandomForest (good luck!) bugid=1");
  171. if ( (left->isLeaf()) && (right->isLeaf()) )
  172. {
  173. /* ------------ case (a) left and right childs are leafs
  174. (p) (p)
  175. (n) (s) -> (n) (s) and add p to the last level nodes
  176. (l) (r) */
  177. #ifdef DEBUGPRUNING
  178. fprintf (stderr, "case (a)\n");
  179. #endif
  180. delete ( node->f );
  181. node->f = NULL;
  182. delete left;
  183. deletedNodes.insert ( left );
  184. delete right;
  185. deletedNodes.insert ( right );
  186. node->left = NULL;
  187. node->right = NULL;
  188. leafs--;
  189. double ep = parent_node->distribution.entropy();
  190. double en = node->distribution.entropy();
  191. double es = sibling->distribution.entropy();
  192. double pn = node->distribution.sum();
  193. double ps = sibling->distribution.sum();
  194. mi = ep - ( pn * en + ps * es ) / (pn+ps);
  195. #ifdef DEBUGPRUNING
  196. fprintf (stderr, "ep %f en %f es %f pn %f ps %f\n",
  197. ep, en, es, pn, ps );
  198. parent_node->distribution.store(cerr);
  199. node->distribution.store(cerr);
  200. sibling->distribution.store(cerr);
  201. fprintf (stderr, "add new pre-leaf %ld: mi %lf top %lf\n", (long int)parent_node, mi,
  202. -lastLevelInnerNodes.top().first);
  203. #endif
  204. lastLevelInnerNodes.push ( triplet<double, long, DecisionNode *> (
  205. - mi, parent_index, parent_node ) );
  206. } else if ( left->isLeaf() ) {
  207. // --------------- case (b) left child is a leaf
  208. #ifdef DEBUGPRUNING
  209. fprintf (stderr, "case (b)\n");
  210. #endif
  211. if ( nodeIsLeft )
  212. parent_node->left = right;
  213. else
  214. parent_node->right = right;
  215. parentStructure[right] = parent_node;
  216. delete left;
  217. deletedNodes.insert ( left );
  218. delete node;
  219. deletedNodes.insert ( node );
  220. leafs--;
  221. } else if ( right->isLeaf() ) {
  222. // --------------- case (c) right child is a leaf
  223. #ifdef DEBUGPRUNING
  224. fprintf (stderr, "case (c)\n");
  225. #endif
  226. if ( nodeIsLeft )
  227. parent_node->left = left;
  228. else
  229. parent_node->right = left;
  230. delete right;
  231. deletedNodes.insert ( right );
  232. delete node;
  233. deletedNodes.insert ( node );
  234. parentStructure[left] = parent_node;
  235. leafs--;
  236. } else {
  237. fthrow(Exception, "There is a bug in this code: CodebookRandomForest (good luck!) bugid=2");
  238. }
  239. }
  240. for ( vector<DecisionTree *>::iterator i = trees.begin(); i != trees.end() ; )
  241. {
  242. if ( deletedRoots.find((*i)->getRoot()) != deletedRoots.end() )
  243. {
  244. delete (*i);
  245. trees.erase ( i );
  246. } else {
  247. i++;
  248. }
  249. }
  250. fprintf (stderr, "Final number of leafs: %ld (%d)\n", leafs, restrictedCodebookSize );
  251. }
  252. void CodebookRandomForest::buildLeafMap ()
  253. {
  254. if ( restrictedCodebookSize > 0 ) {
  255. pruneForest ();
  256. }
  257. map<DecisionNode *, pair<long, int> > index;
  258. vector< pair<long, DecisionNode *> > index_reverse;
  259. clusterforest->indexDescendants ( index );
  260. leafMap.clear();
  261. for ( map<DecisionNode *, pair<long, int> >::const_iterator i = index.begin();
  262. i != index.end(); i++ )
  263. {
  264. DecisionNode *node = i->first;
  265. int depth = i->second.second;
  266. long index = i->second.first;
  267. if ( ( (node->right == NULL) && (node->left == NULL) && (depth <= maxDepth) ) || ( depth == maxDepth ) )
  268. index_reverse.push_back ( pair<long, DecisionNode *> ( index, node ) );
  269. }
  270. sort ( index_reverse.begin(), index_reverse.end() );
  271. /*************************************
  272. Recover a kind of canonical node
  273. permutation
  274. **************************************/
  275. for ( vector< pair<long, DecisionNode *> >::const_iterator i = index_reverse.begin();
  276. i != index_reverse.end(); i++ )
  277. {
  278. DecisionNode *node = i->second;
  279. leafMap.insert ( pair<DecisionNode *, int> ( node, leafMap.size() ) );
  280. }
  281. fprintf (stderr, "CSRandomForest::buildLeafMap: dimension = %d\n", (int)leafMap.size() );
  282. reinit ( leafMap.size() );
  283. }
  284. void CodebookRandomForest::copy ( const Codebook *codebook )
  285. {
  286. fthrow(Exception, "CodebookRandomForest::not yet implemented !\n");
  287. }
  288. void CodebookRandomForest::vote ( const NICE::Vector & feature, int & codebookEntry, double & weight, double & distance ) const
  289. {
  290. fthrow(Exception, "CodebookRandomForest::not supported, please use multi voting feature\n");
  291. }
  292. void CodebookRandomForest::vote ( const NICE::Vector & feature, NICE::Vector & histogram,
  293. int & codebookEntry, double & weight, double & distance ) const
  294. {
  295. SparseVector votes;
  296. vote ( feature, votes );
  297. for ( SparseVector::const_iterator i = votes.begin();
  298. i != votes.end(); i++ )
  299. {
  300. int index = i->first;
  301. double val = i->second;
  302. histogram[index] += val;
  303. if ( i == votes.begin() )
  304. {
  305. codebookEntry = index;
  306. weight = val;
  307. }
  308. }
  309. distance = 0.0;
  310. }
  311. void CodebookRandomForest::vote ( const NICE::Vector & feature, NICE::SparseVector & votes ) const
  312. {
  313. vector<DecisionNode *> leafNodes;
  314. NICE::Vector *x = new NICE::Vector ( feature );
  315. Example pe ( x );
  316. clusterforest->getLeafNodes ( pe, leafNodes, maxDepth );
  317. delete x;
  318. for ( vector<DecisionNode *>::const_iterator j = leafNodes.begin();
  319. j != leafNodes.end(); j++ )
  320. {
  321. map<DecisionNode *, int>::const_iterator k = leafMap.find ( *j );
  322. assert ( k != leafMap.end() );
  323. int leafindex = k->second;
  324. votes.insert ( votes.begin(), pair<int, double> ( leafindex, 1.0 ) );
  325. }
  326. }
  327. void CodebookRandomForest::voteAndClassify ( const NICE::Vector & feature, NICE::SparseVector & votes, FullVector & distribution ) const
  328. {
  329. vector<DecisionNode *> leafNodes;
  330. NICE::Vector *x = new NICE::Vector ( feature );
  331. Example pe ( x );
  332. clusterforest->getLeafNodes ( pe, leafNodes, maxDepth );
  333. delete x;
  334. for ( vector<DecisionNode *>::const_iterator j = leafNodes.begin();
  335. j != leafNodes.end(); j++ )
  336. {
  337. map<DecisionNode *, int>::const_iterator k = leafMap.find ( *j );
  338. DecisionNode *node = *j;
  339. assert ( k != leafMap.end() );
  340. int leafindex = k->second;
  341. votes.insert ( votes.begin(), pair<int, double> ( leafindex, 1.0 ) );
  342. FullVector sDistribution ( node->distribution );
  343. sDistribution.normalize();
  344. if ( distribution.empty() )
  345. distribution = sDistribution;
  346. else
  347. distribution.add ( sDistribution );
  348. }
  349. distribution.normalize();
  350. }
  351. void CodebookRandomForest::add ( const Codebook *codebook )
  352. {
  353. fthrow ( Exception, "CodebookRandomForest::not yet implemented !");
  354. }
  355. Codebook *CodebookRandomForest::clone () const
  356. {
  357. return (new CodebookRandomForest(maxDepth, restrictedCodebookSize));
  358. }
  359. void CodebookRandomForest::clear ()
  360. {
  361. if ( clusterforest != NULL )
  362. clusterforest->clear();
  363. Codebook::clear();
  364. }
  365. void CodebookRandomForest::restore ( istream & is, int format )
  366. {
  367. if(this->clusterforest == NULL)
  368. this->clusterforest = new FPCRandomForests ();
  369. Codebook::restore(is, format);
  370. int maxClassNo = 0;
  371. is >> maxClassNo;
  372. clusterforest->setMaxClassNo( maxClassNo );
  373. clusterforest->restore ( is, format );
  374. buildLeafMap();
  375. }
  376. void CodebookRandomForest::store ( ostream & os, int format ) const
  377. {
  378. Codebook::store ( os, format );
  379. os << endl;
  380. os << clusterforest->getMaxClassNo() << endl;
  381. clusterforest->store ( os, format );
  382. os << endl;
  383. }