CodebookRandomForest.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. /**
  2. * @file CodebookRandomForest.cpp
  3. * @brief feature CodebookRandomForest
  4. * @author Erik Rodner
  5. * @date 02/15/2008
  6. */
  7. #include <queue>
  8. #include <iostream>
  9. #include "CodebookRandomForest.h"
  10. using namespace OBJREC;
  11. using namespace std;
  12. using namespace NICE;
  13. #undef DEBUGPRUNING
  14. CodebookRandomForest::CodebookRandomForest( int maxDepth, int restrictedCodebookSize )
  15. {
  16. this->clusterforest = NULL;
  17. this->maxDepth = maxDepth;
  18. this->restrictedCodebookSize = restrictedCodebookSize;
  19. }
  20. CodebookRandomForest::CodebookRandomForest( FPCRandomForests *clusterforest,
  21. int maxDepth, int restrictedCodebookSize )
  22. {
  23. this->clusterforest = clusterforest;
  24. this->maxDepth = maxDepth;
  25. this->restrictedCodebookSize = restrictedCodebookSize;
  26. buildLeafMap();
  27. }
  28. CodebookRandomForest::~CodebookRandomForest()
  29. {
  30. if ( clusterforest != NULL )
  31. delete clusterforest;
  32. }
  33. void CodebookRandomForest::setClusterForest( FPCRandomForests *clusterforest)
  34. {
  35. if(this->clusterforest != NULL)
  36. delete this->clusterforest;
  37. this->clusterforest = clusterforest;
  38. buildLeafMap();
  39. }
  40. void CodebookRandomForest::buildParentStructure ( DecisionNode *node,
  41. map<DecisionNode *, DecisionNode *> & parentStructure )
  42. {
  43. if ( node == NULL ) return;
  44. if ( node->left != NULL )
  45. {
  46. parentStructure.insert ( pair<DecisionNode *, DecisionNode *> ( node->left, node ) );
  47. buildParentStructure ( node->left, parentStructure );
  48. }
  49. if ( node->right != NULL )
  50. {
  51. parentStructure.insert ( pair<DecisionNode *, DecisionNode *> ( node->right, node ) );
  52. buildParentStructure ( node->right, parentStructure );
  53. }
  54. }
  55. void CodebookRandomForest::pruneForest ()
  56. {
  57. map<DecisionNode *, pair<long, int> > index;
  58. clusterforest->indexDescendants( index );
  59. map<DecisionNode *, DecisionNode *> parentStructure;
  60. vector<DecisionTree *> & trees = clusterforest->getForestNonConst();
  61. for ( vector<DecisionTree *>::const_iterator i = trees.begin();
  62. i != trees.end(); i++ )
  63. {
  64. DecisionTree *tree = *i;
  65. parentStructure.insert ( pair<DecisionNode *, DecisionNode *> ( tree->getRoot(), NULL ) );
  66. buildParentStructure ( tree->getRoot(), parentStructure );
  67. }
  68. priority_queue< triplet<double, long, DecisionNode *> > lastLevelInnerNodes;
  69. long leafs = 0;
  70. for ( map<DecisionNode *, pair<long, int> >::const_iterator k = index.begin();
  71. k != index.end(); k++ )
  72. {
  73. DecisionNode *node = k->first;
  74. if ( (!node->isLeaf()) && ((node->left->isLeaf())
  75. || (node->right->isLeaf())) )
  76. {
  77. double mi = node->distribution.entropy() - (
  78. node->left->distribution.sum() * node->left->distribution.entropy() +
  79. node->right->distribution.sum() * node->right->distribution.entropy() )
  80. / node->distribution.sum();
  81. lastLevelInnerNodes.push ( triplet<double, long, DecisionNode *> (
  82. - mi, k->second.first, node ) );
  83. }
  84. if ( node->isLeaf() ) leafs++;
  85. }
  86. set< DecisionNode * > deletedRoots;
  87. /*********************************************
  88. * EVIL Pruning method *
  89. *********************************************/
  90. set<DecisionNode *> deletedNodes;
  91. while ( (leafs > restrictedCodebookSize) && (lastLevelInnerNodes.size() > 0) )
  92. {
  93. const triplet<double, long, DecisionNode *> & nodemi = lastLevelInnerNodes.top();
  94. #ifdef DEBUGPRUNING
  95. double current_mi = -nodemi.first;
  96. fprintf (stderr, "CodebookRandomForest: %d contract leaf with mutual information %f\n", leafs, current_mi );
  97. #endif
  98. DecisionNode *node = nodemi.third;
  99. lastLevelInnerNodes.pop();
  100. assert ( node != NULL );
  101. DecisionNode *left = node->left;
  102. DecisionNode *right = node->right;
  103. //fprintf (stderr, "node: %ld, left: %ld, right: %ld\n", (long int)node, (long int)left,
  104. // (long int)right );
  105. if ( (deletedNodes.find(node) != deletedNodes.end() ) || node->isLeaf() ) {
  106. // this is a tricky case...consider the subsequent contraction of
  107. // two childs of a node
  108. // After the first child is contracted, the node is added to lastLevelInnerNodes
  109. // If the second child is contracted, the node is still in the queue but
  110. // is now a leaf node.
  111. // A second problem exists if the parent node is contracted after the second
  112. // child but before the node. Therefore we introduced deletedNodes.
  113. continue;
  114. }
  115. #ifdef DEBUGPRUNING
  116. fprintf (stderr, "CodebookRandomForest: nodes remaining %ld (min:%d); current mi %f\n",
  117. leafs, restrictedCodebookSize, current_mi );
  118. #endif
  119. assert ( parentStructure.find(node) != parentStructure.end() );
  120. DecisionNode *parent_node = parentStructure[node];
  121. //fprintf (stderr, "parent: %ld\n", (long int)parent_node );
  122. if ( parent_node == NULL )
  123. {
  124. #ifdef DEBUGPRUNING
  125. fprintf (stderr, "CodebookRandomForest: Deleting the root node !!!\n");
  126. #endif
  127. DecisionNode *newParent = NULL;
  128. if ( (left->isLeaf()) && (right->isLeaf()) )
  129. {
  130. //fprintf (stderr, "case (a)\n");
  131. delete ( node->f );
  132. node->f = NULL;
  133. delete left;
  134. delete right;
  135. deletedNodes.insert ( left );
  136. deletedNodes.insert ( right );
  137. node->left = NULL;
  138. node->right = NULL;
  139. newParent = node;
  140. leafs--;
  141. } else if ( left->isLeaf() ) {
  142. // case (b) left child is a leaf
  143. delete left;
  144. delete node;
  145. deletedNodes.insert ( node );
  146. deletedNodes.insert ( left );
  147. parentStructure[right] = parent_node;
  148. newParent = right;
  149. leafs--;
  150. } else if ( right->isLeaf() ) {
  151. // case (b) right child is a leaf
  152. delete right;
  153. delete node;
  154. deletedNodes.insert ( right );
  155. deletedNodes.insert ( left );
  156. parentStructure[left] = parent_node;
  157. newParent = left;
  158. leafs--;
  159. } else {
  160. fprintf (stderr, "UNKNOWN CASE !!\n");
  161. exit(-1);
  162. }
  163. for ( vector<DecisionTree *>::iterator i = trees.begin(); i != trees.end() ; i++ )
  164. if ( (*i)->getRoot() == node )
  165. (*i)->setRoot(newParent);
  166. continue;
  167. }
  168. long int parent_index = index[parent_node].first;
  169. double mi = 0.0;
  170. bool nodeIsLeft = ( parent_node->left == node );
  171. DecisionNode *sibling = nodeIsLeft ? parent_node->right : parent_node->left;
  172. if ( (left == NULL) || (right == NULL) )
  173. fthrow(Exception, "There is a bug in this code: CodebookRandomForest (good luck!) bugid=1");
  174. if ( (left->isLeaf()) && (right->isLeaf()) )
  175. {
  176. /* ------------ case (a) left and right childs are leafs
  177. (p) (p)
  178. (n) (s) -> (n) (s) and add p to the last level nodes
  179. (l) (r) */
  180. #ifdef DEBUGPRUNING
  181. fprintf (stderr, "case (a)\n");
  182. #endif
  183. delete ( node->f );
  184. node->f = NULL;
  185. delete left;
  186. deletedNodes.insert ( left );
  187. delete right;
  188. deletedNodes.insert ( right );
  189. node->left = NULL;
  190. node->right = NULL;
  191. leafs--;
  192. double ep = parent_node->distribution.entropy();
  193. double en = node->distribution.entropy();
  194. double es = sibling->distribution.entropy();
  195. double pn = node->distribution.sum();
  196. double ps = sibling->distribution.sum();
  197. mi = ep - ( pn * en + ps * es ) / (pn+ps);
  198. #ifdef DEBUGPRUNING
  199. fprintf (stderr, "ep %f en %f es %f pn %f ps %f\n",
  200. ep, en, es, pn, ps );
  201. parent_node->distribution.store(cerr);
  202. node->distribution.store(cerr);
  203. sibling->distribution.store(cerr);
  204. fprintf (stderr, "add new pre-leaf %ld: mi %lf top %lf\n", (long int)parent_node, mi,
  205. -lastLevelInnerNodes.top().first);
  206. #endif
  207. lastLevelInnerNodes.push ( triplet<double, long, DecisionNode *> (
  208. - mi, parent_index, parent_node ) );
  209. } else if ( left->isLeaf() ) {
  210. // --------------- case (b) left child is a leaf
  211. #ifdef DEBUGPRUNING
  212. fprintf (stderr, "case (b)\n");
  213. #endif
  214. if ( nodeIsLeft )
  215. parent_node->left = right;
  216. else
  217. parent_node->right = right;
  218. parentStructure[right] = parent_node;
  219. delete left;
  220. deletedNodes.insert ( left );
  221. delete node;
  222. deletedNodes.insert ( node );
  223. leafs--;
  224. } else if ( right->isLeaf() ) {
  225. // --------------- case (c) right child is a leaf
  226. #ifdef DEBUGPRUNING
  227. fprintf (stderr, "case (c)\n");
  228. #endif
  229. if ( nodeIsLeft )
  230. parent_node->left = left;
  231. else
  232. parent_node->right = left;
  233. delete right;
  234. deletedNodes.insert ( right );
  235. delete node;
  236. deletedNodes.insert ( node );
  237. parentStructure[left] = parent_node;
  238. leafs--;
  239. } else {
  240. fthrow(Exception, "There is a bug in this code: CodebookRandomForest (good luck!) bugid=2");
  241. }
  242. }
  243. for ( vector<DecisionTree *>::iterator i = trees.begin(); i != trees.end() ; )
  244. {
  245. if ( deletedRoots.find((*i)->getRoot()) != deletedRoots.end() )
  246. {
  247. delete (*i);
  248. trees.erase ( i );
  249. } else {
  250. i++;
  251. }
  252. }
  253. fprintf (stderr, "Final number of leafs: %ld (%d)\n", leafs, restrictedCodebookSize );
  254. }
  255. void CodebookRandomForest::buildLeafMap ()
  256. {
  257. if ( restrictedCodebookSize > 0 ) {
  258. pruneForest ();
  259. }
  260. map<DecisionNode *, pair<long, int> > index;
  261. vector< pair<long, DecisionNode *> > index_reverse;
  262. clusterforest->indexDescendants ( index );
  263. leafMap.clear();
  264. for ( map<DecisionNode *, pair<long, int> >::const_iterator i = index.begin();
  265. i != index.end(); i++ )
  266. {
  267. DecisionNode *node = i->first;
  268. int depth = i->second.second;
  269. long index = i->second.first;
  270. if ( ( (node->right == NULL) && (node->left == NULL) && (depth <= maxDepth) ) || ( depth == maxDepth ) )
  271. index_reverse.push_back ( pair<long, DecisionNode *> ( index, node ) );
  272. }
  273. sort ( index_reverse.begin(), index_reverse.end() );
  274. /*************************************
  275. Recover a kind of canonical node
  276. permutation
  277. **************************************/
  278. for ( vector< pair<long, DecisionNode *> >::const_iterator i = index_reverse.begin();
  279. i != index_reverse.end(); i++ )
  280. {
  281. DecisionNode *node = i->second;
  282. leafMap.insert ( pair<DecisionNode *, int> ( node, leafMap.size() ) );
  283. }
  284. fprintf (stderr, "CSRandomForest::buildLeafMap: dimension = %d\n", (int)leafMap.size() );
  285. reinit ( leafMap.size() );
  286. }
  287. void CodebookRandomForest::copy ( const Codebook *codebook )
  288. {
  289. fthrow(Exception, "CodebookRandomForest::not yet implemented !\n");
  290. }
  291. void CodebookRandomForest::vote ( const NICE::Vector & feature, int & codebookEntry, double & weight, double & distance ) const
  292. {
  293. fthrow(Exception, "CodebookRandomForest::not supported, please use multi voting feature\n");
  294. }
  295. void CodebookRandomForest::vote ( const NICE::Vector & feature, NICE::Vector & histogram,
  296. int & codebookEntry, double & weight, double & distance ) const
  297. {
  298. SparseVector votes;
  299. vote ( feature, votes );
  300. for ( SparseVector::const_iterator i = votes.begin();
  301. i != votes.end(); i++ )
  302. {
  303. int index = i->first;
  304. double val = i->second;
  305. histogram[index] += val;
  306. if ( i == votes.begin() )
  307. {
  308. codebookEntry = index;
  309. weight = val;
  310. }
  311. }
  312. distance = 0.0;
  313. }
  314. void CodebookRandomForest::vote ( const NICE::Vector & feature, NICE::SparseVector & votes ) const
  315. {
  316. vector<DecisionNode *> leafNodes;
  317. NICE::Vector *x = new NICE::Vector ( feature );
  318. Example pe ( x );
  319. clusterforest->getLeafNodes ( pe, leafNodes, maxDepth );
  320. delete x;
  321. for ( vector<DecisionNode *>::const_iterator j = leafNodes.begin();
  322. j != leafNodes.end(); j++ )
  323. {
  324. map<DecisionNode *, int>::const_iterator k = leafMap.find ( *j );
  325. assert ( k != leafMap.end() );
  326. int leafindex = k->second;
  327. votes.insert ( votes.begin(), pair<int, double> ( leafindex, 1.0 ) );
  328. }
  329. }
  330. void CodebookRandomForest::voteAndClassify ( const NICE::Vector & feature, NICE::SparseVector & votes, FullVector & distribution ) const
  331. {
  332. vector<DecisionNode *> leafNodes;
  333. NICE::Vector *x = new NICE::Vector ( feature );
  334. Example pe ( x );
  335. clusterforest->getLeafNodes ( pe, leafNodes, maxDepth );
  336. delete x;
  337. for ( vector<DecisionNode *>::const_iterator j = leafNodes.begin();
  338. j != leafNodes.end(); j++ )
  339. {
  340. map<DecisionNode *, int>::const_iterator k = leafMap.find ( *j );
  341. DecisionNode *node = *j;
  342. assert ( k != leafMap.end() );
  343. int leafindex = k->second;
  344. votes.insert ( votes.begin(), pair<int, double> ( leafindex, 1.0 ) );
  345. FullVector sDistribution ( node->distribution );
  346. sDistribution.normalize();
  347. if ( distribution.empty() )
  348. distribution = sDistribution;
  349. else
  350. distribution.add ( sDistribution );
  351. }
  352. distribution.normalize();
  353. }
  354. void CodebookRandomForest::voteAndClassify(const Vector &feature, SparseVector &votes, Vector &distribution) const
  355. {
  356. vector<DecisionNode *> leafNodes;
  357. NICE::Vector *x = new NICE::Vector ( feature );
  358. Example pe ( x );
  359. clusterforest->getLeafNodes ( pe, leafNodes, maxDepth );
  360. delete x;
  361. for ( vector<DecisionNode *>::const_iterator j = leafNodes.begin();
  362. j != leafNodes.end(); j++ )
  363. {
  364. map<DecisionNode *, int>::const_iterator k = leafMap.find ( *j );
  365. DecisionNode *node = *j;
  366. assert ( k != leafMap.end() );
  367. int leafindex = k->second;
  368. votes.insert ( votes.begin(), pair<int, double> ( leafindex, 1.0 ) );
  369. FullVector sDistribution ( node->distribution );
  370. sDistribution.normalize();
  371. if ( distribution.size() == 0 )
  372. {
  373. distribution.resize(sDistribution.size() );
  374. distribution.set(0.0f);
  375. }
  376. for(int i = 0; i< sDistribution.size(); i++)
  377. distribution[i] += sDistribution[i];
  378. }
  379. distribution.normalizeL2();
  380. }
  381. void CodebookRandomForest::add ( const Codebook *codebook )
  382. {
  383. fthrow ( Exception, "CodebookRandomForest::not yet implemented !");
  384. }
  385. Codebook *CodebookRandomForest::clone () const
  386. {
  387. return (new CodebookRandomForest(maxDepth, restrictedCodebookSize));
  388. }
  389. void CodebookRandomForest::clear ()
  390. {
  391. if ( clusterforest != NULL )
  392. clusterforest->clear();
  393. Codebook::clear();
  394. }
  395. void CodebookRandomForest::restore ( istream & is, int format )
  396. {
  397. if (is.good())
  398. {
  399. std::string tmp;
  400. is >> tmp; //class name
  401. if ( ! this->isStartTag( tmp, "CodebookRandomForest" ) )
  402. {
  403. std::cerr << " WARNING - attempt to restore CodebookRandomForest, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  404. throw;
  405. }
  406. if(this->clusterforest == NULL)
  407. this->clusterforest = new FPCRandomForests ();
  408. bool b_endOfBlock = false;
  409. while ( !b_endOfBlock )
  410. {
  411. is >> tmp; // start of block
  412. if ( this->isEndTag( tmp, "CodebookRandomForest" ) || is.eof() )
  413. {
  414. b_endOfBlock = true;
  415. continue;
  416. }
  417. tmp = this->removeStartTag ( tmp );
  418. if ( tmp.compare("baseclass") == 0 )
  419. {
  420. Codebook::restore(is, format);
  421. is >> tmp; // end of block
  422. tmp = this->removeEndTag ( tmp );
  423. }
  424. else if ( tmp.compare("maxDepth") == 0 )
  425. {
  426. is >> maxDepth;
  427. is >> tmp; // end of block
  428. tmp = this->removeEndTag ( tmp );
  429. }
  430. else if ( tmp.compare("restrictedCodebookSize") == 0 )
  431. {
  432. is >> restrictedCodebookSize;
  433. is >> tmp; // end of block
  434. tmp = this->removeEndTag ( tmp );
  435. }
  436. else if ( tmp.compare("maxClassNo") == 0 )
  437. {
  438. int maxClassNo = 0;
  439. is >> maxClassNo;
  440. is >> tmp; // end of block
  441. tmp = this->removeEndTag ( tmp );
  442. if(clusterforest != NULL)
  443. clusterforest->setMaxClassNo(maxClassNo);
  444. }
  445. else if ( tmp.compare("clusterforest") == 0 )
  446. {
  447. clusterforest->restore ( is, format );
  448. is >> tmp; // end of block
  449. tmp = this->removeEndTag ( tmp );
  450. }
  451. }
  452. buildLeafMap();
  453. }
  454. }
  455. void CodebookRandomForest::store ( ostream & os, int format ) const
  456. {
  457. if (os.good())
  458. {
  459. // show starting point
  460. os << this->createStartTag( "CodebookRandomForest" ) << std::endl;
  461. os.precision (numeric_limits<double>::digits10 + 1);
  462. os << this->createStartTag( "baseclass" ) << std::endl;
  463. Codebook::store ( os, format );
  464. os << this->createEndTag( "baseclass" ) << std::endl;
  465. os << this->createStartTag( "maxDepth" ) << std::endl;
  466. os << maxDepth << std::endl;
  467. os << this->createEndTag( "maxDepth" ) << std::endl;
  468. os << this->createStartTag( "restrictedCodebookSize" ) << std::endl;
  469. os << restrictedCodebookSize << std::endl;
  470. os << this->createEndTag( "restrictedCodebookSize" ) << std::endl;
  471. os << this->createStartTag( "maxClassNo" ) << std::endl;
  472. os << clusterforest->getMaxClassNo() << endl;
  473. os << this->createEndTag( "maxClassNo" ) << std::endl;
  474. os << this->createStartTag( "clusterforest" ) << std::endl;
  475. clusterforest->store ( os, format );
  476. os << this->createEndTag( "clusterforest" ) << std::endl;
  477. /* Codebook::store ( os, format );
  478. os << maxDepth << endl;
  479. os << restrictedCodebookSize << endl;
  480. os << clusterforest->getMaxClassNo() << endl;
  481. clusterforest->store ( os, format );
  482. os << endl;
  483. */
  484. // done
  485. os << this->createEndTag( "CodebookRandomForest" ) << std::endl;
  486. }
  487. }