CodebookRandomForest.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. /**
  2. * @file CodebookRandomForest.cpp
  3. * @brief feature CodebookRandomForest
  4. * @author Erik Rodner
  5. * @date 02/15/2008
  6. */
  7. #include <queue>
  8. #include <iostream>
  9. #include "CodebookRandomForest.h"
  10. using namespace OBJREC;
  11. using namespace std;
  12. using namespace NICE;
  13. #undef DEBUGPRUNING
  14. CodebookRandomForest::CodebookRandomForest( int maxDepth, int restrictedCodebookSize )
  15. {
  16. this->clusterforest = NULL;
  17. this->maxDepth = maxDepth;
  18. this->restrictedCodebookSize = restrictedCodebookSize;
  19. }
  20. CodebookRandomForest::CodebookRandomForest( FPCRandomForests *clusterforest,
  21. int maxDepth, int restrictedCodebookSize )
  22. {
  23. this->clusterforest = clusterforest;
  24. this->maxDepth = maxDepth;
  25. this->restrictedCodebookSize = restrictedCodebookSize;
  26. buildLeafMap();
  27. }
  28. CodebookRandomForest::~CodebookRandomForest()
  29. {
  30. if ( clusterforest != NULL )
  31. delete clusterforest;
  32. }
  33. void CodebookRandomForest::setClusterForest( FPCRandomForests *clusterforest)
  34. {
  35. if(this->clusterforest != NULL)
  36. delete this->clusterforest;
  37. this->clusterforest = clusterforest;
  38. buildLeafMap();
  39. }
  40. void CodebookRandomForest::buildParentStructure ( DecisionNode *node,
  41. map<DecisionNode *, DecisionNode *> & parentStructure )
  42. {
  43. if ( node == NULL ) return;
  44. if ( node->left != NULL )
  45. {
  46. parentStructure.insert ( pair<DecisionNode *, DecisionNode *> ( node->left, node ) );
  47. buildParentStructure ( node->left, parentStructure );
  48. }
  49. if ( node->right != NULL )
  50. {
  51. parentStructure.insert ( pair<DecisionNode *, DecisionNode *> ( node->right, node ) );
  52. buildParentStructure ( node->right, parentStructure );
  53. }
  54. }
  55. void CodebookRandomForest::pruneForest ()
  56. {
  57. map<DecisionNode *, pair<long, int> > index;
  58. clusterforest->indexDescendants( index );
  59. map<DecisionNode *, DecisionNode *> parentStructure;
  60. vector<DecisionTree *> & trees = clusterforest->getForestNonConst();
  61. for ( vector<DecisionTree *>::const_iterator i = trees.begin();
  62. i != trees.end(); i++ )
  63. {
  64. DecisionTree *tree = *i;
  65. parentStructure.insert ( pair<DecisionNode *, DecisionNode *> ( tree->getRoot(), NULL ) );
  66. buildParentStructure ( tree->getRoot(), parentStructure );
  67. }
  68. priority_queue< triplet<double, long, DecisionNode *> > lastLevelInnerNodes;
  69. long leafs = 0;
  70. for ( map<DecisionNode *, pair<long, int> >::const_iterator k = index.begin();
  71. k != index.end(); k++ )
  72. {
  73. DecisionNode *node = k->first;
  74. if ( (!node->isLeaf()) && ((node->left->isLeaf())
  75. || (node->right->isLeaf())) )
  76. {
  77. double mi = node->distribution.entropy() - (
  78. node->left->distribution.sum() * node->left->distribution.entropy() +
  79. node->right->distribution.sum() * node->right->distribution.entropy() )
  80. / node->distribution.sum();
  81. lastLevelInnerNodes.push ( triplet<double, long, DecisionNode *> (
  82. - mi, k->second.first, node ) );
  83. }
  84. if ( node->isLeaf() ) leafs++;
  85. }
  86. set< DecisionNode * > deletedRoots;
  87. /*********************************************
  88. * EVIL Pruning method *
  89. *********************************************/
  90. set<DecisionNode *> deletedNodes;
  91. while ( (leafs > restrictedCodebookSize) && (lastLevelInnerNodes.size() > 0) )
  92. {
  93. const triplet<double, long, DecisionNode *> & nodemi = lastLevelInnerNodes.top();
  94. #ifdef DEBUGPRUNING
  95. double current_mi = -nodemi.first;
  96. fprintf (stderr, "CodebookRandomForest: %d contract leaf with mutual information %f\n", leafs, current_mi );
  97. #endif
  98. DecisionNode *node = nodemi.third;
  99. lastLevelInnerNodes.pop();
  100. assert ( node != NULL );
  101. DecisionNode *left = node->left;
  102. DecisionNode *right = node->right;
  103. //fprintf (stderr, "node: %ld, left: %ld, right: %ld\n", (long int)node, (long int)left,
  104. // (long int)right );
  105. if ( (deletedNodes.find(node) != deletedNodes.end() ) || node->isLeaf() ) {
  106. // this is a tricky case...consider the subsequent contraction of
  107. // two childs of a node
  108. // After the first child is contracted, the node is added to lastLevelInnerNodes
  109. // If the second child is contracted, the node is still in the queue but
  110. // is now a leaf node.
  111. // A second problem exists if the parent node is contracted after the second
  112. // child but before the node. Therefore we introduced deletedNodes.
  113. continue;
  114. }
  115. #ifdef DEBUGPRUNING
  116. fprintf (stderr, "CodebookRandomForest: nodes remaining %ld (min:%d); current mi %f\n",
  117. leafs, restrictedCodebookSize, current_mi );
  118. #endif
  119. assert ( parentStructure.find(node) != parentStructure.end() );
  120. DecisionNode *parent_node = parentStructure[node];
  121. //fprintf (stderr, "parent: %ld\n", (long int)parent_node );
  122. if ( parent_node == NULL )
  123. {
  124. #ifdef DEBUGPRUNING
  125. fprintf (stderr, "CodebookRandomForest: Deleting the root node !!!\n");
  126. #endif
  127. DecisionNode *newParent = NULL;
  128. if ( (left->isLeaf()) && (right->isLeaf()) )
  129. {
  130. //fprintf (stderr, "case (a)\n");
  131. delete ( node->f );
  132. node->f = NULL;
  133. delete left;
  134. delete right;
  135. deletedNodes.insert ( left );
  136. deletedNodes.insert ( right );
  137. node->left = NULL;
  138. node->right = NULL;
  139. newParent = node;
  140. leafs--;
  141. } else if ( left->isLeaf() ) {
  142. // case (b) left child is a leaf
  143. delete left;
  144. delete node;
  145. deletedNodes.insert ( node );
  146. deletedNodes.insert ( left );
  147. parentStructure[right] = parent_node;
  148. newParent = right;
  149. leafs--;
  150. } else if ( right->isLeaf() ) {
  151. // case (b) right child is a leaf
  152. delete right;
  153. delete node;
  154. deletedNodes.insert ( right );
  155. deletedNodes.insert ( left );
  156. parentStructure[left] = parent_node;
  157. newParent = left;
  158. leafs--;
  159. } else {
  160. fprintf (stderr, "UNKNOWN CASE !!\n");
  161. exit(-1);
  162. }
  163. for ( vector<DecisionTree *>::iterator i = trees.begin(); i != trees.end() ; i++ )
  164. if ( (*i)->getRoot() == node )
  165. (*i)->setRoot(newParent);
  166. continue;
  167. }
  168. long int parent_index = index[parent_node].first;
  169. double mi = 0.0;
  170. bool nodeIsLeft = ( parent_node->left == node );
  171. DecisionNode *sibling = nodeIsLeft ? parent_node->right : parent_node->left;
  172. if ( (left == NULL) || (right == NULL) )
  173. fthrow(Exception, "There is a bug in this code: CodebookRandomForest (good luck!) bugid=1");
  174. if ( (left->isLeaf()) && (right->isLeaf()) )
  175. {
  176. /* ------------ case (a) left and right childs are leafs
  177. (p) (p)
  178. (n) (s) -> (n) (s) and add p to the last level nodes
  179. (l) (r) */
  180. #ifdef DEBUGPRUNING
  181. fprintf (stderr, "case (a)\n");
  182. #endif
  183. delete ( node->f );
  184. node->f = NULL;
  185. delete left;
  186. deletedNodes.insert ( left );
  187. delete right;
  188. deletedNodes.insert ( right );
  189. node->left = NULL;
  190. node->right = NULL;
  191. leafs--;
  192. double ep = parent_node->distribution.entropy();
  193. double en = node->distribution.entropy();
  194. double es = sibling->distribution.entropy();
  195. double pn = node->distribution.sum();
  196. double ps = sibling->distribution.sum();
  197. mi = ep - ( pn * en + ps * es ) / (pn+ps);
  198. #ifdef DEBUGPRUNING
  199. fprintf (stderr, "ep %f en %f es %f pn %f ps %f\n",
  200. ep, en, es, pn, ps );
  201. parent_node->distribution.store(cerr);
  202. node->distribution.store(cerr);
  203. sibling->distribution.store(cerr);
  204. fprintf (stderr, "add new pre-leaf %ld: mi %lf top %lf\n", (long int)parent_node, mi,
  205. -lastLevelInnerNodes.top().first);
  206. #endif
  207. lastLevelInnerNodes.push ( triplet<double, long, DecisionNode *> (
  208. - mi, parent_index, parent_node ) );
  209. } else if ( left->isLeaf() ) {
  210. // --------------- case (b) left child is a leaf
  211. #ifdef DEBUGPRUNING
  212. fprintf (stderr, "case (b)\n");
  213. #endif
  214. if ( nodeIsLeft )
  215. parent_node->left = right;
  216. else
  217. parent_node->right = right;
  218. parentStructure[right] = parent_node;
  219. delete left;
  220. deletedNodes.insert ( left );
  221. delete node;
  222. deletedNodes.insert ( node );
  223. leafs--;
  224. } else if ( right->isLeaf() ) {
  225. // --------------- case (c) right child is a leaf
  226. #ifdef DEBUGPRUNING
  227. fprintf (stderr, "case (c)\n");
  228. #endif
  229. if ( nodeIsLeft )
  230. parent_node->left = left;
  231. else
  232. parent_node->right = left;
  233. delete right;
  234. deletedNodes.insert ( right );
  235. delete node;
  236. deletedNodes.insert ( node );
  237. parentStructure[left] = parent_node;
  238. leafs--;
  239. } else {
  240. fthrow(Exception, "There is a bug in this code: CodebookRandomForest (good luck!) bugid=2");
  241. }
  242. }
  243. for ( vector<DecisionTree *>::iterator i = trees.begin(); i != trees.end() ; )
  244. {
  245. if ( deletedRoots.find((*i)->getRoot()) != deletedRoots.end() )
  246. {
  247. delete (*i);
  248. trees.erase ( i );
  249. } else {
  250. i++;
  251. }
  252. }
  253. #ifdef DEBUGPRUNING
  254. fprintf (stderr, "Final number of leafs: %ld (%d)\n", leafs, restrictedCodebookSize );
  255. #endif
  256. }
  257. void CodebookRandomForest::buildLeafMap ()
  258. {
  259. if ( restrictedCodebookSize > 0 ) {
  260. pruneForest ();
  261. }
  262. map<DecisionNode *, pair<long, int> > index;
  263. vector< pair<long, DecisionNode *> > index_reverse;
  264. clusterforest->indexDescendants ( index );
  265. leafMap.clear();
  266. for ( map<DecisionNode *, pair<long, int> >::const_iterator i = index.begin();
  267. i != index.end(); i++ )
  268. {
  269. DecisionNode *node = i->first;
  270. int depth = i->second.second;
  271. long index = i->second.first;
  272. if ( ( (node->right == NULL) && (node->left == NULL) && (depth <= maxDepth) ) || ( depth == maxDepth ) )
  273. index_reverse.push_back ( pair<long, DecisionNode *> ( index, node ) );
  274. }
  275. sort ( index_reverse.begin(), index_reverse.end() );
  276. /*************************************
  277. Recover a kind of canonical node
  278. permutation
  279. **************************************/
  280. for ( vector< pair<long, DecisionNode *> >::const_iterator i = index_reverse.begin();
  281. i != index_reverse.end(); i++ )
  282. {
  283. DecisionNode *node = i->second;
  284. leafMap.insert ( pair<DecisionNode *, int> ( node, leafMap.size() ) );
  285. }
  286. #ifdef DEBUGPRUNING
  287. fprintf (stderr, "CSRandomForest::buildLeafMap: dimension = %d\n", (int)leafMap.size() );
  288. #endif
  289. reinit ( leafMap.size() );
  290. }
  291. void CodebookRandomForest::copy ( const Codebook *codebook )
  292. {
  293. fthrow(Exception, "CodebookRandomForest::not yet implemented !\n");
  294. }
  295. void CodebookRandomForest::vote ( const NICE::Vector & feature, int & codebookEntry, double & weight, double & distance ) const
  296. {
  297. fthrow(Exception, "CodebookRandomForest::not supported, please use multi voting feature\n");
  298. }
  299. void CodebookRandomForest::vote ( const NICE::Vector & feature, NICE::Vector & histogram,
  300. int & codebookEntry, double & weight, double & distance ) const
  301. {
  302. SparseVector votes;
  303. vote ( feature, votes );
  304. for ( SparseVector::const_iterator i = votes.begin();
  305. i != votes.end(); i++ )
  306. {
  307. int index = i->first;
  308. double val = i->second;
  309. histogram[index] += val;
  310. if ( i == votes.begin() )
  311. {
  312. codebookEntry = index;
  313. weight = val;
  314. }
  315. }
  316. distance = 0.0;
  317. }
  318. void CodebookRandomForest::vote ( const NICE::Vector & feature, NICE::SparseVector & votes ) const
  319. {
  320. vector<DecisionNode *> leafNodes;
  321. NICE::Vector *x = new NICE::Vector ( feature );
  322. Example pe ( x );
  323. clusterforest->getLeafNodes ( pe, leafNodes, maxDepth );
  324. delete x;
  325. for ( vector<DecisionNode *>::const_iterator j = leafNodes.begin();
  326. j != leafNodes.end(); j++ )
  327. {
  328. map<DecisionNode *, int>::const_iterator k = leafMap.find ( *j );
  329. assert ( k != leafMap.end() );
  330. int leafindex = k->second;
  331. votes.insert ( votes.begin(), pair<int, double> ( leafindex, 1.0 ) );
  332. }
  333. }
  334. void CodebookRandomForest::voteAndClassify ( const NICE::Vector & feature, NICE::SparseVector & votes, FullVector & distribution ) const
  335. {
  336. vector<DecisionNode *> leafNodes;
  337. NICE::Vector *x = new NICE::Vector ( feature );
  338. Example pe ( x );
  339. clusterforest->getLeafNodes ( pe, leafNodes, maxDepth );
  340. delete x;
  341. for ( vector<DecisionNode *>::const_iterator j = leafNodes.begin();
  342. j != leafNodes.end(); j++ )
  343. {
  344. map<DecisionNode *, int>::const_iterator k = leafMap.find ( *j );
  345. DecisionNode *node = *j;
  346. assert ( k != leafMap.end() );
  347. int leafindex = k->second;
  348. votes.insert ( votes.begin(), pair<int, double> ( leafindex, 1.0 ) );
  349. FullVector sDistribution ( node->distribution );
  350. sDistribution.normalize();
  351. if ( distribution.empty() )
  352. distribution = sDistribution;
  353. else
  354. distribution.add ( sDistribution );
  355. }
  356. distribution.normalize();
  357. }
  358. void CodebookRandomForest::voteAndClassify(const Vector &feature, SparseVector &votes, Vector &distribution) const
  359. {
  360. vector<DecisionNode *> leafNodes;
  361. NICE::Vector *x = new NICE::Vector ( feature );
  362. Example pe ( x );
  363. clusterforest->getLeafNodes ( pe, leafNodes, maxDepth );
  364. delete x;
  365. for ( vector<DecisionNode *>::const_iterator j = leafNodes.begin();
  366. j != leafNodes.end(); j++ )
  367. {
  368. map<DecisionNode *, int>::const_iterator k = leafMap.find ( *j );
  369. DecisionNode *node = *j;
  370. assert ( k != leafMap.end() );
  371. int leafindex = k->second;
  372. votes.insert ( votes.begin(), pair<int, double> ( leafindex, 1.0 ) );
  373. FullVector sDistribution ( node->distribution );
  374. sDistribution.normalize();
  375. if ( distribution.size() == 0 )
  376. {
  377. distribution.resize(sDistribution.size() );
  378. distribution.set(0.0f);
  379. }
  380. for(int i = 0; i< sDistribution.size(); i++)
  381. distribution[i] += sDistribution[i];
  382. }
  383. distribution.normalizeL2();
  384. }
  385. void CodebookRandomForest::add ( const Codebook *codebook )
  386. {
  387. fthrow ( Exception, "CodebookRandomForest::not yet implemented !");
  388. }
  389. Codebook *CodebookRandomForest::clone () const
  390. {
  391. return (new CodebookRandomForest(maxDepth, restrictedCodebookSize));
  392. }
  393. void CodebookRandomForest::clear ()
  394. {
  395. if ( clusterforest != NULL )
  396. clusterforest->clear();
  397. Codebook::clear();
  398. }
  399. void CodebookRandomForest::restore ( istream & is, int format )
  400. {
  401. if (is.good())
  402. {
  403. std::string tmp;
  404. is >> tmp; //class name
  405. if ( ! this->isStartTag( tmp, "CodebookRandomForest" ) )
  406. {
  407. std::cerr << " WARNING - attempt to restore CodebookRandomForest, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  408. throw;
  409. }
  410. if(this->clusterforest == NULL)
  411. this->clusterforest = new FPCRandomForests ();
  412. bool b_endOfBlock = false;
  413. while ( !b_endOfBlock )
  414. {
  415. is >> tmp; // start of block
  416. if ( this->isEndTag( tmp, "CodebookRandomForest" ) || is.eof() )
  417. {
  418. b_endOfBlock = true;
  419. continue;
  420. }
  421. tmp = this->removeStartTag ( tmp );
  422. if ( tmp.compare("baseclass") == 0 )
  423. {
  424. Codebook::restore(is, format);
  425. is >> tmp; // end of block
  426. tmp = this->removeEndTag ( tmp );
  427. }
  428. else if ( tmp.compare("maxDepth") == 0 )
  429. {
  430. is >> maxDepth;
  431. is >> tmp; // end of block
  432. tmp = this->removeEndTag ( tmp );
  433. }
  434. else if ( tmp.compare("restrictedCodebookSize") == 0 )
  435. {
  436. is >> restrictedCodebookSize;
  437. is >> tmp; // end of block
  438. tmp = this->removeEndTag ( tmp );
  439. }
  440. else if ( tmp.compare("maxClassNo") == 0 )
  441. {
  442. int maxClassNo = 0;
  443. is >> maxClassNo;
  444. is >> tmp; // end of block
  445. tmp = this->removeEndTag ( tmp );
  446. if(clusterforest != NULL)
  447. clusterforest->setMaxClassNo(maxClassNo);
  448. }
  449. else if ( tmp.compare("clusterforest") == 0 )
  450. {
  451. clusterforest->restore ( is, format );
  452. is >> tmp; // end of block
  453. tmp = this->removeEndTag ( tmp );
  454. }
  455. }
  456. buildLeafMap();
  457. }
  458. }
  459. void CodebookRandomForest::store ( ostream & os, int format ) const
  460. {
  461. if (os.good())
  462. {
  463. // show starting point
  464. os << this->createStartTag( "CodebookRandomForest" ) << std::endl;
  465. os.precision (numeric_limits<double>::digits10 + 1);
  466. os << this->createStartTag( "baseclass" ) << std::endl;
  467. Codebook::store ( os, format );
  468. os << this->createEndTag( "baseclass" ) << std::endl;
  469. os << this->createStartTag( "maxDepth" ) << std::endl;
  470. os << maxDepth << std::endl;
  471. os << this->createEndTag( "maxDepth" ) << std::endl;
  472. os << this->createStartTag( "restrictedCodebookSize" ) << std::endl;
  473. os << restrictedCodebookSize << std::endl;
  474. os << this->createEndTag( "restrictedCodebookSize" ) << std::endl;
  475. os << this->createStartTag( "maxClassNo" ) << std::endl;
  476. os << clusterforest->getMaxClassNo() << endl;
  477. os << this->createEndTag( "maxClassNo" ) << std::endl;
  478. os << this->createStartTag( "clusterforest" ) << std::endl;
  479. clusterforest->store ( os, format );
  480. os << this->createEndTag( "clusterforest" ) << std::endl;
  481. /* Codebook::store ( os, format );
  482. os << maxDepth << endl;
  483. os << restrictedCodebookSize << endl;
  484. os << clusterforest->getMaxClassNo() << endl;
  485. clusterforest->store ( os, format );
  486. os << endl;
  487. */
  488. // done
  489. os << this->createEndTag( "CodebookRandomForest" ) << std::endl;
  490. }
  491. }