CodebookRandomForest.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. /**
  2. * @file CodebookRandomForest.cpp
  3. * @brief feature CodebookRandomForest
  4. * @author Erik Rodner
  5. * @date 02/15/2008
  6. */
  7. //#ifdef NOVISUAL
  8. //#include <objrec/nice_nonvis.h>
  9. //#else
  10. //#include <objrec/nice.h>
  11. //#endif
  12. #include <queue>
  13. #include <iostream>
  14. #include "CodebookRandomForest.h"
  15. using namespace OBJREC;
  16. using namespace std;
  17. using namespace NICE;
  18. CodebookRandomForest::CodebookRandomForest( int maxDepth, int restrictedCodebookSize )
  19. {
  20. this->clusterforest = NULL;
  21. this->maxDepth = maxDepth;
  22. this->restrictedCodebookSize = restrictedCodebookSize;
  23. }
  24. CodebookRandomForest::CodebookRandomForest( FPCRandomForests *clusterforest,
  25. int maxDepth, int restrictedCodebookSize )
  26. {
  27. this->clusterforest = clusterforest;
  28. this->maxDepth = maxDepth;
  29. this->restrictedCodebookSize = restrictedCodebookSize;
  30. buildLeafMap();
  31. }
  32. CodebookRandomForest::~CodebookRandomForest()
  33. {
  34. if ( clusterforest != NULL )
  35. delete clusterforest;
  36. }
  37. void CodebookRandomForest::setClusterForest( FPCRandomForests *clusterforest)
  38. {
  39. if(this->clusterforest != NULL)
  40. delete this->clusterforest;
  41. this->clusterforest = clusterforest;
  42. buildLeafMap();
  43. }
  44. void CodebookRandomForest::buildParentStructure ( DecisionNode *node,
  45. map<DecisionNode *, DecisionNode *> & parentStructure )
  46. {
  47. if ( node == NULL ) return;
  48. if ( node->left != NULL )
  49. {
  50. parentStructure.insert ( pair<DecisionNode *, DecisionNode *> ( node->left, node ) );
  51. buildParentStructure ( node->left, parentStructure );
  52. }
  53. if ( node->right != NULL )
  54. {
  55. parentStructure.insert ( pair<DecisionNode *, DecisionNode *> ( node->right, node ) );
  56. buildParentStructure ( node->right, parentStructure );
  57. }
  58. }
  59. void CodebookRandomForest::pruneForest ()
  60. {
  61. map<DecisionNode *, pair<long, int> > index;
  62. clusterforest->indexDescendants( index );
  63. map<DecisionNode *, DecisionNode *> parentStructure;
  64. vector<DecisionTree *> & trees = clusterforest->getForestNonConst();
  65. for ( vector<DecisionTree *>::const_iterator i = trees.begin();
  66. i != trees.end(); i++ )
  67. {
  68. DecisionTree *tree = *i;
  69. parentStructure.insert ( pair<DecisionNode *, DecisionNode *> ( tree->getRoot(), NULL ) );
  70. buildParentStructure ( tree->getRoot(), parentStructure );
  71. }
  72. priority_queue< triplet<double, long, DecisionNode *> > lastLevelInnerNodes;
  73. long leafs = 0;
  74. for ( map<DecisionNode *, pair<long, int> >::const_iterator k = index.begin();
  75. k != index.end(); k++ )
  76. {
  77. DecisionNode *node = k->first;
  78. if ( (!node->isLeaf()) && ((node->left->isLeaf())
  79. || (node->right->isLeaf())) )
  80. {
  81. double mi = node->distribution.entropy() - (
  82. node->left->distribution.sum() * node->left->distribution.entropy() +
  83. node->right->distribution.sum() * node->right->distribution.entropy() )
  84. / node->distribution.sum();
  85. lastLevelInnerNodes.push ( triplet<double, long, DecisionNode *> (
  86. - mi, k->second.first, node ) );
  87. }
  88. if ( node->isLeaf() ) leafs++;
  89. }
  90. set< DecisionNode * > deletedRoots;
  91. /*********************************************
  92. * EVIL Pruning method *
  93. *********************************************/
  94. set<DecisionNode *> deletedNodes;
  95. while ( (leafs > restrictedCodebookSize) && (lastLevelInnerNodes.size() > 0) )
  96. {
  97. const triplet<double, long, DecisionNode *> & nodemi = lastLevelInnerNodes.top();
  98. //double current_mi = -nodemi.first;
  99. //fprintf (stderr, "CodebookRandomForest: %d contract leaf with mutual information %f\n", leafs, current_mi );
  100. DecisionNode *node = nodemi.third;
  101. lastLevelInnerNodes.pop();
  102. assert ( node != NULL );
  103. DecisionNode *left = node->left;
  104. DecisionNode *right = node->right;
  105. //fprintf (stderr, "node: %ld, left: %ld, right: %ld\n", (long int)node, (long int)left,
  106. // (long int)right );
  107. if ( (deletedNodes.find(node) != deletedNodes.end() ) || node->isLeaf() ) {
  108. // this is a tricky case...consider the subsequent contraction of
  109. // two childs of a node
  110. // After the first child is contracted, the node is added to lastLevelInnerNodes
  111. // If the second child is contracted, the node is still in the queue but
  112. // is now a leaf node.
  113. // A second problem exists if the parent node is contracted after the second
  114. // child but before the node. Therefore we introduced deletedNodes.
  115. continue;
  116. }
  117. #ifdef DEBUGPRUNING
  118. fprintf (stderr, "CodebookRandomForest: nodes remaining %ld (min:%d); current mi %f\n",
  119. leafs, restrictedCodebookSize, current_mi );
  120. #endif
  121. assert ( parentStructure.find(node) != parentStructure.end() );
  122. DecisionNode *parent_node = parentStructure[node];
  123. //fprintf (stderr, "parent: %ld\n", (long int)parent_node );
  124. if ( parent_node == NULL )
  125. {
  126. #ifdef DEBUGPRUNING
  127. fprintf (stderr, "CodebookRandomForest: Deleting the root node !!!\n");
  128. #endif
  129. DecisionNode *newParent = NULL;
  130. if ( (left->isLeaf()) && (right->isLeaf()) )
  131. {
  132. //fprintf (stderr, "case (a)\n");
  133. delete ( node->f );
  134. node->f = NULL;
  135. delete left;
  136. delete right;
  137. deletedNodes.insert ( left );
  138. deletedNodes.insert ( right );
  139. node->left = NULL;
  140. node->right = NULL;
  141. newParent = node;
  142. leafs--;
  143. } else if ( left->isLeaf() ) {
  144. // case (b) left child is a leaf
  145. delete left;
  146. delete node;
  147. deletedNodes.insert ( node );
  148. deletedNodes.insert ( left );
  149. parentStructure[right] = parent_node;
  150. newParent = right;
  151. leafs--;
  152. } else if ( right->isLeaf() ) {
  153. // case (b) right child is a leaf
  154. delete right;
  155. delete node;
  156. deletedNodes.insert ( right );
  157. deletedNodes.insert ( left );
  158. parentStructure[left] = parent_node;
  159. newParent = left;
  160. leafs--;
  161. } else {
  162. fprintf (stderr, "UNKNOWN CASE !!\n");
  163. exit(-1);
  164. }
  165. for ( vector<DecisionTree *>::iterator i = trees.begin(); i != trees.end() ; i++ )
  166. if ( (*i)->getRoot() == node )
  167. (*i)->setRoot(newParent);
  168. continue;
  169. }
  170. long int parent_index = index[parent_node].first;
  171. double mi = 0.0;
  172. bool nodeIsLeft = ( parent_node->left == node );
  173. DecisionNode *sibling = nodeIsLeft ? parent_node->right : parent_node->left;
  174. if ( (left == NULL) || (right == NULL) )
  175. fthrow(Exception, "There is a bug in this code: CodebookRandomForest (good luck!) bugid=1");
  176. if ( (left->isLeaf()) && (right->isLeaf()) )
  177. {
  178. /* ------------ case (a) left and right childs are leafs
  179. (p) (p)
  180. (n) (s) -> (n) (s) and add p to the last level nodes
  181. (l) (r) */
  182. #ifdef DEBUGPRUNING
  183. fprintf (stderr, "case (a)\n");
  184. #endif
  185. delete ( node->f );
  186. node->f = NULL;
  187. delete left;
  188. deletedNodes.insert ( left );
  189. delete right;
  190. deletedNodes.insert ( right );
  191. node->left = NULL;
  192. node->right = NULL;
  193. leafs--;
  194. double ep = parent_node->distribution.entropy();
  195. double en = node->distribution.entropy();
  196. double es = sibling->distribution.entropy();
  197. double pn = node->distribution.sum();
  198. double ps = sibling->distribution.sum();
  199. mi = ep - ( pn * en + ps * es ) / (pn+ps);
  200. #ifdef DEBUGPRUNING
  201. fprintf (stderr, "ep %f en %f es %f pn %f ps %f\n",
  202. ep, en, es, pn, ps );
  203. parent_node->distribution.store(cerr);
  204. node->distribution.store(cerr);
  205. sibling->distribution.store(cerr);
  206. fprintf (stderr, "add new pre-leaf %ld: mi %lf top %lf\n", (long int)parent_node, mi,
  207. -lastLevelInnerNodes.top().first);
  208. #endif
  209. lastLevelInnerNodes.push ( triplet<double, long, DecisionNode *> (
  210. - mi, parent_index, parent_node ) );
  211. } else if ( left->isLeaf() ) {
  212. // --------------- case (b) left child is a leaf
  213. #ifdef DEBUGPRUNING
  214. fprintf (stderr, "case (b)\n");
  215. #endif
  216. if ( nodeIsLeft )
  217. parent_node->left = right;
  218. else
  219. parent_node->right = right;
  220. parentStructure[right] = parent_node;
  221. delete left;
  222. deletedNodes.insert ( left );
  223. delete node;
  224. deletedNodes.insert ( node );
  225. leafs--;
  226. } else if ( right->isLeaf() ) {
  227. // --------------- case (c) right child is a leaf
  228. #ifdef DEBUGPRUNING
  229. fprintf (stderr, "case (c)\n");
  230. #endif
  231. if ( nodeIsLeft )
  232. parent_node->left = left;
  233. else
  234. parent_node->right = left;
  235. delete right;
  236. deletedNodes.insert ( right );
  237. delete node;
  238. deletedNodes.insert ( node );
  239. parentStructure[left] = parent_node;
  240. leafs--;
  241. } else {
  242. fthrow(Exception, "There is a bug in this code: CodebookRandomForest (good luck!) bugid=2");
  243. }
  244. }
  245. for ( vector<DecisionTree *>::iterator i = trees.begin(); i != trees.end() ; )
  246. {
  247. if ( deletedRoots.find((*i)->getRoot()) != deletedRoots.end() )
  248. {
  249. delete (*i);
  250. trees.erase ( i );
  251. } else {
  252. i++;
  253. }
  254. }
  255. fprintf (stderr, "Final number of leafs: %ld (%d)\n", leafs, restrictedCodebookSize );
  256. }
  257. void CodebookRandomForest::buildLeafMap ()
  258. {
  259. if ( restrictedCodebookSize > 0 ) {
  260. pruneForest ();
  261. }
  262. map<DecisionNode *, pair<long, int> > index;
  263. vector< pair<long, DecisionNode *> > index_reverse;
  264. clusterforest->indexDescendants ( index );
  265. leafMap.clear();
  266. for ( map<DecisionNode *, pair<long, int> >::const_iterator i = index.begin();
  267. i != index.end(); i++ )
  268. {
  269. DecisionNode *node = i->first;
  270. int depth = i->second.second;
  271. long index = i->second.first;
  272. if ( ( (node->right == NULL) && (node->left == NULL) && (depth <= maxDepth) ) || ( depth == maxDepth ) )
  273. index_reverse.push_back ( pair<long, DecisionNode *> ( index, node ) );
  274. }
  275. sort ( index_reverse.begin(), index_reverse.end() );
  276. /*************************************
  277. Recover a kind of canonical node
  278. permutation
  279. **************************************/
  280. for ( vector< pair<long, DecisionNode *> >::const_iterator i = index_reverse.begin();
  281. i != index_reverse.end(); i++ )
  282. {
  283. DecisionNode *node = i->second;
  284. leafMap.insert ( pair<DecisionNode *, int> ( node, leafMap.size() ) );
  285. }
  286. fprintf (stderr, "CSRandomForest::buildLeafMap: dimension = %d\n", (int)leafMap.size() );
  287. reinit ( leafMap.size() );
  288. }
  289. void CodebookRandomForest::copy ( const Codebook *codebook )
  290. {
  291. fthrow(Exception, "CodebookRandomForest::not yet implemented !\n");
  292. }
  293. void CodebookRandomForest::vote ( const NICE::Vector & feature, int & codebookEntry, double & weight, double & distance ) const
  294. {
  295. fthrow(Exception, "CodebookRandomForest::not supported, please use multi voting feature\n");
  296. }
  297. void CodebookRandomForest::vote ( const NICE::Vector & feature, NICE::Vector & histogram,
  298. int & codebookEntry, double & weight, double & distance ) const
  299. {
  300. SparseVector votes;
  301. vote ( feature, votes );
  302. for ( SparseVector::const_iterator i = votes.begin();
  303. i != votes.end(); i++ )
  304. {
  305. int index = i->first;
  306. double val = i->second;
  307. histogram[index] += val;
  308. if ( i == votes.begin() )
  309. {
  310. codebookEntry = index;
  311. weight = val;
  312. }
  313. }
  314. distance = 0.0;
  315. }
  316. void CodebookRandomForest::vote ( const NICE::Vector & feature, NICE::SparseVector & votes ) const
  317. {
  318. vector<DecisionNode *> leafNodes;
  319. NICE::Vector *x = new NICE::Vector ( feature );
  320. Example pe ( x );
  321. clusterforest->getLeafNodes ( pe, leafNodes, maxDepth );
  322. delete x;
  323. for ( vector<DecisionNode *>::const_iterator j = leafNodes.begin();
  324. j != leafNodes.end(); j++ )
  325. {
  326. map<DecisionNode *, int>::const_iterator k = leafMap.find ( *j );
  327. assert ( k != leafMap.end() );
  328. int leafindex = k->second;
  329. votes.insert ( votes.begin(), pair<int, double> ( leafindex, 1.0 ) );
  330. }
  331. }
  332. void CodebookRandomForest::voteAndClassify ( const NICE::Vector & feature, NICE::SparseVector & votes, FullVector & distribution ) const
  333. {
  334. vector<DecisionNode *> leafNodes;
  335. NICE::Vector *x = new NICE::Vector ( feature );
  336. Example pe ( x );
  337. clusterforest->getLeafNodes ( pe, leafNodes, maxDepth );
  338. delete x;
  339. for ( vector<DecisionNode *>::const_iterator j = leafNodes.begin();
  340. j != leafNodes.end(); j++ )
  341. {
  342. map<DecisionNode *, int>::const_iterator k = leafMap.find ( *j );
  343. DecisionNode *node = *j;
  344. assert ( k != leafMap.end() );
  345. int leafindex = k->second;
  346. votes.insert ( votes.begin(), pair<int, double> ( leafindex, 1.0 ) );
  347. FullVector sDistribution ( node->distribution );
  348. sDistribution.normalize();
  349. if ( distribution.empty() )
  350. distribution = sDistribution;
  351. else
  352. distribution.add ( sDistribution );
  353. }
  354. distribution.normalize();
  355. }
  356. void CodebookRandomForest::add ( const Codebook *codebook )
  357. {
  358. fthrow ( Exception, "CodebookRandomForest::not yet implemented !");
  359. }
  360. Codebook *CodebookRandomForest::clone () const
  361. {
  362. return (new CodebookRandomForest(maxDepth, restrictedCodebookSize));
  363. }
  364. void CodebookRandomForest::clear ()
  365. {
  366. if ( clusterforest != NULL )
  367. clusterforest->clear();
  368. Codebook::clear();
  369. }
  370. void CodebookRandomForest::restore ( istream & is, int format )
  371. {
  372. if(this->clusterforest == NULL)
  373. this->clusterforest = new FPCRandomForests ();
  374. Codebook::restore(is, format);
  375. int maxClassNo = 0;
  376. is >> maxClassNo;
  377. clusterforest->setMaxClassNo( maxClassNo );
  378. clusterforest->restore ( is, format );
  379. buildLeafMap();
  380. }
  381. void CodebookRandomForest::store ( ostream & os, int format ) const
  382. {
  383. Codebook::store ( os, format );
  384. os << endl;
  385. os << clusterforest->getMaxClassNo() << endl;
  386. clusterforest->store ( os, format );
  387. os << endl;
  388. }