/** * @file CodebookRandomForest.cpp * @brief feature CodebookRandomForest * @author Erik Rodner * @date 02/15/2008 */ //#ifdef NOVISUAL //#include //#else //#include //#endif #include #include #include "CodebookRandomForest.h" using namespace OBJREC; using namespace std; using namespace NICE; CodebookRandomForest::CodebookRandomForest( int maxDepth, int restrictedCodebookSize ) { this->clusterforest = NULL; this->maxDepth = maxDepth; this->restrictedCodebookSize = restrictedCodebookSize; } CodebookRandomForest::CodebookRandomForest( FPCRandomForests *clusterforest, int maxDepth, int restrictedCodebookSize ) { this->clusterforest = clusterforest; this->maxDepth = maxDepth; this->restrictedCodebookSize = restrictedCodebookSize; buildLeafMap(); } CodebookRandomForest::~CodebookRandomForest() { if ( clusterforest != NULL ) delete clusterforest; } void CodebookRandomForest::setClusterForest( FPCRandomForests *clusterforest) { if(this->clusterforest != NULL) delete this->clusterforest; this->clusterforest = clusterforest; buildLeafMap(); } void CodebookRandomForest::buildParentStructure ( DecisionNode *node, map & parentStructure ) { if ( node == NULL ) return; if ( node->left != NULL ) { parentStructure.insert ( pair ( node->left, node ) ); buildParentStructure ( node->left, parentStructure ); } if ( node->right != NULL ) { parentStructure.insert ( pair ( node->right, node ) ); buildParentStructure ( node->right, parentStructure ); } } void CodebookRandomForest::pruneForest () { map > index; clusterforest->indexDescendants( index ); map parentStructure; vector & trees = clusterforest->getForestNonConst(); for ( vector::const_iterator i = trees.begin(); i != trees.end(); i++ ) { DecisionTree *tree = *i; parentStructure.insert ( pair ( tree->getRoot(), NULL ) ); buildParentStructure ( tree->getRoot(), parentStructure ); } priority_queue< triplet > lastLevelInnerNodes; long leafs = 0; for ( map >::const_iterator k = index.begin(); k != index.end(); k++ ) { DecisionNode *node = k->first; if ( (!node->isLeaf()) && ((node->left->isLeaf()) || (node->right->isLeaf())) ) { double mi = node->distribution.entropy() - ( node->left->distribution.sum() * node->left->distribution.entropy() + node->right->distribution.sum() * node->right->distribution.entropy() ) / node->distribution.sum(); lastLevelInnerNodes.push ( triplet ( - mi, k->second.first, node ) ); } if ( node->isLeaf() ) leafs++; } set< DecisionNode * > deletedRoots; /********************************************* * EVIL Pruning method * *********************************************/ set deletedNodes; while ( (leafs > restrictedCodebookSize) && (lastLevelInnerNodes.size() > 0) ) { const triplet & nodemi = lastLevelInnerNodes.top(); //double current_mi = -nodemi.first; //fprintf (stderr, "CodebookRandomForest: %d contract leaf with mutual information %f\n", leafs, current_mi ); DecisionNode *node = nodemi.third; lastLevelInnerNodes.pop(); assert ( node != NULL ); DecisionNode *left = node->left; DecisionNode *right = node->right; //fprintf (stderr, "node: %ld, left: %ld, right: %ld\n", (long int)node, (long int)left, // (long int)right ); if ( (deletedNodes.find(node) != deletedNodes.end() ) || node->isLeaf() ) { // this is a tricky case...consider the subsequent contraction of // two childs of a node // After the first child is contracted, the node is added to lastLevelInnerNodes // If the second child is contracted, the node is still in the queue but // is now a leaf node. // A second problem exists if the parent node is contracted after the second // child but before the node. Therefore we introduced deletedNodes. continue; } #ifdef DEBUGPRUNING fprintf (stderr, "CodebookRandomForest: nodes remaining %ld (min:%d); current mi %f\n", leafs, restrictedCodebookSize, current_mi ); #endif assert ( parentStructure.find(node) != parentStructure.end() ); DecisionNode *parent_node = parentStructure[node]; //fprintf (stderr, "parent: %ld\n", (long int)parent_node ); if ( parent_node == NULL ) { #ifdef DEBUGPRUNING fprintf (stderr, "CodebookRandomForest: Deleting the root node !!!\n"); #endif DecisionNode *newParent = NULL; if ( (left->isLeaf()) && (right->isLeaf()) ) { //fprintf (stderr, "case (a)\n"); delete ( node->f ); node->f = NULL; delete left; delete right; deletedNodes.insert ( left ); deletedNodes.insert ( right ); node->left = NULL; node->right = NULL; newParent = node; leafs--; } else if ( left->isLeaf() ) { // case (b) left child is a leaf delete left; delete node; deletedNodes.insert ( node ); deletedNodes.insert ( left ); parentStructure[right] = parent_node; newParent = right; leafs--; } else if ( right->isLeaf() ) { // case (b) right child is a leaf delete right; delete node; deletedNodes.insert ( right ); deletedNodes.insert ( left ); parentStructure[left] = parent_node; newParent = left; leafs--; } else { fprintf (stderr, "UNKNOWN CASE !!\n"); exit(-1); } for ( vector::iterator i = trees.begin(); i != trees.end() ; i++ ) if ( (*i)->getRoot() == node ) (*i)->setRoot(newParent); continue; } long int parent_index = index[parent_node].first; double mi = 0.0; bool nodeIsLeft = ( parent_node->left == node ); DecisionNode *sibling = nodeIsLeft ? parent_node->right : parent_node->left; if ( (left == NULL) || (right == NULL) ) fthrow(Exception, "There is a bug in this code: CodebookRandomForest (good luck!) bugid=1"); if ( (left->isLeaf()) && (right->isLeaf()) ) { /* ------------ case (a) left and right childs are leafs (p) (p) (n) (s) -> (n) (s) and add p to the last level nodes (l) (r) */ #ifdef DEBUGPRUNING fprintf (stderr, "case (a)\n"); #endif delete ( node->f ); node->f = NULL; delete left; deletedNodes.insert ( left ); delete right; deletedNodes.insert ( right ); node->left = NULL; node->right = NULL; leafs--; double ep = parent_node->distribution.entropy(); double en = node->distribution.entropy(); double es = sibling->distribution.entropy(); double pn = node->distribution.sum(); double ps = sibling->distribution.sum(); mi = ep - ( pn * en + ps * es ) / (pn+ps); #ifdef DEBUGPRUNING fprintf (stderr, "ep %f en %f es %f pn %f ps %f\n", ep, en, es, pn, ps ); parent_node->distribution.store(cerr); node->distribution.store(cerr); sibling->distribution.store(cerr); fprintf (stderr, "add new pre-leaf %ld: mi %lf top %lf\n", (long int)parent_node, mi, -lastLevelInnerNodes.top().first); #endif lastLevelInnerNodes.push ( triplet ( - mi, parent_index, parent_node ) ); } else if ( left->isLeaf() ) { // --------------- case (b) left child is a leaf #ifdef DEBUGPRUNING fprintf (stderr, "case (b)\n"); #endif if ( nodeIsLeft ) parent_node->left = right; else parent_node->right = right; parentStructure[right] = parent_node; delete left; deletedNodes.insert ( left ); delete node; deletedNodes.insert ( node ); leafs--; } else if ( right->isLeaf() ) { // --------------- case (c) right child is a leaf #ifdef DEBUGPRUNING fprintf (stderr, "case (c)\n"); #endif if ( nodeIsLeft ) parent_node->left = left; else parent_node->right = left; delete right; deletedNodes.insert ( right ); delete node; deletedNodes.insert ( node ); parentStructure[left] = parent_node; leafs--; } else { fthrow(Exception, "There is a bug in this code: CodebookRandomForest (good luck!) bugid=2"); } } for ( vector::iterator i = trees.begin(); i != trees.end() ; ) { if ( deletedRoots.find((*i)->getRoot()) != deletedRoots.end() ) { delete (*i); trees.erase ( i ); } else { i++; } } fprintf (stderr, "Final number of leafs: %ld (%d)\n", leafs, restrictedCodebookSize ); } void CodebookRandomForest::buildLeafMap () { if ( restrictedCodebookSize > 0 ) { pruneForest (); } map > index; vector< pair > index_reverse; clusterforest->indexDescendants ( index ); leafMap.clear(); for ( map >::const_iterator i = index.begin(); i != index.end(); i++ ) { DecisionNode *node = i->first; int depth = i->second.second; long index = i->second.first; if ( ( (node->right == NULL) && (node->left == NULL) && (depth <= maxDepth) ) || ( depth == maxDepth ) ) index_reverse.push_back ( pair ( index, node ) ); } sort ( index_reverse.begin(), index_reverse.end() ); /************************************* Recover a kind of canonical node permutation **************************************/ for ( vector< pair >::const_iterator i = index_reverse.begin(); i != index_reverse.end(); i++ ) { DecisionNode *node = i->second; leafMap.insert ( pair ( node, leafMap.size() ) ); } fprintf (stderr, "CSRandomForest::buildLeafMap: dimension = %d\n", (int)leafMap.size() ); reinit ( leafMap.size() ); } void CodebookRandomForest::copy ( const Codebook *codebook ) { fthrow(Exception, "CodebookRandomForest::not yet implemented !\n"); } void CodebookRandomForest::vote ( const NICE::Vector & feature, int & codebookEntry, double & weight, double & distance ) const { fthrow(Exception, "CodebookRandomForest::not supported, please use multi voting feature\n"); } void CodebookRandomForest::vote ( const NICE::Vector & feature, NICE::Vector & histogram, int & codebookEntry, double & weight, double & distance ) const { SparseVector votes; vote ( feature, votes ); for ( SparseVector::const_iterator i = votes.begin(); i != votes.end(); i++ ) { int index = i->first; double val = i->second; histogram[index] += val; if ( i == votes.begin() ) { codebookEntry = index; weight = val; } } distance = 0.0; } void CodebookRandomForest::vote ( const NICE::Vector & feature, NICE::SparseVector & votes ) const { vector leafNodes; NICE::Vector *x = new NICE::Vector ( feature ); Example pe ( x ); clusterforest->getLeafNodes ( pe, leafNodes, maxDepth ); delete x; for ( vector::const_iterator j = leafNodes.begin(); j != leafNodes.end(); j++ ) { map::const_iterator k = leafMap.find ( *j ); assert ( k != leafMap.end() ); int leafindex = k->second; votes.insert ( votes.begin(), pair ( leafindex, 1.0 ) ); } } void CodebookRandomForest::voteAndClassify ( const NICE::Vector & feature, NICE::SparseVector & votes, FullVector & distribution ) const { vector leafNodes; NICE::Vector *x = new NICE::Vector ( feature ); Example pe ( x ); clusterforest->getLeafNodes ( pe, leafNodes, maxDepth ); delete x; for ( vector::const_iterator j = leafNodes.begin(); j != leafNodes.end(); j++ ) { map::const_iterator k = leafMap.find ( *j ); DecisionNode *node = *j; assert ( k != leafMap.end() ); int leafindex = k->second; votes.insert ( votes.begin(), pair ( leafindex, 1.0 ) ); FullVector sDistribution ( node->distribution ); sDistribution.normalize(); if ( distribution.empty() ) distribution = sDistribution; else distribution.add ( sDistribution ); } distribution.normalize(); } void CodebookRandomForest::add ( const Codebook *codebook ) { fthrow ( Exception, "CodebookRandomForest::not yet implemented !"); } Codebook *CodebookRandomForest::clone () const { return (new CodebookRandomForest(maxDepth, restrictedCodebookSize)); } void CodebookRandomForest::clear () { if ( clusterforest != NULL ) clusterforest->clear(); Codebook::clear(); } void CodebookRandomForest::restore ( istream & is, int format ) { if(this->clusterforest == NULL) this->clusterforest = new FPCRandomForests (); Codebook::restore(is, format); int maxClassNo = 0; is >> maxClassNo; clusterforest->setMaxClassNo( maxClassNo ); clusterforest->restore ( is, format ); buildLeafMap(); } void CodebookRandomForest::store ( ostream & os, int format ) const { Codebook::store ( os, format ); os << endl; os << clusterforest->getMaxClassNo() << endl; clusterforest->store ( os, format ); os << endl; }