/** * @file CodebookRandomForest.cpp * @brief feature CodebookRandomForest * @author Erik Rodner * @date 02/15/2008 */ #include #include #include "CodebookRandomForest.h" using namespace OBJREC; using namespace std; using namespace NICE; #undef DEBUGPRUNING CodebookRandomForest::CodebookRandomForest( int maxDepth, int restrictedCodebookSize ) { this->clusterforest = NULL; this->maxDepth = maxDepth; this->restrictedCodebookSize = restrictedCodebookSize; } CodebookRandomForest::CodebookRandomForest( FPCRandomForests *clusterforest, int maxDepth, int restrictedCodebookSize ) { this->clusterforest = clusterforest; this->maxDepth = maxDepth; this->restrictedCodebookSize = restrictedCodebookSize; buildLeafMap(); } CodebookRandomForest::~CodebookRandomForest() { if ( clusterforest != NULL ) delete clusterforest; } void CodebookRandomForest::setClusterForest( FPCRandomForests *clusterforest) { if(this->clusterforest != NULL) delete this->clusterforest; this->clusterforest = clusterforest; buildLeafMap(); } void CodebookRandomForest::buildParentStructure ( DecisionNode *node, map & parentStructure ) { if ( node == NULL ) return; if ( node->left != NULL ) { parentStructure.insert ( pair ( node->left, node ) ); buildParentStructure ( node->left, parentStructure ); } if ( node->right != NULL ) { parentStructure.insert ( pair ( node->right, node ) ); buildParentStructure ( node->right, parentStructure ); } } void CodebookRandomForest::pruneForest () { map > index; clusterforest->indexDescendants( index ); map parentStructure; vector & trees = clusterforest->getForestNonConst(); for ( vector::const_iterator i = trees.begin(); i != trees.end(); i++ ) { DecisionTree *tree = *i; parentStructure.insert ( pair ( tree->getRoot(), NULL ) ); buildParentStructure ( tree->getRoot(), parentStructure ); } priority_queue< triplet > lastLevelInnerNodes; long leafs = 0; for ( map >::const_iterator k = index.begin(); k != index.end(); k++ ) { DecisionNode *node = k->first; if ( (!node->isLeaf()) && ((node->left->isLeaf()) || (node->right->isLeaf())) ) { double mi = node->distribution.entropy() - ( node->left->distribution.sum() * node->left->distribution.entropy() + node->right->distribution.sum() * node->right->distribution.entropy() ) / node->distribution.sum(); lastLevelInnerNodes.push ( triplet ( - mi, k->second.first, node ) ); } if ( node->isLeaf() ) leafs++; } set< DecisionNode * > deletedRoots; /********************************************* * EVIL Pruning method * *********************************************/ set deletedNodes; while ( (leafs > restrictedCodebookSize) && (lastLevelInnerNodes.size() > 0) ) { const triplet & nodemi = lastLevelInnerNodes.top(); #ifdef DEBUGPRUNING double current_mi = -nodemi.first; fprintf (stderr, "CodebookRandomForest: %d contract leaf with mutual information %f\n", leafs, current_mi ); #endif DecisionNode *node = nodemi.third; lastLevelInnerNodes.pop(); assert ( node != NULL ); DecisionNode *left = node->left; DecisionNode *right = node->right; //fprintf (stderr, "node: %ld, left: %ld, right: %ld\n", (long int)node, (long int)left, // (long int)right ); if ( (deletedNodes.find(node) != deletedNodes.end() ) || node->isLeaf() ) { // this is a tricky case...consider the subsequent contraction of // two childs of a node // After the first child is contracted, the node is added to lastLevelInnerNodes // If the second child is contracted, the node is still in the queue but // is now a leaf node. // A second problem exists if the parent node is contracted after the second // child but before the node. Therefore we introduced deletedNodes. continue; } #ifdef DEBUGPRUNING fprintf (stderr, "CodebookRandomForest: nodes remaining %ld (min:%d); current mi %f\n", leafs, restrictedCodebookSize, current_mi ); #endif assert ( parentStructure.find(node) != parentStructure.end() ); DecisionNode *parent_node = parentStructure[node]; //fprintf (stderr, "parent: %ld\n", (long int)parent_node ); if ( parent_node == NULL ) { #ifdef DEBUGPRUNING fprintf (stderr, "CodebookRandomForest: Deleting the root node !!!\n"); #endif DecisionNode *newParent = NULL; if ( (left->isLeaf()) && (right->isLeaf()) ) { //fprintf (stderr, "case (a)\n"); delete ( node->f ); node->f = NULL; delete left; delete right; deletedNodes.insert ( left ); deletedNodes.insert ( right ); node->left = NULL; node->right = NULL; newParent = node; leafs--; } else if ( left->isLeaf() ) { // case (b) left child is a leaf delete left; delete node; deletedNodes.insert ( node ); deletedNodes.insert ( left ); parentStructure[right] = parent_node; newParent = right; leafs--; } else if ( right->isLeaf() ) { // case (b) right child is a leaf delete right; delete node; deletedNodes.insert ( right ); deletedNodes.insert ( left ); parentStructure[left] = parent_node; newParent = left; leafs--; } else { fprintf (stderr, "UNKNOWN CASE !!\n"); exit(-1); } for ( vector::iterator i = trees.begin(); i != trees.end() ; i++ ) if ( (*i)->getRoot() == node ) (*i)->setRoot(newParent); continue; } long int parent_index = index[parent_node].first; double mi = 0.0; bool nodeIsLeft = ( parent_node->left == node ); DecisionNode *sibling = nodeIsLeft ? parent_node->right : parent_node->left; if ( (left == NULL) || (right == NULL) ) fthrow(Exception, "There is a bug in this code: CodebookRandomForest (good luck!) bugid=1"); if ( (left->isLeaf()) && (right->isLeaf()) ) { /* ------------ case (a) left and right childs are leafs (p) (p) (n) (s) -> (n) (s) and add p to the last level nodes (l) (r) */ #ifdef DEBUGPRUNING fprintf (stderr, "case (a)\n"); #endif delete ( node->f ); node->f = NULL; delete left; deletedNodes.insert ( left ); delete right; deletedNodes.insert ( right ); node->left = NULL; node->right = NULL; leafs--; double ep = parent_node->distribution.entropy(); double en = node->distribution.entropy(); double es = sibling->distribution.entropy(); double pn = node->distribution.sum(); double ps = sibling->distribution.sum(); mi = ep - ( pn * en + ps * es ) / (pn+ps); #ifdef DEBUGPRUNING fprintf (stderr, "ep %f en %f es %f pn %f ps %f\n", ep, en, es, pn, ps ); parent_node->distribution.store(cerr); node->distribution.store(cerr); sibling->distribution.store(cerr); fprintf (stderr, "add new pre-leaf %ld: mi %lf top %lf\n", (long int)parent_node, mi, -lastLevelInnerNodes.top().first); #endif lastLevelInnerNodes.push ( triplet ( - mi, parent_index, parent_node ) ); } else if ( left->isLeaf() ) { // --------------- case (b) left child is a leaf #ifdef DEBUGPRUNING fprintf (stderr, "case (b)\n"); #endif if ( nodeIsLeft ) parent_node->left = right; else parent_node->right = right; parentStructure[right] = parent_node; delete left; deletedNodes.insert ( left ); delete node; deletedNodes.insert ( node ); leafs--; } else if ( right->isLeaf() ) { // --------------- case (c) right child is a leaf #ifdef DEBUGPRUNING fprintf (stderr, "case (c)\n"); #endif if ( nodeIsLeft ) parent_node->left = left; else parent_node->right = left; delete right; deletedNodes.insert ( right ); delete node; deletedNodes.insert ( node ); parentStructure[left] = parent_node; leafs--; } else { fthrow(Exception, "There is a bug in this code: CodebookRandomForest (good luck!) bugid=2"); } } for ( vector::iterator i = trees.begin(); i != trees.end() ; ) { if ( deletedRoots.find((*i)->getRoot()) != deletedRoots.end() ) { delete (*i); trees.erase ( i ); } else { i++; } } #ifdef DEBUGPRUNING fprintf (stderr, "Final number of leafs: %ld (%d)\n", leafs, restrictedCodebookSize ); #endif } void CodebookRandomForest::buildLeafMap () { if ( restrictedCodebookSize > 0 ) { pruneForest (); } map > index; vector< pair > index_reverse; clusterforest->indexDescendants ( index ); leafMap.clear(); for ( map >::const_iterator i = index.begin(); i != index.end(); i++ ) { DecisionNode *node = i->first; int depth = i->second.second; long index = i->second.first; if ( ( (node->right == NULL) && (node->left == NULL) && (depth <= maxDepth) ) || ( depth == maxDepth ) ) index_reverse.push_back ( pair ( index, node ) ); } sort ( index_reverse.begin(), index_reverse.end() ); /************************************* Recover a kind of canonical node permutation **************************************/ for ( vector< pair >::const_iterator i = index_reverse.begin(); i != index_reverse.end(); i++ ) { DecisionNode *node = i->second; leafMap.insert ( pair ( node, leafMap.size() ) ); } #ifdef DEBUGPRUNING fprintf (stderr, "CSRandomForest::buildLeafMap: dimension = %d\n", (int)leafMap.size() ); #endif reinit ( leafMap.size() ); } void CodebookRandomForest::copy ( const Codebook *codebook ) { fthrow(Exception, "CodebookRandomForest::not yet implemented !\n"); } void CodebookRandomForest::vote ( const NICE::Vector & feature, int & codebookEntry, double & weight, double & distance ) const { fthrow(Exception, "CodebookRandomForest::not supported, please use multi voting feature\n"); } void CodebookRandomForest::vote ( const NICE::Vector & feature, NICE::Vector & histogram, int & codebookEntry, double & weight, double & distance ) const { SparseVector votes; vote ( feature, votes ); for ( SparseVector::const_iterator i = votes.begin(); i != votes.end(); i++ ) { int index = i->first; double val = i->second; histogram[index] += val; if ( i == votes.begin() ) { codebookEntry = index; weight = val; } } distance = 0.0; } void CodebookRandomForest::vote ( const NICE::Vector & feature, NICE::SparseVector & votes ) const { vector leafNodes; NICE::Vector *x = new NICE::Vector ( feature ); Example pe ( x ); clusterforest->getLeafNodes ( pe, leafNodes, maxDepth ); delete x; for ( vector::const_iterator j = leafNodes.begin(); j != leafNodes.end(); j++ ) { map::const_iterator k = leafMap.find ( *j ); assert ( k != leafMap.end() ); int leafindex = k->second; votes.insert ( votes.begin(), pair ( leafindex, 1.0 ) ); } } void CodebookRandomForest::voteAndClassify ( const NICE::Vector & feature, NICE::SparseVector & votes, FullVector & distribution ) const { vector leafNodes; NICE::Vector *x = new NICE::Vector ( feature ); Example pe ( x ); clusterforest->getLeafNodes ( pe, leafNodes, maxDepth ); delete x; for ( vector::const_iterator j = leafNodes.begin(); j != leafNodes.end(); j++ ) { map::const_iterator k = leafMap.find ( *j ); DecisionNode *node = *j; assert ( k != leafMap.end() ); int leafindex = k->second; votes.insert ( votes.begin(), pair ( leafindex, 1.0 ) ); FullVector sDistribution ( node->distribution ); sDistribution.normalize(); if ( distribution.empty() ) distribution = sDistribution; else distribution.add ( sDistribution ); } distribution.normalize(); } void CodebookRandomForest::voteAndClassify(const Vector &feature, SparseVector &votes, Vector &distribution) const { vector leafNodes; NICE::Vector *x = new NICE::Vector ( feature ); Example pe ( x ); clusterforest->getLeafNodes ( pe, leafNodes, maxDepth ); delete x; for ( vector::const_iterator j = leafNodes.begin(); j != leafNodes.end(); j++ ) { map::const_iterator k = leafMap.find ( *j ); DecisionNode *node = *j; assert ( k != leafMap.end() ); int leafindex = k->second; votes.insert ( votes.begin(), pair ( leafindex, 1.0 ) ); FullVector sDistribution ( node->distribution ); sDistribution.normalize(); if ( distribution.size() == 0 ) { distribution.resize(sDistribution.size() ); distribution.set(0.0f); } for(int i = 0; i< sDistribution.size(); i++) distribution[i] += sDistribution[i]; } distribution.normalizeL2(); } void CodebookRandomForest::add ( const Codebook *codebook ) { fthrow ( Exception, "CodebookRandomForest::not yet implemented !"); } Codebook *CodebookRandomForest::clone () const { return (new CodebookRandomForest(maxDepth, restrictedCodebookSize)); } void CodebookRandomForest::clear () { if ( clusterforest != NULL ) clusterforest->clear(); Codebook::clear(); } void CodebookRandomForest::restore ( istream & is, int format ) { if (is.good()) { std::string tmp; is >> tmp; //class name if ( ! this->isStartTag( tmp, "CodebookRandomForest" ) ) { std::cerr << " WARNING - attempt to restore CodebookRandomForest, but start flag " << tmp << " does not match! Aborting... " << std::endl; throw; } if(this->clusterforest == NULL) this->clusterforest = new FPCRandomForests (); bool b_endOfBlock = false; while ( !b_endOfBlock ) { is >> tmp; // start of block if ( this->isEndTag( tmp, "CodebookRandomForest" ) || is.eof() ) { b_endOfBlock = true; continue; } tmp = this->removeStartTag ( tmp ); if ( tmp.compare("baseclass") == 0 ) { Codebook::restore(is, format); is >> tmp; // end of block tmp = this->removeEndTag ( tmp ); } else if ( tmp.compare("maxDepth") == 0 ) { is >> maxDepth; is >> tmp; // end of block tmp = this->removeEndTag ( tmp ); } else if ( tmp.compare("restrictedCodebookSize") == 0 ) { is >> restrictedCodebookSize; is >> tmp; // end of block tmp = this->removeEndTag ( tmp ); } else if ( tmp.compare("maxClassNo") == 0 ) { int maxClassNo = 0; is >> maxClassNo; is >> tmp; // end of block tmp = this->removeEndTag ( tmp ); if(clusterforest != NULL) clusterforest->setMaxClassNo(maxClassNo); } else if ( tmp.compare("clusterforest") == 0 ) { clusterforest->restore ( is, format ); is >> tmp; // end of block tmp = this->removeEndTag ( tmp ); } } buildLeafMap(); } } void CodebookRandomForest::store ( ostream & os, int format ) const { if (os.good()) { // show starting point os << this->createStartTag( "CodebookRandomForest" ) << std::endl; os.precision (numeric_limits::digits10 + 1); os << this->createStartTag( "baseclass" ) << std::endl; Codebook::store ( os, format ); os << this->createEndTag( "baseclass" ) << std::endl; os << this->createStartTag( "maxDepth" ) << std::endl; os << maxDepth << std::endl; os << this->createEndTag( "maxDepth" ) << std::endl; os << this->createStartTag( "restrictedCodebookSize" ) << std::endl; os << restrictedCodebookSize << std::endl; os << this->createEndTag( "restrictedCodebookSize" ) << std::endl; os << this->createStartTag( "maxClassNo" ) << std::endl; os << clusterforest->getMaxClassNo() << endl; os << this->createEndTag( "maxClassNo" ) << std::endl; os << this->createStartTag( "clusterforest" ) << std::endl; clusterforest->store ( os, format ); os << this->createEndTag( "clusterforest" ) << std::endl; /* Codebook::store ( os, format ); os << maxDepth << endl; os << restrictedCodebookSize << endl; os << clusterforest->getMaxClassNo() << endl; clusterforest->store ( os, format ); os << endl; */ // done os << this->createEndTag( "CodebookRandomForest" ) << std::endl; } }