/** * @file DTBOblique.cpp * @brief random oblique decision tree * @author Sven Sickert * @date 10/15/2014 */ #include #include #include "DTBOblique.h" #include "vislearning/features/fpfeatures/ConvolutionFeature.h" #include "core/vector/Algorithms.h" using namespace OBJREC; #define DEBUGTREE using namespace std; using namespace NICE; DTBOblique::DTBOblique ( const Config *conf, string section ) { split_steps = conf->gI(section, "split_steps", 20 ); max_depth = conf->gI(section, "max_depth", 10 ); minimum_information_gain = conf->gD(section, "minimum_information_gain", 0.0000001 ); minimum_entropy = conf->gD(section, "minimum_entropy", 0.00001 ); use_shannon_entropy = conf->gB(section, "use_shannon_entropy", false ); min_examples = conf->gI(section, "min_examples", 50); save_indices = conf->gB(section, "save_indices", false); lambdaInit = conf->gD(section, "lambda_init", 0.5 ); regularizationType = conf->gI(section, "regularization_type", 1 ); } DTBOblique::~DTBOblique() { } bool DTBOblique::entropyLeftRight ( const FeatureValuesUnsorted & values, double threshold, double* stat_left, double* stat_right, double & entropy_left, double & entropy_right, double & count_left, double & count_right, int maxClassNo ) { count_left = 0; count_right = 0; for ( FeatureValuesUnsorted::const_iterator i = values.begin(); i != values.end(); i++ ) { int classno = i->second; double value = i->first; if ( value < threshold ) { stat_left[classno] += i->fourth; count_left+=i->fourth; } else { stat_right[classno] += i->fourth; count_right+=i->fourth; } } if ( (count_left == 0) || (count_right == 0) ) return false; entropy_left = 0.0; for ( int j = 0 ; j <= maxClassNo ; j++ ) if ( stat_left[j] != 0 ) entropy_left -= stat_left[j] * log(stat_left[j]); entropy_left /= count_left; entropy_left += log(count_left); entropy_right = 0.0; for ( int j = 0 ; j <= maxClassNo ; j++ ) if ( stat_right[j] != 0 ) entropy_right -= stat_right[j] * log(stat_right[j]); entropy_right /= count_right; entropy_right += log (count_right); return true; } /** refresh data matrix X and label vector y */ void DTBOblique::getDataAndLabel( const FeaturePool &fp, const Examples &examples, const std::vector &examples_selection, NICE::Matrix & matX, NICE::Vector & vecY ) { ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second; int amountParams = f->getParameterLength(); int amountExamples = examples_selection.size(); NICE::Matrix X(amountExamples, amountParams, 0.0 ); NICE::Vector y(amountExamples, 0.0); int matIndex = 0; for ( vector::const_iterator si = examples_selection.begin(); si != examples_selection.end(); si++ ) { const pair & p = examples[*si]; const Example & ce = p.second; NICE::Vector pixelRepr = f->getFeatureVector( &ce ); double label = p.first * ce.weight; pixelRepr *= ce.weight; y.set( matIndex, label ); X.setRow(matIndex,pixelRepr); matIndex++; } matX = X; vecY = y; } void DTBOblique::regularizeDataMatrix( const NICE::Matrix &X, NICE::Matrix &XTXreg, const int regOption, const double lambda ) { XTXreg = X.transpose()*X; NICE::Matrix R; const int dim = X.cols(); switch (regOption) { // identity matrix case 0: R.resize(dim,dim); R.setIdentity(); R *= lambda; XTXreg += R; break; // differences operator, k=1 case 1: R.resize(dim-1,dim); R.set( 0.0 ); for ( int r = 0; r < dim-1; r++ ) { R(r,r) = 1.0; R(r,r+1) = -1.0; } R = R.transpose()*R; R *= lambda; XTXreg += R; break; // difference operator, k=2 case 2: R.resize(dim-2,dim); R.set( 0.0 ); for ( int r = 0; r < dim-2; r++ ) { R(r,r) = 1.0; R(r,r+1) = -2.0; R(r,r+2) = 1.0; } R = R.transpose()*R; R *= lambda; XTXreg += R; break; // as in [Chen et al., 2012] case 3: { NICE::Vector q ( dim, (1.0-lambda) ); q[0] = 1; NICE::Matrix Q; Q.tensorProduct(q,q); R.multiply(XTXreg,Q); for ( int r = 0; r < dim; r++ ) R(r,r) = q[r] * XTXreg(r,r); XTXreg = R; break; } // no regularization default: std::cerr << "DTBOblique::regularizeDataMatrix: No regularization applied!" << std::endl; break; } } /** recursive building method */ DecisionNode *DTBOblique::buildRecursive( const FeaturePool & fp, const Examples & examples, std::vector & examples_selection, FullVector & distribution, double e, int maxClassNo, int depth, double lambdaCurrent ) { #ifdef DEBUGTREE std::cerr << "DTBOblique: Examples: " << (int)examples_selection.size() << ", Depth: " << (int)depth << ", Entropy: " << e << std::endl; #endif // initialize new node DecisionNode *node = new DecisionNode (); node->distribution = distribution; // stop criteria: max_depth, min_examples, min_entropy if ( ( e <= minimum_entropy ) || ( (int)examples_selection.size() < min_examples ) || ( depth > max_depth ) ) { #ifdef DEBUGTREE std::cerr << "DTBOblique: Stopping criteria applied!" << std::endl; #endif node->trainExamplesIndices = examples_selection; return node; } // variables double best_threshold = 0.0; double best_ig = -1.0; FeatureValuesUnsorted values; double *best_distribution_left = new double [maxClassNo+1]; double *best_distribution_right = new double [maxClassNo+1]; double *distribution_left = new double [maxClassNo+1]; double *distribution_right = new double [maxClassNo+1]; double best_entropy_left = 0.0; double best_entropy_right = 0.0; ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second; NICE::Vector best_beta = f->getParameterVector(); // Creating data matrix X and label vector y NICE::Matrix X, XTXr, G, temp; NICE::Vector y, beta; getDataAndLabel( fp, examples, examples_selection, X, y ); // Preparing system of linear equations regularizeDataMatrix( X, XTXr, regularizationType, lambdaCurrent ); if (regularizationType == 3) { G = NICE::invert(XTXr); temp = G * X.transpose(); } else { choleskyDecomp(XTXr, G); choleskyInvert(G, XTXr); temp = XTXr * X.transpose(); } for ( int curClass = 0; curClass <= maxClassNo; curClass++ ) { // One-vs-all: Transforming into {-1,+1} problem NICE::Vector yCur ( y.size(), -1.0 ); int idx = 0; bool hasExamples = false; for ( vector::const_iterator si = examples_selection.begin(); si != examples_selection.end(); si++, idx++ ) { const pair & p = examples[*si]; if (p.first == curClass) { yCur.set( idx, 1.0 ); hasExamples = true; } } // is there a positive example for current class in current set? if (!hasExamples) continue; // Solve system of linear equations in a least squares manner beta.multiply(temp,yCur,false); // Updating parameter vector in convolutional feature f->setParameterVector( beta ); // Feature Values values.clear(); f->calcFeatureValues( examples, examples_selection, values); double minValue = (min_element ( values.begin(), values.end() ))->first; double maxValue = (max_element ( values.begin(), values.end() ))->first; if ( maxValue - minValue < 1e-7 ) std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl; // get best thresholds by complete search for ( int i = 0; i < split_steps; i++ ) { double threshold = (i * (maxValue - minValue ) / (double)split_steps) + minValue; // preparations double el, er; for ( int k = 0 ; k <= maxClassNo ; k++ ) { distribution_left[k] = 0.0; distribution_right[k] = 0.0; } /** Test the current split */ // Does another split make sense? double count_left; double count_right; if ( ! entropyLeftRight ( values, threshold, distribution_left, distribution_right, el, er, count_left, count_right, maxClassNo ) ) continue; // information gain and entropy double pl = (count_left) / (count_left + count_right); double ig = e - pl*el - (1-pl)*er; if ( use_shannon_entropy ) { double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) ); ig = 2*ig / ( e + esplit ); } if ( ig > best_ig ) { best_ig = ig; best_threshold = threshold; best_beta = beta; for ( int k = 0 ; k <= maxClassNo ; k++ ) { best_distribution_left[k] = distribution_left[k]; best_distribution_right[k] = distribution_right[k]; } best_entropy_left = el; best_entropy_right = er; } } } // supress strange behaviour for values near zero (8.88178e-16) if (best_entropy_left < 1.0e-10 ) best_entropy_left = 0.0; if (best_entropy_right < 1.0e-10 ) best_entropy_right = 0.0; //cleaning up delete [] distribution_left; delete [] distribution_right; // stop criteria: minimum information gain if ( best_ig < minimum_information_gain ) { #ifdef DEBUGTREE std::cerr << "DTBOblique: Minimum information gain reached!" << std::endl; #endif delete [] best_distribution_left; delete [] best_distribution_right; node->trainExamplesIndices = examples_selection; return node; } /** Save the best split to current node */ f->setParameterVector( best_beta ); values.clear(); f->calcFeatureValues( examples, examples_selection, values); node->f = f->clone(); node->threshold = best_threshold; /** Split examples according to best split function */ vector examples_left; vector examples_right; examples_left.reserve ( values.size() / 2 ); examples_right.reserve ( values.size() / 2 ); for ( FeatureValuesUnsorted::const_iterator i = values.begin(); i != values.end(); i++ ) { double value = i->first; if ( value < best_threshold ) examples_left.push_back ( i->third ); else examples_right.push_back ( i->third ); } #ifdef DEBUGTREE node->f->store( std::cerr ); std::cerr << std::endl; std::cerr << "DTBOblique: Information Gain: " << best_ig << ", Left Entropy: " << best_entropy_left << ", Right Entropy: " << best_entropy_right << std::endl; #endif FullVector distribution_left_sparse ( distribution.size() ); FullVector distribution_right_sparse ( distribution.size() ); for ( int k = 0 ; k <= maxClassNo ; k++ ) { double l = best_distribution_left[k]; double r = best_distribution_right[k]; if ( l != 0 ) distribution_left_sparse[k] = l; if ( r != 0 ) distribution_right_sparse[k] = r; #ifdef DEBUGTREE std::cerr << "DTBOblique: Split of Class " << k << " (" << l << " <-> " << r << ") " << std::endl; #endif } delete [] best_distribution_left; delete [] best_distribution_right; // update lambda by heuristic [Laptev/Buhmann, 2014] double lambdaLeft = lambdaCurrent * pow(((double)examples_selection.size()/(double)examples_left.size()),(2./f->getParameterLength())); double lambdaRight = lambdaCurrent * pow(((double)examples_selection.size()/(double)examples_right.size()),(2./f->getParameterLength())); //#ifdef DEBUGTREE // std::cerr << "regularization parameter lambda left " << lambdaLeft // << " right " << lambdaRight << std::endl; //#endif /** Recursion */ // left child node->left = buildRecursive ( fp, examples, examples_left, distribution_left_sparse, best_entropy_left, maxClassNo, depth+1, lambdaLeft ); // right child node->right = buildRecursive ( fp, examples, examples_right, distribution_right_sparse, best_entropy_right, maxClassNo, depth+1, lambdaRight ); return node; } /** initial building method */ DecisionNode *DTBOblique::build ( const FeaturePool & fp, const Examples & examples, int maxClassNo ) { int index = 0; FullVector distribution ( maxClassNo+1 ); vector all; all.reserve ( examples.size() ); for ( Examples::const_iterator j = examples.begin(); j != examples.end(); j++ ) { int classno = j->first; distribution[classno] += j->second.weight; all.push_back ( index ); index++; } double entropy = 0.0; double sum = 0.0; for ( int i = 0 ; i < distribution.size(); i++ ) { double val = distribution[i]; if ( val <= 0.0 ) continue; entropy -= val*log(val); sum += val; } entropy /= sum; entropy += log(sum); return buildRecursive ( fp, examples, all, distribution, entropy, maxClassNo, 0, lambdaInit ); }