|
@@ -11,6 +11,8 @@
|
|
|
#include "DTBRandomOblique.h"
|
|
|
#include "vislearning/features/fpfeatures/ConvolutionFeature.h"
|
|
|
|
|
|
+#include "core/vector/Algorithms.h"
|
|
|
+
|
|
|
using namespace OBJREC;
|
|
|
|
|
|
#define DEBUGTREE
|
|
@@ -22,17 +24,17 @@ using namespace NICE;
|
|
|
|
|
|
DTBRandomOblique::DTBRandomOblique ( const Config *conf, string section )
|
|
|
{
|
|
|
- random_split_tests = conf->gI(section, "random_split_tests", 10 );
|
|
|
- random_features = conf->gI(section, "random_features", 500 );
|
|
|
- max_depth = conf->gI(section, "max_depth", 10 );
|
|
|
- minimum_information_gain = conf->gD(section, "minimum_information_gain", 10e-7 );
|
|
|
- minimum_entropy = conf->gD(section, "minimum_entropy", 10e-5 );
|
|
|
- use_shannon_entropy = conf->gB(section, "use_shannon_entropy", false );
|
|
|
- min_examples = conf->gI(section, "min_examples", 50);
|
|
|
- save_indices = conf->gB(section, "save_indices", false);
|
|
|
-
|
|
|
- if ( conf->gB(section, "start_random_generator", false ) )
|
|
|
- srand(time(NULL));
|
|
|
+ random_split_tests = conf->gI(section, "random_split_tests", 10 );
|
|
|
+ random_features = conf->gI(section, "random_features", 500 );
|
|
|
+ max_depth = conf->gI(section, "max_depth", 10 );
|
|
|
+ minimum_information_gain = conf->gD(section, "minimum_information_gain", 10e-7 );
|
|
|
+ minimum_entropy = conf->gD(section, "minimum_entropy", 10e-5 );
|
|
|
+ use_shannon_entropy = conf->gB(section, "use_shannon_entropy", false );
|
|
|
+ min_examples = conf->gI(section, "min_examples", 50);
|
|
|
+ save_indices = conf->gB(section, "save_indices", false);
|
|
|
+
|
|
|
+ if ( conf->gB(section, "start_random_generator", false ) )
|
|
|
+ srand(time(NULL));
|
|
|
}
|
|
|
|
|
|
DTBRandomOblique::~DTBRandomOblique()
|
|
@@ -40,62 +42,98 @@ DTBRandomOblique::~DTBRandomOblique()
|
|
|
|
|
|
}
|
|
|
|
|
|
-bool DTBRandomOblique::entropyLeftRight ( const FeatureValuesUnsorted & values,
|
|
|
- double threshold,
|
|
|
- double* stat_left,
|
|
|
- double* stat_right,
|
|
|
- double & entropy_left,
|
|
|
- double & entropy_right,
|
|
|
- double & count_left,
|
|
|
- double & count_right,
|
|
|
- int maxClassNo )
|
|
|
+bool DTBRandomOblique::entropyLeftRight (
|
|
|
+ const FeatureValuesUnsorted & values,
|
|
|
+ double threshold,
|
|
|
+ double* stat_left,
|
|
|
+ double* stat_right,
|
|
|
+ double & entropy_left,
|
|
|
+ double & entropy_right,
|
|
|
+ double & count_left,
|
|
|
+ double & count_right,
|
|
|
+ int maxClassNo )
|
|
|
{
|
|
|
- count_left = 0;
|
|
|
- count_right = 0;
|
|
|
- for ( FeatureValuesUnsorted::const_iterator i = values.begin(); i != values.end(); i++ )
|
|
|
- {
|
|
|
- int classno = i->second;
|
|
|
- double value = i->first;
|
|
|
- if ( value < threshold ) {
|
|
|
- stat_left[classno] += i->fourth;
|
|
|
- count_left+=i->fourth;
|
|
|
+ count_left = 0;
|
|
|
+ count_right = 0;
|
|
|
+ for ( FeatureValuesUnsorted::const_iterator i = values.begin(); i != values.end(); i++ )
|
|
|
+ {
|
|
|
+ int classno = i->second;
|
|
|
+ double value = i->first;
|
|
|
+ if ( value < threshold ) {
|
|
|
+ stat_left[classno] += i->fourth;
|
|
|
+ count_left+=i->fourth;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ stat_right[classno] += i->fourth;
|
|
|
+ count_right+=i->fourth;
|
|
|
+ }
|
|
|
}
|
|
|
- else
|
|
|
+
|
|
|
+ if ( (count_left == 0) || (count_right == 0) )
|
|
|
+ return false;
|
|
|
+
|
|
|
+ entropy_left = 0.0;
|
|
|
+ for ( int j = 0 ; j <= maxClassNo ; j++ )
|
|
|
+ if ( stat_left[j] != 0 )
|
|
|
+ entropy_left -= stat_left[j] * log(stat_left[j]);
|
|
|
+ entropy_left /= count_left;
|
|
|
+ entropy_left += log(count_left);
|
|
|
+
|
|
|
+ entropy_right = 0.0;
|
|
|
+ for ( int j = 0 ; j <= maxClassNo ; j++ )
|
|
|
+ if ( stat_right[j] != 0 )
|
|
|
+ entropy_right -= stat_right[j] * log(stat_right[j]);
|
|
|
+ entropy_right /= count_right;
|
|
|
+ entropy_right += log (count_right);
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+void DTBRandomOblique::getDataAndLabel(
|
|
|
+ const FeaturePool &fp,
|
|
|
+ const Examples &examples,
|
|
|
+ const std::vector<int> &examples_selection,
|
|
|
+ NICE::Matrix & matX,
|
|
|
+ NICE::Vector & vecY )
|
|
|
+{
|
|
|
+ ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
|
|
|
+ int amountParams = f->getParameterLength();
|
|
|
+ int amountExamples = examples_selection.size();
|
|
|
+
|
|
|
+ NICE::Matrix X(amountExamples, amountParams, 0.0);
|
|
|
+ NICE::Vector y(amountExamples, 0.0);
|
|
|
+
|
|
|
+ int matIndex = 0;
|
|
|
+ for ( vector<int>::const_iterator si = examples_selection.begin();
|
|
|
+ si != examples_selection.end();
|
|
|
+ si++ )
|
|
|
{
|
|
|
- stat_right[classno] += i->fourth;
|
|
|
- count_right+=i->fourth;
|
|
|
+ const pair<int, Example> & p = examples[*si];
|
|
|
+ int classno = p.first;
|
|
|
+ const Example & ce = p.second;
|
|
|
+
|
|
|
+ NICE::Vector pixelRepr = f->getFeatureVector( &ce );
|
|
|
+ X.setRow(matIndex,pixelRepr);
|
|
|
+ y.set(matIndex,(double)classno);
|
|
|
+
|
|
|
+ matIndex++;
|
|
|
}
|
|
|
- }
|
|
|
-
|
|
|
- if ( (count_left == 0) || (count_right == 0) )
|
|
|
- return false;
|
|
|
-
|
|
|
- entropy_left = 0.0;
|
|
|
- for ( int j = 0 ; j <= maxClassNo ; j++ )
|
|
|
- if ( stat_left[j] != 0 )
|
|
|
- entropy_left -= stat_left[j] * log(stat_left[j]);
|
|
|
- entropy_left /= count_left;
|
|
|
- entropy_left += log(count_left);
|
|
|
-
|
|
|
- entropy_right = 0.0;
|
|
|
- for ( int j = 0 ; j <= maxClassNo ; j++ )
|
|
|
- if ( stat_right[j] != 0 )
|
|
|
- entropy_right -= stat_right[j] * log(stat_right[j]);
|
|
|
- entropy_right /= count_right;
|
|
|
- entropy_right += log (count_right);
|
|
|
-
|
|
|
- return true;
|
|
|
+
|
|
|
+ matX = X;
|
|
|
+ vecY = y;
|
|
|
}
|
|
|
|
|
|
|
|
|
DecisionNode *DTBRandomOblique::buildRecursive(
|
|
|
- const FeaturePool & fp,
|
|
|
- const Examples & examples,
|
|
|
- std::vector<int> & examples_selection,
|
|
|
- FullVector & distribution,
|
|
|
- double e,
|
|
|
- int maxClassNo,
|
|
|
- int depth)
|
|
|
+ const FeaturePool & fp,
|
|
|
+ const Examples & examples,
|
|
|
+ std::vector<int> & examples_selection,
|
|
|
+ FullVector & distribution,
|
|
|
+ double e,
|
|
|
+ int maxClassNo,
|
|
|
+ int depth)
|
|
|
{
|
|
|
|
|
|
#ifdef DEBUGTREE
|
|
@@ -103,223 +141,229 @@ DecisionNode *DTBRandomOblique::buildRecursive(
|
|
|
<< " (depth " << (int)depth << ")" << std::endl;
|
|
|
#endif
|
|
|
|
|
|
-
|
|
|
- DecisionNode *node = new DecisionNode ();
|
|
|
- node->distribution = distribution;
|
|
|
+
|
|
|
+ DecisionNode *node = new DecisionNode ();
|
|
|
+ node->distribution = distribution;
|
|
|
|
|
|
-
|
|
|
- if ( depth > max_depth
|
|
|
- || (int)examples_selection.size() < min_examples
|
|
|
- || ( (e <= minimum_entropy) && (e != 0.0) ) )
|
|
|
+
|
|
|
+ if ( depth > max_depth
|
|
|
+ || (int)examples_selection.size() < min_examples
|
|
|
+ || ( (e <= minimum_entropy) && (e != 0.0) ) )
|
|
|
|
|
|
- {
|
|
|
+ {
|
|
|
#ifdef DEBUGTREE
|
|
|
- std::cerr << "DTBRandomOblique: Stopping criteria applied!" << std::endl;
|
|
|
+ std::cerr << "DTBRandomOblique: Stopping criteria applied!" << std::endl;
|
|
|
#endif
|
|
|
- node->trainExamplesIndices = examples_selection;
|
|
|
- return node;
|
|
|
- }
|
|
|
-
|
|
|
- Feature *best_feature = NULL;
|
|
|
- double best_threshold = 0.0;
|
|
|
- double best_ig = -1.0;
|
|
|
- FeatureValuesUnsorted values;
|
|
|
- double *best_distribution_left = new double [maxClassNo+1];
|
|
|
- double *best_distribution_right = new double [maxClassNo+1];
|
|
|
- double *distribution_left = new double [maxClassNo+1];
|
|
|
- double *distribution_right = new double [maxClassNo+1];
|
|
|
- double best_entropy_left = 0.0;
|
|
|
- double best_entropy_right = 0.0;
|
|
|
-
|
|
|
-
|
|
|
- for ( int k = 0 ; k < random_features ; k++ )
|
|
|
- {
|
|
|
-
|
|
|
+ node->trainExamplesIndices = examples_selection;
|
|
|
+ return node;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ NICE::Matrix X;
|
|
|
+ NICE::Vector y;
|
|
|
+ getDataAndLabel(fp, examples, examples_selection, X, y);
|
|
|
+ NICE::Matrix XTX = X.transpose()*X;
|
|
|
+ XTX = NICE::invert(XTX);
|
|
|
+ NICE::Matrix temp = XTX * X.transpose();
|
|
|
+ NICE::Vector params;
|
|
|
+ params.multiply(temp,y,false);
|
|
|
+ params.normalizeL2();
|
|
|
+
|
|
|
+ Feature *best_feature = NULL;
|
|
|
+ double best_threshold = 0.0;
|
|
|
+ double best_ig = -1.0;
|
|
|
+ FeatureValuesUnsorted values;
|
|
|
+ double *best_distribution_left = new double [maxClassNo+1];
|
|
|
+ double *best_distribution_right = new double [maxClassNo+1];
|
|
|
+ double *distribution_left = new double [maxClassNo+1];
|
|
|
+ double *distribution_right = new double [maxClassNo+1];
|
|
|
+ double best_entropy_left = 0.0;
|
|
|
+ double best_entropy_right = 0.0;
|
|
|
+
|
|
|
+
|
|
|
+ for ( int k = 0 ; k < random_features ; k++ )
|
|
|
+ {
|
|
|
+
|
|
|
#ifdef DETAILTREE
|
|
|
- std::cerr << "Calculating random parameter vector #" << k << std::endl;
|
|
|
+ std::cerr << "Calculating random parameter vector #" << k << std::endl;
|
|
|
#endif
|
|
|
- ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
|
|
|
+ ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
|
|
|
|
|
|
- Vector param ( f->getParameterLength(), 0.0 );
|
|
|
- for ( NICE::Vector::iterator it = param.begin();
|
|
|
- it != param.end(); ++it )
|
|
|
- *it = ( double ) rand() / ( double ) RAND_MAX;
|
|
|
+ f->setParameterVector( params );
|
|
|
|
|
|
- f->setParameterVector( param );
|
|
|
-
|
|
|
-
|
|
|
- values.clear();
|
|
|
- f->calcFeatureValues( examples, examples_selection, values);
|
|
|
+
|
|
|
+ values.clear();
|
|
|
+ f->calcFeatureValues( examples, examples_selection, values);
|
|
|
|
|
|
- double minValue = (min_element ( values.begin(), values.end() ))->first;
|
|
|
- double maxValue = (max_element ( values.begin(), values.end() ))->first;
|
|
|
+ double minValue = (min_element ( values.begin(), values.end() ))->first;
|
|
|
+ double maxValue = (max_element ( values.begin(), values.end() ))->first;
|
|
|
|
|
|
- if ( maxValue - minValue < 1e-7 ) continue;
|
|
|
+ if ( maxValue - minValue < 1e-7 ) continue;
|
|
|
|
|
|
-
|
|
|
- for ( int i = 0; i < random_split_tests; i++ )
|
|
|
- {
|
|
|
- double threshold = rand() * (maxValue - minValue ) / RAND_MAX + minValue;
|
|
|
+
|
|
|
+ for ( int i = 0; i < random_split_tests; i++ )
|
|
|
+ {
|
|
|
+ double threshold = rand() * (maxValue - minValue ) / RAND_MAX + minValue;
|
|
|
#ifdef DETAILTREE
|
|
|
- std::cerr << "Testing split #" << i << " for vector #" << k
|
|
|
- << ": t=" << threshold << std::endl;
|
|
|
+ std::cerr << "Testing split #" << i << " for vector #" << k
|
|
|
+ << ": t=" << threshold << std::endl;
|
|
|
#endif
|
|
|
|
|
|
-
|
|
|
- double el, er;
|
|
|
- for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
- {
|
|
|
- distribution_left[k] = 0;
|
|
|
- distribution_right[k] = 0;
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
- double count_left;
|
|
|
- double count_right;
|
|
|
- if ( ! entropyLeftRight ( values, threshold,
|
|
|
- distribution_left, distribution_right,
|
|
|
- el, er, count_left, count_right, maxClassNo ) )
|
|
|
- continue;
|
|
|
-
|
|
|
-
|
|
|
- double pl = (count_left) / (count_left + count_right);
|
|
|
- double ig = e - pl*el - (1-pl)*er;
|
|
|
-
|
|
|
- if ( use_shannon_entropy )
|
|
|
- {
|
|
|
- double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
|
|
|
- ig = 2*ig / ( e + esplit );
|
|
|
- }
|
|
|
-
|
|
|
- if ( ig > best_ig )
|
|
|
- {
|
|
|
- best_ig = ig;
|
|
|
- best_threshold = threshold;
|
|
|
-
|
|
|
- best_feature = f;
|
|
|
- for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
- {
|
|
|
- best_distribution_left[k] = distribution_left[k];
|
|
|
- best_distribution_right[k] = distribution_right[k];
|
|
|
+
|
|
|
+ double el, er;
|
|
|
+ for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
+ {
|
|
|
+ distribution_left[k] = 0;
|
|
|
+ distribution_right[k] = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ double count_left;
|
|
|
+ double count_right;
|
|
|
+ if ( ! entropyLeftRight ( values, threshold,
|
|
|
+ distribution_left, distribution_right,
|
|
|
+ el, er, count_left, count_right, maxClassNo ) )
|
|
|
+ continue;
|
|
|
+
|
|
|
+
|
|
|
+ double pl = (count_left) / (count_left + count_right);
|
|
|
+ double ig = e - pl*el - (1-pl)*er;
|
|
|
+
|
|
|
+ if ( use_shannon_entropy )
|
|
|
+ {
|
|
|
+ double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
|
|
|
+ ig = 2*ig / ( e + esplit );
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( ig > best_ig )
|
|
|
+ {
|
|
|
+ best_ig = ig;
|
|
|
+ best_threshold = threshold;
|
|
|
+
|
|
|
+ best_feature = f;
|
|
|
+ for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
+ {
|
|
|
+ best_distribution_left[k] = distribution_left[k];
|
|
|
+ best_distribution_right[k] = distribution_right[k];
|
|
|
+ }
|
|
|
+ best_entropy_left = el;
|
|
|
+ best_entropy_right = er;
|
|
|
+ }
|
|
|
}
|
|
|
- best_entropy_left = el;
|
|
|
- best_entropy_right = er;
|
|
|
- }
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
-
|
|
|
- delete [] distribution_left;
|
|
|
- delete [] distribution_right;
|
|
|
+
|
|
|
+ delete [] distribution_left;
|
|
|
+ delete [] distribution_right;
|
|
|
|
|
|
-
|
|
|
- if ( best_ig < minimum_information_gain )
|
|
|
- {
|
|
|
+
|
|
|
+ if ( best_ig < minimum_information_gain )
|
|
|
+ {
|
|
|
#ifdef DEBUGTREE
|
|
|
- std::cerr << "DTBRandomOblique: Minimum information gain reached!" << std::endl;
|
|
|
+ std::cerr << "DTBRandomOblique: Minimum information gain reached!" << std::endl;
|
|
|
#endif
|
|
|
- delete [] best_distribution_left;
|
|
|
- delete [] best_distribution_right;
|
|
|
- node->trainExamplesIndices = examples_selection;
|
|
|
- return node;
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- node->f = best_feature->clone();
|
|
|
- node->threshold = best_threshold;
|
|
|
-
|
|
|
-
|
|
|
- vector<int> best_examples_left;
|
|
|
- vector<int> best_examples_right;
|
|
|
- values.clear();
|
|
|
- best_feature->calcFeatureValues ( examples, examples_selection, values );
|
|
|
-
|
|
|
- best_examples_left.reserve ( values.size() / 2 );
|
|
|
- best_examples_right.reserve ( values.size() / 2 );
|
|
|
- for ( FeatureValuesUnsorted::const_iterator i = values.begin();
|
|
|
- i != values.end(); i++ )
|
|
|
- {
|
|
|
- double value = i->first;
|
|
|
- if ( value < best_threshold )
|
|
|
- best_examples_left.push_back ( i->third );
|
|
|
- else
|
|
|
- best_examples_right.push_back ( i->third );
|
|
|
- }
|
|
|
+ delete [] best_distribution_left;
|
|
|
+ delete [] best_distribution_right;
|
|
|
+ node->trainExamplesIndices = examples_selection;
|
|
|
+ return node;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ node->f = best_feature->clone();
|
|
|
+ node->threshold = best_threshold;
|
|
|
+
|
|
|
+
|
|
|
+ vector<int> best_examples_left;
|
|
|
+ vector<int> best_examples_right;
|
|
|
+ values.clear();
|
|
|
+ best_feature->calcFeatureValues ( examples, examples_selection, values );
|
|
|
+
|
|
|
+ best_examples_left.reserve ( values.size() / 2 );
|
|
|
+ best_examples_right.reserve ( values.size() / 2 );
|
|
|
+ for ( FeatureValuesUnsorted::const_iterator i = values.begin();
|
|
|
+ i != values.end(); i++ )
|
|
|
+ {
|
|
|
+ double value = i->first;
|
|
|
+ if ( value < best_threshold )
|
|
|
+ best_examples_left.push_back ( i->third );
|
|
|
+ else
|
|
|
+ best_examples_right.push_back ( i->third );
|
|
|
+ }
|
|
|
|
|
|
#ifdef DEBUGTREE
|
|
|
- node->f->store( std::cerr );
|
|
|
- std::cerr << std::endl;
|
|
|
- std::cerr << "mutual information / shannon entropy " << best_ig << " entropy "
|
|
|
- << e << " left entropy " << best_entropy_left << " right entropy "
|
|
|
- << best_entropy_right << std::endl;
|
|
|
+ node->f->store( std::cerr );
|
|
|
+ std::cerr << std::endl;
|
|
|
+ std::cerr << "mutual information / shannon entropy " << best_ig << " entropy "
|
|
|
+ << e << " left entropy " << best_entropy_left << " right entropy "
|
|
|
+ << best_entropy_right << std::endl;
|
|
|
#endif
|
|
|
|
|
|
- FullVector best_distribution_left_sparse ( distribution.size() );
|
|
|
- FullVector best_distribution_right_sparse ( distribution.size() );
|
|
|
- for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
- {
|
|
|
- double l = best_distribution_left[k];
|
|
|
- double r = best_distribution_right[k];
|
|
|
- if ( l != 0 )
|
|
|
- best_distribution_left_sparse[k] = l;
|
|
|
- if ( r != 0 )
|
|
|
- best_distribution_right_sparse[k] = r;
|
|
|
-#ifdef DEBUGTREE
|
|
|
- if ( (l>0)||(r>0) )
|
|
|
+ FullVector best_distribution_left_sparse ( distribution.size() );
|
|
|
+ FullVector best_distribution_right_sparse ( distribution.size() );
|
|
|
+ for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
{
|
|
|
- std::cerr << "DTBRandomOblique: split of class " << k << " ("
|
|
|
- << l << " <-> " << r << ") " << std::endl;
|
|
|
- }
|
|
|
+ double l = best_distribution_left[k];
|
|
|
+ double r = best_distribution_right[k];
|
|
|
+ if ( l != 0 )
|
|
|
+ best_distribution_left_sparse[k] = l;
|
|
|
+ if ( r != 0 )
|
|
|
+ best_distribution_right_sparse[k] = r;
|
|
|
+#ifdef DEBUGTREE
|
|
|
+ if ( (l>0)||(r>0) )
|
|
|
+ {
|
|
|
+ std::cerr << "DTBRandomOblique: split of class " << k << " ("
|
|
|
+ << l << " <-> " << r << ") " << std::endl;
|
|
|
+ }
|
|
|
#endif
|
|
|
- }
|
|
|
+ }
|
|
|
|
|
|
- delete [] best_distribution_left;
|
|
|
- delete [] best_distribution_right;
|
|
|
+ delete [] best_distribution_left;
|
|
|
+ delete [] best_distribution_right;
|
|
|
|
|
|
-
|
|
|
-
|
|
|
- node->left = buildRecursive ( fp, examples, best_examples_left,
|
|
|
- best_distribution_left_sparse, best_entropy_left, maxClassNo, depth+1 );
|
|
|
-
|
|
|
- node->right = buildRecursive ( fp, examples, best_examples_right,
|
|
|
- best_distribution_right_sparse, best_entropy_right, maxClassNo, depth+1 );
|
|
|
+
|
|
|
+
|
|
|
+ node->left = buildRecursive ( fp, examples, best_examples_left,
|
|
|
+ best_distribution_left_sparse, best_entropy_left, maxClassNo, depth+1 );
|
|
|
+
|
|
|
+ node->right = buildRecursive ( fp, examples, best_examples_right,
|
|
|
+ best_distribution_right_sparse, best_entropy_right, maxClassNo, depth+1 );
|
|
|
|
|
|
- return node;
|
|
|
+ return node;
|
|
|
}
|
|
|
|
|
|
|
|
|
DecisionNode *DTBRandomOblique::build ( const FeaturePool & fp,
|
|
|
- const Examples & examples,
|
|
|
- int maxClassNo )
|
|
|
+ const Examples & examples,
|
|
|
+ int maxClassNo )
|
|
|
{
|
|
|
- int index = 0;
|
|
|
-
|
|
|
- FullVector distribution ( maxClassNo+1 );
|
|
|
- vector<int> all;
|
|
|
-
|
|
|
- all.reserve ( examples.size() );
|
|
|
- for ( Examples::const_iterator j = examples.begin();
|
|
|
- j != examples.end(); j++ )
|
|
|
- {
|
|
|
- int classno = j->first;
|
|
|
- distribution[classno] += j->second.weight;
|
|
|
-
|
|
|
- all.push_back ( index );
|
|
|
- index++;
|
|
|
- }
|
|
|
-
|
|
|
- double entropy = 0.0;
|
|
|
- double sum = 0.0;
|
|
|
- for ( int i = 0 ; i < distribution.size(); i++ )
|
|
|
- {
|
|
|
- double val = distribution[i];
|
|
|
- if ( val <= 0.0 ) continue;
|
|
|
- entropy -= val*log(val);
|
|
|
- sum += val;
|
|
|
- }
|
|
|
- entropy /= sum;
|
|
|
- entropy += log(sum);
|
|
|
-
|
|
|
- return buildRecursive ( fp, examples, all, distribution, entropy, maxClassNo, 0 );
|
|
|
+ int index = 0;
|
|
|
+
|
|
|
+ FullVector distribution ( maxClassNo+1 );
|
|
|
+ vector<int> all;
|
|
|
+
|
|
|
+ all.reserve ( examples.size() );
|
|
|
+ for ( Examples::const_iterator j = examples.begin();
|
|
|
+ j != examples.end(); j++ )
|
|
|
+ {
|
|
|
+ int classno = j->first;
|
|
|
+ distribution[classno] += j->second.weight;
|
|
|
+
|
|
|
+ all.push_back ( index );
|
|
|
+ index++;
|
|
|
+ }
|
|
|
+
|
|
|
+ double entropy = 0.0;
|
|
|
+ double sum = 0.0;
|
|
|
+ for ( int i = 0 ; i < distribution.size(); i++ )
|
|
|
+ {
|
|
|
+ double val = distribution[i];
|
|
|
+ if ( val <= 0.0 ) continue;
|
|
|
+ entropy -= val*log(val);
|
|
|
+ sum += val;
|
|
|
+ }
|
|
|
+ entropy /= sum;
|
|
|
+ entropy += log(sum);
|
|
|
+
|
|
|
+ return buildRecursive ( fp, examples, all, distribution, entropy, maxClassNo, 0 );
|
|
|
}
|