|
@@ -202,6 +202,74 @@ void DTBOblique::regularizeDataMatrix(
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+void DTBOblique::findBestSplitThreshold (
|
|
|
+ FeatureValuesUnsorted &values,
|
|
|
+ SplitInfo &bestSplitInfo,
|
|
|
+ const NICE::Vector &beta,
|
|
|
+ const double &e,
|
|
|
+ const int &maxClassNo )
|
|
|
+{
|
|
|
+ double *distribution_left = new double [maxClassNo+1];
|
|
|
+ double *distribution_right = new double [maxClassNo+1];
|
|
|
+ double minValue = (min_element ( values.begin(), values.end() ))->first;
|
|
|
+ double maxValue = (max_element ( values.begin(), values.end() ))->first;
|
|
|
+
|
|
|
+ if ( maxValue - minValue < 1e-7 )
|
|
|
+ std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl;
|
|
|
+
|
|
|
+ // get best thresholds using complete search
|
|
|
+ for ( int i = 0; i < splitSteps; i++ )
|
|
|
+ {
|
|
|
+ double threshold = (i * (maxValue - minValue ) / (double)splitSteps)
|
|
|
+ + minValue;
|
|
|
+ // preparations
|
|
|
+ double el, er;
|
|
|
+ for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
+ {
|
|
|
+ distribution_left[k] = 0.0;
|
|
|
+ distribution_right[k] = 0.0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /** Test the current split */
|
|
|
+ // Does another split make sense?
|
|
|
+ double count_left;
|
|
|
+ double count_right;
|
|
|
+ if ( ! entropyLeftRight ( values, threshold,
|
|
|
+ distribution_left, distribution_right,
|
|
|
+ el, er, count_left, count_right, maxClassNo ) )
|
|
|
+ continue;
|
|
|
+
|
|
|
+ // information gain and entropy
|
|
|
+ double pl = (count_left) / (count_left + count_right);
|
|
|
+ double ig = e - pl*el - (1-pl)*er;
|
|
|
+
|
|
|
+ if ( useShannonEntropy )
|
|
|
+ {
|
|
|
+ double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
|
|
|
+ ig = 2*ig / ( e + esplit );
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( ig > bestSplitInfo.informationGain )
|
|
|
+ {
|
|
|
+ bestSplitInfo.informationGain = ig;
|
|
|
+ bestSplitInfo.threshold = threshold;
|
|
|
+ bestSplitInfo.params = beta;
|
|
|
+
|
|
|
+ for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
+ {
|
|
|
+ bestSplitInfo.distLeft[k] = distribution_left[k];
|
|
|
+ bestSplitInfo.distRight[k] = distribution_right[k];
|
|
|
+ }
|
|
|
+ bestSplitInfo.entropyLeft = el;
|
|
|
+ bestSplitInfo.entropyRight = er;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //cleaning up
|
|
|
+ delete [] distribution_left;
|
|
|
+ delete [] distribution_right;
|
|
|
+}
|
|
|
+
|
|
|
/** recursive building method */
|
|
|
DecisionNode *DTBOblique::buildRecursive(
|
|
|
const FeaturePool & fp,
|
|
@@ -237,18 +305,24 @@ DecisionNode *DTBOblique::buildRecursive(
|
|
|
}
|
|
|
|
|
|
// variables
|
|
|
- double best_threshold = 0.0;
|
|
|
- double best_ig = -1.0;
|
|
|
FeatureValuesUnsorted values;
|
|
|
- double *best_distribution_left = new double [maxClassNo+1];
|
|
|
- double *best_distribution_right = new double [maxClassNo+1];
|
|
|
- double *distribution_left = new double [maxClassNo+1];
|
|
|
- double *distribution_right = new double [maxClassNo+1];
|
|
|
- double best_entropy_left = 0.0;
|
|
|
- double best_entropy_right = 0.0;
|
|
|
+ SplitInfo bestSplitInfo;
|
|
|
+ bestSplitInfo.threshold = 0.0;
|
|
|
+ bestSplitInfo.informationGain = -1.0;
|
|
|
+ bestSplitInfo.distLeft = new double [maxClassNo+1];
|
|
|
+ bestSplitInfo.distRight = new double [maxClassNo+1];
|
|
|
+ bestSplitInfo.entropyLeft = 0.0;
|
|
|
+ bestSplitInfo.entropyRight = 0.0;
|
|
|
+
|
|
|
+// double best_threshold = 0.0;
|
|
|
+// double best_ig = -1.0;
|
|
|
+// double *best_distribution_left = new double [maxClassNo+1];
|
|
|
+// double *best_distribution_right = new double [maxClassNo+1];
|
|
|
+// double best_entropy_left = 0.0;
|
|
|
+// double best_entropy_right = 0.0;
|
|
|
|
|
|
ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
|
|
|
- NICE::Vector best_beta = f->getParameterVector();
|
|
|
+ bestSplitInfo.params = f->getParameterVector();
|
|
|
|
|
|
// Creating data matrix X and label vector y
|
|
|
NICE::Matrix X, XTXr, G, temp;
|
|
@@ -257,134 +331,121 @@ DecisionNode *DTBOblique::buildRecursive(
|
|
|
|
|
|
// Preparing system of linear equations
|
|
|
regularizeDataMatrix( X, XTXr, regularizationType, lambdaCurrent );
|
|
|
-
|
|
|
- if (regularizationType == 3)
|
|
|
- {
|
|
|
- G = NICE::invert(XTXr);
|
|
|
- temp = G * X.transpose();
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- choleskyDecomp(XTXr, G);
|
|
|
- choleskyInvert(G, XTXr);
|
|
|
- temp = XTXr * X.transpose();
|
|
|
- }
|
|
|
+ choleskyDecomp(XTXr, G);
|
|
|
+ choleskyInvert(G, XTXr);
|
|
|
+ temp = XTXr * X.transpose();
|
|
|
|
|
|
|
|
|
- for ( int curClass = 0; curClass <= maxClassNo; curClass++ )
|
|
|
+ if ( useOneVsOne )
|
|
|
{
|
|
|
- // One-vs-all: Transforming into {-1,+1} problem
|
|
|
- NICE::Vector yCur ( y.size(), -1.0 );
|
|
|
- int idx = 0;
|
|
|
- bool hasExamples = false;
|
|
|
- for ( vector<int>::const_iterator si = examples_selection.begin();
|
|
|
- si != examples_selection.end();
|
|
|
- si++, idx++ )
|
|
|
- {
|
|
|
- const pair<int, Example> & p = examples[*si];
|
|
|
- if (p.first == curClass)
|
|
|
+ // One-vs-one: Transforming into {-1,0,+1} problem
|
|
|
+ for ( int curClass = 0; curClass <= maxClassNo; curClass++ )
|
|
|
+ for ( int opClass = 0; opClass <= maxClassNo; opClass++ )
|
|
|
{
|
|
|
- yCur.set( idx, 1.0 );
|
|
|
- hasExamples = true;
|
|
|
- }
|
|
|
- }
|
|
|
+ if ( curClass == opClass ) continue;
|
|
|
|
|
|
- // TODO: One-vs-one: Transforming into {-1,0,+1} problem
|
|
|
+ NICE::Vector yCur ( y.size(), 0.0 );
|
|
|
+ int idx = 0;
|
|
|
+ bool curHasExamples = false;
|
|
|
+ bool opHasExamples = false;
|
|
|
|
|
|
- // is there a positive example for current class in current set?
|
|
|
- if (!hasExamples) continue;
|
|
|
-
|
|
|
- // Solve system of linear equations in a least squares manner
|
|
|
- beta.multiply(temp,yCur,false);
|
|
|
+ for ( vector<int>::const_iterator si = examples_selection.begin();
|
|
|
+ si != examples_selection.end();
|
|
|
+ si++, idx++ )
|
|
|
+ {
|
|
|
+ const pair<int, Example> & p = examples[*si];
|
|
|
+ if ( p.first == curClass )
|
|
|
+ {
|
|
|
+ yCur.set( idx, 1.0 );
|
|
|
+ curHasExamples = true;
|
|
|
+ }
|
|
|
+ else if ( p.first == opClass )
|
|
|
+ {
|
|
|
+ yCur.set( idx, -1.0 );
|
|
|
+ opHasExamples = true;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- // Updating parameter vector in convolutional feature
|
|
|
- f->setParameterVector( beta );
|
|
|
+ // are there positive examples for current and opposition class in current set?
|
|
|
+ if ( !curHasExamples || !opHasExamples ) continue;
|
|
|
|
|
|
- // Feature Values
|
|
|
- values.clear();
|
|
|
- f->calcFeatureValues( examples, examples_selection, values);
|
|
|
+ // Solve system of linear equations in a least squares manner
|
|
|
+ beta.multiply(temp,yCur,false);
|
|
|
|
|
|
- double minValue = (min_element ( values.begin(), values.end() ))->first;
|
|
|
- double maxValue = (max_element ( values.begin(), values.end() ))->first;
|
|
|
+ // Updating parameter vector in convolutional feature
|
|
|
+ f->setParameterVector( beta );
|
|
|
|
|
|
- if ( maxValue - minValue < 1e-7 )
|
|
|
- std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl;
|
|
|
+ // Feature Values
|
|
|
+ values.clear();
|
|
|
+ f->calcFeatureValues( examples, examples_selection, values);
|
|
|
|
|
|
- // get best thresholds using complete search
|
|
|
- for ( int i = 0; i < splitSteps; i++ )
|
|
|
+ // complete search for threshold
|
|
|
+ findBestSplitThreshold ( values, bestSplitInfo, beta, e,
|
|
|
+ maxClassNo );
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ // One-vs-all: Transforming into {-1,+1} problem
|
|
|
+ for ( int curClass = 0; curClass <= maxClassNo; curClass++ )
|
|
|
{
|
|
|
- double threshold = (i * (maxValue - minValue ) / (double)splitSteps)
|
|
|
- + minValue;
|
|
|
- // preparations
|
|
|
- double el, er;
|
|
|
- for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
+ NICE::Vector yCur ( y.size(), -1.0 );
|
|
|
+ int idx = 0;
|
|
|
+ bool hasExamples = false;
|
|
|
+ for ( vector<int>::const_iterator si = examples_selection.begin();
|
|
|
+ si != examples_selection.end();
|
|
|
+ si++, idx++ )
|
|
|
{
|
|
|
- distribution_left[k] = 0.0;
|
|
|
- distribution_right[k] = 0.0;
|
|
|
+ const pair<int, Example> & p = examples[*si];
|
|
|
+ if ( p.first == curClass )
|
|
|
+ {
|
|
|
+ yCur.set( idx, 1.0 );
|
|
|
+ hasExamples = true;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
- /** Test the current split */
|
|
|
- // Does another split make sense?
|
|
|
- double count_left;
|
|
|
- double count_right;
|
|
|
- if ( ! entropyLeftRight ( values, threshold,
|
|
|
- distribution_left, distribution_right,
|
|
|
- el, er, count_left, count_right, maxClassNo ) )
|
|
|
- continue;
|
|
|
+ // is there a positive example for current class in current set?
|
|
|
+ if (!hasExamples) continue;
|
|
|
|
|
|
- // information gain and entropy
|
|
|
- double pl = (count_left) / (count_left + count_right);
|
|
|
- double ig = e - pl*el - (1-pl)*er;
|
|
|
+ // Solve system of linear equations in a least squares manner
|
|
|
+ beta.multiply(temp,yCur,false);
|
|
|
|
|
|
- if ( useShannonEntropy )
|
|
|
- {
|
|
|
- double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
|
|
|
- ig = 2*ig / ( e + esplit );
|
|
|
- }
|
|
|
+ // Updating parameter vector in convolutional feature
|
|
|
+ f->setParameterVector( beta );
|
|
|
|
|
|
- if ( ig > best_ig )
|
|
|
- {
|
|
|
- best_ig = ig;
|
|
|
- best_threshold = threshold;
|
|
|
- best_beta = beta;
|
|
|
+ // Feature Values
|
|
|
+ values.clear();
|
|
|
+ f->calcFeatureValues( examples, examples_selection, values);
|
|
|
+
|
|
|
+ // complete search for threshold
|
|
|
+ findBestSplitThreshold ( values, bestSplitInfo, beta, e, maxClassNo );
|
|
|
|
|
|
- for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
- {
|
|
|
- best_distribution_left[k] = distribution_left[k];
|
|
|
- best_distribution_right[k] = distribution_right[k];
|
|
|
- }
|
|
|
- best_entropy_left = el;
|
|
|
- best_entropy_right = er;
|
|
|
- }
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // supress strange behaviour for values near zero (8.88178e-16)
|
|
|
- if (best_entropy_left < 1.0e-10 ) best_entropy_left = 0.0;
|
|
|
- if (best_entropy_right < 1.0e-10 ) best_entropy_right = 0.0;
|
|
|
|
|
|
- //cleaning up
|
|
|
- delete [] distribution_left;
|
|
|
- delete [] distribution_right;
|
|
|
+ // supress strange behaviour for values near zero (8.88178e-16)
|
|
|
+ if (bestSplitInfo.entropyLeft < 1.0e-10 ) bestSplitInfo.entropyLeft = 0.0;
|
|
|
+ if (bestSplitInfo.entropyRight < 1.0e-10 ) bestSplitInfo.entropyRight = 0.0;
|
|
|
|
|
|
// stop criteria: minimum information gain
|
|
|
- if ( best_ig < minimumInformationGain )
|
|
|
+ if ( bestSplitInfo.informationGain < minimumInformationGain )
|
|
|
{
|
|
|
#ifdef DEBUGTREE
|
|
|
std::cerr << "DTBOblique: Minimum information gain reached!" << std::endl;
|
|
|
#endif
|
|
|
- delete [] best_distribution_left;
|
|
|
- delete [] best_distribution_right;
|
|
|
+ delete [] bestSplitInfo.distLeft;
|
|
|
+ delete [] bestSplitInfo.distRight;
|
|
|
node->trainExamplesIndices = examples_selection;
|
|
|
return node;
|
|
|
}
|
|
|
|
|
|
/** Save the best split to current node */
|
|
|
- f->setParameterVector( best_beta );
|
|
|
+ f->setParameterVector( bestSplitInfo.params );
|
|
|
values.clear();
|
|
|
f->calcFeatureValues( examples, examples_selection, values);
|
|
|
node->f = f->clone();
|
|
|
- node->threshold = best_threshold;
|
|
|
+ node->threshold = bestSplitInfo.threshold;
|
|
|
|
|
|
/** Split examples according to best split function */
|
|
|
vector<int> examples_left;
|
|
@@ -396,7 +457,7 @@ DecisionNode *DTBOblique::buildRecursive(
|
|
|
i != values.end(); i++ )
|
|
|
{
|
|
|
double value = i->first;
|
|
|
- if ( value < best_threshold )
|
|
|
+ if ( value < bestSplitInfo.threshold )
|
|
|
examples_left.push_back ( i->third );
|
|
|
else
|
|
|
examples_right.push_back ( i->third );
|
|
@@ -405,17 +466,17 @@ DecisionNode *DTBOblique::buildRecursive(
|
|
|
#ifdef DEBUGTREE
|
|
|
node->f->store( std::cerr );
|
|
|
std::cerr << std::endl;
|
|
|
- std::cerr << "DTBOblique: Information Gain: " << best_ig
|
|
|
- << ", Left Entropy: " << best_entropy_left << ", Right Entropy: "
|
|
|
- << best_entropy_right << std::endl;
|
|
|
+ std::cerr << "DTBOblique: Information Gain: " << bestSplitInfo.informationGain
|
|
|
+ << ", Left Entropy: " << bestSplitInfo.entropyLeft << ", Right Entropy: "
|
|
|
+ << bestSplitInfo.entropyRight << std::endl;
|
|
|
#endif
|
|
|
|
|
|
FullVector distribution_left_sparse ( distribution.size() );
|
|
|
FullVector distribution_right_sparse ( distribution.size() );
|
|
|
for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
{
|
|
|
- double l = best_distribution_left[k];
|
|
|
- double r = best_distribution_right[k];
|
|
|
+ double l = bestSplitInfo.distLeft[k];
|
|
|
+ double r = bestSplitInfo.distRight[k];
|
|
|
if ( l != 0 )
|
|
|
distribution_left_sparse[k] = l;
|
|
|
if ( r != 0 )
|
|
@@ -426,8 +487,9 @@ DecisionNode *DTBOblique::buildRecursive(
|
|
|
#endif
|
|
|
}
|
|
|
|
|
|
- delete [] best_distribution_left;
|
|
|
- delete [] best_distribution_right;
|
|
|
+ //TODO
|
|
|
+ //delete [] best_distribution_left;
|
|
|
+ //delete [] best_distribution_right;
|
|
|
|
|
|
// update lambda by heuristic [Laptev/Buhmann, 2014]
|
|
|
double lambdaLeft = lambdaCurrent *
|
|
@@ -444,11 +506,11 @@ DecisionNode *DTBOblique::buildRecursive(
|
|
|
/** Recursion */
|
|
|
// left child
|
|
|
node->left = buildRecursive ( fp, examples, examples_left,
|
|
|
- distribution_left_sparse, best_entropy_left,
|
|
|
+ distribution_left_sparse, bestSplitInfo.entropyLeft,
|
|
|
maxClassNo, depth+1, lambdaLeft );
|
|
|
// right child
|
|
|
node->right = buildRecursive ( fp, examples, examples_right,
|
|
|
- distribution_right_sparse, best_entropy_right,
|
|
|
+ distribution_right_sparse, bestSplitInfo.entropyRight,
|
|
|
maxClassNo, depth+1, lambdaRight );
|
|
|
|
|
|
return node;
|