|
@@ -25,13 +25,13 @@ using namespace NICE;
|
|
DTBRandomOblique::DTBRandomOblique ( const Config *conf, string section )
|
|
DTBRandomOblique::DTBRandomOblique ( const Config *conf, string section )
|
|
{
|
|
{
|
|
random_split_tests = conf->gI(section, "random_split_tests", 10 );
|
|
random_split_tests = conf->gI(section, "random_split_tests", 10 );
|
|
- random_features = conf->gI(section, "random_features", 500 );
|
|
|
|
max_depth = conf->gI(section, "max_depth", 10 );
|
|
max_depth = conf->gI(section, "max_depth", 10 );
|
|
minimum_information_gain = conf->gD(section, "minimum_information_gain", 10e-7 );
|
|
minimum_information_gain = conf->gD(section, "minimum_information_gain", 10e-7 );
|
|
minimum_entropy = conf->gD(section, "minimum_entropy", 10e-5 );
|
|
minimum_entropy = conf->gD(section, "minimum_entropy", 10e-5 );
|
|
use_shannon_entropy = conf->gB(section, "use_shannon_entropy", false );
|
|
use_shannon_entropy = conf->gB(section, "use_shannon_entropy", false );
|
|
min_examples = conf->gI(section, "min_examples", 50);
|
|
min_examples = conf->gI(section, "min_examples", 50);
|
|
save_indices = conf->gB(section, "save_indices", false);
|
|
save_indices = conf->gB(section, "save_indices", false);
|
|
|
|
+ lambda = conf->gD(section, "lambda", 0.5 );
|
|
|
|
|
|
if ( conf->gB(section, "start_random_generator", false ) )
|
|
if ( conf->gB(section, "start_random_generator", false ) )
|
|
srand(time(NULL));
|
|
srand(time(NULL));
|
|
@@ -115,8 +115,14 @@ void DTBRandomOblique::getDataAndLabel(
|
|
const Example & ce = p.second;
|
|
const Example & ce = p.second;
|
|
|
|
|
|
NICE::Vector pixelRepr = f->getFeatureVector( &ce );
|
|
NICE::Vector pixelRepr = f->getFeatureVector( &ce );
|
|
|
|
+ pixelRepr /= pixelRepr.Max();
|
|
X.setRow(matIndex,pixelRepr);
|
|
X.setRow(matIndex,pixelRepr);
|
|
- y.set(matIndex,(double)classno);
|
|
|
|
|
|
+
|
|
|
|
+ // TODO for multiclass scenarios we need ONEvsALL!
|
|
|
|
+ if ( classno == 0 )
|
|
|
|
+ y.set(matIndex,-1.0);
|
|
|
|
+ else
|
|
|
|
+ y.set(matIndex, 1.0);
|
|
|
|
|
|
matIndex++;
|
|
matIndex++;
|
|
}
|
|
}
|
|
@@ -163,13 +169,18 @@ DecisionNode *DTBRandomOblique::buildRecursive(
|
|
NICE::Vector y;
|
|
NICE::Vector y;
|
|
getDataAndLabel(fp, examples, examples_selection, X, y);
|
|
getDataAndLabel(fp, examples, examples_selection, X, y);
|
|
NICE::Matrix XTX = X.transpose()*X;
|
|
NICE::Matrix XTX = X.transpose()*X;
|
|
- XTX = NICE::invert(XTX);
|
|
|
|
|
|
+ XTX.addDiagonal ( NICE::Vector( XTX.rows(), lambda) );
|
|
|
|
+
|
|
|
|
+ //TODO: incorporate weighting according class distribution
|
|
|
|
+
|
|
|
|
+ NICE::Matrix G;
|
|
|
|
+ choleskyDecomp(XTX, G);
|
|
|
|
+ choleskyInvert(G, XTX);
|
|
NICE::Matrix temp = XTX * X.transpose();
|
|
NICE::Matrix temp = XTX * X.transpose();
|
|
- NICE::Vector params;
|
|
|
|
- params.multiply(temp,y,false);
|
|
|
|
- params.normalizeL2();
|
|
|
|
|
|
+ NICE::Vector beta;
|
|
|
|
+ beta.multiply(temp,y,false);
|
|
|
|
|
|
- Feature *best_feature = NULL;
|
|
|
|
|
|
+ // variables
|
|
double best_threshold = 0.0;
|
|
double best_threshold = 0.0;
|
|
double best_ig = -1.0;
|
|
double best_ig = -1.0;
|
|
FeatureValuesUnsorted values;
|
|
FeatureValuesUnsorted values;
|
|
@@ -180,76 +191,69 @@ DecisionNode *DTBRandomOblique::buildRecursive(
|
|
double best_entropy_left = 0.0;
|
|
double best_entropy_left = 0.0;
|
|
double best_entropy_right = 0.0;
|
|
double best_entropy_right = 0.0;
|
|
|
|
|
|
- // random parameter vectors
|
|
|
|
- for ( int k = 0 ; k < random_features ; k++ )
|
|
|
|
- {
|
|
|
|
- /** Create random parameter vector */
|
|
|
|
-#ifdef DETAILTREE
|
|
|
|
- std::cerr << "Calculating random parameter vector #" << k << std::endl;
|
|
|
|
-#endif
|
|
|
|
- ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
|
|
|
|
-
|
|
|
|
- f->setParameterVector( params );
|
|
|
|
|
|
+ // Setting Convolutional Feature
|
|
|
|
+ ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
|
|
|
|
+ f->setParameterVector( beta );
|
|
|
|
|
|
- /** Compute feature values for current parameters */
|
|
|
|
- values.clear();
|
|
|
|
- f->calcFeatureValues( examples, examples_selection, values);
|
|
|
|
|
|
+ // Feature Values
|
|
|
|
+ values.clear();
|
|
|
|
+ f->calcFeatureValues( examples, examples_selection, values);
|
|
|
|
|
|
- double minValue = (min_element ( values.begin(), values.end() ))->first;
|
|
|
|
- double maxValue = (max_element ( values.begin(), values.end() ))->first;
|
|
|
|
|
|
+ double minValue = (min_element ( values.begin(), values.end() ))->first;
|
|
|
|
+ double maxValue = (max_element ( values.begin(), values.end() ))->first;
|
|
|
|
|
|
- if ( maxValue - minValue < 1e-7 ) continue;
|
|
|
|
|
|
+ if ( maxValue - minValue < 1e-7 )
|
|
|
|
+ std::cerr << "DTBRandomOblique: Difference between min and max of features values to small!" << std::endl;
|
|
|
|
|
|
- // randomly chosen thresholds
|
|
|
|
- for ( int i = 0; i < random_split_tests; i++ )
|
|
|
|
- {
|
|
|
|
- double threshold = rand() * (maxValue - minValue ) / RAND_MAX + minValue;
|
|
|
|
|
|
+ // randomly chosen thresholds
|
|
|
|
+ for ( int i = 0; i < random_split_tests; i++ )
|
|
|
|
+ {
|
|
|
|
+ double threshold = ((maxValue - minValue ) / (double)random_split_tests)
|
|
|
|
+ + minValue;
|
|
#ifdef DETAILTREE
|
|
#ifdef DETAILTREE
|
|
- std::cerr << "Testing split #" << i << " for vector #" << k
|
|
|
|
- << ": t=" << threshold << std::endl;
|
|
|
|
|
|
+ std::cerr << "Testing split #" << i << " for vector #" << k
|
|
|
|
+ << ": t=" << threshold << std::endl;
|
|
#endif
|
|
#endif
|
|
|
|
|
|
- // preparations
|
|
|
|
- double el, er;
|
|
|
|
- for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
|
- {
|
|
|
|
- distribution_left[k] = 0;
|
|
|
|
- distribution_right[k] = 0;
|
|
|
|
- }
|
|
|
|
|
|
+ // preparations
|
|
|
|
+ double el, er;
|
|
|
|
+ for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
|
+ {
|
|
|
|
+ distribution_left[k] = 0.0;
|
|
|
|
+ distribution_right[k] = 0.0;
|
|
|
|
+ }
|
|
|
|
|
|
- /** Test the current split */
|
|
|
|
- // Does another split make sense?
|
|
|
|
- double count_left;
|
|
|
|
- double count_right;
|
|
|
|
- if ( ! entropyLeftRight ( values, threshold,
|
|
|
|
- distribution_left, distribution_right,
|
|
|
|
- el, er, count_left, count_right, maxClassNo ) )
|
|
|
|
- continue;
|
|
|
|
|
|
+ /** Test the current split */
|
|
|
|
+ // Does another split make sense?
|
|
|
|
+ double count_left;
|
|
|
|
+ double count_right;
|
|
|
|
+ if ( ! entropyLeftRight ( values, threshold,
|
|
|
|
+ distribution_left, distribution_right,
|
|
|
|
+ el, er, count_left, count_right, maxClassNo ) )
|
|
|
|
+ continue;
|
|
|
|
|
|
- // information gain and entropy
|
|
|
|
- double pl = (count_left) / (count_left + count_right);
|
|
|
|
- double ig = e - pl*el - (1-pl)*er;
|
|
|
|
|
|
+ // information gain and entropy
|
|
|
|
+ double pl = (count_left) / (count_left + count_right);
|
|
|
|
+ double ig = e - pl*el - (1-pl)*er;
|
|
|
|
|
|
- if ( use_shannon_entropy )
|
|
|
|
- {
|
|
|
|
- double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
|
|
|
|
- ig = 2*ig / ( e + esplit );
|
|
|
|
- }
|
|
|
|
|
|
+ if ( use_shannon_entropy )
|
|
|
|
+ {
|
|
|
|
+ double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
|
|
|
|
+ ig = 2*ig / ( e + esplit );
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if ( ig > best_ig )
|
|
|
|
+ {
|
|
|
|
+ best_ig = ig;
|
|
|
|
+ best_threshold = threshold;
|
|
|
|
|
|
- if ( ig > best_ig )
|
|
|
|
|
|
+ for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
{
|
|
{
|
|
- best_ig = ig;
|
|
|
|
- best_threshold = threshold;
|
|
|
|
-
|
|
|
|
- best_feature = f;
|
|
|
|
- for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
|
- {
|
|
|
|
- best_distribution_left[k] = distribution_left[k];
|
|
|
|
- best_distribution_right[k] = distribution_right[k];
|
|
|
|
- }
|
|
|
|
- best_entropy_left = el;
|
|
|
|
- best_entropy_right = er;
|
|
|
|
|
|
+ best_distribution_left[k] = distribution_left[k];
|
|
|
|
+ best_distribution_right[k] = distribution_right[k];
|
|
}
|
|
}
|
|
|
|
+ best_entropy_left = el;
|
|
|
|
+ best_entropy_right = er;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -270,25 +274,23 @@ DecisionNode *DTBRandomOblique::buildRecursive(
|
|
}
|
|
}
|
|
|
|
|
|
/** Save the best split to current node */
|
|
/** Save the best split to current node */
|
|
- node->f = best_feature->clone();
|
|
|
|
|
|
+ node->f = f->clone();
|
|
node->threshold = best_threshold;
|
|
node->threshold = best_threshold;
|
|
|
|
|
|
- /** Recalculate examples using best split */
|
|
|
|
- vector<int> best_examples_left;
|
|
|
|
- vector<int> best_examples_right;
|
|
|
|
- values.clear();
|
|
|
|
- best_feature->calcFeatureValues ( examples, examples_selection, values );
|
|
|
|
|
|
+ /** Split examples according to split function */
|
|
|
|
+ vector<int> examples_left;
|
|
|
|
+ vector<int> examples_right;
|
|
|
|
|
|
- best_examples_left.reserve ( values.size() / 2 );
|
|
|
|
- best_examples_right.reserve ( values.size() / 2 );
|
|
|
|
|
|
+ examples_left.reserve ( values.size() / 2 );
|
|
|
|
+ examples_right.reserve ( values.size() / 2 );
|
|
for ( FeatureValuesUnsorted::const_iterator i = values.begin();
|
|
for ( FeatureValuesUnsorted::const_iterator i = values.begin();
|
|
i != values.end(); i++ )
|
|
i != values.end(); i++ )
|
|
{
|
|
{
|
|
double value = i->first;
|
|
double value = i->first;
|
|
if ( value < best_threshold )
|
|
if ( value < best_threshold )
|
|
- best_examples_left.push_back ( i->third );
|
|
|
|
|
|
+ examples_left.push_back ( i->third );
|
|
else
|
|
else
|
|
- best_examples_right.push_back ( i->third );
|
|
|
|
|
|
+ examples_right.push_back ( i->third );
|
|
}
|
|
}
|
|
|
|
|
|
#ifdef DEBUGTREE
|
|
#ifdef DEBUGTREE
|
|
@@ -299,16 +301,16 @@ DecisionNode *DTBRandomOblique::buildRecursive(
|
|
<< best_entropy_right << std::endl;
|
|
<< best_entropy_right << std::endl;
|
|
#endif
|
|
#endif
|
|
|
|
|
|
- FullVector best_distribution_left_sparse ( distribution.size() );
|
|
|
|
- FullVector best_distribution_right_sparse ( distribution.size() );
|
|
|
|
|
|
+ FullVector distribution_left_sparse ( distribution.size() );
|
|
|
|
+ FullVector distribution_right_sparse ( distribution.size() );
|
|
for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
{
|
|
{
|
|
double l = best_distribution_left[k];
|
|
double l = best_distribution_left[k];
|
|
double r = best_distribution_right[k];
|
|
double r = best_distribution_right[k];
|
|
if ( l != 0 )
|
|
if ( l != 0 )
|
|
- best_distribution_left_sparse[k] = l;
|
|
|
|
|
|
+ distribution_left_sparse[k] = l;
|
|
if ( r != 0 )
|
|
if ( r != 0 )
|
|
- best_distribution_right_sparse[k] = r;
|
|
|
|
|
|
+ distribution_right_sparse[k] = r;
|
|
#ifdef DEBUGTREE
|
|
#ifdef DEBUGTREE
|
|
if ( (l>0)||(r>0) )
|
|
if ( (l>0)||(r>0) )
|
|
{
|
|
{
|
|
@@ -323,11 +325,13 @@ DecisionNode *DTBRandomOblique::buildRecursive(
|
|
|
|
|
|
/** Recursion */
|
|
/** Recursion */
|
|
// left child
|
|
// left child
|
|
- node->left = buildRecursive ( fp, examples, best_examples_left,
|
|
|
|
- best_distribution_left_sparse, best_entropy_left, maxClassNo, depth+1 );
|
|
|
|
|
|
+ node->left = buildRecursive ( fp, examples, examples_left,
|
|
|
|
+ distribution_left_sparse, best_entropy_left,
|
|
|
|
+ maxClassNo, depth+1 );
|
|
// right child
|
|
// right child
|
|
- node->right = buildRecursive ( fp, examples, best_examples_right,
|
|
|
|
- best_distribution_right_sparse, best_entropy_right, maxClassNo, depth+1 );
|
|
|
|
|
|
+ node->right = buildRecursive ( fp, examples, examples_right,
|
|
|
|
+ distribution_right_sparse, best_entropy_right,
|
|
|
|
+ maxClassNo, depth+1 );
|
|
|
|
|
|
return node;
|
|
return node;
|
|
}
|
|
}
|