|
@@ -23,14 +23,15 @@ using namespace NICE;
|
|
|
|
|
|
DTBOblique::DTBOblique ( const Config *conf, string section )
|
|
|
{
|
|
|
- random_split_tests = conf->gI(section, "random_split_tests", 10 );
|
|
|
+ split_steps = conf->gI(section, "split_steps", 20 );
|
|
|
max_depth = conf->gI(section, "max_depth", 10 );
|
|
|
minimum_information_gain = conf->gD(section, "minimum_information_gain", 10e-7 );
|
|
|
minimum_entropy = conf->gD(section, "minimum_entropy", 10e-5 );
|
|
|
use_shannon_entropy = conf->gB(section, "use_shannon_entropy", false );
|
|
|
min_examples = conf->gI(section, "min_examples", 50);
|
|
|
save_indices = conf->gB(section, "save_indices", false);
|
|
|
- lambdaInit = conf->gD(section, "lambdaInit", 0.5 );
|
|
|
+ lambdaInit = conf->gD(section, "lambda_init", 0.5 );
|
|
|
+ regularizationType = conf->gI(section, "regularization_type", 1 );
|
|
|
}
|
|
|
|
|
|
DTBOblique::~DTBOblique()
|
|
@@ -51,7 +52,9 @@ bool DTBOblique::entropyLeftRight (
|
|
|
{
|
|
|
count_left = 0;
|
|
|
count_right = 0;
|
|
|
- for ( FeatureValuesUnsorted::const_iterator i = values.begin(); i != values.end(); i++ )
|
|
|
+ for ( FeatureValuesUnsorted::const_iterator i = values.begin();
|
|
|
+ i != values.end();
|
|
|
+ i++ )
|
|
|
{
|
|
|
int classno = i->second;
|
|
|
double value = i->first;
|
|
@@ -107,18 +110,12 @@ void DTBOblique::getDataAndLabel(
|
|
|
si++ )
|
|
|
{
|
|
|
const pair<int, Example> & p = examples[*si];
|
|
|
- int classno = p.first;
|
|
|
const Example & ce = p.second;
|
|
|
|
|
|
NICE::Vector pixelRepr = f->getFeatureVector( &ce );
|
|
|
pixelRepr /= pixelRepr.Max();
|
|
|
|
|
|
- // TODO for multiclass scenarios we need ONEvsALL!
|
|
|
-
|
|
|
- // {0,1} -> {-1,+1}
|
|
|
- double label = 2*classno-1;
|
|
|
-
|
|
|
- label *= ce.weight;
|
|
|
+ double label = p.first * ce.weight;
|
|
|
pixelRepr *= ce.weight;
|
|
|
|
|
|
y.set( matIndex, label );
|
|
@@ -131,6 +128,77 @@ void DTBOblique::getDataAndLabel(
|
|
|
vecY = y;
|
|
|
}
|
|
|
|
|
|
+void DTBOblique::regularizeDataMatrix(
|
|
|
+ const NICE::Matrix &X,
|
|
|
+ NICE::Matrix &XTXreg,
|
|
|
+ const int regOption,
|
|
|
+ const double lambda )
|
|
|
+{
|
|
|
+ XTXreg = X.transpose()*X;
|
|
|
+ NICE::Matrix R;
|
|
|
+ const int dim = X.cols();
|
|
|
+
|
|
|
+ switch (regOption)
|
|
|
+ {
|
|
|
+ // identity matrix
|
|
|
+ case 0:
|
|
|
+ R.resize(dim,dim);
|
|
|
+ R.setIdentity();
|
|
|
+ R *= lambda;
|
|
|
+ XTXreg += R;
|
|
|
+ break;
|
|
|
+
|
|
|
+ // differences operator, k=1
|
|
|
+ case 1:
|
|
|
+ R.resize(dim-1,dim);
|
|
|
+ R.set( 0.0 );
|
|
|
+ for ( int r = 0; r < dim-1; r++ )
|
|
|
+ {
|
|
|
+ R(r,r) = 1.0;
|
|
|
+ R(r,r+1) = -1.0;
|
|
|
+ }
|
|
|
+ R = R.transpose()*R;
|
|
|
+ R *= lambda;
|
|
|
+ XTXreg += R;
|
|
|
+ break;
|
|
|
+
|
|
|
+ // difference operator, k=2
|
|
|
+ case 2:
|
|
|
+ R.resize(dim-2,dim);
|
|
|
+ R.set( 0.0 );
|
|
|
+ for ( int r = 0; r < dim-2; r++ )
|
|
|
+ {
|
|
|
+ R(r,r) = 1.0;
|
|
|
+ R(r,r+1) = -2.0;
|
|
|
+ R(r,r+2) = 1.0;
|
|
|
+ }
|
|
|
+ R = R.transpose()*R;
|
|
|
+ R *= lambda;
|
|
|
+ XTXreg += R;
|
|
|
+ break;
|
|
|
+
|
|
|
+ // as in [Chen et al., 2012]
|
|
|
+ case 3:
|
|
|
+ {
|
|
|
+ NICE::Vector q ( dim, (1.0-lambda) );
|
|
|
+ q[0] = 1;
|
|
|
+ NICE::Matrix Q;
|
|
|
+ Q.tensorProduct(q,q);
|
|
|
+ R.multiply(XTXreg,Q);
|
|
|
+ for ( int r = 0; r < dim; r++ )
|
|
|
+ R(r,r) = q[r] * XTXreg(r,r);
|
|
|
+ XTXreg = R;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ // no regularization
|
|
|
+ default:
|
|
|
+ std::cerr << "DTBOblique::regularizeDataMatrix: No regularization applied!"
|
|
|
+ << std::endl;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/** recursive building method */
|
|
|
DecisionNode *DTBOblique::buildRecursive(
|
|
|
const FeaturePool & fp,
|
|
@@ -165,19 +233,6 @@ DecisionNode *DTBOblique::buildRecursive(
|
|
|
return node;
|
|
|
}
|
|
|
|
|
|
- // refresh/set X and y
|
|
|
- NICE::Matrix X, G;
|
|
|
- NICE::Vector y, beta;
|
|
|
- getDataAndLabel( fp, examples, examples_selection, X, y );
|
|
|
-
|
|
|
- // least squares solution
|
|
|
- NICE::Matrix XTX = X.transpose()*X;
|
|
|
- XTX.addDiagonal ( NICE::Vector( XTX.rows(), lambdaCurrent) );
|
|
|
- choleskyDecomp(XTX, G);
|
|
|
- choleskyInvert(G, XTX);
|
|
|
- NICE::Matrix temp = XTX * X.transpose();
|
|
|
- beta.multiply(temp,y,false);
|
|
|
-
|
|
|
// variables
|
|
|
double best_threshold = 0.0;
|
|
|
double best_ig = -1.0;
|
|
@@ -189,64 +244,107 @@ DecisionNode *DTBOblique::buildRecursive(
|
|
|
double best_entropy_left = 0.0;
|
|
|
double best_entropy_right = 0.0;
|
|
|
|
|
|
- // Setting Convolutional Feature
|
|
|
ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
|
|
|
- f->setParameterVector( beta );
|
|
|
+ NICE::Vector best_beta = f->getParameterVector();
|
|
|
|
|
|
- // Feature Values
|
|
|
- values.clear();
|
|
|
- f->calcFeatureValues( examples, examples_selection, values);
|
|
|
+ // Creating data matrix X and label vector y
|
|
|
+ NICE::Matrix X, XTXr, G;
|
|
|
+ NICE::Vector y, beta;
|
|
|
+ getDataAndLabel( fp, examples, examples_selection, X, y );
|
|
|
|
|
|
- double minValue = (min_element ( values.begin(), values.end() ))->first;
|
|
|
- double maxValue = (max_element ( values.begin(), values.end() ))->first;
|
|
|
+ // Preparing system of linear equations
|
|
|
+ //NICE::Matrix XTX = X.transpose()*X;
|
|
|
+ regularizeDataMatrix( X, XTXr, regularizationType, lambdaCurrent );
|
|
|
+// R *= lambdaCurrent;
|
|
|
|
|
|
- if ( maxValue - minValue < 1e-7 )
|
|
|
- std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl;
|
|
|
+ //choleskyDecomp(XTXr, G);
|
|
|
+ //choleskyInvert(G, XTXr);
|
|
|
+ G = NICE::invert(XTXr);
|
|
|
+ NICE::Matrix temp = G * X.transpose();
|
|
|
|
|
|
- // get best thresholds by complete search
|
|
|
- for ( int i = 0; i < random_split_tests; i++ )
|
|
|
+ for ( int curClass = 0; curClass <= maxClassNo; curClass++ )
|
|
|
{
|
|
|
- double threshold = (i * (maxValue - minValue ) / (double)random_split_tests)
|
|
|
- + minValue;
|
|
|
- // preparations
|
|
|
- double el, er;
|
|
|
- for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
+ // One-vs-all: Transforming into {-1,+1} problem
|
|
|
+ NICE::Vector yCur ( y.size(), -1.0 );
|
|
|
+ int idx = 0;
|
|
|
+ bool hasExamples = false;
|
|
|
+ for ( vector<int>::const_iterator si = examples_selection.begin();
|
|
|
+ si != examples_selection.end();
|
|
|
+ si++, idx++ )
|
|
|
{
|
|
|
- distribution_left[k] = 0.0;
|
|
|
- distribution_right[k] = 0.0;
|
|
|
+ const pair<int, Example> & p = examples[*si];
|
|
|
+ if (p.first == curClass)
|
|
|
+ {
|
|
|
+ yCur.set( idx, 1.0 );
|
|
|
+ hasExamples = true;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
- /** Test the current split */
|
|
|
- // Does another split make sense?
|
|
|
- double count_left;
|
|
|
- double count_right;
|
|
|
- if ( ! entropyLeftRight ( values, threshold,
|
|
|
- distribution_left, distribution_right,
|
|
|
- el, er, count_left, count_right, maxClassNo ) )
|
|
|
- continue;
|
|
|
+ // is there a positive example for current class in current set?
|
|
|
+ if (!hasExamples) continue;
|
|
|
|
|
|
- // information gain and entropy
|
|
|
- double pl = (count_left) / (count_left + count_right);
|
|
|
- double ig = e - pl*el - (1-pl)*er;
|
|
|
+ // Solve system of linear equations in a least squares manner
|
|
|
+ beta.multiply(temp,yCur,false);
|
|
|
|
|
|
- if ( use_shannon_entropy )
|
|
|
- {
|
|
|
- double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
|
|
|
- ig = 2*ig / ( e + esplit );
|
|
|
- }
|
|
|
+ // Updating parameter vector in convolutional feature
|
|
|
+ f->setParameterVector( beta );
|
|
|
|
|
|
- if ( ig > best_ig )
|
|
|
- {
|
|
|
- best_ig = ig;
|
|
|
- best_threshold = threshold;
|
|
|
+ // Feature Values
|
|
|
+ values.clear();
|
|
|
+ f->calcFeatureValues( examples, examples_selection, values);
|
|
|
+
|
|
|
+ double minValue = (min_element ( values.begin(), values.end() ))->first;
|
|
|
+ double maxValue = (max_element ( values.begin(), values.end() ))->first;
|
|
|
+
|
|
|
+ if ( maxValue - minValue < 1e-7 )
|
|
|
+ std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl;
|
|
|
|
|
|
+ // get best thresholds by complete search
|
|
|
+ for ( int i = 0; i < split_steps; i++ )
|
|
|
+ {
|
|
|
+ double threshold = (i * (maxValue - minValue ) / (double)split_steps)
|
|
|
+ + minValue;
|
|
|
+ // preparations
|
|
|
+ double el, er;
|
|
|
for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
{
|
|
|
- best_distribution_left[k] = distribution_left[k];
|
|
|
- best_distribution_right[k] = distribution_right[k];
|
|
|
+ distribution_left[k] = 0.0;
|
|
|
+ distribution_right[k] = 0.0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /** Test the current split */
|
|
|
+ // Does another split make sense?
|
|
|
+ double count_left;
|
|
|
+ double count_right;
|
|
|
+ if ( ! entropyLeftRight ( values, threshold,
|
|
|
+ distribution_left, distribution_right,
|
|
|
+ el, er, count_left, count_right, maxClassNo ) )
|
|
|
+ continue;
|
|
|
+
|
|
|
+ // information gain and entropy
|
|
|
+ double pl = (count_left) / (count_left + count_right);
|
|
|
+ double ig = e - pl*el - (1-pl)*er;
|
|
|
+
|
|
|
+ if ( use_shannon_entropy )
|
|
|
+ {
|
|
|
+ double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
|
|
|
+ ig = 2*ig / ( e + esplit );
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( ig > best_ig )
|
|
|
+ {
|
|
|
+ best_ig = ig;
|
|
|
+ best_threshold = threshold;
|
|
|
+ best_beta = beta;
|
|
|
+
|
|
|
+ for ( int k = 0 ; k <= maxClassNo ; k++ )
|
|
|
+ {
|
|
|
+ best_distribution_left[k] = distribution_left[k];
|
|
|
+ best_distribution_right[k] = distribution_right[k];
|
|
|
+ }
|
|
|
+ best_entropy_left = el;
|
|
|
+ best_entropy_right = er;
|
|
|
}
|
|
|
- best_entropy_left = el;
|
|
|
- best_entropy_right = er;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -267,10 +365,13 @@ DecisionNode *DTBOblique::buildRecursive(
|
|
|
}
|
|
|
|
|
|
/** Save the best split to current node */
|
|
|
+ f->setParameterVector( best_beta );
|
|
|
+ values.clear();
|
|
|
+ f->calcFeatureValues( examples, examples_selection, values);
|
|
|
node->f = f->clone();
|
|
|
node->threshold = best_threshold;
|
|
|
|
|
|
- /** Split examples according to split function */
|
|
|
+ /** Split examples according to best split function */
|
|
|
vector<int> examples_left;
|
|
|
vector<int> examples_right;
|
|
|
|
|
@@ -305,11 +406,8 @@ DecisionNode *DTBOblique::buildRecursive(
|
|
|
if ( r != 0 )
|
|
|
distribution_right_sparse[k] = r;
|
|
|
#ifdef DEBUGTREE
|
|
|
- if ( (l>0)||(r>0) )
|
|
|
- {
|
|
|
- std::cerr << "DTBOblique: split of class " << k << " ("
|
|
|
- << l << " <-> " << r << ") " << std::endl;
|
|
|
- }
|
|
|
+ std::cerr << "DTBOblique: split of class " << k << " ("
|
|
|
+ << l << " <-> " << r << ") " << std::endl;
|
|
|
#endif
|
|
|
}
|
|
|
|
|
@@ -323,7 +421,7 @@ DecisionNode *DTBOblique::buildRecursive(
|
|
|
pow(((double)examples_selection.size()/(double)examples_right.size()),(2./f->getParameterLength()));
|
|
|
|
|
|
#ifdef DEBUGTREE
|
|
|
- std::cerr << "regularization parameter lambda: left " << lambdaLeft
|
|
|
+ std::cerr << "regularization parameter lambda left " << lambdaLeft
|
|
|
<< " right " << lambdaRight << std::endl;
|
|
|
|
|
|
#endif
|