DTBObliqueLS.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. /**
  2. * @file DTBObliqueLS.cpp
  3. * @brief random oblique decision tree
  4. * @author Sven Sickert
  5. * @date 10/15/2014
  6. */
  7. #include <iostream>
  8. #include <time.h>
  9. #include "DTBObliqueLS.h"
  10. #include "vislearning/features/fpfeatures/ConvolutionFeature.h"
  11. #include "core/vector/Algorithms.h"
  12. using namespace OBJREC;
  13. #define DEBUGTREE
  14. using namespace std;
  15. using namespace NICE;
  16. DTBObliqueLS::DTBObliqueLS ( const Config *conf, string section )
  17. {
  18. saveIndices = conf->gB( section, "save_indices", false);
  19. useShannonEntropy = conf->gB( section, "use_shannon_entropy", false );
  20. useOneVsOne = conf->gB( section, "use_one_vs_one", false );
  21. useDynamicRegularization = conf->gB( section, "use_dynamic_regularization", true );
  22. splitSteps = conf->gI( section, "split_steps", 20 );
  23. maxDepth = conf->gI( section, "max_depth", 10 );
  24. minExamples = conf->gI( section, "min_examples", 50);
  25. regularizationType = conf->gI( section, "regularization_type", 1 );
  26. minimumEntropy = conf->gD( section, "minimum_entropy", 10e-5 );
  27. minimumInformationGain = conf->gD( section, "minimum_information_gain", 10e-7 );
  28. lambdaInit = conf->gD( section, "lambda_init", 0.5 );
  29. }
  30. DTBObliqueLS::~DTBObliqueLS()
  31. {
  32. }
  33. bool DTBObliqueLS::entropyLeftRight (
  34. const FeatureValuesUnsorted & values,
  35. double threshold,
  36. double* stat_left,
  37. double* stat_right,
  38. double & entropy_left,
  39. double & entropy_right,
  40. double & count_left,
  41. double & count_right,
  42. int maxClassNo )
  43. {
  44. count_left = 0;
  45. count_right = 0;
  46. for ( FeatureValuesUnsorted::const_iterator i = values.begin();
  47. i != values.end();
  48. i++ )
  49. {
  50. int classno = i->second;
  51. double value = i->first;
  52. if ( value < threshold ) {
  53. stat_left[classno] += i->fourth;
  54. count_left+=i->fourth;
  55. }
  56. else
  57. {
  58. stat_right[classno] += i->fourth;
  59. count_right+=i->fourth;
  60. }
  61. }
  62. if ( (count_left == 0) || (count_right == 0) )
  63. return false;
  64. entropy_left = 0.0;
  65. for ( int j = 0 ; j <= maxClassNo ; j++ )
  66. if ( stat_left[j] != 0 )
  67. entropy_left -= stat_left[j] * log(stat_left[j]);
  68. entropy_left /= count_left;
  69. entropy_left += log(count_left);
  70. entropy_right = 0.0;
  71. for ( int j = 0 ; j <= maxClassNo ; j++ )
  72. if ( stat_right[j] != 0 )
  73. entropy_right -= stat_right[j] * log(stat_right[j]);
  74. entropy_right /= count_right;
  75. entropy_right += log (count_right);
  76. return true;
  77. }
  78. bool DTBObliqueLS::adaptDataAndLabelForMultiClass (
  79. const int posClass,
  80. const int negClass,
  81. NICE::Matrix & X,
  82. NICE::Vector & y )
  83. {
  84. bool posHasExamples = false;
  85. bool negHasExamples = false;
  86. // One-vs-one: Transforming into {-1,0,+1} problem
  87. if ( useOneVsOne )
  88. for ( int i = 0; i < y.size(); i++ )
  89. {
  90. if ( y[i] == posClass )
  91. {
  92. y[i] = 1.0;
  93. posHasExamples = true;
  94. }
  95. else if ( y[i] == negClass )
  96. {
  97. y[i] = -1.0;
  98. negHasExamples = true;
  99. }
  100. else
  101. {
  102. y[i] = 0.0;
  103. X.setRow( i, NICE::Vector( X.cols(), 0.0 ) );
  104. }
  105. }
  106. // One-vs-all: Transforming into {-1,+1} problem
  107. else
  108. for ( int i = 0; i < y.size(); i++ )
  109. {
  110. if ( y[i] == posClass )
  111. {
  112. y[i] = 1.0;
  113. posHasExamples = true;
  114. }
  115. else
  116. {
  117. y[i] = -1.0;
  118. negHasExamples = true;
  119. }
  120. }
  121. if ( posHasExamples && negHasExamples )
  122. return true;
  123. else
  124. return false;
  125. }
  126. /** refresh data matrix X and label vector y */
  127. void DTBObliqueLS::getDataAndLabel(
  128. const FeaturePool &fp,
  129. const Examples &examples,
  130. const std::vector<int> &examples_selection,
  131. NICE::Matrix & X,
  132. NICE::Vector & y,
  133. NICE::Vector & w )
  134. {
  135. ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
  136. int amountParams = f->getParameterLength();
  137. int amountExamples = examples_selection.size();
  138. X = NICE::Matrix(amountExamples, amountParams, 0.0 );
  139. y = NICE::Vector(amountExamples, 0.0);
  140. w = NICE::Vector(amountExamples, 1.0);
  141. int matIndex = 0;
  142. for ( vector<int>::const_iterator si = examples_selection.begin();
  143. si != examples_selection.end();
  144. si++ )
  145. {
  146. const pair<int, Example> & p = examples[*si];
  147. const Example & ex = p.second;
  148. NICE::Vector pixelRepr = f->getFeatureVector( &ex );
  149. double label = p.first;
  150. pixelRepr *= ex.weight;
  151. w.set ( matIndex, ex.weight );
  152. y.set ( matIndex, label );
  153. X.setRow ( matIndex, pixelRepr );
  154. matIndex++;
  155. }
  156. }
  157. void DTBObliqueLS::regularizeDataMatrix(
  158. const NICE::Matrix &X,
  159. NICE::Matrix &XTXreg,
  160. const int regOption,
  161. const double lambda )
  162. {
  163. XTXreg = X.transpose()*X;
  164. NICE::Matrix R;
  165. const int dim = X.cols();
  166. switch (regOption)
  167. {
  168. // identity matrix
  169. case 0:
  170. R.resize(dim,dim);
  171. R.setIdentity();
  172. R *= lambda;
  173. XTXreg += R;
  174. break;
  175. // differences operator, k=1
  176. case 1:
  177. R.resize(dim-1,dim);
  178. R.set( 0.0 );
  179. for ( int r = 0; r < dim-1; r++ )
  180. {
  181. R(r,r) = 1.0;
  182. R(r,r+1) = -1.0;
  183. }
  184. R = R.transpose()*R;
  185. R *= lambda;
  186. XTXreg += R;
  187. break;
  188. // difference operator, k=2
  189. case 2:
  190. R.resize(dim-2,dim);
  191. R.set( 0.0 );
  192. for ( int r = 0; r < dim-2; r++ )
  193. {
  194. R(r,r) = 1.0;
  195. R(r,r+1) = -2.0;
  196. R(r,r+2) = 1.0;
  197. }
  198. R = R.transpose()*R;
  199. R *= lambda;
  200. XTXreg += R;
  201. break;
  202. // as in [Chen et al., 2012]
  203. case 3:
  204. {
  205. NICE::Vector q ( dim, (1.0-lambda) );
  206. q[0] = 1.0;
  207. NICE::Matrix Q;
  208. Q.tensorProduct(q,q);
  209. R.resize(dim,dim);
  210. for ( int r = 0; r < dim; r++ )
  211. {
  212. for ( int c = 0; c < dim; c++ )
  213. R(r,c) = XTXreg(r,c) * Q(r,c);
  214. R(r,r) = q[r] * XTXreg(r,r);
  215. }
  216. XTXreg = R;
  217. break;
  218. }
  219. // no regularization
  220. default:
  221. std::cerr << "DTBObliqueLS::regularizeDataMatrix: No regularization applied!"
  222. << std::endl;
  223. break;
  224. }
  225. }
  226. void DTBObliqueLS::findBestSplitThreshold (
  227. FeatureValuesUnsorted &values,
  228. SplitInfo &bestSplitInfo,
  229. const NICE::Vector &beta,
  230. const double &e,
  231. const int &maxClassNo )
  232. {
  233. double *distribution_left = new double [maxClassNo+1];
  234. double *distribution_right = new double [maxClassNo+1];
  235. double minValue = (min_element ( values.begin(), values.end() ))->first;
  236. double maxValue = (max_element ( values.begin(), values.end() ))->first;
  237. if ( maxValue - minValue < 1e-7 )
  238. std::cerr << "DTBObliqueLS: Difference between min and max of features values to small!"
  239. << " [" << minValue << "," << maxValue << "]" << std::endl;
  240. // get best thresholds using complete search
  241. for ( int i = 0; i < splitSteps; i++ )
  242. {
  243. double threshold = (i * (maxValue - minValue ) / (double)splitSteps)
  244. + minValue;
  245. // preparations
  246. double el, er;
  247. for ( int k = 0 ; k <= maxClassNo ; k++ )
  248. {
  249. distribution_left[k] = 0.0;
  250. distribution_right[k] = 0.0;
  251. }
  252. /** Test the current split */
  253. // Does another split make sense?
  254. double count_left;
  255. double count_right;
  256. if ( ! entropyLeftRight ( values, threshold,
  257. distribution_left, distribution_right,
  258. el, er, count_left, count_right, maxClassNo ) )
  259. continue;
  260. // information gain and entropy
  261. double pl = (count_left) / (count_left + count_right);
  262. double ig = e - pl*el - (1-pl)*er;
  263. if ( useShannonEntropy )
  264. {
  265. double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
  266. ig = 2*ig / ( e + esplit );
  267. }
  268. if ( ig > bestSplitInfo.informationGain )
  269. {
  270. bestSplitInfo.informationGain = ig;
  271. bestSplitInfo.threshold = threshold;
  272. bestSplitInfo.params = beta;
  273. for ( int k = 0 ; k <= maxClassNo ; k++ )
  274. {
  275. bestSplitInfo.distLeft[k] = distribution_left[k];
  276. bestSplitInfo.distRight[k] = distribution_right[k];
  277. }
  278. bestSplitInfo.entropyLeft = el;
  279. bestSplitInfo.entropyRight = er;
  280. }
  281. }
  282. //cleaning up
  283. delete [] distribution_left;
  284. delete [] distribution_right;
  285. }
  286. /** recursive building method */
  287. DecisionNode *DTBObliqueLS::buildRecursive(
  288. const FeaturePool & fp,
  289. const Examples & examples,
  290. std::vector<int> & examples_selection,
  291. FullVector & distribution,
  292. double e,
  293. int maxClassNo,
  294. int depth,
  295. double lambdaCurrent )
  296. {
  297. #ifdef DEBUGTREE
  298. std::cerr << "DTBObliqueLS: Examples: " << (int)examples_selection.size()
  299. << ", Depth: " << (int)depth << ", Entropy: " << e << std::endl;
  300. #endif
  301. // initialize new node
  302. DecisionNode *node = new DecisionNode ();
  303. node->distribution = distribution;
  304. // stop criteria: maxDepth, minExamples, min_entropy
  305. if ( ( e <= minimumEntropy )
  306. || ( (int)examples_selection.size() < minExamples )
  307. || ( depth > maxDepth ) )
  308. {
  309. #ifdef DEBUGTREE
  310. std::cerr << "DTBObliqueLS: Stopping criteria applied!" << std::endl;
  311. #endif
  312. node->trainExamplesIndices = examples_selection;
  313. return node;
  314. }
  315. // variables
  316. FeatureValuesUnsorted values;
  317. SplitInfo bestSplitInfo;
  318. bestSplitInfo.threshold = 0.0;
  319. bestSplitInfo.informationGain = -1.0;
  320. bestSplitInfo.distLeft = new double [maxClassNo+1];
  321. bestSplitInfo.distRight = new double [maxClassNo+1];
  322. bestSplitInfo.entropyLeft = 0.0;
  323. bestSplitInfo.entropyRight = 0.0;
  324. ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
  325. bestSplitInfo.params = f->getParameterVector();
  326. // Creating data matrix X and label vector y
  327. NICE::Matrix X;
  328. NICE::Vector y, beta, weights;
  329. getDataAndLabel( fp, examples, examples_selection, X, y, weights );
  330. // Transforming into multi-class problem
  331. for ( int posClass = 0; posClass <= maxClassNo; posClass++ )
  332. {
  333. bool gotInnerIteration = false;
  334. for ( int negClass = 0; negClass <= maxClassNo; negClass++ )
  335. {
  336. if ( posClass == negClass ) continue;
  337. NICE::Vector yCur = y;
  338. NICE::Matrix XCur = X;
  339. bool hasExamples = adaptDataAndLabelForMultiClass(
  340. posClass, negClass, XCur, yCur );
  341. yCur *= weights;
  342. // are there examples for positive and negative class?
  343. if ( !hasExamples ) continue;
  344. // one-vs-all setting: only one iteration for inner loop
  345. if ( !useOneVsOne && gotInnerIteration ) continue;
  346. // Preparing system of linear equations
  347. NICE::Matrix XTXr, G, temp;
  348. regularizeDataMatrix( XCur, XTXr, regularizationType, lambdaCurrent );
  349. choleskyDecomp(XTXr, G);
  350. choleskyInvert(G, XTXr);
  351. temp = XTXr * XCur.transpose();
  352. // Solve system of linear equations in a least squares manner
  353. beta.multiply(temp,yCur,false);
  354. // Updating parameter vector in convolutional feature
  355. f->setParameterVector( beta );
  356. // Feature Values
  357. values.clear();
  358. f->calcFeatureValues( examples, examples_selection, values);
  359. // complete search for threshold
  360. findBestSplitThreshold ( values, bestSplitInfo, beta, e, maxClassNo );
  361. gotInnerIteration = true;
  362. }
  363. }
  364. // f->setRandomParameterVector();
  365. // beta = f->getParameterVector();
  366. // f->calcFeatureValues( examples, examples_selection, values);
  367. // findBestSplitThreshold ( values, bestSplitInfo, beta, e, maxClassNo );
  368. // supress strange behaviour for values near zero (8.88178e-16)
  369. if (bestSplitInfo.entropyLeft < 1.0e-10 ) bestSplitInfo.entropyLeft = 0.0;
  370. if (bestSplitInfo.entropyRight < 1.0e-10 ) bestSplitInfo.entropyRight = 0.0;
  371. // stop criteria: minimum information gain
  372. if ( bestSplitInfo.informationGain < minimumInformationGain )
  373. {
  374. #ifdef DEBUGTREE
  375. std::cerr << "DTBObliqueLS: Minimum information gain reached!" << std::endl;
  376. #endif
  377. delete [] bestSplitInfo.distLeft;
  378. delete [] bestSplitInfo.distRight;
  379. node->trainExamplesIndices = examples_selection;
  380. return node;
  381. }
  382. /** Save the best split to current node */
  383. f->setParameterVector( bestSplitInfo.params );
  384. values.clear();
  385. f->calcFeatureValues( examples, examples_selection, values);
  386. node->f = f->clone();
  387. node->threshold = bestSplitInfo.threshold;
  388. /** Split examples according to best split function */
  389. vector<int> examples_left;
  390. vector<int> examples_right;
  391. examples_left.reserve ( values.size() / 2 );
  392. examples_right.reserve ( values.size() / 2 );
  393. for ( FeatureValuesUnsorted::const_iterator i = values.begin();
  394. i != values.end(); i++ )
  395. {
  396. if ( i->first < bestSplitInfo.threshold )
  397. examples_left.push_back ( i->third );
  398. else
  399. examples_right.push_back ( i->third );
  400. }
  401. #ifdef DEBUGTREE
  402. // node->f->store( std::cerr );
  403. // std::cerr << std::endl;
  404. std::cerr << "DTBObliqueLS: Information Gain: " << bestSplitInfo.informationGain
  405. << ", Left Entropy: " << bestSplitInfo.entropyLeft << ", Right Entropy: "
  406. << bestSplitInfo.entropyRight << std::endl;
  407. #endif
  408. FullVector distribution_left_sparse ( distribution.size() );
  409. FullVector distribution_right_sparse ( distribution.size() );
  410. for ( int k = 0 ; k <= maxClassNo ; k++ )
  411. {
  412. double l = bestSplitInfo.distLeft[k];
  413. double r = bestSplitInfo.distRight[k];
  414. if ( l != 0 )
  415. distribution_left_sparse[k] = l;
  416. if ( r != 0 )
  417. distribution_right_sparse[k] = r;
  418. //#ifdef DEBUGTREE
  419. // std::cerr << "DTBObliqueLS: Split of Class " << k << " ("
  420. // << l << " <-> " << r << ") " << std::endl;
  421. //#endif
  422. }
  423. delete [] bestSplitInfo.distLeft;
  424. delete [] bestSplitInfo.distRight;
  425. // update lambda by heuristic [Laptev/Buhmann, 2014]
  426. double lambdaLeft, lambdaRight;
  427. if (useDynamicRegularization)
  428. {
  429. lambdaLeft = lambdaCurrent *
  430. pow(((double)examples_selection.size()/(double)examples_left.size()),(2./f->getParameterLength()));
  431. lambdaRight = lambdaCurrent *
  432. pow(((double)examples_selection.size()/(double)examples_right.size()),(2./f->getParameterLength()));
  433. }
  434. else
  435. {
  436. lambdaLeft = lambdaCurrent;
  437. lambdaRight = lambdaCurrent;
  438. }
  439. /** Recursion */
  440. // left child
  441. node->left = buildRecursive ( fp, examples, examples_left,
  442. distribution_left_sparse, bestSplitInfo.entropyLeft,
  443. maxClassNo, depth+1, lambdaLeft );
  444. // right child
  445. node->right = buildRecursive ( fp, examples, examples_right,
  446. distribution_right_sparse, bestSplitInfo.entropyRight,
  447. maxClassNo, depth+1, lambdaRight );
  448. return node;
  449. }
  450. /** initial building method */
  451. DecisionNode *DTBObliqueLS::build ( const FeaturePool & fp,
  452. const Examples & examples,
  453. int maxClassNo )
  454. {
  455. int index = 0;
  456. FullVector distribution ( maxClassNo+1 );
  457. vector<int> all;
  458. all.reserve ( examples.size() );
  459. for ( Examples::const_iterator j = examples.begin();
  460. j != examples.end(); j++ )
  461. {
  462. int classno = j->first;
  463. distribution[classno] += j->second.weight;
  464. all.push_back ( index );
  465. index++;
  466. }
  467. double entropy = 0.0;
  468. double sum = 0.0;
  469. for ( int i = 0 ; i < distribution.size(); i++ )
  470. {
  471. double val = distribution[i];
  472. if ( val <= 0.0 ) continue;
  473. entropy -= val*log(val);
  474. sum += val;
  475. }
  476. entropy /= sum;
  477. entropy += log(sum);
  478. return buildRecursive ( fp, examples, all, distribution,
  479. entropy, maxClassNo, 0, lambdaInit );
  480. }