DTBObliqueLS.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. /**
  2. * @file DTBObliqueLS.cpp
  3. * @brief random oblique decision tree
  4. * @author Sven Sickert
  5. * @date 10/15/2014
  6. */
  7. #include <iostream>
  8. #include <time.h>
  9. #include "DTBObliqueLS.h"
  10. #include "vislearning/features/fpfeatures/ConvolutionFeature.h"
  11. #include "core/vector/Algorithms.h"
  12. using namespace OBJREC;
  13. //#define DEBUGTREE
  14. using namespace std;
  15. using namespace NICE;
  16. DTBObliqueLS::DTBObliqueLS ( const Config *conf, string section )
  17. {
  18. saveIndices = conf->gB( section, "save_indices", false);
  19. useShannonEntropy = conf->gB( section, "use_shannon_entropy", false );
  20. useOneVsOne = conf->gB( section, "use_one_vs_one", false );
  21. useDynamicRegularization = conf->gB( section, "use_dynamic_regularization", true );
  22. splitSteps = conf->gI( section, "split_steps", 20 );
  23. maxDepth = conf->gI( section, "max_depth", 10 );
  24. minExamples = conf->gI( section, "min_examples", 50);
  25. regularizationType = conf->gI( section, "regularization_type", 1 );
  26. minimumEntropy = conf->gD( section, "minimum_entropy", 10e-5 );
  27. minimumInformationGain = conf->gD( section, "minimum_information_gain", 10e-7 );
  28. lambdaInit = conf->gD( section, "lambda_init", 0.5 );
  29. if ( conf->gB(section, "start_random_generator", true ) )
  30. srand(time(NULL));
  31. }
  32. DTBObliqueLS::~DTBObliqueLS()
  33. {
  34. }
  35. bool DTBObliqueLS::entropyLeftRight (
  36. const FeatureValuesUnsorted & values,
  37. double threshold,
  38. double* stat_left,
  39. double* stat_right,
  40. double & entropy_left,
  41. double & entropy_right,
  42. double & count_left,
  43. double & count_right,
  44. int maxClassNo )
  45. {
  46. count_left = 0;
  47. count_right = 0;
  48. int count_unweighted_left = 0;
  49. int count_unweighted_right = 0;
  50. for ( FeatureValuesUnsorted::const_iterator i = values.begin();
  51. i != values.end();
  52. i++ )
  53. {
  54. int classno = i->second;
  55. double value = i->first;
  56. if ( value < threshold ) {
  57. stat_left[classno] += i->fourth;
  58. count_left+=i->fourth;
  59. count_unweighted_left++;
  60. }
  61. else
  62. {
  63. stat_right[classno] += i->fourth;
  64. count_right+=i->fourth;
  65. count_unweighted_right++;
  66. }
  67. }
  68. if ( (count_unweighted_left < minExamples)
  69. || (count_unweighted_right < minExamples) )
  70. return false;
  71. entropy_left = 0.0;
  72. for ( int j = 0 ; j <= maxClassNo ; j++ )
  73. if ( stat_left[j] != 0 )
  74. entropy_left -= stat_left[j] * log(stat_left[j]);
  75. entropy_left /= count_left;
  76. entropy_left += log(count_left);
  77. entropy_right = 0.0;
  78. for ( int j = 0 ; j <= maxClassNo ; j++ )
  79. if ( stat_right[j] != 0 )
  80. entropy_right -= stat_right[j] * log(stat_right[j]);
  81. entropy_right /= count_right;
  82. entropy_right += log (count_right);
  83. return true;
  84. }
  85. bool DTBObliqueLS::adaptDataAndLabelForMultiClass (
  86. const int posClass,
  87. const int negClass,
  88. NICE::Matrix & X,
  89. NICE::Vector & y )
  90. {
  91. bool posHasExamples = false;
  92. bool negHasExamples = false;
  93. int posCount = 0;
  94. int negCount = 0;
  95. // One-vs-one: Transforming into {-1,0,+1} problem
  96. if ( useOneVsOne )
  97. for ( int i = 0; i < y.size(); i++ )
  98. {
  99. if ( y[i] == posClass )
  100. {
  101. y[i] = 1.0;
  102. posHasExamples = true;
  103. posCount++;
  104. }
  105. else if ( y[i] == negClass )
  106. {
  107. y[i] = -1.0;
  108. negHasExamples = true;
  109. negCount++;
  110. }
  111. else
  112. {
  113. y[i] = 0.0;
  114. X.setRow( i, NICE::Vector( X.cols(), 0.0 ) );
  115. }
  116. }
  117. // One-vs-all: Transforming into {-1,+1} problem
  118. else
  119. for ( int i = 0; i < y.size(); i++ )
  120. {
  121. if ( y[i] == posClass )
  122. {
  123. y[i] = 1.0;
  124. posHasExamples = true;
  125. posCount++;
  126. }
  127. else
  128. {
  129. y[i] = -1.0;
  130. negHasExamples = true;
  131. negCount++;
  132. }
  133. }
  134. if ( posHasExamples && negHasExamples )
  135. return true;
  136. else
  137. return false;
  138. }
  139. /** refresh data matrix X and label vector y */
  140. void DTBObliqueLS::getDataAndLabel(
  141. const FeaturePool &fp,
  142. const Examples &examples,
  143. const std::vector<int> &examples_selection,
  144. NICE::Matrix & X,
  145. NICE::Vector & y,
  146. NICE::Vector & w )
  147. {
  148. ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
  149. int amountParams = f->getParameterLength();
  150. int amountExamples = examples_selection.size();
  151. X = NICE::Matrix(amountExamples, amountParams, 0.0 );
  152. y = NICE::Vector(amountExamples, 0.0);
  153. w = NICE::Vector(amountExamples, 1.0);
  154. int matIndex = 0;
  155. for ( vector<int>::const_iterator si = examples_selection.begin();
  156. si != examples_selection.end();
  157. si++ )
  158. {
  159. const pair<int, Example> & p = examples[*si];
  160. const Example & ex = p.second;
  161. NICE::Vector pixelRepr (amountParams, 1.0);
  162. f->getFeatureVector( &ex, pixelRepr );
  163. double label = p.first;
  164. pixelRepr *= ex.weight;
  165. w.set ( matIndex, ex.weight );
  166. y.set ( matIndex, label );
  167. X.setRow ( matIndex, pixelRepr );
  168. matIndex++;
  169. }
  170. }
  171. void DTBObliqueLS::regularizeDataMatrix(
  172. const NICE::Matrix &X,
  173. NICE::Matrix &XTXreg,
  174. const int regOption,
  175. const double lambda )
  176. {
  177. XTXreg = X.transpose()*X;
  178. NICE::Matrix R;
  179. const int dim = X.cols();
  180. switch (regOption)
  181. {
  182. // identity matrix
  183. case 0:
  184. R.resize(dim,dim);
  185. R.setIdentity();
  186. R *= lambda;
  187. XTXreg += R;
  188. break;
  189. // differences operator, k=1
  190. case 1:
  191. R.resize(dim-1,dim);
  192. R.set( 0.0 );
  193. for ( int r = 0; r < dim-1; r++ )
  194. {
  195. R(r,r) = 1.0;
  196. R(r,r+1) = -1.0;
  197. }
  198. R = R.transpose()*R;
  199. R *= lambda;
  200. XTXreg += R;
  201. break;
  202. // difference operator, k=2
  203. case 2:
  204. R.resize(dim-2,dim);
  205. R.set( 0.0 );
  206. for ( int r = 0; r < dim-2; r++ )
  207. {
  208. R(r,r) = 1.0;
  209. R(r,r+1) = -2.0;
  210. R(r,r+2) = 1.0;
  211. }
  212. R = R.transpose()*R;
  213. R *= lambda;
  214. XTXreg += R;
  215. break;
  216. // as in [Chen et al., 2012]
  217. case 3:
  218. {
  219. NICE::Vector q ( dim, (1.0-lambda) );
  220. q[0] = 1.0;
  221. NICE::Matrix Q;
  222. Q.tensorProduct(q,q);
  223. R.resize(dim,dim);
  224. for ( int r = 0; r < dim; r++ )
  225. {
  226. for ( int c = 0; c < dim; c++ )
  227. R(r,c) = XTXreg(r,c) * Q(r,c);
  228. R(r,r) = q[r] * XTXreg(r,r);
  229. }
  230. XTXreg = R;
  231. break;
  232. }
  233. // no regularization
  234. default:
  235. std::cerr << "DTBObliqueLS::regularizeDataMatrix: No regularization applied!"
  236. << std::endl;
  237. break;
  238. }
  239. }
  240. void DTBObliqueLS::findBestSplitThreshold (
  241. FeatureValuesUnsorted &values,
  242. SplitInfo &bestSplitInfo,
  243. const NICE::Vector &params,
  244. const double &e,
  245. const int &maxClassNo )
  246. {
  247. double *distribution_left = new double [maxClassNo+1];
  248. double *distribution_right = new double [maxClassNo+1];
  249. double minValue = (min_element ( values.begin(), values.end() ))->first;
  250. double maxValue = (max_element ( values.begin(), values.end() ))->first;
  251. if ( maxValue - minValue < 1e-7 )
  252. std::cerr << "DTBObliqueLS: Difference between min and max of features values to small!"
  253. << " [" << minValue << "," << maxValue << "]" << std::endl;
  254. // get best thresholds using complete search
  255. for ( int i = 0; i < splitSteps; i++ )
  256. {
  257. double threshold = (i * (maxValue - minValue ) / (double)splitSteps)
  258. + minValue;
  259. // preparations
  260. double el, er;
  261. for ( int k = 0 ; k <= maxClassNo ; k++ )
  262. {
  263. distribution_left[k] = 0.0;
  264. distribution_right[k] = 0.0;
  265. }
  266. /** Test the current split */
  267. // Does another split make sense?
  268. double count_left;
  269. double count_right;
  270. if ( ! entropyLeftRight ( values, threshold,
  271. distribution_left, distribution_right,
  272. el, er, count_left, count_right, maxClassNo ) )
  273. continue;
  274. // information gain and entropy
  275. double pl = (count_left) / (count_left + count_right);
  276. double ig = e - pl*el - (1-pl)*er;
  277. if ( useShannonEntropy )
  278. {
  279. double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
  280. ig = 2*ig / ( e + esplit );
  281. }
  282. if ( ig > bestSplitInfo.informationGain )
  283. {
  284. bestSplitInfo.informationGain = ig;
  285. bestSplitInfo.threshold = threshold;
  286. bestSplitInfo.params = params;
  287. for ( int k = 0 ; k <= maxClassNo ; k++ )
  288. {
  289. bestSplitInfo.distLeft[k] = distribution_left[k];
  290. bestSplitInfo.distRight[k] = distribution_right[k];
  291. }
  292. bestSplitInfo.entropyLeft = el;
  293. bestSplitInfo.entropyRight = er;
  294. }
  295. }
  296. //cleaning up
  297. delete [] distribution_left;
  298. delete [] distribution_right;
  299. }
  300. /** recursive building method */
  301. DecisionNode *DTBObliqueLS::buildRecursive(
  302. const FeaturePool & fp,
  303. const Examples & examples,
  304. std::vector<int> & examples_selection,
  305. FullVector & distribution,
  306. double e,
  307. int maxClassNo,
  308. int depth,
  309. double lambdaCurrent )
  310. {
  311. std::cerr << "DTBObliqueLS: Examples: " << (int)examples_selection.size()
  312. << ", Depth: " << (int)depth << ", Entropy: " << e << std::endl;
  313. // initialize new node
  314. DecisionNode *node = new DecisionNode ();
  315. node->distribution = distribution;
  316. // stop criteria: maxDepth, minExamples, min_entropy
  317. if ( ( e <= minimumEntropy )
  318. // || ( (int)examples_selection.size() < minExamples )
  319. || ( depth > maxDepth ) )
  320. {
  321. #ifdef DEBUGTREE
  322. std::cerr << "DTBObliqueLS: Stopping criteria applied!" << std::endl;
  323. #endif
  324. node->trainExamplesIndices = examples_selection;
  325. return node;
  326. }
  327. // variables
  328. FeatureValuesUnsorted values;
  329. SplitInfo bestSplitInfo;
  330. bestSplitInfo.threshold = 0.0;
  331. bestSplitInfo.informationGain = -1.0;
  332. bestSplitInfo.distLeft = new double [maxClassNo+1];
  333. bestSplitInfo.distRight = new double [maxClassNo+1];
  334. bestSplitInfo.entropyLeft = 0.0;
  335. bestSplitInfo.entropyRight = 0.0;
  336. ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
  337. bestSplitInfo.params = f->getParameterVector();
  338. // Creating data matrix X and label vector y
  339. NICE::Matrix X;
  340. NICE::Vector y, params, weights;
  341. getDataAndLabel( fp, examples, examples_selection, X, y, weights );
  342. // Transforming into multi-class problem
  343. bool hasExamples = false;
  344. NICE::Vector yCur;
  345. NICE::Matrix XCur;
  346. while ( !hasExamples )
  347. {
  348. int posClass, negClass;
  349. posClass = rand() % (maxClassNo+1);
  350. negClass = posClass;
  351. while ( posClass == negClass )
  352. negClass = rand() % (maxClassNo+1);
  353. yCur = y;
  354. XCur = X;
  355. hasExamples = adaptDataAndLabelForMultiClass(
  356. posClass, negClass, XCur, yCur );
  357. }
  358. yCur *= weights;
  359. // Preparing system of linear equations
  360. NICE::Matrix XTXr, G, temp;
  361. regularizeDataMatrix( XCur, XTXr, regularizationType, lambdaCurrent );
  362. choleskyDecomp(XTXr, G);
  363. choleskyInvert(G, XTXr);
  364. temp = XTXr * XCur.transpose();
  365. // Solve system of linear equations in a least squares manner
  366. params.multiply(temp,yCur,false);
  367. // Updating parameter vector in convolutional feature
  368. f->setParameterVector( params );
  369. // Feature Values
  370. values.clear();
  371. f->calcFeatureValues( examples, examples_selection, values);
  372. // complete search for threshold
  373. findBestSplitThreshold ( values, bestSplitInfo, params, e, maxClassNo );
  374. // f->setRandomParameterVector();
  375. // params = f->getParameterVector();
  376. // f->calcFeatureValues( examples, examples_selection, values);
  377. // findBestSplitThreshold ( values, bestSplitInfo, params, e, maxClassNo );
  378. // supress strange behaviour for values near zero (8.88178e-16)
  379. if (bestSplitInfo.entropyLeft < 1.0e-10 ) bestSplitInfo.entropyLeft = 0.0;
  380. if (bestSplitInfo.entropyRight < 1.0e-10 ) bestSplitInfo.entropyRight = 0.0;
  381. // stop criteria: minimum information gain
  382. if ( bestSplitInfo.informationGain < minimumInformationGain )
  383. {
  384. #ifdef DEBUGTREE
  385. std::cerr << "DTBObliqueLS: Minimum information gain reached!" << std::endl;
  386. #endif
  387. delete [] bestSplitInfo.distLeft;
  388. delete [] bestSplitInfo.distRight;
  389. node->trainExamplesIndices = examples_selection;
  390. return node;
  391. }
  392. /** Save the best split to current node */
  393. f->setParameterVector( bestSplitInfo.params );
  394. values.clear();
  395. f->calcFeatureValues( examples, examples_selection, values);
  396. node->f = f->clone();
  397. node->threshold = bestSplitInfo.threshold;
  398. /** Split examples according to best split function */
  399. vector<int> examples_left;
  400. vector<int> examples_right;
  401. examples_left.reserve ( values.size() / 2 );
  402. examples_right.reserve ( values.size() / 2 );
  403. for ( FeatureValuesUnsorted::const_iterator i = values.begin();
  404. i != values.end(); i++ )
  405. {
  406. if ( i->first < bestSplitInfo.threshold )
  407. examples_left.push_back ( i->third );
  408. else
  409. examples_right.push_back ( i->third );
  410. }
  411. #ifdef DEBUGTREE
  412. // node->f->store( std::cerr );
  413. // std::cerr << std::endl;
  414. std::cerr << "DTBObliqueLS: Information Gain: " << bestSplitInfo.informationGain
  415. << ", Left Entropy: " << bestSplitInfo.entropyLeft << ", Right Entropy: "
  416. << bestSplitInfo.entropyRight << std::endl;
  417. #endif
  418. FullVector distribution_left_sparse ( distribution.size() );
  419. FullVector distribution_right_sparse ( distribution.size() );
  420. for ( int k = 0 ; k <= maxClassNo ; k++ )
  421. {
  422. double l = bestSplitInfo.distLeft[k];
  423. double r = bestSplitInfo.distRight[k];
  424. if ( l != 0 )
  425. distribution_left_sparse[k] = l;
  426. if ( r != 0 )
  427. distribution_right_sparse[k] = r;
  428. //#ifdef DEBUGTREE
  429. // std::cerr << "DTBObliqueLS: Split of Class " << k << " ("
  430. // << l << " <-> " << r << ") " << std::endl;
  431. //#endif
  432. }
  433. delete [] bestSplitInfo.distLeft;
  434. delete [] bestSplitInfo.distRight;
  435. // update lambda by heuristic [Laptev/Buhmann, 2014]
  436. double lambdaLeft, lambdaRight;
  437. if (useDynamicRegularization)
  438. {
  439. lambdaLeft = lambdaCurrent *
  440. pow(((double)examples_selection.size()/(double)examples_left.size()),(2./f->getParameterLength()));
  441. lambdaRight = lambdaCurrent *
  442. pow(((double)examples_selection.size()/(double)examples_right.size()),(2./f->getParameterLength()));
  443. }
  444. else
  445. {
  446. lambdaLeft = lambdaCurrent;
  447. lambdaRight = lambdaCurrent;
  448. }
  449. /** Recursion */
  450. // left child
  451. node->left = buildRecursive ( fp, examples, examples_left,
  452. distribution_left_sparse, bestSplitInfo.entropyLeft,
  453. maxClassNo, depth+1, lambdaLeft );
  454. // right child
  455. node->right = buildRecursive ( fp, examples, examples_right,
  456. distribution_right_sparse, bestSplitInfo.entropyRight,
  457. maxClassNo, depth+1, lambdaRight );
  458. return node;
  459. }
  460. /** initial building method */
  461. DecisionNode *DTBObliqueLS::build ( const FeaturePool & fp,
  462. const Examples & examples,
  463. int maxClassNo )
  464. {
  465. int index = 0;
  466. FullVector distribution ( maxClassNo+1 );
  467. vector<int> all;
  468. all.reserve ( examples.size() );
  469. for ( Examples::const_iterator j = examples.begin();
  470. j != examples.end(); j++ )
  471. {
  472. int classno = j->first;
  473. distribution[classno] += j->second.weight;
  474. all.push_back ( index );
  475. index++;
  476. }
  477. double entropy = 0.0;
  478. double sum = 0.0;
  479. for ( int i = 0 ; i < distribution.size(); i++ )
  480. {
  481. double val = distribution[i];
  482. if ( val <= 0.0 ) continue;
  483. entropy -= val*log(val);
  484. sum += val;
  485. }
  486. entropy /= sum;
  487. entropy += log(sum);
  488. return buildRecursive ( fp, examples, all, distribution,
  489. entropy, maxClassNo, 0, lambdaInit );
  490. }