DTBOblique.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. /**
  2. * @file DTBOblique.cpp
  3. * @brief random oblique decision tree
  4. * @author Sven Sickert
  5. * @date 10/15/2014
  6. */
  7. #include <iostream>
  8. #include <time.h>
  9. #include "DTBOblique.h"
  10. #include "vislearning/features/fpfeatures/ConvolutionFeature.h"
  11. #include "core/vector/Algorithms.h"
  12. using namespace OBJREC;
  13. #define DEBUGTREE
  14. using namespace std;
  15. using namespace NICE;
  16. DTBOblique::DTBOblique ( const Config *conf, string section )
  17. {
  18. saveIndices = conf->gB( section, "save_indices", false);
  19. useShannonEntropy = conf->gB( section, "use_shannon_entropy", false );
  20. useOneVsOne = conf->gB( section, "use_one_vs_one", false );
  21. splitSteps = conf->gI( section, "split_steps", 20 );
  22. maxDepth = conf->gI( section, "max_depth", 10 );
  23. minExamples = conf->gI( section, "min_examples", 50);
  24. regularizationType = conf->gI( section, "regularization_type", 1 );
  25. minimumEntropy = conf->gD( section, "minimum_entropy", 10e-5 );
  26. minimumInformationGain = conf->gD( section, "minimum_information_gain", 10e-7 );
  27. lambdaInit = conf->gD( section, "lambda_init", 0.5 );
  28. }
  29. DTBOblique::~DTBOblique()
  30. {
  31. }
  32. bool DTBOblique::entropyLeftRight (
  33. const FeatureValuesUnsorted & values,
  34. double threshold,
  35. double* stat_left,
  36. double* stat_right,
  37. double & entropy_left,
  38. double & entropy_right,
  39. double & count_left,
  40. double & count_right,
  41. int maxClassNo )
  42. {
  43. count_left = 0;
  44. count_right = 0;
  45. for ( FeatureValuesUnsorted::const_iterator i = values.begin();
  46. i != values.end();
  47. i++ )
  48. {
  49. int classno = i->second;
  50. double value = i->first;
  51. if ( value < threshold ) {
  52. stat_left[classno] += i->fourth;
  53. count_left+=i->fourth;
  54. }
  55. else
  56. {
  57. stat_right[classno] += i->fourth;
  58. count_right+=i->fourth;
  59. }
  60. }
  61. if ( (count_left == 0) || (count_right == 0) )
  62. return false;
  63. entropy_left = 0.0;
  64. for ( int j = 0 ; j <= maxClassNo ; j++ )
  65. if ( stat_left[j] != 0 )
  66. entropy_left -= stat_left[j] * log(stat_left[j]);
  67. entropy_left /= count_left;
  68. entropy_left += log(count_left);
  69. entropy_right = 0.0;
  70. for ( int j = 0 ; j <= maxClassNo ; j++ )
  71. if ( stat_right[j] != 0 )
  72. entropy_right -= stat_right[j] * log(stat_right[j]);
  73. entropy_right /= count_right;
  74. entropy_right += log (count_right);
  75. return true;
  76. }
  77. void DTBOblique::adaptDataAndLabelForMultiClass (
  78. const int posClass,
  79. const int negClass,
  80. NICE::Matrix & matX,
  81. NICE::Vector & vecY,
  82. bool & posHasExamples,
  83. bool & negHasExamples )
  84. {
  85. int posCount = 0;
  86. int negCount = 0;
  87. int outCount = 0;
  88. // One-vs-one: Transforming into {-1,0,+1} problem
  89. if ( useOneVsOne )
  90. for ( int i = 0; i < vecY.size(); i++ )
  91. {
  92. if ( vecY[i] == posClass )
  93. {
  94. vecY[i] = 1.0;
  95. posHasExamples = true;
  96. posCount++;
  97. }
  98. else if ( vecY[i] == negClass )
  99. {
  100. vecY[i] = -1.0;
  101. negHasExamples = true;
  102. negCount++;
  103. }
  104. else
  105. {
  106. vecY[i] = 0.0;
  107. matX.setRow( i, NICE::Vector( matX.cols(), 0.0 ) );
  108. outCount++;
  109. }
  110. }
  111. // One-vs-all: Transforming into {-1,+1} problem
  112. else
  113. for ( int i = 0; i < vecY.size(); i++ )
  114. {
  115. if ( vecY[i] == posClass )
  116. {
  117. vecY[i] = 1.0;
  118. posHasExamples = true;
  119. posCount++;
  120. }
  121. else
  122. {
  123. vecY[i] = -1.0;
  124. negHasExamples = true;
  125. negCount++;
  126. }
  127. }
  128. }
  129. /** refresh data matrix X and label vector y */
  130. void DTBOblique::getDataAndLabel(
  131. const FeaturePool &fp,
  132. const Examples &examples,
  133. const std::vector<int> &examples_selection,
  134. NICE::Matrix & matX,
  135. NICE::Vector & vecY )
  136. {
  137. ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
  138. int amountParams = f->getParameterLength();
  139. int amountExamples = examples_selection.size();
  140. NICE::Matrix X(amountExamples, amountParams, 0.0 );
  141. NICE::Vector y(amountExamples, 0.0);
  142. int matIndex = 0;
  143. for ( vector<int>::const_iterator si = examples_selection.begin();
  144. si != examples_selection.end();
  145. si++ )
  146. {
  147. const pair<int, Example> & p = examples[*si];
  148. const Example & ce = p.second;
  149. NICE::Vector pixelRepr = f->getFeatureVector( &ce );
  150. double label = p.first;// * ce.weight;
  151. pixelRepr *= ce.weight;
  152. y.set( matIndex, label );
  153. X.setRow(matIndex,pixelRepr);
  154. matIndex++;
  155. }
  156. matX = X;
  157. vecY = y;
  158. }
  159. void DTBOblique::regularizeDataMatrix(
  160. const NICE::Matrix &X,
  161. NICE::Matrix &XTXreg,
  162. const int regOption,
  163. const double lambda )
  164. {
  165. XTXreg = X.transpose()*X;
  166. NICE::Matrix R;
  167. const int dim = X.cols();
  168. switch (regOption)
  169. {
  170. // identity matrix
  171. case 0:
  172. R.resize(dim,dim);
  173. R.setIdentity();
  174. R *= lambda;
  175. XTXreg += R;
  176. break;
  177. // differences operator, k=1
  178. case 1:
  179. R.resize(dim-1,dim);
  180. R.set( 0.0 );
  181. for ( int r = 0; r < dim-1; r++ )
  182. {
  183. R(r,r) = 1.0;
  184. R(r,r+1) = -1.0;
  185. }
  186. R = R.transpose()*R;
  187. R *= lambda;
  188. XTXreg += R;
  189. break;
  190. // difference operator, k=2
  191. case 2:
  192. R.resize(dim-2,dim);
  193. R.set( 0.0 );
  194. for ( int r = 0; r < dim-2; r++ )
  195. {
  196. R(r,r) = 1.0;
  197. R(r,r+1) = -2.0;
  198. R(r,r+2) = 1.0;
  199. }
  200. R = R.transpose()*R;
  201. R *= lambda;
  202. XTXreg += R;
  203. break;
  204. // as in [Chen et al., 2012]
  205. case 3:
  206. {
  207. NICE::Vector q ( dim, (1.0-lambda) );
  208. q[0] = 1;
  209. NICE::Matrix Q;
  210. Q.tensorProduct(q,q);
  211. R.multiply(XTXreg,Q);
  212. for ( int r = 0; r < dim; r++ )
  213. R(r,r) = q[r] * XTXreg(r,r);
  214. XTXreg = R;
  215. break;
  216. }
  217. // no regularization
  218. default:
  219. std::cerr << "DTBOblique::regularizeDataMatrix: No regularization applied!"
  220. << std::endl;
  221. break;
  222. }
  223. }
  224. void DTBOblique::findBestSplitThreshold (
  225. FeatureValuesUnsorted &values,
  226. SplitInfo &bestSplitInfo,
  227. const NICE::Vector &beta,
  228. const double &e,
  229. const int &maxClassNo )
  230. {
  231. double *distribution_left = new double [maxClassNo+1];
  232. double *distribution_right = new double [maxClassNo+1];
  233. double minValue = (min_element ( values.begin(), values.end() ))->first;
  234. double maxValue = (max_element ( values.begin(), values.end() ))->first;
  235. if ( maxValue - minValue < 1e-7 )
  236. std::cerr << "DTBOblique: Difference between min and max of features values to small!"
  237. << " [" << minValue << "," << maxValue << "]" << std::endl;
  238. // get best thresholds using complete search
  239. for ( int i = 0; i < splitSteps; i++ )
  240. {
  241. double threshold = (i * (maxValue - minValue ) / (double)splitSteps)
  242. + minValue;
  243. // preparations
  244. double el, er;
  245. for ( int k = 0 ; k <= maxClassNo ; k++ )
  246. {
  247. distribution_left[k] = 0.0;
  248. distribution_right[k] = 0.0;
  249. }
  250. /** Test the current split */
  251. // Does another split make sense?
  252. double count_left;
  253. double count_right;
  254. if ( ! entropyLeftRight ( values, threshold,
  255. distribution_left, distribution_right,
  256. el, er, count_left, count_right, maxClassNo ) )
  257. continue;
  258. // information gain and entropy
  259. double pl = (count_left) / (count_left + count_right);
  260. double ig = e - pl*el - (1-pl)*er;
  261. if ( useShannonEntropy )
  262. {
  263. double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
  264. ig = 2*ig / ( e + esplit );
  265. }
  266. if ( ig > bestSplitInfo.informationGain )
  267. {
  268. bestSplitInfo.informationGain = ig;
  269. bestSplitInfo.threshold = threshold;
  270. bestSplitInfo.params = beta;
  271. for ( int k = 0 ; k <= maxClassNo ; k++ )
  272. {
  273. bestSplitInfo.distLeft[k] = distribution_left[k];
  274. bestSplitInfo.distRight[k] = distribution_right[k];
  275. }
  276. bestSplitInfo.entropyLeft = el;
  277. bestSplitInfo.entropyRight = er;
  278. }
  279. }
  280. //cleaning up
  281. delete [] distribution_left;
  282. delete [] distribution_right;
  283. }
  284. /** recursive building method */
  285. DecisionNode *DTBOblique::buildRecursive(
  286. const FeaturePool & fp,
  287. const Examples & examples,
  288. std::vector<int> & examples_selection,
  289. FullVector & distribution,
  290. double e,
  291. int maxClassNo,
  292. int depth,
  293. double lambdaCurrent )
  294. {
  295. #ifdef DEBUGTREE
  296. std::cerr << "DTBOblique: Examples: " << (int)examples_selection.size()
  297. << ", Depth: " << (int)depth << ", Entropy: " << e << std::endl;
  298. #endif
  299. // initialize new node
  300. DecisionNode *node = new DecisionNode ();
  301. node->distribution = distribution;
  302. // stop criteria: maxDepth, minExamples, min_entropy
  303. if ( ( e <= minimumEntropy )
  304. || ( (int)examples_selection.size() < minExamples )
  305. || ( depth > maxDepth ) )
  306. {
  307. #ifdef DEBUGTREE
  308. std::cerr << "DTBOblique: Stopping criteria applied!" << std::endl;
  309. #endif
  310. node->trainExamplesIndices = examples_selection;
  311. return node;
  312. }
  313. // variables
  314. FeatureValuesUnsorted values;
  315. SplitInfo bestSplitInfo;
  316. bestSplitInfo.threshold = 0.0;
  317. bestSplitInfo.informationGain = -1.0;
  318. bestSplitInfo.distLeft = new double [maxClassNo+1];
  319. bestSplitInfo.distRight = new double [maxClassNo+1];
  320. bestSplitInfo.entropyLeft = 0.0;
  321. bestSplitInfo.entropyRight = 0.0;
  322. ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
  323. bestSplitInfo.params = f->getParameterVector();
  324. // Creating data matrix X and label vector y
  325. NICE::Matrix X;
  326. NICE::Vector y, beta;
  327. getDataAndLabel( fp, examples, examples_selection, X, y );
  328. // Transforming into multi-class problem
  329. for ( int posClass = 0; posClass <= maxClassNo; posClass++ )
  330. {
  331. bool gotInnerIteration = false;
  332. for ( int negClass = 0; negClass <= maxClassNo; negClass++ )
  333. {
  334. if ( posClass == negClass ) continue;
  335. NICE::Vector yCur = y;
  336. NICE::Matrix XCur = X;
  337. bool posHasExamples = false;
  338. bool negHasExamples = false;
  339. adaptDataAndLabelForMultiClass(
  340. posClass, negClass, XCur, yCur, posHasExamples, negHasExamples );
  341. // are there examples for positive and negative class?
  342. if ( !posHasExamples || !negHasExamples ) continue;
  343. // one-vs-all setting: only one iteration for inner loop
  344. if ( !useOneVsOne && gotInnerIteration ) continue;
  345. // Preparing system of linear equations
  346. NICE::Matrix XTXr, G, temp;
  347. regularizeDataMatrix( XCur, XTXr, regularizationType, lambdaCurrent );
  348. choleskyDecomp(XTXr, G);
  349. choleskyInvert(G, XTXr);
  350. temp = XTXr * XCur.transpose();
  351. // Solve system of linear equations in a least squares manner
  352. beta.multiply(temp,yCur,false);
  353. // Updating parameter vector in convolutional feature
  354. f->setParameterVector( beta );
  355. // Feature Values
  356. values.clear();
  357. f->calcFeatureValues( examples, examples_selection, values);
  358. // complete search for threshold
  359. findBestSplitThreshold ( values, bestSplitInfo, beta, e, maxClassNo );
  360. gotInnerIteration = true;
  361. }
  362. }
  363. // supress strange behaviour for values near zero (8.88178e-16)
  364. if (bestSplitInfo.entropyLeft < 1.0e-10 ) bestSplitInfo.entropyLeft = 0.0;
  365. if (bestSplitInfo.entropyRight < 1.0e-10 ) bestSplitInfo.entropyRight = 0.0;
  366. // stop criteria: minimum information gain
  367. if ( bestSplitInfo.informationGain < minimumInformationGain )
  368. {
  369. #ifdef DEBUGTREE
  370. std::cerr << "DTBOblique: Minimum information gain reached!" << std::endl;
  371. #endif
  372. delete [] bestSplitInfo.distLeft;
  373. delete [] bestSplitInfo.distRight;
  374. node->trainExamplesIndices = examples_selection;
  375. return node;
  376. }
  377. /** Save the best split to current node */
  378. f->setParameterVector( bestSplitInfo.params );
  379. values.clear();
  380. f->calcFeatureValues( examples, examples_selection, values);
  381. node->f = f->clone();
  382. node->threshold = bestSplitInfo.threshold;
  383. /** Split examples according to best split function */
  384. vector<int> examples_left;
  385. vector<int> examples_right;
  386. examples_left.reserve ( values.size() / 2 );
  387. examples_right.reserve ( values.size() / 2 );
  388. for ( FeatureValuesUnsorted::const_iterator i = values.begin();
  389. i != values.end(); i++ )
  390. {
  391. double value = i->first;
  392. if ( value < bestSplitInfo.threshold )
  393. examples_left.push_back ( i->third );
  394. else
  395. examples_right.push_back ( i->third );
  396. }
  397. #ifdef DEBUGTREE
  398. node->f->store( std::cerr );
  399. std::cerr << std::endl;
  400. std::cerr << "DTBOblique: Information Gain: " << bestSplitInfo.informationGain
  401. << ", Left Entropy: " << bestSplitInfo.entropyLeft << ", Right Entropy: "
  402. << bestSplitInfo.entropyRight << std::endl;
  403. #endif
  404. FullVector distribution_left_sparse ( distribution.size() );
  405. FullVector distribution_right_sparse ( distribution.size() );
  406. for ( int k = 0 ; k <= maxClassNo ; k++ )
  407. {
  408. double l = bestSplitInfo.distLeft[k];
  409. double r = bestSplitInfo.distRight[k];
  410. if ( l != 0 )
  411. distribution_left_sparse[k] = l;
  412. if ( r != 0 )
  413. distribution_right_sparse[k] = r;
  414. #ifdef DEBUGTREE
  415. std::cerr << "DTBOblique: Split of Class " << k << " ("
  416. << l << " <-> " << r << ") " << std::endl;
  417. #endif
  418. }
  419. //TODO
  420. //delete [] best_distribution_left;
  421. //delete [] best_distribution_right;
  422. // update lambda by heuristic [Laptev/Buhmann, 2014]
  423. double lambdaLeft = lambdaCurrent *
  424. pow(((double)examples_selection.size()/(double)examples_left.size()),(2./f->getParameterLength()));
  425. double lambdaRight = lambdaCurrent *
  426. pow(((double)examples_selection.size()/(double)examples_right.size()),(2./f->getParameterLength()));
  427. //#ifdef DEBUGTREE
  428. // std::cerr << "regularization parameter lambda left " << lambdaLeft
  429. // << " right " << lambdaRight << std::endl;
  430. //#endif
  431. /** Recursion */
  432. // left child
  433. node->left = buildRecursive ( fp, examples, examples_left,
  434. distribution_left_sparse, bestSplitInfo.entropyLeft,
  435. maxClassNo, depth+1, lambdaLeft );
  436. // right child
  437. node->right = buildRecursive ( fp, examples, examples_right,
  438. distribution_right_sparse, bestSplitInfo.entropyRight,
  439. maxClassNo, depth+1, lambdaRight );
  440. return node;
  441. }
  442. /** initial building method */
  443. DecisionNode *DTBOblique::build ( const FeaturePool & fp,
  444. const Examples & examples,
  445. int maxClassNo )
  446. {
  447. int index = 0;
  448. FullVector distribution ( maxClassNo+1 );
  449. vector<int> all;
  450. all.reserve ( examples.size() );
  451. for ( Examples::const_iterator j = examples.begin();
  452. j != examples.end(); j++ )
  453. {
  454. int classno = j->first;
  455. distribution[classno] += j->second.weight;
  456. all.push_back ( index );
  457. index++;
  458. }
  459. double entropy = 0.0;
  460. double sum = 0.0;
  461. for ( int i = 0 ; i < distribution.size(); i++ )
  462. {
  463. double val = distribution[i];
  464. if ( val <= 0.0 ) continue;
  465. entropy -= val*log(val);
  466. sum += val;
  467. }
  468. entropy /= sum;
  469. entropy += log(sum);
  470. return buildRecursive ( fp, examples, all, distribution,
  471. entropy, maxClassNo, 0, lambdaInit );
  472. }