DTBOblique.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. /**
  2. * @file DTBOblique.cpp
  3. * @brief random oblique decision tree
  4. * @author Sven Sickert
  5. * @date 10/15/2014
  6. */
  7. #include <iostream>
  8. #include <time.h>
  9. #include "DTBOblique.h"
  10. #include "vislearning/features/fpfeatures/ConvolutionFeature.h"
  11. #include "core/vector/Algorithms.h"
  12. using namespace OBJREC;
  13. #define DEBUGTREE
  14. using namespace std;
  15. using namespace NICE;
  16. DTBOblique::DTBOblique ( const Config *conf, string section )
  17. {
  18. split_steps = conf->gI(section, "split_steps", 20 );
  19. max_depth = conf->gI(section, "max_depth", 10 );
  20. minimum_information_gain = conf->gD(section, "minimum_information_gain", 0.0000001 );
  21. minimum_entropy = conf->gD(section, "minimum_entropy", 0.00001 );
  22. use_shannon_entropy = conf->gB(section, "use_shannon_entropy", false );
  23. min_examples = conf->gI(section, "min_examples", 50);
  24. save_indices = conf->gB(section, "save_indices", false);
  25. lambdaInit = conf->gD(section, "lambda_init", 0.5 );
  26. regularizationType = conf->gI(section, "regularization_type", 1 );
  27. }
  28. DTBOblique::~DTBOblique()
  29. {
  30. }
  31. bool DTBOblique::entropyLeftRight (
  32. const FeatureValuesUnsorted & values,
  33. double threshold,
  34. double* stat_left,
  35. double* stat_right,
  36. double & entropy_left,
  37. double & entropy_right,
  38. double & count_left,
  39. double & count_right,
  40. int maxClassNo )
  41. {
  42. count_left = 0;
  43. count_right = 0;
  44. for ( FeatureValuesUnsorted::const_iterator i = values.begin();
  45. i != values.end();
  46. i++ )
  47. {
  48. int classno = i->second;
  49. double value = i->first;
  50. if ( value < threshold ) {
  51. stat_left[classno] += i->fourth;
  52. count_left+=i->fourth;
  53. }
  54. else
  55. {
  56. stat_right[classno] += i->fourth;
  57. count_right+=i->fourth;
  58. }
  59. }
  60. if ( (count_left == 0) || (count_right == 0) )
  61. return false;
  62. entropy_left = 0.0;
  63. for ( int j = 0 ; j <= maxClassNo ; j++ )
  64. if ( stat_left[j] != 0 )
  65. entropy_left -= stat_left[j] * log(stat_left[j]);
  66. entropy_left /= count_left;
  67. entropy_left += log(count_left);
  68. entropy_right = 0.0;
  69. for ( int j = 0 ; j <= maxClassNo ; j++ )
  70. if ( stat_right[j] != 0 )
  71. entropy_right -= stat_right[j] * log(stat_right[j]);
  72. entropy_right /= count_right;
  73. entropy_right += log (count_right);
  74. return true;
  75. }
  76. /** refresh data matrix X and label vector y */
  77. void DTBOblique::getDataAndLabel(
  78. const FeaturePool &fp,
  79. const Examples &examples,
  80. const std::vector<int> &examples_selection,
  81. NICE::Matrix & matX,
  82. NICE::Vector & vecY )
  83. {
  84. ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
  85. int amountParams = f->getParameterLength();
  86. int amountExamples = examples_selection.size();
  87. NICE::Matrix X(amountExamples, amountParams, 0.0 );
  88. NICE::Vector y(amountExamples, 0.0);
  89. int matIndex = 0;
  90. for ( vector<int>::const_iterator si = examples_selection.begin();
  91. si != examples_selection.end();
  92. si++ )
  93. {
  94. const pair<int, Example> & p = examples[*si];
  95. const Example & ce = p.second;
  96. NICE::Vector pixelRepr = f->getFeatureVector( &ce );
  97. double label = p.first * ce.weight;
  98. pixelRepr *= ce.weight;
  99. y.set( matIndex, label );
  100. X.setRow(matIndex,pixelRepr);
  101. matIndex++;
  102. }
  103. matX = X;
  104. vecY = y;
  105. }
  106. void DTBOblique::regularizeDataMatrix(
  107. const NICE::Matrix &X,
  108. NICE::Matrix &XTXreg,
  109. const int regOption,
  110. const double lambda )
  111. {
  112. XTXreg = X.transpose()*X;
  113. NICE::Matrix R;
  114. const int dim = X.cols();
  115. switch (regOption)
  116. {
  117. // identity matrix
  118. case 0:
  119. R.resize(dim,dim);
  120. R.setIdentity();
  121. R *= lambda;
  122. XTXreg += R;
  123. break;
  124. // differences operator, k=1
  125. case 1:
  126. R.resize(dim-1,dim);
  127. R.set( 0.0 );
  128. for ( int r = 0; r < dim-1; r++ )
  129. {
  130. R(r,r) = 1.0;
  131. R(r,r+1) = -1.0;
  132. }
  133. R = R.transpose()*R;
  134. R *= lambda;
  135. XTXreg += R;
  136. break;
  137. // difference operator, k=2
  138. case 2:
  139. R.resize(dim-2,dim);
  140. R.set( 0.0 );
  141. for ( int r = 0; r < dim-2; r++ )
  142. {
  143. R(r,r) = 1.0;
  144. R(r,r+1) = -2.0;
  145. R(r,r+2) = 1.0;
  146. }
  147. R = R.transpose()*R;
  148. R *= lambda;
  149. XTXreg += R;
  150. break;
  151. // as in [Chen et al., 2012]
  152. case 3:
  153. {
  154. NICE::Vector q ( dim, (1.0-lambda) );
  155. q[0] = 1;
  156. NICE::Matrix Q;
  157. Q.tensorProduct(q,q);
  158. R.multiply(XTXreg,Q);
  159. for ( int r = 0; r < dim; r++ )
  160. R(r,r) = q[r] * XTXreg(r,r);
  161. XTXreg = R;
  162. break;
  163. }
  164. // no regularization
  165. default:
  166. std::cerr << "DTBOblique::regularizeDataMatrix: No regularization applied!"
  167. << std::endl;
  168. break;
  169. }
  170. }
  171. /** recursive building method */
  172. DecisionNode *DTBOblique::buildRecursive(
  173. const FeaturePool & fp,
  174. const Examples & examples,
  175. std::vector<int> & examples_selection,
  176. FullVector & distribution,
  177. double e,
  178. int maxClassNo,
  179. int depth,
  180. double lambdaCurrent )
  181. {
  182. #ifdef DEBUGTREE
  183. std::cerr << "DTBOblique: Examples: " << (int)examples_selection.size()
  184. << ", Depth: " << (int)depth << ", Entropy: " << e << std::endl;
  185. #endif
  186. // initialize new node
  187. DecisionNode *node = new DecisionNode ();
  188. node->distribution = distribution;
  189. // stop criteria: max_depth, min_examples, min_entropy
  190. if ( ( e <= minimum_entropy )
  191. || ( (int)examples_selection.size() < min_examples )
  192. || ( depth > max_depth ) )
  193. {
  194. #ifdef DEBUGTREE
  195. std::cerr << "DTBOblique: Stopping criteria applied!" << std::endl;
  196. #endif
  197. node->trainExamplesIndices = examples_selection;
  198. return node;
  199. }
  200. // variables
  201. double best_threshold = 0.0;
  202. double best_ig = -1.0;
  203. FeatureValuesUnsorted values;
  204. double *best_distribution_left = new double [maxClassNo+1];
  205. double *best_distribution_right = new double [maxClassNo+1];
  206. double *distribution_left = new double [maxClassNo+1];
  207. double *distribution_right = new double [maxClassNo+1];
  208. double best_entropy_left = 0.0;
  209. double best_entropy_right = 0.0;
  210. ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
  211. NICE::Vector best_beta = f->getParameterVector();
  212. // Creating data matrix X and label vector y
  213. NICE::Matrix X, XTXr, G, temp;
  214. NICE::Vector y, beta;
  215. getDataAndLabel( fp, examples, examples_selection, X, y );
  216. // Preparing system of linear equations
  217. regularizeDataMatrix( X, XTXr, regularizationType, lambdaCurrent );
  218. if (regularizationType == 3)
  219. {
  220. G = NICE::invert(XTXr);
  221. temp = G * X.transpose();
  222. }
  223. else
  224. {
  225. choleskyDecomp(XTXr, G);
  226. choleskyInvert(G, XTXr);
  227. temp = XTXr * X.transpose();
  228. }
  229. for ( int curClass = 0; curClass <= maxClassNo; curClass++ )
  230. {
  231. // One-vs-all: Transforming into {-1,+1} problem
  232. NICE::Vector yCur ( y.size(), -1.0 );
  233. int idx = 0;
  234. bool hasExamples = false;
  235. for ( vector<int>::const_iterator si = examples_selection.begin();
  236. si != examples_selection.end();
  237. si++, idx++ )
  238. {
  239. const pair<int, Example> & p = examples[*si];
  240. if (p.first == curClass)
  241. {
  242. yCur.set( idx, 1.0 );
  243. hasExamples = true;
  244. }
  245. }
  246. // is there a positive example for current class in current set?
  247. if (!hasExamples) continue;
  248. // Solve system of linear equations in a least squares manner
  249. beta.multiply(temp,yCur,false);
  250. // Updating parameter vector in convolutional feature
  251. f->setParameterVector( beta );
  252. // Feature Values
  253. values.clear();
  254. f->calcFeatureValues( examples, examples_selection, values);
  255. double minValue = (min_element ( values.begin(), values.end() ))->first;
  256. double maxValue = (max_element ( values.begin(), values.end() ))->first;
  257. if ( maxValue - minValue < 1e-7 )
  258. std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl;
  259. // get best thresholds by complete search
  260. for ( int i = 0; i < split_steps; i++ )
  261. {
  262. double threshold = (i * (maxValue - minValue ) / (double)split_steps)
  263. + minValue;
  264. // preparations
  265. double el, er;
  266. for ( int k = 0 ; k <= maxClassNo ; k++ )
  267. {
  268. distribution_left[k] = 0.0;
  269. distribution_right[k] = 0.0;
  270. }
  271. /** Test the current split */
  272. // Does another split make sense?
  273. double count_left;
  274. double count_right;
  275. if ( ! entropyLeftRight ( values, threshold,
  276. distribution_left, distribution_right,
  277. el, er, count_left, count_right, maxClassNo ) )
  278. continue;
  279. // information gain and entropy
  280. double pl = (count_left) / (count_left + count_right);
  281. double ig = e - pl*el - (1-pl)*er;
  282. if ( use_shannon_entropy )
  283. {
  284. double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
  285. ig = 2*ig / ( e + esplit );
  286. }
  287. if ( ig > best_ig )
  288. {
  289. best_ig = ig;
  290. best_threshold = threshold;
  291. best_beta = beta;
  292. for ( int k = 0 ; k <= maxClassNo ; k++ )
  293. {
  294. best_distribution_left[k] = distribution_left[k];
  295. best_distribution_right[k] = distribution_right[k];
  296. }
  297. best_entropy_left = el;
  298. best_entropy_right = er;
  299. }
  300. }
  301. }
  302. // supress strange behaviour for values near zero (8.88178e-16)
  303. if (best_entropy_left < 1.0e-10 ) best_entropy_left = 0.0;
  304. if (best_entropy_right < 1.0e-10 ) best_entropy_right = 0.0;
  305. //cleaning up
  306. delete [] distribution_left;
  307. delete [] distribution_right;
  308. // stop criteria: minimum information gain
  309. if ( best_ig < minimum_information_gain )
  310. {
  311. #ifdef DEBUGTREE
  312. std::cerr << "DTBOblique: Minimum information gain reached!" << std::endl;
  313. #endif
  314. delete [] best_distribution_left;
  315. delete [] best_distribution_right;
  316. node->trainExamplesIndices = examples_selection;
  317. return node;
  318. }
  319. /** Save the best split to current node */
  320. f->setParameterVector( best_beta );
  321. values.clear();
  322. f->calcFeatureValues( examples, examples_selection, values);
  323. node->f = f->clone();
  324. node->threshold = best_threshold;
  325. /** Split examples according to best split function */
  326. vector<int> examples_left;
  327. vector<int> examples_right;
  328. examples_left.reserve ( values.size() / 2 );
  329. examples_right.reserve ( values.size() / 2 );
  330. for ( FeatureValuesUnsorted::const_iterator i = values.begin();
  331. i != values.end(); i++ )
  332. {
  333. double value = i->first;
  334. if ( value < best_threshold )
  335. examples_left.push_back ( i->third );
  336. else
  337. examples_right.push_back ( i->third );
  338. }
  339. #ifdef DEBUGTREE
  340. node->f->store( std::cerr );
  341. std::cerr << std::endl;
  342. std::cerr << "DTBOblique: Information Gain: " << best_ig
  343. << ", Left Entropy: " << best_entropy_left << ", Right Entropy: "
  344. << best_entropy_right << std::endl;
  345. #endif
  346. FullVector distribution_left_sparse ( distribution.size() );
  347. FullVector distribution_right_sparse ( distribution.size() );
  348. for ( int k = 0 ; k <= maxClassNo ; k++ )
  349. {
  350. double l = best_distribution_left[k];
  351. double r = best_distribution_right[k];
  352. if ( l != 0 )
  353. distribution_left_sparse[k] = l;
  354. if ( r != 0 )
  355. distribution_right_sparse[k] = r;
  356. #ifdef DEBUGTREE
  357. std::cerr << "DTBOblique: Split of Class " << k << " ("
  358. << l << " <-> " << r << ") " << std::endl;
  359. #endif
  360. }
  361. delete [] best_distribution_left;
  362. delete [] best_distribution_right;
  363. // update lambda by heuristic [Laptev/Buhmann, 2014]
  364. double lambdaLeft = lambdaCurrent *
  365. pow(((double)examples_selection.size()/(double)examples_left.size()),(2./f->getParameterLength()));
  366. double lambdaRight = lambdaCurrent *
  367. pow(((double)examples_selection.size()/(double)examples_right.size()),(2./f->getParameterLength()));
  368. //#ifdef DEBUGTREE
  369. // std::cerr << "regularization parameter lambda left " << lambdaLeft
  370. // << " right " << lambdaRight << std::endl;
  371. //#endif
  372. /** Recursion */
  373. // left child
  374. node->left = buildRecursive ( fp, examples, examples_left,
  375. distribution_left_sparse, best_entropy_left,
  376. maxClassNo, depth+1, lambdaLeft );
  377. // right child
  378. node->right = buildRecursive ( fp, examples, examples_right,
  379. distribution_right_sparse, best_entropy_right,
  380. maxClassNo, depth+1, lambdaRight );
  381. return node;
  382. }
  383. /** initial building method */
  384. DecisionNode *DTBOblique::build ( const FeaturePool & fp,
  385. const Examples & examples,
  386. int maxClassNo )
  387. {
  388. int index = 0;
  389. FullVector distribution ( maxClassNo+1 );
  390. vector<int> all;
  391. all.reserve ( examples.size() );
  392. for ( Examples::const_iterator j = examples.begin();
  393. j != examples.end(); j++ )
  394. {
  395. int classno = j->first;
  396. distribution[classno] += j->second.weight;
  397. all.push_back ( index );
  398. index++;
  399. }
  400. double entropy = 0.0;
  401. double sum = 0.0;
  402. for ( int i = 0 ; i < distribution.size(); i++ )
  403. {
  404. double val = distribution[i];
  405. if ( val <= 0.0 ) continue;
  406. entropy -= val*log(val);
  407. sum += val;
  408. }
  409. entropy /= sum;
  410. entropy += log(sum);
  411. return buildRecursive ( fp, examples, all, distribution,
  412. entropy, maxClassNo, 0, lambdaInit );
  413. }