DTBRandomOblique.cpp 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. /**
  2. * @file DTBRandomOblique.cpp
  3. * @brief random oblique decision tree
  4. * @author Sven Sickert
  5. * @date 10/15/2014
  6. */
  7. #include <iostream>
  8. #include <time.h>
  9. #include "DTBRandomOblique.h"
  10. #include "vislearning/features/fpfeatures/ConvolutionFeature.h"
  11. using namespace OBJREC;
  12. #define DEBUGTREE
  13. #undef DETAILTREE
  14. using namespace std;
  15. using namespace NICE;
  16. DTBRandomOblique::DTBRandomOblique ( const Config *conf, string section )
  17. {
  18. random_split_tests = conf->gI(section, "random_split_tests", 10 );
  19. random_features = conf->gI(section, "random_features", 500 );
  20. max_depth = conf->gI(section, "max_depth", 10 );
  21. minimum_information_gain = conf->gD(section, "minimum_information_gain", 10e-7 );
  22. minimum_entropy = conf->gD(section, "minimum_entropy", 10e-5 );
  23. use_shannon_entropy = conf->gB(section, "use_shannon_entropy", false );
  24. min_examples = conf->gI(section, "min_examples", 50);
  25. save_indices = conf->gB(section, "save_indices", false);
  26. if ( conf->gB(section, "start_random_generator", false ) )
  27. srand(time(NULL));
  28. }
  29. DTBRandomOblique::~DTBRandomOblique()
  30. {
  31. }
  32. bool DTBRandomOblique::entropyLeftRight ( const FeatureValuesUnsorted & values,
  33. double threshold,
  34. double* stat_left,
  35. double* stat_right,
  36. double & entropy_left,
  37. double & entropy_right,
  38. double & count_left,
  39. double & count_right,
  40. int maxClassNo )
  41. {
  42. count_left = 0;
  43. count_right = 0;
  44. for ( FeatureValuesUnsorted::const_iterator i = values.begin(); i != values.end(); i++ )
  45. {
  46. int classno = i->second;
  47. double value = i->first;
  48. if ( value < threshold ) {
  49. stat_left[classno] += i->fourth;
  50. count_left+=i->fourth;
  51. }
  52. else
  53. {
  54. stat_right[classno] += i->fourth;
  55. count_right+=i->fourth;
  56. }
  57. }
  58. if ( (count_left == 0) || (count_right == 0) )
  59. return false;
  60. entropy_left = 0.0;
  61. for ( int j = 0 ; j <= maxClassNo ; j++ )
  62. if ( stat_left[j] != 0 )
  63. entropy_left -= stat_left[j] * log(stat_left[j]);
  64. entropy_left /= count_left;
  65. entropy_left += log(count_left);
  66. entropy_right = 0.0;
  67. for ( int j = 0 ; j <= maxClassNo ; j++ )
  68. if ( stat_right[j] != 0 )
  69. entropy_right -= stat_right[j] * log(stat_right[j]);
  70. entropy_right /= count_right;
  71. entropy_right += log (count_right);
  72. return true;
  73. }
  74. /** recursive building method */
  75. DecisionNode *DTBRandomOblique::buildRecursive(
  76. const FeaturePool & fp,
  77. const Examples & examples,
  78. std::vector<int> & examples_selection,
  79. FullVector & distribution,
  80. double e,
  81. int maxClassNo,
  82. int depth)
  83. {
  84. #ifdef DEBUGTREE
  85. std::cerr << "Examples: " << (int)examples_selection.size()
  86. << " (depth " << (int)depth << ")" << std::endl;
  87. #endif
  88. // initialize new node
  89. DecisionNode *node = new DecisionNode ();
  90. node->distribution = distribution;
  91. // stop criteria: max_depth, min_examples, min_entropy
  92. if ( depth > max_depth
  93. || (int)examples_selection.size() < min_examples
  94. || ( (e <= minimum_entropy) && (e != 0.0) ) ) // FIXME
  95. {
  96. #ifdef DEBUGTREE
  97. std::cerr << "DTBRandomOblique: Stopping criteria applied!" << std::endl;
  98. #endif
  99. node->trainExamplesIndices = examples_selection;
  100. return node;
  101. }
  102. Feature *best_feature = NULL;
  103. double best_threshold = 0.0;
  104. double best_ig = -1.0;
  105. FeatureValuesUnsorted values;
  106. double *best_distribution_left = new double [maxClassNo+1];
  107. double *best_distribution_right = new double [maxClassNo+1];
  108. double *distribution_left = new double [maxClassNo+1];
  109. double *distribution_right = new double [maxClassNo+1];
  110. double best_entropy_left = 0.0;
  111. double best_entropy_right = 0.0;
  112. // random parameter vectors
  113. for ( int k = 0 ; k < random_features ; k++ )
  114. {
  115. /** Create random parameter vector */
  116. #ifdef DETAILTREE
  117. std::cerr << "Calculating random parameter vector #" << k << std::endl;
  118. #endif
  119. ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
  120. Vector param ( f->getParameterLength(), 0.0 );
  121. for ( NICE::Vector::iterator it = param.begin();
  122. it != param.end(); ++it )
  123. *it = ( double ) rand() / ( double ) RAND_MAX;
  124. f->setParameterVector( param );
  125. /** Compute feature values for current parameters */
  126. values.clear();
  127. f->calcFeatureValues( examples, examples_selection, values);
  128. double minValue = (min_element ( values.begin(), values.end() ))->first;
  129. double maxValue = (max_element ( values.begin(), values.end() ))->first;
  130. if ( maxValue - minValue < 1e-7 ) continue;
  131. // randomly chosen thresholds
  132. for ( int i = 0; i < random_split_tests; i++ )
  133. {
  134. double threshold = rand() * (maxValue - minValue ) / RAND_MAX + minValue;
  135. #ifdef DETAILTREE
  136. std::cerr << "Testing split #" << i << " for vector #" << k
  137. << ": t=" << threshold << std::endl;
  138. #endif
  139. // preparations
  140. double el, er;
  141. for ( int k = 0 ; k <= maxClassNo ; k++ )
  142. {
  143. distribution_left[k] = 0;
  144. distribution_right[k] = 0;
  145. }
  146. /** Test the current split */
  147. // Does another split make sense?
  148. double count_left;
  149. double count_right;
  150. if ( ! entropyLeftRight ( values, threshold,
  151. distribution_left, distribution_right,
  152. el, er, count_left, count_right, maxClassNo ) )
  153. continue;
  154. // information gain and entropy
  155. double pl = (count_left) / (count_left + count_right);
  156. double ig = e - pl*el - (1-pl)*er;
  157. if ( use_shannon_entropy )
  158. {
  159. double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
  160. ig = 2*ig / ( e + esplit );
  161. }
  162. if ( ig > best_ig )
  163. {
  164. best_ig = ig;
  165. best_threshold = threshold;
  166. best_feature = f;
  167. for ( int k = 0 ; k <= maxClassNo ; k++ )
  168. {
  169. best_distribution_left[k] = distribution_left[k];
  170. best_distribution_right[k] = distribution_right[k];
  171. }
  172. best_entropy_left = el;
  173. best_entropy_right = er;
  174. }
  175. }
  176. }
  177. //cleaning up
  178. delete [] distribution_left;
  179. delete [] distribution_right;
  180. // stop criteria: minimum information gain
  181. if ( best_ig < minimum_information_gain )
  182. {
  183. #ifdef DEBUGTREE
  184. std::cerr << "DTBRandomOblique: Minimum information gain reached!" << std::endl;
  185. #endif
  186. delete [] best_distribution_left;
  187. delete [] best_distribution_right;
  188. node->trainExamplesIndices = examples_selection;
  189. return node;
  190. }
  191. /** Save the best split to current node */
  192. node->f = best_feature->clone();
  193. node->threshold = best_threshold;
  194. /** Recalculate examples using best split */
  195. vector<int> best_examples_left;
  196. vector<int> best_examples_right;
  197. values.clear();
  198. best_feature->calcFeatureValues ( examples, examples_selection, values );
  199. best_examples_left.reserve ( values.size() / 2 );
  200. best_examples_right.reserve ( values.size() / 2 );
  201. for ( FeatureValuesUnsorted::const_iterator i = values.begin();
  202. i != values.end(); i++ )
  203. {
  204. double value = i->first;
  205. if ( value < best_threshold )
  206. best_examples_left.push_back ( i->third );
  207. else
  208. best_examples_right.push_back ( i->third );
  209. }
  210. #ifdef DEBUGTREE
  211. node->f->store( std::cerr );
  212. std::cerr << std::endl;
  213. std::cerr << "mutual information / shannon entropy " << best_ig << " entropy "
  214. << e << " left entropy " << best_entropy_left << " right entropy "
  215. << best_entropy_right << std::endl;
  216. #endif
  217. FullVector best_distribution_left_sparse ( distribution.size() );
  218. FullVector best_distribution_right_sparse ( distribution.size() );
  219. for ( int k = 0 ; k <= maxClassNo ; k++ )
  220. {
  221. double l = best_distribution_left[k];
  222. double r = best_distribution_right[k];
  223. if ( l != 0 )
  224. best_distribution_left_sparse[k] = l;
  225. if ( r != 0 )
  226. best_distribution_right_sparse[k] = r;
  227. #ifdef DEBUGTREE
  228. if ( (l>0)||(r>0) )
  229. {
  230. std::cerr << "DTBRandomOblique: split of class " << k << " ("
  231. << l << " <-> " << r << ") " << std::endl;
  232. }
  233. #endif
  234. }
  235. delete [] best_distribution_left;
  236. delete [] best_distribution_right;
  237. /** Recursion */
  238. // left child
  239. node->left = buildRecursive ( fp, examples, best_examples_left,
  240. best_distribution_left_sparse, best_entropy_left, maxClassNo, depth+1 );
  241. // right child
  242. node->right = buildRecursive ( fp, examples, best_examples_right,
  243. best_distribution_right_sparse, best_entropy_right, maxClassNo, depth+1 );
  244. return node;
  245. }
  246. /** initial building method */
  247. DecisionNode *DTBRandomOblique::build ( const FeaturePool & fp,
  248. const Examples & examples,
  249. int maxClassNo )
  250. {
  251. int index = 0;
  252. FullVector distribution ( maxClassNo+1 );
  253. vector<int> all;
  254. all.reserve ( examples.size() );
  255. for ( Examples::const_iterator j = examples.begin();
  256. j != examples.end(); j++ )
  257. {
  258. int classno = j->first;
  259. distribution[classno] += j->second.weight;
  260. all.push_back ( index );
  261. index++;
  262. }
  263. double entropy = 0.0;
  264. double sum = 0.0;
  265. for ( int i = 0 ; i < distribution.size(); i++ )
  266. {
  267. double val = distribution[i];
  268. if ( val <= 0.0 ) continue;
  269. entropy -= val*log(val);
  270. sum += val;
  271. }
  272. entropy /= sum;
  273. entropy += log(sum);
  274. return buildRecursive ( fp, examples, all, distribution, entropy, maxClassNo, 0 );
  275. }