SCInformationGain.cpp 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. /**
  2. * @file SCInformationGain.cpp
  3. * @brief the information gain splitting criterion
  4. * @author Sven Sickert
  5. * @date 01/12/2017
  6. */
  7. #include "SCInformationGain.h"
  8. using namespace OBJREC;
  9. /* default constructor */
  10. SCInformationGain::SCInformationGain()
  11. : SplittingCriterion ()
  12. {
  13. entropy_left = 0.0;
  14. entropy_right = 0.0;
  15. count_left = 0.0;
  16. count_right = 0.0;
  17. use_shannon_entropy = false;
  18. }
  19. /* simple constructor */
  20. SCInformationGain::SCInformationGain( int _min_examples )
  21. : SplittingCriterion ( _min_examples )
  22. {
  23. entropy_left = 0.0;
  24. entropy_right = 0.0;
  25. count_left = 0.0;
  26. count_right = 0.0;
  27. use_shannon_entropy = false;
  28. }
  29. /* config constructor */
  30. SCInformationGain::SCInformationGain( const NICE::Config *conf )
  31. : SplittingCriterion ( conf )
  32. {
  33. entropy_left = 0.0;
  34. entropy_right = 0.0;
  35. count_left = 0.0;
  36. count_right = 0.0;
  37. use_shannon_entropy = conf->gB ( "SplittingCriterion",
  38. "use_shannon_entropy",
  39. false );
  40. }
  41. /* copy constructor */
  42. SCInformationGain::SCInformationGain( const SCInformationGain &obj )
  43. {
  44. min_examples = obj.min_examples;
  45. min_entropy = obj.min_entropy;
  46. min_purity = obj.min_purity;
  47. entropy_left = obj.entropy_left;
  48. entropy_right = obj.entropy_right;
  49. entropy_cur = obj.entropy_cur;
  50. count_left = obj.count_left;
  51. count_right = obj.count_right;
  52. use_shannon_entropy = obj.use_shannon_entropy;
  53. }
  54. /* simple destructor */
  55. SCInformationGain::~SCInformationGain()
  56. {
  57. }
  58. /* cloning function */
  59. SplittingCriterion* SCInformationGain::clone()
  60. {
  61. SplittingCriterion* sc = new SCInformationGain( *this );
  62. return sc;
  63. }
  64. bool SCInformationGain::evaluateSplit(
  65. const FeatureValuesUnsorted & values,
  66. double threshold,
  67. double* distribution_left,
  68. double* distribution_right,
  69. int maxClassNo )
  70. {
  71. this->count_left = 0;
  72. this->count_right = 0;
  73. int count_unweighted_left = 0;
  74. int count_unweighted_right = 0;
  75. double *distribution = new double [maxClassNo+1];
  76. for ( int c = 0; c <= maxClassNo; c++ )
  77. distribution[c] = 0.0;
  78. for ( FeatureValuesUnsorted::const_iterator i = values.begin();
  79. i != values.end();
  80. i++ )
  81. {
  82. int classno = i->second;
  83. double value = i->first;
  84. double weight = i->fourth;
  85. distribution[classno] += weight;
  86. if ( value < threshold ) {
  87. distribution_left[classno] += weight;
  88. this->count_left += weight;
  89. count_unweighted_left++;
  90. }
  91. else
  92. {
  93. distribution_right[classno] += weight;
  94. this->count_right += weight;
  95. count_unweighted_right++;
  96. }
  97. }
  98. if ( (count_unweighted_left < this->min_examples)
  99. || (count_unweighted_right < this->min_examples) )
  100. {
  101. delete [] distribution;
  102. return false;
  103. }
  104. // current entropy
  105. this->entropy_cur = computeEntropy( distribution, this->count_left+this->count_right, maxClassNo );
  106. // entropy for left child
  107. this->entropy_left = computeEntropy( distribution_left, this->count_left, maxClassNo );
  108. // entropy for right child
  109. this->entropy_right = computeEntropy( distribution_right, this->count_right, maxClassNo );
  110. delete [] distribution;
  111. return true;
  112. }
  113. double SCInformationGain::computePurity() const
  114. {
  115. double p_left = (this->count_left) / (this->count_left + this->count_right);
  116. double ig = this->entropy_cur - p_left*this->entropy_left - (1-p_left)*this->entropy_right;
  117. if ( use_shannon_entropy )
  118. {
  119. double entropy_split = -( p_left*log(p_left) + (1-p_left)*log(1-p_left) );
  120. ig = 2*ig / ( this->entropy_cur + entropy_split );
  121. }
  122. return ig;
  123. }