/** * @file SCInformationGain.cpp * @brief the information gain splitting criterion * @author Sven Sickert * @date 01/12/2017 */ #include "SCInformationGain.h" using namespace OBJREC; /* default constructor */ SCInformationGain::SCInformationGain() : SplittingCriterion () { entropy_left = 0.0; entropy_right = 0.0; count_left = 0.0; count_right = 0.0; use_shannon_entropy = false; } /* simple constructor */ SCInformationGain::SCInformationGain( int _min_examples ) : SplittingCriterion ( _min_examples ) { entropy_left = 0.0; entropy_right = 0.0; count_left = 0.0; count_right = 0.0; use_shannon_entropy = false; } /* config constructor */ SCInformationGain::SCInformationGain( const NICE::Config *conf ) : SplittingCriterion ( conf ) { entropy_left = 0.0; entropy_right = 0.0; count_left = 0.0; count_right = 0.0; use_shannon_entropy = conf->gB ( "SplittingCriterion", "use_shannon_entropy", false ); } /* copy constructor */ SCInformationGain::SCInformationGain( const SCInformationGain &obj ) { min_examples = obj.min_examples; min_entropy = obj.min_entropy; min_purity = obj.min_purity; entropy_left = obj.entropy_left; entropy_right = obj.entropy_right; entropy_cur = obj.entropy_cur; count_left = obj.count_left; count_right = obj.count_right; use_shannon_entropy = obj.use_shannon_entropy; } /* simple destructor */ SCInformationGain::~SCInformationGain() { } /* cloning function */ SplittingCriterion* SCInformationGain::clone() { SplittingCriterion* sc = new SCInformationGain( *this ); return sc; } bool SCInformationGain::evaluateSplit( const FeatureValuesUnsorted & values, double threshold, double* distribution_left, double* distribution_right, int maxClassNo ) { this->count_left = 0; this->count_right = 0; int count_unweighted_left = 0; int count_unweighted_right = 0; double *distribution = new double [maxClassNo+1]; for ( int c = 0; c <= maxClassNo; c++ ) distribution[c] = 0.0; for ( FeatureValuesUnsorted::const_iterator i = values.begin(); i != values.end(); i++ ) { int classno = i->second; double value = i->first; double weight = i->fourth; distribution[classno] += weight; if ( value < threshold ) { distribution_left[classno] += weight; this->count_left += weight; count_unweighted_left++; } else { distribution_right[classno] += weight; this->count_right += weight; count_unweighted_right++; } } if ( (count_unweighted_left < this->min_examples) || (count_unweighted_right < this->min_examples) ) { delete [] distribution; return false; } // current entropy this->entropy_cur = computeEntropy( distribution, this->count_left+this->count_right, maxClassNo ); // entropy for left child this->entropy_left = computeEntropy( distribution_left, this->count_left, maxClassNo ); // entropy for right child this->entropy_right = computeEntropy( distribution_right, this->count_right, maxClassNo ); delete [] distribution; return true; } double SCInformationGain::computePurity() const { double p_left = (this->count_left) / (this->count_left + this->count_right); double ig = this->entropy_cur - p_left*this->entropy_left - (1-p_left)*this->entropy_right; if ( use_shannon_entropy ) { double entropy_split = -( p_left*log(p_left) + (1-p_left)*log(1-p_left) ); ig = 2*ig / ( this->entropy_cur + entropy_split ); } return ig; }