123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- /**
- * @file SCInformationGain.cpp
- * @brief the information gain splitting criterion
- * @author Sven Sickert
- * @date 01/12/2017
- */
- #include "SCInformationGain.h"
- using namespace OBJREC;
- /* default constructor */
- SCInformationGain::SCInformationGain()
- : SplittingCriterion ()
- {
- entropy_left = 0.0;
- entropy_right = 0.0;
- count_left = 0.0;
- count_right = 0.0;
- use_shannon_entropy = false;
- }
- /* simple constructor */
- SCInformationGain::SCInformationGain( int _min_examples )
- : SplittingCriterion ( _min_examples )
- {
- entropy_left = 0.0;
- entropy_right = 0.0;
- count_left = 0.0;
- count_right = 0.0;
- use_shannon_entropy = false;
- }
- /* config constructor */
- SCInformationGain::SCInformationGain( const NICE::Config *conf )
- : SplittingCriterion ( conf )
- {
- entropy_left = 0.0;
- entropy_right = 0.0;
- count_left = 0.0;
- count_right = 0.0;
- use_shannon_entropy = conf->gB ( "SplittingCriterion",
- "use_shannon_entropy",
- false );
- }
- /* copy constructor */
- SCInformationGain::SCInformationGain( const SCInformationGain &obj )
- {
- min_examples = obj.min_examples;
- min_entropy = obj.min_entropy;
- min_purity = obj.min_purity;
- entropy_left = obj.entropy_left;
- entropy_right = obj.entropy_right;
- entropy_cur = obj.entropy_cur;
- count_left = obj.count_left;
- count_right = obj.count_right;
- use_shannon_entropy = obj.use_shannon_entropy;
- }
- /* simple destructor */
- SCInformationGain::~SCInformationGain()
- {
- }
- /* cloning function */
- SplittingCriterion* SCInformationGain::clone()
- {
- SplittingCriterion* sc = new SCInformationGain( *this );
- return sc;
- }
- bool SCInformationGain::evaluateSplit(
- const FeatureValuesUnsorted & values,
- double threshold,
- double* distribution_left,
- double* distribution_right,
- int maxClassNo )
- {
- this->count_left = 0;
- this->count_right = 0;
- int count_unweighted_left = 0;
- int count_unweighted_right = 0;
- double *distribution = new double [maxClassNo+1];
- for ( int c = 0; c <= maxClassNo; c++ )
- distribution[c] = 0.0;
- for ( FeatureValuesUnsorted::const_iterator i = values.begin();
- i != values.end();
- i++ )
- {
- int classno = i->second;
- double value = i->first;
- double weight = i->fourth;
-
- distribution[classno] += weight;
- if ( value < threshold ) {
- distribution_left[classno] += weight;
- this->count_left += weight;
- count_unweighted_left++;
- }
- else
- {
- distribution_right[classno] += weight;
- this->count_right += weight;
- count_unweighted_right++;
- }
- }
- if ( (count_unweighted_left < this->min_examples)
- || (count_unweighted_right < this->min_examples) )
- {
- delete [] distribution;
- return false;
- }
- // current entropy
- this->entropy_cur = computeEntropy( distribution, this->count_left+this->count_right, maxClassNo );
- // entropy for left child
- this->entropy_left = computeEntropy( distribution_left, this->count_left, maxClassNo );
- // entropy for right child
- this->entropy_right = computeEntropy( distribution_right, this->count_right, maxClassNo );
- delete [] distribution;
- return true;
- }
- double SCInformationGain::computePurity() const
- {
- double p_left = (this->count_left) / (this->count_left + this->count_right);
- double ig = this->entropy_cur - p_left*this->entropy_left - (1-p_left)*this->entropy_right;
- if ( use_shannon_entropy )
- {
- double entropy_split = -( p_left*log(p_left) + (1-p_left)*log(1-p_left) );
- ig = 2*ig / ( this->entropy_cur + entropy_split );
- }
-
- return ig;
- }
|