|
@@ -0,0 +1,143 @@
|
|
|
|
+/**
|
|
|
|
+ * @file SCInformationGain.cpp
|
|
|
|
+ * @brief the information gain splitting criterion
|
|
|
|
+ * @author Sven Sickert
|
|
|
|
+ * @date 01/12/2017
|
|
|
|
+
|
|
|
|
+*/
|
|
|
|
+#include "SCInformationGain.h"
|
|
|
|
+
|
|
|
|
+using namespace OBJREC;
|
|
|
|
+
|
|
|
|
+/* default constructor */
|
|
|
|
+SCInformationGain::SCInformationGain()
|
|
|
|
+ : SplittingCriterion ()
|
|
|
|
+{
|
|
|
|
+ entropy_left = 0.0;
|
|
|
|
+ entropy_right = 0.0;
|
|
|
|
+ count_left = 0.0;
|
|
|
|
+ count_right = 0.0;
|
|
|
|
+ use_shannon_entropy = false;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* simple constructor */
|
|
|
|
+SCInformationGain::SCInformationGain( int _min_examples )
|
|
|
|
+ : SplittingCriterion ( _min_examples )
|
|
|
|
+{
|
|
|
|
+ entropy_left = 0.0;
|
|
|
|
+ entropy_right = 0.0;
|
|
|
|
+ count_left = 0.0;
|
|
|
|
+ count_right = 0.0;
|
|
|
|
+ use_shannon_entropy = false;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* config constructor */
|
|
|
|
+SCInformationGain::SCInformationGain( const NICE::Config *conf )
|
|
|
|
+ : SplittingCriterion ( conf )
|
|
|
|
+{
|
|
|
|
+ entropy_left = 0.0;
|
|
|
|
+ entropy_right = 0.0;
|
|
|
|
+ count_left = 0.0;
|
|
|
|
+ count_right = 0.0;
|
|
|
|
+ use_shannon_entropy = conf->gB ( "SplittingCriterion",
|
|
|
|
+ "use_shannon_entropy",
|
|
|
|
+ false );
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* copy constructor */
|
|
|
|
+SCInformationGain::SCInformationGain( const SCInformationGain &obj )
|
|
|
|
+{
|
|
|
|
+ min_examples = obj.min_examples;
|
|
|
|
+ min_entropy = obj.min_entropy;
|
|
|
|
+ min_purity = obj.min_purity;
|
|
|
|
+ entropy_left = obj.entropy_left;
|
|
|
|
+ entropy_right = obj.entropy_right;
|
|
|
|
+ entropy_cur = obj.entropy_cur;
|
|
|
|
+ count_left = obj.count_left;
|
|
|
|
+ count_right = obj.count_right;
|
|
|
|
+ use_shannon_entropy = obj.use_shannon_entropy;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* simple destructor */
|
|
|
|
+SCInformationGain::~SCInformationGain()
|
|
|
|
+{
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* cloning function */
|
|
|
|
+SplittingCriterion* SCInformationGain::clone()
|
|
|
|
+{
|
|
|
|
+ SplittingCriterion* sc = new SCInformationGain( *this );
|
|
|
|
+ return sc;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+bool SCInformationGain::evaluateSplit(
|
|
|
|
+ const FeatureValuesUnsorted & values,
|
|
|
|
+ double threshold,
|
|
|
|
+ double* distribution_left,
|
|
|
|
+ double* distribution_right,
|
|
|
|
+ int maxClassNo )
|
|
|
|
+{
|
|
|
|
+ this->count_left = 0;
|
|
|
|
+ this->count_right = 0;
|
|
|
|
+ int count_unweighted_left = 0;
|
|
|
|
+ int count_unweighted_right = 0;
|
|
|
|
+
|
|
|
|
+ double *distribution = new double [maxClassNo+1];
|
|
|
|
+ for ( int c = 0; c <= maxClassNo; c++ )
|
|
|
|
+ distribution[c] = 0.0;
|
|
|
|
+
|
|
|
|
+ for ( FeatureValuesUnsorted::const_iterator i = values.begin();
|
|
|
|
+ i != values.end();
|
|
|
|
+ i++ )
|
|
|
|
+ {
|
|
|
|
+ int classno = i->second;
|
|
|
|
+ double value = i->first;
|
|
|
|
+ double weight = i->fourth;
|
|
|
|
+
|
|
|
|
+ distribution[classno] += weight;
|
|
|
|
+ if ( value < threshold ) {
|
|
|
|
+ distribution_left[classno] += weight;
|
|
|
|
+ this->count_left += weight;
|
|
|
|
+ count_unweighted_left++;
|
|
|
|
+ }
|
|
|
|
+ else
|
|
|
|
+ {
|
|
|
|
+ distribution_right[classno] += weight;
|
|
|
|
+ this->count_right += weight;
|
|
|
|
+ count_unweighted_right++;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if ( (count_unweighted_left < this->min_examples)
|
|
|
|
+ || (count_unweighted_right < this->min_examples) )
|
|
|
|
+ {
|
|
|
|
+ delete [] distribution;
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // current entropy
|
|
|
|
+ this->entropy_cur = computeEntropy( distribution, this->count_left+this->count_right, maxClassNo );
|
|
|
|
+
|
|
|
|
+ // entropy for left child
|
|
|
|
+ this->entropy_left = computeEntropy( distribution_left, this->count_left, maxClassNo );
|
|
|
|
+
|
|
|
|
+ // entropy for right child
|
|
|
|
+ this->entropy_right = computeEntropy( distribution_right, this->count_right, maxClassNo );
|
|
|
|
+
|
|
|
|
+ delete [] distribution;
|
|
|
|
+ return true;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+double SCInformationGain::computePurity() const
|
|
|
|
+{
|
|
|
|
+ double p_left = (this->count_left) / (this->count_left + this->count_right);
|
|
|
|
+ double ig = this->entropy_cur - p_left*this->entropy_left - (1-p_left)*this->entropy_right;
|
|
|
|
+
|
|
|
|
+ if ( use_shannon_entropy )
|
|
|
|
+ {
|
|
|
|
+ double entropy_split = -( p_left*log(p_left) + (1-p_left)*log(1-p_left) );
|
|
|
|
+ ig = 2*ig / ( this->entropy_cur + entropy_split );
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return ig;
|
|
|
|
+}
|