Ver código fonte

workaround for strange split behaviour near entropy=0

Sven Sickert 10 anos atrás
pai
commit
0b396330d2
1 arquivos alterados com 18 adições e 14 exclusões
  1. 18 14
      classifier/fpclassifier/randomforest/DTBOblique.cpp

+ 18 - 14
classifier/fpclassifier/randomforest/DTBOblique.cpp

@@ -25,8 +25,8 @@ DTBOblique::DTBOblique ( const Config *conf, string section )
 {
     split_steps = conf->gI(section, "split_steps", 20 );
     max_depth = conf->gI(section, "max_depth", 10 );
-    minimum_information_gain = conf->gD(section, "minimum_information_gain", 10e-7 );
-    minimum_entropy = conf->gD(section, "minimum_entropy", 10e-5 );
+    minimum_information_gain = conf->gD(section, "minimum_information_gain", 0.0000001 );
+    minimum_entropy = conf->gD(section, "minimum_entropy", 0.00001 );
     use_shannon_entropy = conf->gB(section, "use_shannon_entropy", false );
     min_examples = conf->gI(section, "min_examples", 50);
     save_indices = conf->gB(section, "save_indices", false);
@@ -212,8 +212,8 @@ DecisionNode *DTBOblique::buildRecursive(
 {
 
 #ifdef DEBUGTREE
-    std::cerr << "Examples: " << (int)examples_selection.size()
-              << " (depth " << (int)depth << ")" << std::endl;
+    std::cerr << "DTBOblique: Examples: " << (int)examples_selection.size()
+              << ", Depth: " << (int)depth << ", Entropy: " << e << std::endl;
 #endif
 
     // initialize new node
@@ -221,9 +221,9 @@ DecisionNode *DTBOblique::buildRecursive(
     node->distribution = distribution;
 
     // stop criteria: max_depth, min_examples, min_entropy
-    if ( depth > max_depth
-         || (int)examples_selection.size() < min_examples
-         || ( (e <= minimum_entropy) && (e != 0.0) ) )  // FIXME
+    if (    ( e <= minimum_entropy )
+         || ( (int)examples_selection.size() < min_examples )
+         || ( depth > max_depth ) )
 
     {
 #ifdef DEBUGTREE
@@ -348,6 +348,10 @@ DecisionNode *DTBOblique::buildRecursive(
         }
     }
 
+    // supress strange behaviour for values near zero (8.88178e-16)
+    if (best_entropy_left < 1.0e-10 ) best_entropy_left = 0.0;
+    if (best_entropy_right < 1.0e-10 ) best_entropy_right = 0.0;
+
     //cleaning up
     delete [] distribution_left;
     delete [] distribution_right;
@@ -390,8 +394,8 @@ DecisionNode *DTBOblique::buildRecursive(
 #ifdef DEBUGTREE
     node->f->store( std::cerr );
     std::cerr << std::endl;
-    std::cerr << "mutual information / shannon entropy " << best_ig << " entropy "
-              << e << " left entropy " <<  best_entropy_left << " right entropy "
+    std::cerr << "DTBOblique: Information Gain: " << best_ig
+              << ", Left Entropy: " <<  best_entropy_left << ", Right Entropy: "
               << best_entropy_right << std::endl;
 #endif
 
@@ -406,7 +410,7 @@ DecisionNode *DTBOblique::buildRecursive(
         if ( r != 0 )
             distribution_right_sparse[k] = r;
 #ifdef DEBUGTREE
-        std::cerr << "DTBOblique: split of class " << k << " ("
+        std::cerr << "DTBOblique: Split of Class " << k << " ("
                   << l << " <-> " << r << ") " << std::endl;
 #endif
     }
@@ -420,11 +424,11 @@ DecisionNode *DTBOblique::buildRecursive(
     double lambdaRight = lambdaCurrent *
             pow(((double)examples_selection.size()/(double)examples_right.size()),(2./f->getParameterLength()));
 
-#ifdef DEBUGTREE
-    std::cerr << "regularization parameter lambda left " << lambdaLeft
-              << " right " << lambdaRight << std::endl;
+//#ifdef DEBUGTREE
+//    std::cerr << "regularization parameter lambda left " << lambdaLeft
+//              << " right " << lambdaRight << std::endl;
 
-#endif
+//#endif
 
     /** Recursion */
     // left child