il y a 10 ans · 9ade450879
--- a/classifier/fpclassifier/randomforest/DTBRandomOblique.cpp
+++ b/classifier/fpclassifier/randomforest/DTBRandomOblique.cpp
@@ -0,0 +1,325 @@
 
															+/**
														
 
															+ * @file DTBRandomOblique.cpp
														
 
															+ * @brief random oblique decision tree
														
 
															+ * @author Sven Sickert
														
 
															+ * @date 10/15/2014
														
 
															+
														
 
															+*/
														
 
															+#include <iostream>
														
 
															+#include <time.h>
														
 
															+
														
 
															+#include "DTBRandomOblique.h"
														
 
															+#include "vislearning/features/fpfeatures/ConvolutionFeature.h"
														
 
															+
														
 
															+using namespace OBJREC;
														
 
															+
														
 
															+#define DEBUGTREE
														
 
															+#undef DETAILTREE
														
 
															+
														
 
															+
														
 
															+using namespace std;
														
 
															+using namespace NICE;
														
 
															+
														
 
															+DTBRandomOblique::DTBRandomOblique ( const Config *conf, string section )
														
 
															+{
														
 
															+  random_split_tests = conf->gI(section, "random_split_tests", 10 );
														
 
															+  random_features = conf->gI(section, "random_features", 500 );
														
 
															+  max_depth = conf->gI(section, "max_depth", 10 );
														
 
															+  minimum_information_gain = conf->gD(section, "minimum_information_gain", 10e-7 );
														
 
															+  minimum_entropy = conf->gD(section, "minimum_entropy", 10e-5 );
														
 
															+  use_shannon_entropy = conf->gB(section, "use_shannon_entropy", false );
														
 
															+  min_examples = conf->gI(section, "min_examples", 50);
														
 
															+  save_indices = conf->gB(section, "save_indices", false);
														
 
															+
														
 
															+  if ( conf->gB(section, "start_random_generator", false ) )
														
 
															+    srand(time(NULL));
														
 
															+}
														
 
															+
														
 
															+DTBRandomOblique::~DTBRandomOblique()
														
 
															+{
														
 
															+
														
 
															+}
														
 
															+
														
 
															+bool DTBRandomOblique::entropyLeftRight ( const FeatureValuesUnsorted & values,
														
 
															+         double threshold,
														
 
															+         double* stat_left,
														
 
															+         double* stat_right,
														
 
															+         double & entropy_left,
														
 
															+         double & entropy_right,
														
 
															+         double & count_left,
														
 
															+         double & count_right,
														
 
															+         int maxClassNo )
														
 
															+{
														
 
															+  count_left = 0;
														
 
															+  count_right = 0;
														
 
															+  for ( FeatureValuesUnsorted::const_iterator i = values.begin(); i != values.end(); i++ )
														
 
															+  {
														
 
															+    int classno = i->second;
														
 
															+    double value = i->first;
														
 
															+    if ( value < threshold ) {
														
 
															+      stat_left[classno] += i->fourth;
														
 
															+      count_left+=i->fourth;
														
 
															+    }
														
 
															+    else
														
 
															+    {
														
 
															+      stat_right[classno] += i->fourth;
														
 
															+      count_right+=i->fourth;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  if ( (count_left == 0) || (count_right == 0) )
														
 
															+     return false;
														
 
															+
														
 
															+  entropy_left = 0.0;
														
 
															+  for ( int j = 0 ; j <= maxClassNo ; j++ )
														
 
															+     if ( stat_left[j] != 0 )
														
 
															+       entropy_left -= stat_left[j] * log(stat_left[j]);
														
 
															+  entropy_left /= count_left;
														
 
															+  entropy_left += log(count_left);
														
 
															+
														
 
															+  entropy_right = 0.0;
														
 
															+  for ( int j = 0 ; j <= maxClassNo ; j++ )
														
 
															+     if ( stat_right[j] != 0 )
														
 
															+      entropy_right -= stat_right[j] * log(stat_right[j]);
														
 
															+  entropy_right /= count_right;
														
 
															+  entropy_right += log (count_right);
														
 
															+
														
 
															+  return true;
														
 
															+}
														
 
															+
														
 
															+/** recursive building method */
														
 
															+DecisionNode *DTBRandomOblique::buildRecursive(
														
 
															+    const FeaturePool & fp,
														
 
															+    const Examples & examples,
														
 
															+    std::vector<int> & examples_selection,
														
 
															+    FullVector & distribution,
														
 
															+    double e,
														
 
															+    int maxClassNo,
														
 
															+    int depth)
														
 
															+{
														
 
															+
														
 
															+#ifdef DEBUGTREE
														
 
															+    std::cerr << "Examples: " << (int)examples_selection.size()
														
 
															+              << " (depth " << (int)depth << ")" << std::endl;
														
 
															+#endif
														
 
															+
														
 
															+  // initialize new node
														
 
															+  DecisionNode *node = new DecisionNode ();
														
 
															+  node->distribution = distribution;
														
 
															+
														
 
															+  // stop criteria: max_depth, min_examples, min_entropy
														
 
															+  if ( depth > max_depth
														
 
															+       || (int)examples_selection.size() < min_examples
														
 
															+       || ( (e <= minimum_entropy) && (e != 0.0) ) )  // FIXME
														
 
															+
														
 
															+  {
														
 
															+#ifdef DEBUGTREE
														
 
															+    std::cerr << "DTBRandomOblique: Stopping criteria applied!" << std::endl;
														
 
															+#endif
														
 
															+    node->trainExamplesIndices = examples_selection;
														
 
															+    return node;
														
 
															+  }
														
 
															+
														
 
															+  Feature *best_feature = NULL;
														
 
															+  double best_threshold = 0.0;
														
 
															+  double best_ig = -1.0;
														
 
															+  FeatureValuesUnsorted values;
														
 
															+  double *best_distribution_left = new double [maxClassNo+1];
														
 
															+  double *best_distribution_right = new double [maxClassNo+1];
														
 
															+  double *distribution_left = new double [maxClassNo+1];
														
 
															+  double *distribution_right = new double [maxClassNo+1];
														
 
															+  double best_entropy_left = 0.0;
														
 
															+  double best_entropy_right = 0.0;
														
 
															+
														
 
															+  // random parameter vectors
														
 
															+  for ( int k = 0 ; k < random_features ; k++ )
														
 
															+  {
														
 
															+    /** Create random parameter vector */
														
 
															+#ifdef DETAILTREE
														
 
															+    std::cerr << "Calculating random parameter vector #" << k << std::endl;
														
 
															+#endif
														
 
															+    ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
														
 
															+
														
 
															+    Vector param ( f->getParameterLength(), 0.0 );
														
 
															+    for ( NICE::Vector::iterator it = param.begin();
														
 
															+          it != param.end(); ++it )
														
 
															+      *it = ( double ) rand() / ( double ) RAND_MAX;
														
 
															+
														
 
															+    f->setParameterVector( param );
														
 
															+
														
 
															+    /** Compute feature values for current parameters */
														
 
															+    values.clear();
														
 
															+    f->calcFeatureValues( examples, examples_selection, values);
														
 
															+
														
 
															+    double minValue = (min_element ( values.begin(), values.end() ))->first;
														
 
															+    double maxValue = (max_element ( values.begin(), values.end() ))->first;
														
 
															+
														
 
															+    if ( maxValue - minValue < 1e-7 ) continue;
														
 
															+
														
 
															+    // randomly chosen thresholds
														
 
															+    for ( int i = 0; i < random_split_tests; i++ )
														
 
															+    {
														
 
															+      double threshold = rand() * (maxValue - minValue ) / RAND_MAX + minValue;
														
 
															+#ifdef DETAILTREE
														
 
															+      std::cerr << "Testing split #" << i << " for vector #" << k
														
 
															+                << ": t=" << threshold <<  std::endl;
														
 
															+#endif
														
 
															+
														
 
															+      // preparations
														
 
															+      double el, er;
														
 
															+      for ( int k = 0 ; k <= maxClassNo ; k++ )
														
 
															+      {
														
 
															+        distribution_left[k] = 0;
														
 
															+        distribution_right[k] = 0;
														
 
															+      }
														
 
															+
														
 
															+      /** Test the current split */
														
 
															+      // Does another split make sense?
														
 
															+      double count_left;
														
 
															+      double count_right;
														
 
															+      if ( ! entropyLeftRight ( values, threshold,
														
 
															+             distribution_left, distribution_right,
														
 
															+             el, er, count_left, count_right, maxClassNo ) )
														
 
															+        continue;
														
 
															+
														
 
															+      // information gain and entropy
														
 
															+      double pl = (count_left) / (count_left + count_right);
														
 
															+      double ig = e - pl*el - (1-pl)*er;
														
 
															+
														
 
															+      if ( use_shannon_entropy )
														
 
															+      {
														
 
															+        double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
														
 
															+        ig = 2*ig / ( e + esplit );
														
 
															+      }
														
 
															+
														
 
															+      if ( ig > best_ig )
														
 
															+      {
														
 
															+        best_ig = ig;
														
 
															+        best_threshold = threshold;
														
 
															+
														
 
															+        best_feature = f;
														
 
															+        for ( int k = 0 ; k <= maxClassNo ; k++ )
														
 
															+        {
														
 
															+          best_distribution_left[k] = distribution_left[k];
														
 
															+          best_distribution_right[k] = distribution_right[k];
														
 
															+        }
														
 
															+        best_entropy_left = el;
														
 
															+        best_entropy_right = er;
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  //cleaning up
														
 
															+  delete [] distribution_left;
														
 
															+  delete [] distribution_right;
														
 
															+
														
 
															+  // stop criteria: minimum information gain
														
 
															+  if ( best_ig < minimum_information_gain )
														
 
															+  {
														
 
															+#ifdef DEBUGTREE
														
 
															+    std::cerr << "DTBRandomOblique: Minimum information gain reached!" << std::endl;
														
 
															+#endif
														
 
															+    delete [] best_distribution_left;
														
 
															+    delete [] best_distribution_right;
														
 
															+    node->trainExamplesIndices = examples_selection;
														
 
															+    return node;
														
 
															+  }
														
 
															+
														
 
															+  /** Save the best split to current node */
														
 
															+  node->f = best_feature->clone();
														
 
															+  node->threshold = best_threshold;
														
 
															+
														
 
															+  /** Recalculate examples using best split */
														
 
															+  vector<int> best_examples_left;
														
 
															+  vector<int> best_examples_right;
														
 
															+  values.clear();
														
 
															+  best_feature->calcFeatureValues ( examples, examples_selection, values );
														
 
															+
														
 
															+  best_examples_left.reserve ( values.size() / 2 );
														
 
															+  best_examples_right.reserve ( values.size() / 2 );
														
 
															+  for ( FeatureValuesUnsorted::const_iterator i = values.begin();
														
 
															+             i != values.end(); i++ )
														
 
															+  {
														
 
															+     double value = i->first;
														
 
															+     if ( value < best_threshold )
														
 
															+      best_examples_left.push_back ( i->third );
														
 
															+     else
														
 
															+      best_examples_right.push_back ( i->third );
														
 
															+  }
														
 
															+
														
 
															+#ifdef DEBUGTREE
														
 
															+  node->f->store( std::cerr );
														
 
															+  std::cerr << std::endl;
														
 
															+  std::cerr << "mutual information / shannon entropy " << best_ig << " entropy "
														
 
															+            << e << " left entropy " <<  best_entropy_left << " right entropy "
														
 
															+            << best_entropy_right << std::endl;
														
 
															+#endif
														
 
															+
														
 
															+  FullVector best_distribution_left_sparse ( distribution.size() );
														
 
															+  FullVector best_distribution_right_sparse ( distribution.size() );
														
 
															+  for ( int k = 0 ; k <= maxClassNo ; k++ )
														
 
															+  {
														
 
															+    double l = best_distribution_left[k];
														
 
															+    double r = best_distribution_right[k];
														
 
															+    if ( l != 0 )
														
 
															+      best_distribution_left_sparse[k] = l;
														
 
															+    if ( r != 0 )
														
 
															+      best_distribution_right_sparse[k] = r;
														
 
															+#ifdef DEBUGTREE
														
 
															+    if ( (l>0)||(r>0) )
														
 
															+    {
														
 
															+        std::cerr << "DTBRandomOblique: split of class " << k << " ("
														
 
															+                  << l << " <-> " << r << ") " << std::endl;
														
 
															+    }
														
 
															+#endif
														
 
															+  }
														
 
															+
														
 
															+  delete [] best_distribution_left;
														
 
															+  delete [] best_distribution_right;
														
 
															+
														
 
															+  /** Recursion */
														
 
															+  // left child
														
 
															+  node->left = buildRecursive ( fp, examples, best_examples_left,
														
 
															+           best_distribution_left_sparse, best_entropy_left, maxClassNo, depth+1 );
														
 
															+  // right child
														
 
															+  node->right = buildRecursive ( fp, examples, best_examples_right,
														
 
															+           best_distribution_right_sparse, best_entropy_right, maxClassNo, depth+1 );
														
 
															+
														
 
															+  return node;
														
 
															+}
														
 
															+
														
 
															+/** initial building method */
														
 
															+DecisionNode *DTBRandomOblique::build ( const FeaturePool & fp,
														
 
															+         const Examples & examples,
														
 
															+         int maxClassNo )
														
 
															+{
														
 
															+  int index = 0;
														
 
															+
														
 
															+  FullVector distribution ( maxClassNo+1 );
														
 
															+  vector<int> all;
														
 
															+
														
 
															+  all.reserve ( examples.size() );
														
 
															+  for ( Examples::const_iterator j = examples.begin();
														
 
															+        j != examples.end(); j++ )
														
 
															+  {
														
 
															+     int classno = j->first;
														
 
															+     distribution[classno] += j->second.weight;
														
 
															+
														
 
															+     all.push_back ( index );
														
 
															+     index++;
														
 
															+  }
														
 
															+
														
 
															+  double entropy = 0.0;
														
 
															+  double sum = 0.0;
														
 
															+  for ( int i = 0 ; i < distribution.size(); i++ )
														
 
															+  {
														
 
															+    double val = distribution[i];
														
 
															+    if ( val <= 0.0 ) continue;
														
 
															+      entropy -= val*log(val);
														
 
															+    sum += val;
														
 
															+  }
														
 
															+  entropy /= sum;
														
 
															+  entropy += log(sum);
														
 
															+
														
 
															+  return buildRecursive ( fp, examples, all, distribution, entropy, maxClassNo, 0 );
														
 
															+}
														
--- a/classifier/fpclassifier/randomforest/DTBRandomOblique.h
+++ b/classifier/fpclassifier/randomforest/DTBRandomOblique.h
@@ -0,0 +1,130 @@
 
															+/**
														
 
															+ * @file DTBRandomOblique.h
														
 
															+ * @brief random oblique decision tree
														
 
															+ * @author Sven Sickert
														
 
															+ * @date 10/15/2014
														
 
															+
														
 
															+*/
														
 
															+#ifndef DTBRANDOMOBLIQUEINCLUDE
														
 
															+#define DTBRANDOMOBLIQUEINCLUDE
														
 
															+
														
 
															+#include "core/vector/VectorT.h"
														
 
															+#include "core/vector/MatrixT.h"
														
 
															+
														
 
															+#include "core/basics/Config.h"
														
 
															+#include "DecisionTreeBuilder.h"
														
 
															+#include "vislearning/cbaselib/CachedExample.h"
														
 
															+
														
 
															+
														
 
															+namespace OBJREC {
														
 
															+
														
 
															+/** random oblique decision tree */
														
 
															+class DTBRandomOblique : public DecisionTreeBuilder
														
 
															+{
														
 
															+  protected:
														
 
															+
														
 
															+    /////////////////////////
														
 
															+    /////////////////////////
														
 
															+    // PROTECTED VARIABLES //
														
 
															+    /////////////////////////
														
 
															+    /////////////////////////
														
 
															+
														
 
															+    /** Amount of randomly chosen thresholds */
														
 
															+    int random_split_tests;
														
 
															+
														
 
															+    /** Amount of randomly chosen features */
														
 
															+    int random_features;
														
 
															+
														
 
															+    /** Maximum allowed depth of a tree */
														
 
															+    int max_depth;
														
 
															+
														
 
															+    /* Minimum amount of features in a leaf node */
														
 
															+    int min_examples;
														
 
															+
														
 
															+    /** Minimum entropy to continue with splitting */
														
 
															+    int minimum_entropy;
														
 
															+
														
 
															+    /** Minimum information gain to continue with splitting */
														
 
															+    int minimum_information_gain;
														
 
															+
														
 
															+    /** Whether to use shannon entropy or not */
														
 
															+    bool use_shannon_entropy;
														
 
															+
														
 
															+    /** Whether to save indices in leaves or not */
														
 
															+    bool save_indices;
														
 
															+
														
 
															+    /////////////////////////
														
 
															+    /////////////////////////
														
 
															+    //  PROTECTED METHODS  //
														
 
															+    /////////////////////////
														
 
															+    /////////////////////////
														
 
															+
														
 
															+    /**
														
 
															+     * @brief recursive building method
														
 
															+     * @param fp feature pool
														
 
															+     * @param examples all examples of the training
														
 
															+     * @param examples_selection indeces of selected example subset
														
 
															+     * @param distribution class distribution in current node
														
 
															+     * @param entropy current entropy
														
 
															+     * @param maxClassNo maximum class number
														
 
															+     * @param depth current depth
														
 
															+     * @return Pointer to root/parent node
														
 
															+     */
														
 
															+    DecisionNode *buildRecursive ( const FeaturePool & fp,
														
 
															+           const Examples & examples,
														
 
															+           std::vector<int> & examples_selection,
														
 
															+           FullVector & distribution,
														
 
															+           double entropy,
														
 
															+           int maxClassNo,
														
 
															+           int depth );
														
 
															+
														
 
															+    /**
														
 
															+     * @brief compute entropy for left and right child
														
 
															+     * @param values feature values
														
 
															+     * @param threshold threshold for split
														
 
															+     * @param stat_left statistics for left child
														
 
															+     * @param stat_right statistics for right child
														
 
															+     * @param entropy_left entropy for left child
														
 
															+     * @param entropy_right entropy for right child
														
 
															+     * @param count_left amount of features in left child
														
 
															+     * @param count_right amount of features in right child
														
 
															+     * @param maxClassNo maximum class number
														
 
															+     * @return whether another split is possible or not
														
 
															+     */
														
 
															+    bool entropyLeftRight ( const FeatureValuesUnsorted & values,
														
 
															+           double threshold,
														
 
															+           double* stat_left,
														
 
															+           double* stat_right,
														
 
															+           double & entropy_left,
														
 
															+           double & entropy_right,
														
 
															+           double & count_left,
														
 
															+           double & count_right,
														
 
															+           int maxClassNo );
														
 
															+
														
 
															+  public:
														
 
															+
														
 
															+    /** simple constructor */
														
 
															+    DTBRandomOblique ( const NICE::Config *conf,
														
 
															+                       std::string section = "DTBRandomOblique" );
														
 
															+
														
 
															+    /** simple destructor */
														
 
															+    virtual ~DTBRandomOblique();
														
 
															+
														
 
															+    /**
														
 
															+     * @brief initial building method
														
 
															+     * @param fp feature pool
														
 
															+     * @param examples all examples of the training
														
 
															+     * @param maxClassNo maximum class number
														
 
															+     * @return Pointer to root/parent node
														
 
															+     */
														
 
															+    DecisionNode *build ( const FeaturePool &fp,
														
 
															+                          const Examples &examples,
														
 
															+                          int maxClassNo );
														
 
															+
														
 
															+};
														
 
															+
														
 
															+
														
 
															+
														
 
															+} //namespace
														
 
															+
														
 
															+#endif
														
--- a/classifier/fpclassifier/randomforest/FPCRandomForests.cpp
+++ b/classifier/fpclassifier/randomforest/FPCRandomForests.cpp
@@ -19,6 +19,7 @@
 
															 #include "vislearning/classifier/fpclassifier/randomforest/DTBStandard.h"
														
 
															 #include "vislearning/classifier/fpclassifier/randomforest/DTBRandom.h"
														
 
															 #include "vislearning/classifier/fpclassifier/randomforest/DTBClusterRandom.h"
														
 
															+#include "vislearning/classifier/fpclassifier/randomforest/DTBRandomOblique.h"
														
 
															 #include "vislearning/cbaselib/FeaturePool.h"
														
 
															 using namespace OBJREC;
														
@@ -64,6 +65,8 @@ FPCRandomForests::FPCRandomForests(const Config *_conf, std::string section) : c
 
															 			builder = new DTBRandom ( conf, builder_section );
														
 
															 		else if (builder_method == "cluster_random" )
														
 
															 			builder = new DTBClusterRandom ( conf, builder_section );
														
 
															+        else if (builder_method == "random_oblique" )
														
 
															+            builder = new DTBRandomOblique ( conf, builder_section );
														
 
															 		else {
														
 
															 			fprintf (stderr, "DecisionTreeBuilder %s not yet implemented !\n", builder_method.c_str() );
														
 
															 			exit(-1);
														
@@ -323,7 +326,6 @@ void FPCRandomForests::train(FeaturePool & fp, Examples & examples)
 
															 				examples_subset.push_back(examples[examples_index[e]]);
														
 
															 				exselection[k].push_back(examples_index[e]);
														
 
															 			}
														
 
															-			
														
 
															 			// set out of bag trees
														
 
															 			for (uint e = trainingExamples; e < examples_index.size() ; e++)