Explorar o código

Merge branch 'master' of /home/dbv/git/nice/vislearning

Bjoern Froehlich %!s(int64=13) %!d(string=hai) anos
pai
achega
66ebd11308
Modificáronse 3 ficheiros con 331 adicións e 3 borrados
  1. 9 3
      baselib/Globals.cpp
  2. 229 0
      cbaselib/MutualInformation.cpp
  3. 93 0
      cbaselib/MutualInformation.h

+ 9 - 3
baselib/Globals.cpp

@@ -3,6 +3,7 @@
 #include "core/basics/StringTools.h"
 #include "vislearning/baselib/Globals.h"
 #include "core/basics/ossettings.h"
+#include "core/basics/Exception.h"
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -28,9 +29,14 @@ std::string Globals::getCurrentImgFN ( )
 
 std::string Globals::getCacheFilename ( const std::string & root, int cache_mode )
 {
-    std::string filename = Globals::getCurrentImgFN();
-    vector<string> mylistdir;
-    StringTools::split ( filename, FILESEP, mylistdir );
+  std::string filename = Globals::getCurrentImgFN();
+
+  if ( filename.length() == 0 ) {
+    fthrow(Exception, "Globals::getCacheFilename: current image filename was not set, please use Globals::setCurrentImgFN()");
+  }
+
+  vector<string> mylistdir;
+  StringTools::split ( filename, FILESEP, mylistdir );
 
 	int dirpart = 2;
 	if ( cache_mode == SORT_CATEGORIES_SECONDPART )

+ 229 - 0
cbaselib/MutualInformation.cpp

@@ -0,0 +1,229 @@
+/** 
+* @file MutualInformation.cpp
+* @brief Part Selection with Mutual Information
+* @author Erik Rodner
+* @date 02/20/2008
+
+*/
+#include <iostream>
+
+#include "MutualInformation.h"
+#include "vislearning/baselib/Gnuplot.h"
+
+using namespace OBJREC;
+
+using namespace std;
+using namespace NICE;
+
+
+
+MutualInformation::MutualInformation( bool verbose )
+{
+  this->verbose = verbose;
+}
+
+MutualInformation::~MutualInformation()
+{
+}
+
+void MutualInformation::addStatistics ( const vector<Vector *> & v, size_t d, double threshold, size_t & ones ) const
+{
+  for ( vector<Vector *>::const_iterator j = v.begin() ; j != v.end(); j++ )
+	  if ( (*(*j))[d] > threshold ) ones++;
+}
+
+double MutualInformation::entropy ( size_t n1, size_t n2 ) const
+{
+  // - p_1 log p_1 - p_2 log p_2
+  // aber log im bereich [0,1] numerisch instabil
+  // daher
+  // p_1 log 1/p_1 + p_2 log 1/p_2
+  double sum = n1 + n2;
+  double log1 = n1 > 0 ? log(n1) : 0;
+  double log2 = n2 > 0 ? log(n2) : 0;
+  double logsum = (sum > 0) ? log(sum) : 0;
+
+  return - (n1/sum*log1 + n2/sum*log2) + logsum;
+}
+
+
+
+double MutualInformation::mutualInformationOverall ( const LabeledSetVector & ls, 
+					      size_t dimension,
+					      double threshold ) const
+{
+  double entropy_conditional = 0.0;
+  for ( LabeledSetVector::const_iterator i = ls.begin();
+           i != ls.end();
+           i++ )
+  {
+    size_t ones = 0;
+    addStatistics ( i->second, dimension, threshold, ones );
+    
+    double entropy_conditional_class = entropy ( ones, i->second.size() - ones );
+    entropy_conditional += entropy_conditional_class;
+  }
+  entropy_conditional /= ls.size();
+
+  double entropy_joint = 0.0;
+  size_t ones = 0;
+  size_t count = 0;
+
+  for ( LabeledSetVector::const_iterator i = ls.begin();
+           i != ls.end();
+           i++ )
+  {
+    addStatistics ( i->second, dimension, threshold, ones );
+    count += i->second.size();
+  }
+  entropy_joint = entropy ( ones, count - ones );
+  
+  return entropy_joint - entropy_conditional;
+}
+
+double MutualInformation::mutualInformationClass ( const LabeledSetVector & ls, 
+						   size_t classno,
+						  size_t dimension,
+						  double threshold ) const
+{
+  size_t ones_p  = 0;
+  LabeledSetVector::const_iterator iclassno = ls.find(classno);
+  if ( iclassno == ls.end() )
+  {
+    fprintf (stderr, "MutualInformation::mutualInformationClass: classno %u not found\n", classno );
+    exit(-1);
+  }
+  size_t count_p = iclassno->second.size();
+
+  addStatistics ( iclassno->second, dimension, threshold, ones_p );
+  
+  double entropy_conditional_p = entropy ( ones_p, count_p - ones_p );
+
+  size_t ones_n  = 0;
+  size_t count_n = 0;
+  for ( LabeledSetVector::const_iterator i = ls.begin();
+           i != ls.end();
+           i++ )
+  {
+    if ( i->first != (int)classno );
+    addStatistics ( i->second, dimension, threshold, ones_n );
+    count_n += i->second.size();
+  }
+  double entropy_conditional_n = entropy ( ones_n, count_n - ones_n );
+  double entropy_conditional = 0.5 * ( entropy_conditional_n + entropy_conditional_p );
+  double entropy_joint = entropy ( ones_p + ones_n, count_p + count_n - ones_p - ones_n );
+    
+  return entropy_joint - entropy_conditional;
+}
+
+
+
+double MutualInformation::computeThresholdClass ( const LabeledSetVector & ls, size_t classno,
+						  size_t dimension, double & opt_threshold ) const
+{
+  vector<double> thresholds;
+  LOOP_ALL(ls)
+  {
+    EACH(classno, v);
+    double val = v[dimension];
+    thresholds.push_back ( val );
+  }
+  sort ( thresholds.begin(), thresholds.end() );
+  thresholds.erase( std::unique( thresholds.begin(), thresholds.end()), thresholds.end());
+    
+  opt_threshold = 0.0;
+  double opt_mi = 0.0;
+
+  for ( vector<double>::const_iterator i  = thresholds.begin();
+               i != thresholds.end(); 
+         i++ )
+  {
+    vector<double>::const_iterator j = i + 1;
+    if ( j == thresholds.end() ) break;
+
+    double threshold = 0.5 * ((*i) + (*j));
+    double mi = mutualInformationClass ( ls, classno, dimension, threshold );
+    if ( mi > opt_mi ) {
+        opt_mi = mi;
+        opt_threshold = threshold;
+    }
+  }
+
+  return opt_mi;
+}
+
+
+double MutualInformation::computeThresholdOverall ( const LabeledSetVector & ls, size_t dimension, double & opt_threshold ) const
+{
+  vector<double> thresholds;
+  vector<int> y;
+  LOOP_ALL(ls)
+  {
+    EACH(classno, v);
+    double val = v[dimension];
+    thresholds.push_back ( val );
+    y.push_back(classno);
+  }
+  sort ( thresholds.begin(), thresholds.end() );
+  thresholds.erase( std::unique( thresholds.begin(), thresholds.end()), thresholds.end());
+  
+  opt_threshold = 0.0;
+  double opt_mi = 0.0;
+
+  uint ind = 0;
+  for ( vector<double>::const_iterator i  = thresholds.begin();
+               i != thresholds.end(); i++, ind++ )
+  {
+    vector<double>::const_iterator j = i + 1;
+    if ( j == thresholds.end() ) break;
+
+    // the optimimum can not be found at non-class borders
+    if ( y[ind] == y[ind+1] ) continue;
+
+    double threshold = 0.5 * ((*i) + (*j));
+
+    // FIXME: This call is pretty inefficient!!
+    // We can directly count the features here...might be 100times faster :)
+    double mi = mutualInformationOverall ( ls, dimension, threshold );
+    if ( mi > opt_mi ) {
+        opt_mi = mi;
+        opt_threshold = threshold;
+    }
+  }
+   
+  return opt_mi;
+}
+
+void MutualInformation::computeThresholdsClass ( const LabeledSetVector & ls, size_t classno, 
+						 NICE::Vector & thresholds, NICE::Vector & mis ) const
+{
+  size_t max_dimension = ls.dimension();
+
+  thresholds.resize(max_dimension);
+  mis.resize(max_dimension);
+
+  for ( size_t k = 0 ; k < max_dimension ; k++ )
+  {
+    double t, mi;
+    mi = computeThresholdClass ( ls, classno, k, t );
+    mis[k] = mi;
+    thresholds[k] = t;
+  }
+}
+
+void MutualInformation::computeThresholdsOverall ( const LabeledSetVector & ls, NICE::Vector & thresholds, NICE::Vector & mis ) const
+{
+  size_t max_dimension = ls.dimension();
+
+  thresholds.resize(max_dimension);
+  mis.resize(max_dimension);
+  for ( size_t k = 0 ; k < max_dimension ; k++ )
+  {
+    if ( verbose )
+      cerr << "MutualInformation: Optimizing threshold for feature " << k << " / " << max_dimension << endl;
+    double t, mi;
+    mi = computeThresholdOverall ( ls, k, t );
+    mis[k] = mi;
+    thresholds[k] = t;
+  }
+}

+ 93 - 0
cbaselib/MutualInformation.h

@@ -0,0 +1,93 @@
+/** 
+* @file MutualInformation.h
+* @brief Part selection and thresholding with Mutual Information
+* @author Erik Rodner
+* @date 02/20/2008
+
+*/
+#ifndef MUTUALINFORMATIONINCLUDE
+#define MUTUALINFORMATIONINCLUDE
+
+#include "core/vector/VVector.h"
+#include "vislearning/cbaselib/LabeledSet.h"
+
+
+namespace OBJREC {
+
+/** Part selection and thresholding with mutual information */
+class MutualInformation
+{
+  protected:
+
+    //! verbose handling
+    bool verbose;
+
+    /**
+    * @brief helper function: loop through all vectors and count how often f[d]>threshold
+    *
+    * @param v multi-dimensional features
+    * @param d feature index used
+    * @param threshold used to generate the binary feature
+    * @param ones number of times f[d] is above threshold
+    */
+	  void addStatistics ( const std::vector<NICE::Vector *> & v, size_t d, double threshold, size_t & ones ) const;
+
+    
+    /**
+    * @brief Compute the entropy of 2-bin discrete distribution
+    *
+    * @param n1 number of elements for bin 1
+    * @param n2 number of elements for bin 2
+    *
+    * @return entropy value
+    */
+	  double entropy ( size_t n1, size_t n2 ) const;
+
+  public:
+  
+    /** simple constructor */
+    MutualInformation( bool verbose = false );
+        
+    /** simple destructor */
+    virtual ~MutualInformation();
+ 
+    /**
+    * @brief Compute the mutual information between a one-dimensional thresholded feature and the class information
+    *
+    * @param ls labeled set of multi-dimensional feature vectors
+    * @param dimension feature index to use
+    * @param threshold threshold used to binarize the feature
+    *
+    * @return mutual information value
+    */
+    double mutualInformationOverall ( const LabeledSetVector & ls, size_t dimension, double threshold ) const;
+    double mutualInformationClass ( const LabeledSetVector & ls, size_t classno, size_t dimension, double threshold ) const;
+	
+    /**
+    * @brief Compute an optimal threshold for a one-dimensional feature that best retains class information, i.e.
+    * maximizes the mutual information between the feature B and the class label C
+    * @param ls labeled set of multi-dimensional feature vectors
+    * @param dimension feature index to use
+    * @param opt_threshold resulting optimal threshold
+    *
+    * @return resulting mutual information of the optimal threshold
+    */
+    double computeThresholdOverall ( const LabeledSetVector & ls, size_t dimension, double & opt_threshold ) const;
+    double computeThresholdClass ( const LabeledSetVector & ls, size_t classno, size_t dimension, double & opt_threshold ) const;
+
+    /**
+    * @brief Compute optimal thresholds for each dimension of a multi-dimensional feature trying to retain class information
+    *
+    * @param ls labeled set of multi-dimensional feature vectors
+    * @param thresholds vector of thresholds for each dimension
+    * @param mis vector of resulting mutual information values
+    */
+    void computeThresholdsOverall ( const LabeledSetVector & ls, NICE::Vector & thresholds, NICE::Vector & mis ) const;
+    void computeThresholdsClass ( const LabeledSetVector & ls, size_t classno, NICE::Vector & thresholds, NICE::Vector & mis ) const;
+    
+};
+
+
+} // namespace
+
+#endif