123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230 |
- /**
- * @file MutualInformation.cpp
- * @brief Part Selection with Mutual Information
- * @author Erik Rodner
- * @date 02/20/2008
- */
- #include <iostream>
- #include "MutualInformation.h"
- #include "vislearning/baselib/Gnuplot.h"
- using namespace OBJREC;
- using namespace std;
- using namespace NICE;
- MutualInformation::MutualInformation( bool verbose )
- {
- this->verbose = verbose;
- }
- MutualInformation::~MutualInformation()
- {
- }
- void MutualInformation::addStatistics ( const vector<Vector *> & v, size_t d, double threshold, size_t & ones ) const
- {
- for ( vector<Vector *>::const_iterator j = v.begin() ; j != v.end(); j++ )
- if ( (*(*j))[d] > threshold ) ones++;
- }
- double MutualInformation::entropy ( size_t n1, size_t n2 ) const
- {
- // - p_1 log p_1 - p_2 log p_2
- // aber log im bereich [0,1] numerisch instabil
- // daher
- // p_1 log 1/p_1 + p_2 log 1/p_2
- double sum = n1 + n2;
- double log1 = n1 > 0 ? log( (double)n1) : 0;
- double log2 = n2 > 0 ? log( (double)n2) : 0;
- double logsum = (sum > 0) ? log(sum) : 0;
- return - (n1/sum*log1 + n2/sum*log2) + logsum;
- }
- double MutualInformation::mutualInformationOverall ( const LabeledSetVector & ls,
- size_t dimension,
- double threshold ) const
- {
- double entropy_conditional = 0.0;
- for ( LabeledSetVector::const_iterator i = ls.begin();
- i != ls.end();
- i++ )
- {
- size_t ones = 0;
- addStatistics ( i->second, dimension, threshold, ones );
-
- double entropy_conditional_class = entropy ( ones, i->second.size() - ones );
- entropy_conditional += entropy_conditional_class;
- }
- entropy_conditional /= ls.size();
- double entropy_joint = 0.0;
- size_t ones = 0;
- size_t count = 0;
- for ( LabeledSetVector::const_iterator i = ls.begin();
- i != ls.end();
- i++ )
- {
- addStatistics ( i->second, dimension, threshold, ones );
- count += i->second.size();
- }
- entropy_joint = entropy ( ones, count - ones );
-
- return entropy_joint - entropy_conditional;
- }
- double MutualInformation::mutualInformationClass ( const LabeledSetVector & ls,
- size_t classno,
- size_t dimension,
- double threshold ) const
- {
- size_t ones_p = 0;
- LabeledSetVector::const_iterator iclassno = ls.find(classno);
- if ( iclassno == ls.end() )
- {
- fprintf (stderr, "MutualInformation::mutualInformationClass: classno %u not found\n", classno );
- exit(-1);
- }
- size_t count_p = iclassno->second.size();
- addStatistics ( iclassno->second, dimension, threshold, ones_p );
-
- double entropy_conditional_p = entropy ( ones_p, count_p - ones_p );
- size_t ones_n = 0;
- size_t count_n = 0;
- for ( LabeledSetVector::const_iterator i = ls.begin();
- i != ls.end();
- i++ )
- {
- if ( i->first != (int)classno );
- addStatistics ( i->second, dimension, threshold, ones_n );
- count_n += i->second.size();
- fprintf(stderr,"This exception means: review code and check the statement 'if ( i->first != (int)classno );' in line 108 / file MutualInformation.cpp. It contains an empty control statement and might be an error. If nonetheless desired behavoir, delete this exception throw.");
- }
- double entropy_conditional_n = entropy ( ones_n, count_n - ones_n );
- double entropy_conditional = 0.5 * ( entropy_conditional_n + entropy_conditional_p );
- double entropy_joint = entropy ( ones_p + ones_n, count_p + count_n - ones_p - ones_n );
-
- return entropy_joint - entropy_conditional;
- }
- double MutualInformation::computeThresholdClass ( const LabeledSetVector & ls, size_t classno,
- size_t dimension, double & opt_threshold ) const
- {
- vector<double> thresholds;
- LOOP_ALL(ls)
- {
- EACH(classno, v);
- double val = v[dimension];
- thresholds.push_back ( val );
- }
- sort ( thresholds.begin(), thresholds.end() );
- thresholds.erase( std::unique( thresholds.begin(), thresholds.end()), thresholds.end());
-
- opt_threshold = 0.0;
- double opt_mi = 0.0;
- for ( vector<double>::const_iterator i = thresholds.begin();
- i != thresholds.end();
- i++ )
- {
- vector<double>::const_iterator j = i + 1;
- if ( j == thresholds.end() ) break;
- double threshold = 0.5 * ((*i) + (*j));
- double mi = mutualInformationClass ( ls, classno, dimension, threshold );
- if ( mi > opt_mi ) {
- opt_mi = mi;
- opt_threshold = threshold;
- }
- }
- return opt_mi;
- }
- double MutualInformation::computeThresholdOverall ( const LabeledSetVector & ls, size_t dimension, double & opt_threshold ) const
- {
- vector<double> thresholds;
- vector<int> y;
- LOOP_ALL(ls)
- {
- EACH(classno, v);
- double val = v[dimension];
- thresholds.push_back ( val );
- y.push_back(classno);
- }
- sort ( thresholds.begin(), thresholds.end() );
- thresholds.erase( std::unique( thresholds.begin(), thresholds.end()), thresholds.end());
-
- opt_threshold = 0.0;
- double opt_mi = 0.0;
- uint ind = 0;
- for ( vector<double>::const_iterator i = thresholds.begin();
- i != thresholds.end(); i++, ind++ )
- {
- vector<double>::const_iterator j = i + 1;
- if ( j == thresholds.end() ) break;
- // the optimimum can not be found at non-class borders
- if ( y[ind] == y[ind+1] ) continue;
- double threshold = 0.5 * ((*i) + (*j));
- // FIXME: This call is pretty inefficient!!
- // We can directly count the features here...might be 100times faster :)
- double mi = mutualInformationOverall ( ls, dimension, threshold );
- if ( mi > opt_mi ) {
- opt_mi = mi;
- opt_threshold = threshold;
- }
- }
-
- return opt_mi;
- }
- void MutualInformation::computeThresholdsClass ( const LabeledSetVector & ls, size_t classno,
- NICE::Vector & thresholds, NICE::Vector & mis ) const
- {
- size_t max_dimension = ls.dimension();
- thresholds.resize(max_dimension);
- mis.resize(max_dimension);
- for ( size_t k = 0 ; k < max_dimension ; k++ )
- {
- double t, mi;
- mi = computeThresholdClass ( ls, classno, k, t );
- mis[k] = mi;
- thresholds[k] = t;
- }
- }
- void MutualInformation::computeThresholdsOverall ( const LabeledSetVector & ls, NICE::Vector & thresholds, NICE::Vector & mis ) const
- {
- size_t max_dimension = ls.dimension();
- thresholds.resize(max_dimension);
- mis.resize(max_dimension);
- for ( size_t k = 0 ; k < max_dimension ; k++ )
- {
- if ( verbose )
- cerr << "MutualInformation: Optimizing threshold for feature " << k << " / " << max_dimension << endl;
- double t, mi;
- mi = computeThresholdOverall ( ls, k, t );
- mis[k] = mi;
- thresholds[k] = t;
- }
- }
|