123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463 |
- /**
- * @file FPCBoosting.cpp
- * @brief implementation of boosting algorithms
- * @author Erik Rodner
- * @date 04/24/2008
- */
- #include <iostream>
- #include "vislearning/classifier/fpclassifier/randomforest/FPCRandomForests.h"
- #include "vislearning/baselib/Gnuplot.h"
- #include "FPCBoosting.h"
- #include "FPCFullSearch.h"
- using namespace OBJREC;
- using namespace std;
- // refactor-nice.pl: check this substitution
- // old: using namespace ice;
- using namespace NICE;
- #undef DEBUG_BOOST
- #define ESTIMATETRAINERROR
- #undef DEBUG_ERROR_ESTIMATION
- static inline double
- pRatio( double val )
- {
- const double eps = 1e-5;
- if ( val < eps ) val = eps;
- if ( val > 1 - eps ) val = 1 - eps;
- return val/(1. - val);
- }
- FPCBoosting::FPCBoosting( const Config *_conf,
- // refactor-nice.pl: check this substitution
- // old: string section ) : conf(_conf)
- std::string section ) : conf(_conf)
- {
- // refactor-nice.pl: check this substitution
- // old: string weakClassifier_s = conf->gS(section, "weak_classifier", "full_search" );
- std::string weakClassifier_s = conf->gS(section, "weak_classifier", "full_search" );
- if ( weakClassifier_s == "random_forest" )
- {
- weakClassifier = new FPCRandomForests ( _conf, "RandomForest" );
- memory_efficient = _conf->gB("RandomForest", "memory_efficient", false );
- } else if ( weakClassifier_s == "full_search" ) {
- fprintf (stderr, "Boost: using full search methods\n");
- weakClassifier = new FPCFullSearch ( _conf );
- memory_efficient = false;
- } else {
- fprintf (stderr, "FPCBoosting: weak classifier type unknown !\n");
- exit(-1);
- }
- // refactor-nice.pl: check this substitution
- // old: string boosting_method_s = conf->gS(section, "method", "realboost" );
- std::string boosting_method_s = conf->gS(section, "method", "realboost" );
- if ( boosting_method_s == "realboost" )
- {
- boosting_method = BOOSTINGMETHOD_REAL_ADABOOST;
- } else if ( boosting_method_s == "adaboost" ) {
- boosting_method = BOOSTINGMETHOD_ADABOOST;
- } else if ( boosting_method_s == "gentleboost" ) {
- boosting_method = BOOSTINGMETHOD_GENTLEBOOST;
- } else {
- fprintf (stderr, "FPCBoosting: boosting method unknown !\n");
- exit(-1);
- }
- classwise_normalization = _conf->gB(section, "classwise_normalization", false );
- positive_class = _conf->gI(section, "positive_class", 1 );
- maxRounds = _conf->gI(section, "max_rounds", 100 );
- }
- FPCBoosting::~FPCBoosting()
- {
- if ( weakClassifier != NULL )
- delete weakClassifier;
- for ( StrongClassifier::iterator i = strongClassifier.begin();
- i != strongClassifier.end();
- i++ )
- delete ( i->third );
- }
- ClassificationResult FPCBoosting::classify ( Example & pce )
- {
- FullVector overall_distribution;
- for ( StrongClassifier::const_iterator i = strongClassifier.begin();
- i != strongClassifier.end();
- i++ )
- {
- double alpha = i->first;
- double beta = i->second;
- FeaturePoolClassifier *classifier = i->third;
- ClassificationResult r = classifier->classify ( pce );
- if ( boosting_method == BOOSTINGMETHOD_REAL_ADABOOST )
- {
- // transform probabilities into nasty scores
- double p = r.scores[positive_class];
- r.scores[positive_class] = 0.5 * log ( pRatio ( p ) );
- r.scores[0] = - r.scores[positive_class];
- }
- if ( overall_distribution.empty() )
- {
- overall_distribution = r.scores;
- overall_distribution.multiply(alpha);
- } else {
- overall_distribution.add ( r.scores, alpha );
- }
- overall_distribution.add ( beta );
- }
- int overall_classno = 0;
- overall_classno = overall_distribution.maxElement();
- return ClassificationResult ( overall_classno, overall_distribution );
- }
-
- void FPCBoosting::train ( FeaturePool & fp,
- Examples & examples )
- {
- maxClassNo = examples.getMaxClassNo();
- FeatureStorage featureStorage;
- #ifdef DEBUG_BOOST
- fprintf (stderr, "FPCBoosting : training examples %d\n", (int)examples.size() );
- #endif
- boosting ( featureStorage, fp, examples );
- }
- void FPCBoosting::normalizeWeights ( Examples & examples ) const
- {
- if ( classwise_normalization )
- {
- double sump = 0.0;
- double sumn = 0.0;
- int np = 0;
- int nn = 0;
- for ( Examples::const_iterator i = examples.begin();
- i != examples.end();
- i++ )
- if ( i->first == positive_class )
- {
- sump += i->second.weight;
- np++;
- } else {
- sumn += i->second.weight;
- nn++;
- }
- if ( fabs(sump) < 1e-10 ) sump = np;
- if ( fabs(sumn) < 1e-10 ) sump = nn;
-
- for ( Examples::iterator i = examples.begin();
- i != examples.end();
- i++ )
- if ( i->first == positive_class )
- i->second.weight /= 2*sump;
- else
- i->second.weight /= 2*sumn;
- } else {
- double sum = 0.0;
- for ( Examples::const_iterator i = examples.begin();
- i != examples.end();
- i++ )
- sum += i->second.weight;
- if ( fabs(sum) < 1e-10 ) sum = examples.size();
- for ( Examples::iterator i = examples.begin();
- i != examples.end();
- i++ )
- i->second.weight /= sum;
- }
- }
- void FPCBoosting::boosting ( const FeatureStorage & featureStorage,
- FeaturePool & fp,
- Examples & examples )
- {
- normalizeWeights ( examples );
- #ifdef ESTIMATETRAINERROR
- vector<double> error_boosting;
- vector<double> weak_error_boosting;
- #endif
- for ( uint iteration = 0 ; iteration < (uint)maxRounds ; iteration++ )
- {
- // fit a classifier using the current weights
- FeaturePoolClassifier *best = weakClassifier->clone();
- best->train ( fp, examples );
- // ---------------------------------------------
- // Estimate the error of the current hypothesis
- // ---------------------------------------------
- double error = 0.0;
- long int index = 0;
- vector<double> h_values (examples.size(), 0.0);
- vector<int> h_results (examples.size(), 0);
-
- for ( Examples::iterator i = examples.begin();
- i != examples.end();
- i++, index++ )
- {
- double weight = i->second.weight;
- ClassificationResult r = best->classify ( i->second );
- double p_value = r.scores[positive_class];
- int h_result = r.classno == positive_class ? 1 : -1;
- double h_value;
-
- // assume p_value is between [0:1]
- if ( boosting_method == BOOSTINGMETHOD_REAL_ADABOOST )
- {
- if ( (p_value < 0.0) || (p_value > 1.0) )
- {
- fprintf (stderr, "FPCBoosting: do not use real adaboost with hypothesis values outside of [0,1]\n");
- exit(-1);
- }
- h_value = 0.5 * log (pRatio ( p_value ));
- } else {
- h_value = p_value;
- }
- assert ( index < (int)h_values.size() );
- assert ( index < (int)h_results.size() );
- h_values[index] = h_value;
- h_results[index] = h_result;
- if ( r.classno != i->first )
- error += weight;
-
- #ifdef DEBUG_BOOST
- fprintf (stderr, "FPCBoosting:w(%ld) = %f gt %d est %d\n", index, weight, i->first, h_result );
- #endif
- if ( memory_efficient ) i->second.ce->dropPreCached();
- }
- #ifdef DEBUG_ERROR_ESTIMATION
- fprintf (stderr, "Boost: iteration %zd error %lf\n", iteration, error );
- FPCFullSearch *search = dynamic_cast< FPCFullSearch * > ( best );
-
- /*
- if ( fabs(error - search->last_error) > 1e-5 ) {
- fprintf (stderr, "Boost: FIX THIS BUG postest=%lf preest=%lf %e\n", error, search->last_error, fabs(error - search->last_error) );
- exit(-1);
- }
- */
-
- if ( error > 0.5 )
- {
- fprintf (stderr, "Boost: weak hypothesis with an error > 0.5 ? postest=%lf preeest=%lf\n", error, search->last_error);
- exit(-1);
- }
- #endif
- double likelihood_ratio = 1.0;
- if (boosting_method == BOOSTINGMETHOD_REAL_ADABOOST)
- {
- strongClassifier.push_back ( triplet<double, double, FeaturePoolClassifier *> ( 1.0, 0.0, best ) );
- } else if ( (boosting_method == BOOSTINGMETHOD_GENTLEBOOST) ) {
- strongClassifier.push_back ( triplet<double, double, FeaturePoolClassifier *> ( 1.0, 0.0, best ) );
- } else {
- // likelihood_ratio corresponds to \beta_t in the
- // Viola&Jones Paper
- likelihood_ratio = pRatio ( error );
- double alpha = - log ( likelihood_ratio );
- double beta = alpha/2;
- #ifdef DEBUG_BOOST
- fprintf (stderr, "estimated parameters: lratio=%f alpha=%f beta=%f\n", likelihood_ratio, alpha, beta);
- #endif
- strongClassifier.push_back ( triplet<double, double, FeaturePoolClassifier *> ( alpha, beta, best ) );
- }
- #ifdef ESTIMATETRAINERROR
- // --------- estimate training error
- double error_sum = 0.0;
- index = 0;
- for ( Examples::iterator i = examples.begin();
- i != examples.end();
- i++, index++ )
- {
- ClassificationResult r = classify ( i->second );
- if ( r.classno != i->first )
- error_sum += 1.0 / examples.size();
- }
- fprintf (stderr, "Boost: training error %f\n", error_sum );
-
- error_boosting.push_back ( error_sum );
- weak_error_boosting.push_back ( error );
- #endif
- // ---------- readjust weights
- index = 0;
- for ( Examples::iterator i = examples.begin();
- i != examples.end();
- i++, index++ )
- {
- double weight = i->second.weight;
- assert ( index < (int)h_values.size() );
- assert ( index < (int)h_results.size() );
- double h_value = h_values[index];
- int h_result = h_results[index];
- int y = (i->first == positive_class) ? 1 : -1;
-
- if (boosting_method == BOOSTINGMETHOD_REAL_ADABOOST)
- {
- // weight update of Real-AdaBoost
- // as presented by Friedman et al.
- weight *= exp( - y*h_value );
- } else if (boosting_method == BOOSTINGMETHOD_GENTLEBOOST) {
- // h_value is still between [0,1]
- weight *= exp( - y*h_value );
- } else {
- // standard AdaBoost weight update
- // according to Viola & Jones
- if ( y == h_result )
- weight *= likelihood_ratio;
- }
- #ifdef DEBUG_BOOST
- fprintf (stderr, "Boost: iteration %d y %d p %f w %f\n", iteration, y, h_value, weight );
- #endif
- i->second.weight = weight;
- }
- normalizeWeights ( examples );
- }
- #ifdef ESTIMATETRAINERROR
- Gnuplot gp ("lines");
- vector<double> upper_bound;
- // Compute the upper bound on the training error
- // the formula and an explanation can be found in the
- // paper of Freund & Shapire
- // 2^T can be incorporated into the product
- // and a quick glance at the formula shows that the
- // upper bound is at least below 1 (otherwise this
- // bound we be cound of useless
- double prod = 1.0;
- for ( vector<double>::const_iterator i = weak_error_boosting.begin();
- i != weak_error_boosting.end(); i++ )
- {
- double epsilon_t = *i;
- prod *= 2 * sqrt( epsilon_t * (1 - epsilon_t) );
- // according to the improvment of the upper bound in the paper
- upper_bound.push_back ( 0.5*prod );
- }
- gp.plot_x ( error_boosting, "Boosting Training Error" );
- gp.plot_x ( weak_error_boosting, "Error of the weak learner" );
- gp.plot_x ( upper_bound, "Upper bound of the training error (AdaBoost using a Soft-Decision)" );
- #ifndef NOVISUAL
- // refactor-nice.pl: check this substitution
- // old: GetChar();
- getchar();
- #else
- getchar();
- #endif
- #endif
- }
- void FPCBoosting::restore (istream & is, int format)
- {
- // refactor-nice.pl: check this substitution
- // old: string tag;
- std::string tag;
- weakClassifier->maxClassNo = maxClassNo;
- while ( (is >> tag) && (tag == "WEAKCLASSIFIER") )
- {
- FeaturePoolClassifier *classifier = weakClassifier->clone();
- double alpha;
- double beta;
- is >> alpha;
- is >> beta;
- classifier->restore ( is );
- strongClassifier.push_back ( triplet<double, double, FeaturePoolClassifier *>
- ( alpha, beta, classifier ) );
- }
- cerr << "TAG: " << tag << endl;
- assert ( strongClassifier.size() > 0 );
- }
- void FPCBoosting::store (ostream & os, int format) const
- {
- for ( StrongClassifier::const_iterator i = strongClassifier.begin();
- i != strongClassifier.end();
- i++ )
- {
- const FeaturePoolClassifier *classifier = i->third;
- double alpha = i->first;
- double beta = i->second;
-
- os << "WEAKCLASSIFIER" << endl;
- os << alpha << endl;
- os << beta << endl;
- classifier->store (os);
- os << "ENDWEAKCLASSIFIER" << endl;
- }
- }
- void FPCBoosting::clear ()
- {
- strongClassifier.clear();
- }
- FeaturePoolClassifier *FPCBoosting::clone () const
- {
- FPCBoosting *fpcBoost = new FPCBoosting ();
- fpcBoost->maxRounds = maxRounds;
- fpcBoost->weakClassifier = weakClassifier->clone();
- fpcBoost->positive_class = positive_class;
- fpcBoost->memory_efficient = memory_efficient;
- fpcBoost->maxClassNo = maxClassNo;
- return fpcBoost;
- }
- void FPCBoosting::setComplexity ( int size )
- {
- fprintf (stderr, "FPCBoosting: set complexity to %d, overwriting current value %d\n",
- size, maxRounds );
- maxRounds = size;
- }
|