123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397 |
- /**
- * @file KCGPRegOneVsAll.cpp
- * @brief One vs. All interface for kernel classifiers
- * @author Erik Rodner
- * @date 12/10/2009
- */
- #include <iostream>
- #include <sstream>
- #ifdef NICE_USELIB_OPENMP
- #include <omp.h>
- #endif
- #include "core/vector/Algorithms.h"
- #include "core/optimization/gradientBased/OptimizationAlgorithmFirst.h"
- #include "core/optimization/gradientBased/FirstOrderTrustRegion.h"
- #include "core/optimization/gradientBased/FirstOrderRasmussen.h"
- #include "vislearning/regression/gpregression/GPRegressionOptimizationProblem.h"
- #include "core/algebra/CholeskyRobust.h"
- #include "core/algebra/CholeskyRobustAuto.h"
- #include "KCGPRegOneVsAll.h"
- #include <limits>
- #undef DEBUG
- using namespace NICE;
- using namespace std;
- using namespace OBJREC;
- KCGPRegOneVsAll::KCGPRegOneVsAll( const Config *conf, Kernel *kernelFunction, const string & section )
- : KernelClassifier ( conf, kernelFunction )
- {
- this->maxClassNo = 0;
- this->verbose = conf->gB( section, "verbose", false );
- this->optimizeParameters = (kernelFunction == NULL) ? false : conf->gB( section, "optimize_parameters", true );
- this->maxIterations = conf->gI( section, "optimization_maxiterations", 500 );
- this->numSamplesCalibration = conf->gI( section, "calibrated_probabilities_numsamples", 2000 );
- this->calibrateProbabilities = conf->gB( section, "calibrated_probabilities", false );
- if ( verbose && this->calibrateProbabilities )
- cerr << "KCGPRegOneVsAll: probability calibration is turned on, this can result in massive additional load!" << endl;
- // we do joint optimization, therefore single classifiers (cloned from prototype)
- // are not allowed to do optimization themselves
- Config confNoOptimize ( *conf );
- confNoOptimize.sB( section, "optimize_parameters", false );
- this->prototype = new RegGaussianProcess ( &confNoOptimize, kernelFunction, section );
- // Do not use this option, unless you know what you are doing!
- // This function is just necessary for hyperparameter optimization with really large
- // kernel matrices of a kronecker structure!
- bool approximateTraceTerm = conf->gB(section, "approximate_trace_term", false);
- if ( approximateTraceTerm )
- traceApproximation = new TraceApproximation ( conf, section );
- else
- traceApproximation = NULL;
- // select final hyperparameters according to leave-one-out
- useLooParameters = conf->gB(section, "use_loo_parameters", false );
- // select the criterion
- modelselcrit = NULL;
- if ( useLooParameters ) {
- string modelselcrit_s = conf->gS(section, "loo_crit", "loo_pred_prob" );
- modelselcrit = GenericGPModelSelection::selectModelSel ( conf, modelselcrit_s );
- cerr << "KCGPRegOneVsAll: using additional model selection criterion " << modelselcrit_s << endl;
- }
- // do we want to compute the uncertainty of the estimate ?
- computeUncertainty = conf->gB(section, "compute_uncertainty", false );
- }
- KCGPRegOneVsAll::KCGPRegOneVsAll( const KCGPRegOneVsAll &vcova ): KernelClassifier(vcova)
- {
- prototype = vcova.prototype->clone();
- optimizeParameters = vcova.optimizeParameters;
- verbose = vcova.verbose;
- maxIterations = vcova.maxIterations;
- useLooParameters = vcova.useLooParameters;
- computeUncertainty = vcova.computeUncertainty;
- choleskyMatrix = vcova.choleskyMatrix;
- calibrateProbabilities = vcova.calibrateProbabilities;
- numSamplesCalibration = vcova.numSamplesCalibration;
- for (int i = 0; i < (int)vcova.classifiers.size(); i++)
- {
- classifiers.push_back(pair<int, RegGaussianProcess*>(vcova.classifiers[i].first, vcova.classifiers[i].second->clone()));
- }
- if ( vcova.traceApproximation == NULL )
- traceApproximation = NULL;
- else
- traceApproximation = new TraceApproximation(*vcova.traceApproximation);
- if ( vcova.modelselcrit == NULL )
- modelselcrit = NULL;
- else
- modelselcrit = new GPMSCLooLikelihoodRegression(*vcova.modelselcrit);
- }
- KCGPRegOneVsAll::~KCGPRegOneVsAll()
- {
- if ( traceApproximation != NULL )
- delete traceApproximation;
- if ( modelselcrit != NULL )
- delete modelselcrit;
- }
- void KCGPRegOneVsAll::teach ( KernelData *kernelData, const NICE::Vector & y )
- {
- maxClassNo = (int)y.Max();
- set<int> classesUsed;
- for ( uint i = 0 ; i < y.size(); i++ )
- classesUsed.insert ( (int)y[i] );
- classifiers.clear();
- VVector ySet;
- VVector ySetZeroMean;
- for ( set<int>::const_iterator it = classesUsed.begin();
- it != classesUsed.end(); it++)
- {
- int i = *it;
- NICE::Vector ySub ( y.size() );
- NICE::Vector ySubZeroMean ( y.size() );
- for ( size_t j = 0 ; j < ySub.size() ; j++ )
- {
- ySub[j] = ((int)y[j] == i) ? 1 : 0;
- ySubZeroMean[j] = ((int)y[j] == i) ? 1 : -1;
- }
- ySet.push_back ( ySub );
- ySetZeroMean.push_back ( ySubZeroMean );
- }
- // Hyperparameter optimization
- if ( optimizeParameters )
- {
- ParameterizedKernel *kernelPara = dynamic_cast< ParameterizedKernel * > ( kernelFunction );
- if ( kernelPara == NULL ) {
- fthrow(Exception, "KCGPRegOneVsAll: you have to specify a parameterized kernel !");
- }
- GPRegressionOptimizationProblem gpopt ( kernelData, ySetZeroMean, kernelPara, verbose, modelselcrit, traceApproximation );
- // the trust region classifier is better for my large collection of one classification problem :)
- // FirstOrderRasmussen optimizer;
- FirstOrderTrustRegion optimizer;
- optimizer.setMaxIterations ( maxIterations );
- optimizer.setEpsilonG ( 0.01 );
- cout << "KCGPRegOneVsAll: Hyperparameter optimization ..." << endl;
- optimizer.optimizeFirst ( gpopt );
- cout << "KCGPRegOneVsAll: Hyperparameter optimization ...done" << endl;
- if ( useLooParameters )
- {
- cerr << "KCGPRegOneVsAll: using best loo parameters" << endl;
- gpopt.useLooParameters();
- }
- gpopt.update();
- Vector parameters;
- kernelPara->getParameters ( parameters );
- cout << "KCGPRegOneVsAll: Optimization finished: " << parameters << endl << endl;
- } else {
- kernelData->updateCholeskyFactorization();
- }
- //for binary problems
- if (classesUsed.size() == 2 && false)
- {
- set<int>::const_iterator it = classesUsed.begin();
- int classno = *it;
- it++;
- int classno2 = *it;
- const Vector & ySub = ySet[0];
- RegGaussianProcess *classifier;
- classifier = prototype->clone();
- if (verbose)
- fprintf (stderr, "KCGPRegOneVsAll: training classifier class %d <-> %d\n", classno, classno2 );
- classifier->teach ( kernelData, ySub );
- classifiers.push_back ( pair<int, RegGaussianProcess*> (classno, classifier) );
- classifiers.push_back ( pair<int, RegGaussianProcess*> (classno2, classifier) );
- }
- else
- {
- int i = 0;
- for ( set<int>::const_iterator it = classesUsed.begin(); it != classesUsed.end(); it++, i++)
- {
- int classno = *it;
- const Vector & ySubZeroMean = ySetZeroMean[i];
- RegGaussianProcess *classifier;
- classifier = prototype->clone();
- if (verbose)
- fprintf (stderr, "KCGPRegOneVsAll: training classifier class %d <-> remainder\n", classno );
- classifier->teach ( kernelData, ySubZeroMean );
- classifiers.push_back ( pair<int, RegGaussianProcess*> (classno, classifier) );
- }
- }
- if ( computeUncertainty || calibrateProbabilities )
- choleskyMatrix = kernelData->getCholeskyMatrix();
- }
- /**
- * @brief Completely the same, only using a different data structure to store the labels
- * @author Alexander Lütz
- * @date 18/08/2011
- */
- void KCGPRegOneVsAll::teach ( KernelData *kernelData, const std::vector<double> & y )
- {
- // FIXME: Do we really need this function? (erik)
- Vector y_nice (y);
- teach ( kernelData, y_nice );
- }
- ClassificationResult KCGPRegOneVsAll::classifyKernel ( const NICE::Vector & kernelVector, double kernelSelf ) const
- {
- if ( classifiers.size() <= 0 )
- fthrow(Exception, "The classifier was not trained with training data (use teach(...))");
- FullVector scores ( maxClassNo + 1 );
- scores.set(0);
- //for binary problems
- if (classifiers.size() == 2 && false)
- {
- int classno = classifiers[0].first;
- int classno2 = classifiers[1].first;
- RegGaussianProcess *classifier = classifiers[0].second;
- double yEstimate = classifier->predictKernel(kernelVector, kernelSelf);
- scores[classno] = yEstimate;
- scores[classno2] = 0;//1-yEstimate;
- //cout << "i: " << 0 << ": " << scores[classno] << endl;
- //cout << "i: " << 1 << ": " << scores[classno2] << endl;
- }
- else
- {
- #pragma omp parallel for
- for ( int classifierIndex = 0 ; classifierIndex < (int)classifiers.size(); classifierIndex++ )
- {
- int classno = classifiers[(uint)classifierIndex].first;
- RegGaussianProcess *classifier = classifiers[(uint)classifierIndex].second;
- double yEstimate = classifier->predictKernel(kernelVector, kernelSelf);
- scores[classno] += yEstimate;
- //cout << "i: " << classifierIndex << ": " << scores[classno] << endl;
- }
- }
- double uncertainty = 0.0;
- if ( computeUncertainty || calibrateProbabilities )
- {
- Vector tmp;
- choleskySolveLargeScale ( choleskyMatrix, kernelVector, tmp );
- // tmp = K^{-1} k*
- uncertainty = kernelSelf - kernelVector.scalarProduct ( tmp );
- /*if(uncertainty < 0.0)
- // uncertainty = 0.0;*/
- if ( calibrateProbabilities )
- {
- /*********************************************************************
- * Calibration of probabilities is based on the following
- * idea:
- *
- * The scores \mu_i (or scores[i]) and the uncertainty
- * r.uncertainty are the mean and the variance of the predictive
- * distribution p(y_i | ...). So actually we do not know the correct value for
- * y_i and therefore just taking the maximum score ignores this uncertainty
- * completely!
- * What we might want to have is the probability for each class k
- * p_k = p( k = argmax_i y_i )
- *
- * Calculating this probability for n>2 is intractable and we
- * have to do monte carlo estimation which is performed in the following code
- *
- * An alternative would be to approximate p_k with
- * p( mu_k >= max_{i != k} y_i ) = prod_{i != k} F_i(mu_k)
- * with F_i being the cumulative distribution of the normal distribution i
- *
- * Details: Erik Rodner, "Learning with Few Examples for Visual Recognition Problems", PhD thesis
- *
- ********************************************************************/
- double stddev = sqrt(uncertainty);
- FullVector calibratedScores ( maxClassNo + 1 );
- calibratedScores.set(0.0);
- for ( uint i = 0 ; i < numSamplesCalibration ; i++ )
- {
- uint cmaxClassno = 0;
- double cmax = - std::numeric_limits<double>::max();
- for ( int classifierIndex = 0 ; classifierIndex < (int)classifiers.size(); classifierIndex++ )
- {
- int classno = classifiers[(uint)classifierIndex].first;
- double s = randGaussDouble ( stddev ) + scores[classno];
- if ( s > cmax )
- {
- cmax = s;
- cmaxClassno = classno;
- }
- }
- calibratedScores[ cmaxClassno ]++;
- }
- calibratedScores.normalize();
- // calibrating probabilities should not affect our hard
- // decision for a specific class
- if ( verbose ) {
- if ( scores.maxElement() != calibratedScores.maxElement() )
- cerr << "Calibration of probabilities affected the hard decision, you should increase calibrated_probabilities_numsamples !!" << endl;
- }
- scores = calibratedScores;
- }
- }
- ClassificationResult r ( scores.maxElement(), scores );
- r.uncertainty = uncertainty;
- return r;
- }
- KCGPRegOneVsAll* KCGPRegOneVsAll::clone(void) const
- {
- KCGPRegOneVsAll *classifier = new KCGPRegOneVsAll( *this );
- return classifier;
- }
- void KCGPRegOneVsAll::clear()
- {
- //nothing to clear
- }
- void KCGPRegOneVsAll::restore(std::istream& ifs, int type)
- {
- ifs.precision (numeric_limits<double>::digits10 + 1);
- ifs >> maxClassNo;
- ifs >> computeUncertainty;
- ifs >> calibrateProbabilities;
- if (calibrateProbabilities || computeUncertainty)
- {
- ifs >> choleskyMatrix;
- }
- int size;
- ifs >> size;
- for (int i = 0; i < size; i++)
- {
- int tmp;
- ifs >> tmp;
- RegGaussianProcess *classifier;
- classifier = prototype->clone();
- classifier->restore(ifs);
- classifiers.push_back ( pair<int, RegGaussianProcess*> (tmp, classifier) );
- }
- KernelClassifier::restore(ifs, type);
- }
- void KCGPRegOneVsAll::store(std::ostream& ofs, int type) const
- {
- ofs.precision (numeric_limits<double>::digits10 + 1);
- ofs << maxClassNo << endl;
- ofs << computeUncertainty << endl;
- ofs << calibrateProbabilities << endl;
- if (calibrateProbabilities || computeUncertainty)
- {
- ofs << choleskyMatrix << endl;
- }
- ofs << classifiers.size() << endl;
- for (uint i = 0; i < classifiers.size(); i++)
- {
- ofs << classifiers[i].first << endl;
- classifiers[i].second->store(ofs);
- ofs << endl;
- }
- KernelClassifier::store(ofs, type);
- }
|