|
@@ -1,4 +1,4 @@
|
|
|
-/**
|
|
|
+/**
|
|
|
* @file KCGPRegOneVsAll.cpp
|
|
|
* @brief One vs. All interface for kernel classifiers
|
|
|
* @author Erik Rodner
|
|
@@ -32,182 +32,182 @@ using namespace std;
|
|
|
using namespace OBJREC;
|
|
|
|
|
|
KCGPRegOneVsAll::KCGPRegOneVsAll( const Config *conf, Kernel *kernelFunction, const string & section )
|
|
|
- : KernelClassifier ( conf, kernelFunction )
|
|
|
+ : KernelClassifier ( conf, kernelFunction )
|
|
|
{
|
|
|
- this->maxClassNo = 0;
|
|
|
- this->verbose = conf->gB( section, "verbose", false );
|
|
|
- this->optimizeParameters = (kernelFunction == NULL) ? false : conf->gB( section, "optimize_parameters", true );
|
|
|
- this->maxIterations = conf->gI( section, "optimization_maxiterations", 500 );
|
|
|
- this->numSamplesCalibration = conf->gI( section, "calibrated_probabilities_numsamples", 2000 );
|
|
|
- this->calibrateProbabilities = conf->gB( section, "calibrated_probabilities", false );
|
|
|
- if ( verbose && this->calibrateProbabilities )
|
|
|
- cerr << "KCGPRegOneVsAll: probability calibration is turned on, this can result in massive additional load!" << endl;
|
|
|
-
|
|
|
- // we do joint optimization, therefore single classifiers (cloned from prototype)
|
|
|
- // are not allowed to do optimization themselves
|
|
|
- Config confNoOptimize ( *conf );
|
|
|
- confNoOptimize.sB( section, "optimize_parameters", false );
|
|
|
- this->prototype = new RegGaussianProcess ( &confNoOptimize, kernelFunction, section );
|
|
|
-
|
|
|
- // Do not use this option, unless you know what you are doing!
|
|
|
- // This function is just necessary for hyperparameter optimization with really large
|
|
|
- // kernel matrices of a kronecker structure!
|
|
|
- bool approximateTraceTerm = conf->gB(section, "approximate_trace_term", false);
|
|
|
- if ( approximateTraceTerm )
|
|
|
- traceApproximation = new TraceApproximation ( conf, section );
|
|
|
- else
|
|
|
- traceApproximation = NULL;
|
|
|
-
|
|
|
- // select final hyperparameters according to leave-one-out
|
|
|
- useLooParameters = conf->gB(section, "use_loo_parameters", false );
|
|
|
-
|
|
|
- // select the criterion
|
|
|
- modelselcrit = NULL;
|
|
|
- if ( useLooParameters ) {
|
|
|
- string modelselcrit_s = conf->gS(section, "loo_crit", "loo_pred_prob" );
|
|
|
- modelselcrit = GenericGPModelSelection::selectModelSel ( conf, modelselcrit_s );
|
|
|
- cerr << "KCGPRegOneVsAll: using additional model selection criterion " << modelselcrit_s << endl;
|
|
|
- }
|
|
|
-
|
|
|
- // do we want to compute the uncertainty of the estimate ?
|
|
|
- computeUncertainty = conf->gB(section, "compute_uncertainty", false );
|
|
|
+ this->maxClassNo = 0;
|
|
|
+ this->verbose = conf->gB( section, "verbose", false );
|
|
|
+ this->optimizeParameters = (kernelFunction == NULL) ? false : conf->gB( section, "optimize_parameters", true );
|
|
|
+ this->maxIterations = conf->gI( section, "optimization_maxiterations", 500 );
|
|
|
+ this->numSamplesCalibration = conf->gI( section, "calibrated_probabilities_numsamples", 2000 );
|
|
|
+ this->calibrateProbabilities = conf->gB( section, "calibrated_probabilities", false );
|
|
|
+ if ( verbose && this->calibrateProbabilities )
|
|
|
+ cerr << "KCGPRegOneVsAll: probability calibration is turned on, this can result in massive additional load!" << endl;
|
|
|
+
|
|
|
+ // we do joint optimization, therefore single classifiers (cloned from prototype)
|
|
|
+ // are not allowed to do optimization themselves
|
|
|
+ Config confNoOptimize ( *conf );
|
|
|
+ confNoOptimize.sB( section, "optimize_parameters", false );
|
|
|
+ this->prototype = new RegGaussianProcess ( &confNoOptimize, kernelFunction, section );
|
|
|
+
|
|
|
+ // Do not use this option, unless you know what you are doing!
|
|
|
+ // This function is just necessary for hyperparameter optimization with really large
|
|
|
+ // kernel matrices of a kronecker structure!
|
|
|
+ bool approximateTraceTerm = conf->gB(section, "approximate_trace_term", false);
|
|
|
+ if ( approximateTraceTerm )
|
|
|
+ traceApproximation = new TraceApproximation ( conf, section );
|
|
|
+ else
|
|
|
+ traceApproximation = NULL;
|
|
|
+
|
|
|
+ // select final hyperparameters according to leave-one-out
|
|
|
+ useLooParameters = conf->gB(section, "use_loo_parameters", false );
|
|
|
+
|
|
|
+ // select the criterion
|
|
|
+ modelselcrit = NULL;
|
|
|
+ if ( useLooParameters ) {
|
|
|
+ string modelselcrit_s = conf->gS(section, "loo_crit", "loo_pred_prob" );
|
|
|
+ modelselcrit = GenericGPModelSelection::selectModelSel ( conf, modelselcrit_s );
|
|
|
+ cerr << "KCGPRegOneVsAll: using additional model selection criterion " << modelselcrit_s << endl;
|
|
|
+ }
|
|
|
+
|
|
|
+ // do we want to compute the uncertainty of the estimate ?
|
|
|
+ computeUncertainty = conf->gB(section, "compute_uncertainty", false );
|
|
|
}
|
|
|
|
|
|
KCGPRegOneVsAll::KCGPRegOneVsAll( const KCGPRegOneVsAll &vcova ): KernelClassifier(vcova)
|
|
|
{
|
|
|
- prototype = vcova.prototype->clone();
|
|
|
- optimizeParameters = vcova.optimizeParameters;
|
|
|
- verbose = vcova.verbose;
|
|
|
- maxIterations = vcova.maxIterations;
|
|
|
- useLooParameters = vcova.useLooParameters;
|
|
|
- computeUncertainty = vcova.computeUncertainty;
|
|
|
- choleskyMatrix = vcova.choleskyMatrix;
|
|
|
- calibrateProbabilities = vcova.calibrateProbabilities;
|
|
|
- numSamplesCalibration = vcova.numSamplesCalibration;
|
|
|
-
|
|
|
- for(int i = 0; i < (int)vcova.classifiers.size(); i++)
|
|
|
- {
|
|
|
- classifiers.push_back(pair<int, RegGaussianProcess*>(vcova.classifiers[i].first,vcova.classifiers[i].second->clone()));
|
|
|
- }
|
|
|
-
|
|
|
- if ( vcova.traceApproximation == NULL )
|
|
|
- traceApproximation = NULL;
|
|
|
- else
|
|
|
- traceApproximation = new TraceApproximation(*vcova.traceApproximation);
|
|
|
-
|
|
|
- if ( vcova.modelselcrit == NULL )
|
|
|
- modelselcrit = NULL;
|
|
|
- else
|
|
|
- modelselcrit = new GPMSCLooLikelihoodRegression(*vcova.modelselcrit);
|
|
|
+ prototype = vcova.prototype->clone();
|
|
|
+ optimizeParameters = vcova.optimizeParameters;
|
|
|
+ verbose = vcova.verbose;
|
|
|
+ maxIterations = vcova.maxIterations;
|
|
|
+ useLooParameters = vcova.useLooParameters;
|
|
|
+ computeUncertainty = vcova.computeUncertainty;
|
|
|
+ choleskyMatrix = vcova.choleskyMatrix;
|
|
|
+ calibrateProbabilities = vcova.calibrateProbabilities;
|
|
|
+ numSamplesCalibration = vcova.numSamplesCalibration;
|
|
|
+
|
|
|
+ for (int i = 0; i < (int)vcova.classifiers.size(); i++)
|
|
|
+ {
|
|
|
+ classifiers.push_back(pair<int, RegGaussianProcess*>(vcova.classifiers[i].first, vcova.classifiers[i].second->clone()));
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( vcova.traceApproximation == NULL )
|
|
|
+ traceApproximation = NULL;
|
|
|
+ else
|
|
|
+ traceApproximation = new TraceApproximation(*vcova.traceApproximation);
|
|
|
+
|
|
|
+ if ( vcova.modelselcrit == NULL )
|
|
|
+ modelselcrit = NULL;
|
|
|
+ else
|
|
|
+ modelselcrit = new GPMSCLooLikelihoodRegression(*vcova.modelselcrit);
|
|
|
}
|
|
|
|
|
|
KCGPRegOneVsAll::~KCGPRegOneVsAll()
|
|
|
{
|
|
|
- if ( traceApproximation != NULL )
|
|
|
- delete traceApproximation;
|
|
|
- if ( modelselcrit != NULL )
|
|
|
- delete modelselcrit;
|
|
|
+ if ( traceApproximation != NULL )
|
|
|
+ delete traceApproximation;
|
|
|
+ if ( modelselcrit != NULL )
|
|
|
+ delete modelselcrit;
|
|
|
}
|
|
|
|
|
|
void KCGPRegOneVsAll::teach ( KernelData *kernelData, const NICE::Vector & y )
|
|
|
{
|
|
|
- maxClassNo = (int)y.Max();
|
|
|
-
|
|
|
- set<int> classesUsed;
|
|
|
- for ( uint i = 0 ; i < y.size(); i++ )
|
|
|
- classesUsed.insert ( (int)y[i] );
|
|
|
-
|
|
|
- classifiers.clear();
|
|
|
-
|
|
|
- VVector ySet;
|
|
|
- VVector ySetZeroMean;
|
|
|
- for ( set<int>::const_iterator it = classesUsed.begin();
|
|
|
- it != classesUsed.end(); it++)
|
|
|
- {
|
|
|
- int i = *it;
|
|
|
-
|
|
|
- NICE::Vector ySub ( y.size() );
|
|
|
- NICE::Vector ySubZeroMean ( y.size() );
|
|
|
- for ( size_t j = 0 ; j < ySub.size() ; j++ )
|
|
|
- {
|
|
|
- ySub[j] = ((int)y[j] == i) ? 1 : 0;
|
|
|
- ySubZeroMean[j] = ((int)y[j] == i) ? 1 : -1;
|
|
|
- }
|
|
|
- ySet.push_back ( ySub );
|
|
|
- ySetZeroMean.push_back ( ySubZeroMean );
|
|
|
- }
|
|
|
-
|
|
|
- // Hyperparameter optimization
|
|
|
- if ( optimizeParameters )
|
|
|
- {
|
|
|
- ParameterizedKernel *kernelPara = dynamic_cast< ParameterizedKernel * > ( kernelFunction );
|
|
|
- if ( kernelPara == NULL ) {
|
|
|
- fthrow(Exception, "KCGPRegOneVsAll: you have to specify a parameterized kernel !");
|
|
|
- }
|
|
|
- GPRegressionOptimizationProblem gpopt ( kernelData, ySetZeroMean, kernelPara, verbose, modelselcrit, traceApproximation );
|
|
|
-
|
|
|
- // the trust region classifier is better for my large collection of one classification problem :)
|
|
|
- // FirstOrderRasmussen optimizer;
|
|
|
- FirstOrderTrustRegion optimizer;
|
|
|
- optimizer.setMaxIterations ( maxIterations );
|
|
|
- optimizer.setEpsilonG ( 0.01 );
|
|
|
-
|
|
|
- cout << "KCGPRegOneVsAll: Hyperparameter optimization ..." << endl;
|
|
|
- optimizer.optimizeFirst ( gpopt );
|
|
|
- cout << "KCGPRegOneVsAll: Hyperparameter optimization ...done" << endl;
|
|
|
-
|
|
|
- if ( useLooParameters )
|
|
|
- {
|
|
|
- cerr << "KCGPRegOneVsAll: using best loo parameters" << endl;
|
|
|
- gpopt.useLooParameters();
|
|
|
- }
|
|
|
-
|
|
|
- gpopt.update();
|
|
|
-
|
|
|
- Vector parameters;
|
|
|
- kernelPara->getParameters ( parameters );
|
|
|
- cout << "KCGPRegOneVsAll: Optimization finished: " << parameters << endl << endl;
|
|
|
- } else {
|
|
|
- kernelData->updateCholeskyFactorization();
|
|
|
- }
|
|
|
-
|
|
|
- //for binary problems
|
|
|
- if(classesUsed.size() == 2 && false)
|
|
|
- {
|
|
|
- set<int>::const_iterator it = classesUsed.begin();
|
|
|
- int classno = *it;
|
|
|
- it++;
|
|
|
- int classno2 = *it;
|
|
|
- const Vector & ySub = ySet[0];
|
|
|
- RegGaussianProcess *classifier;
|
|
|
- classifier = prototype->clone();
|
|
|
- if (verbose)
|
|
|
- fprintf (stderr, "KCGPRegOneVsAll: training classifier class %d <-> %d\n", classno, classno2 );
|
|
|
- classifier->teach ( kernelData, ySub );
|
|
|
- classifiers.push_back ( pair<int, RegGaussianProcess*> (classno, classifier) );
|
|
|
- classifiers.push_back ( pair<int, RegGaussianProcess*> (classno2, classifier) );
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
- int i = 0;
|
|
|
- for ( set<int>::const_iterator it = classesUsed.begin(); it != classesUsed.end(); it++,i++)
|
|
|
- {
|
|
|
- int classno = *it;
|
|
|
- const Vector & ySub = ySet[i];
|
|
|
- RegGaussianProcess *classifier;
|
|
|
- classifier = prototype->clone();
|
|
|
-
|
|
|
- if (verbose)
|
|
|
- fprintf (stderr, "KCGPRegOneVsAll: training classifier class %d <-> remainder\n", classno );
|
|
|
-
|
|
|
- classifier->teach ( kernelData, ySub );
|
|
|
-
|
|
|
- classifiers.push_back ( pair<int, RegGaussianProcess*> (classno, classifier) );
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if ( computeUncertainty || calibrateProbabilities )
|
|
|
- choleskyMatrix = kernelData->getCholeskyMatrix();
|
|
|
+ maxClassNo = (int)y.Max();
|
|
|
+
|
|
|
+ set<int> classesUsed;
|
|
|
+ for ( uint i = 0 ; i < y.size(); i++ )
|
|
|
+ classesUsed.insert ( (int)y[i] );
|
|
|
+
|
|
|
+ classifiers.clear();
|
|
|
+
|
|
|
+ VVector ySet;
|
|
|
+ VVector ySetZeroMean;
|
|
|
+ for ( set<int>::const_iterator it = classesUsed.begin();
|
|
|
+ it != classesUsed.end(); it++)
|
|
|
+ {
|
|
|
+ int i = *it;
|
|
|
+
|
|
|
+ NICE::Vector ySub ( y.size() );
|
|
|
+ NICE::Vector ySubZeroMean ( y.size() );
|
|
|
+ for ( size_t j = 0 ; j < ySub.size() ; j++ )
|
|
|
+ {
|
|
|
+ ySub[j] = ((int)y[j] == i) ? 1 : 0;
|
|
|
+ ySubZeroMean[j] = ((int)y[j] == i) ? 1 : -1;
|
|
|
+ }
|
|
|
+ ySet.push_back ( ySub );
|
|
|
+ ySetZeroMean.push_back ( ySubZeroMean );
|
|
|
+ }
|
|
|
+
|
|
|
+ // Hyperparameter optimization
|
|
|
+ if ( optimizeParameters )
|
|
|
+ {
|
|
|
+ ParameterizedKernel *kernelPara = dynamic_cast< ParameterizedKernel * > ( kernelFunction );
|
|
|
+ if ( kernelPara == NULL ) {
|
|
|
+ fthrow(Exception, "KCGPRegOneVsAll: you have to specify a parameterized kernel !");
|
|
|
+ }
|
|
|
+ GPRegressionOptimizationProblem gpopt ( kernelData, ySetZeroMean, kernelPara, verbose, modelselcrit, traceApproximation );
|
|
|
+
|
|
|
+ // the trust region classifier is better for my large collection of one classification problem :)
|
|
|
+ // FirstOrderRasmussen optimizer;
|
|
|
+ FirstOrderTrustRegion optimizer;
|
|
|
+ optimizer.setMaxIterations ( maxIterations );
|
|
|
+ optimizer.setEpsilonG ( 0.01 );
|
|
|
+
|
|
|
+ cout << "KCGPRegOneVsAll: Hyperparameter optimization ..." << endl;
|
|
|
+ optimizer.optimizeFirst ( gpopt );
|
|
|
+ cout << "KCGPRegOneVsAll: Hyperparameter optimization ...done" << endl;
|
|
|
+
|
|
|
+ if ( useLooParameters )
|
|
|
+ {
|
|
|
+ cerr << "KCGPRegOneVsAll: using best loo parameters" << endl;
|
|
|
+ gpopt.useLooParameters();
|
|
|
+ }
|
|
|
+
|
|
|
+ gpopt.update();
|
|
|
+
|
|
|
+ Vector parameters;
|
|
|
+ kernelPara->getParameters ( parameters );
|
|
|
+ cout << "KCGPRegOneVsAll: Optimization finished: " << parameters << endl << endl;
|
|
|
+ } else {
|
|
|
+ kernelData->updateCholeskyFactorization();
|
|
|
+ }
|
|
|
+
|
|
|
+ //for binary problems
|
|
|
+ if (classesUsed.size() == 2 && false)
|
|
|
+ {
|
|
|
+ set<int>::const_iterator it = classesUsed.begin();
|
|
|
+ int classno = *it;
|
|
|
+ it++;
|
|
|
+ int classno2 = *it;
|
|
|
+ const Vector & ySub = ySet[0];
|
|
|
+ RegGaussianProcess *classifier;
|
|
|
+ classifier = prototype->clone();
|
|
|
+ if (verbose)
|
|
|
+ fprintf (stderr, "KCGPRegOneVsAll: training classifier class %d <-> %d\n", classno, classno2 );
|
|
|
+ classifier->teach ( kernelData, ySub );
|
|
|
+ classifiers.push_back ( pair<int, RegGaussianProcess*> (classno, classifier) );
|
|
|
+ classifiers.push_back ( pair<int, RegGaussianProcess*> (classno2, classifier) );
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ int i = 0;
|
|
|
+ for ( set<int>::const_iterator it = classesUsed.begin(); it != classesUsed.end(); it++, i++)
|
|
|
+ {
|
|
|
+ int classno = *it;
|
|
|
+ const Vector & ySubZeroMean = ySetZeroMean[i];
|
|
|
+ RegGaussianProcess *classifier;
|
|
|
+ classifier = prototype->clone();
|
|
|
+
|
|
|
+ if (verbose)
|
|
|
+ fprintf (stderr, "KCGPRegOneVsAll: training classifier class %d <-> remainder\n", classno );
|
|
|
+
|
|
|
+ classifier->teach ( kernelData, ySubZeroMean );
|
|
|
+
|
|
|
+ classifiers.push_back ( pair<int, RegGaussianProcess*> (classno, classifier) );
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( computeUncertainty || calibrateProbabilities )
|
|
|
+ choleskyMatrix = kernelData->getCholeskyMatrix();
|
|
|
|
|
|
}
|
|
|
|
|
@@ -218,180 +218,180 @@ void KCGPRegOneVsAll::teach ( KernelData *kernelData, const NICE::Vector & y )
|
|
|
*/
|
|
|
void KCGPRegOneVsAll::teach ( KernelData *kernelData, const std::vector<double> & y )
|
|
|
{
|
|
|
- // FIXME: Do we really need this function? (erik)
|
|
|
- Vector y_nice (y);
|
|
|
- teach ( kernelData, y_nice );
|
|
|
+ // FIXME: Do we really need this function? (erik)
|
|
|
+ Vector y_nice (y);
|
|
|
+ teach ( kernelData, y_nice );
|
|
|
}
|
|
|
|
|
|
|
|
|
ClassificationResult KCGPRegOneVsAll::classifyKernel ( const NICE::Vector & kernelVector, double kernelSelf ) const
|
|
|
{
|
|
|
- if ( classifiers.size() <= 0 )
|
|
|
- fthrow(Exception, "The classifier was not trained with training data (use teach(...))");
|
|
|
-
|
|
|
- FullVector scores ( maxClassNo+1 );
|
|
|
- scores.set(0);
|
|
|
-
|
|
|
- //for binary problems
|
|
|
- if(classifiers.size() == 2 && false)
|
|
|
- {
|
|
|
- int classno = classifiers[0].first;
|
|
|
- int classno2 = classifiers[1].first;
|
|
|
- RegGaussianProcess *classifier = classifiers[0].second;
|
|
|
- double yEstimate = classifier->predictKernel(kernelVector, kernelSelf);
|
|
|
- scores[classno] = yEstimate;
|
|
|
- scores[classno2] = 0;//1-yEstimate;
|
|
|
- //cout << "i: " << 0 << ": " << scores[classno] << endl;
|
|
|
- //cout << "i: " << 1 << ": " << scores[classno2] << endl;
|
|
|
- }
|
|
|
- else
|
|
|
- {
|
|
|
+ if ( classifiers.size() <= 0 )
|
|
|
+ fthrow(Exception, "The classifier was not trained with training data (use teach(...))");
|
|
|
+
|
|
|
+ FullVector scores ( maxClassNo + 1 );
|
|
|
+ scores.set(0);
|
|
|
+
|
|
|
+ //for binary problems
|
|
|
+ if (classifiers.size() == 2 && false)
|
|
|
+ {
|
|
|
+ int classno = classifiers[0].first;
|
|
|
+ int classno2 = classifiers[1].first;
|
|
|
+ RegGaussianProcess *classifier = classifiers[0].second;
|
|
|
+ double yEstimate = classifier->predictKernel(kernelVector, kernelSelf);
|
|
|
+ scores[classno] = yEstimate;
|
|
|
+ scores[classno2] = 0;//1-yEstimate;
|
|
|
+ //cout << "i: " << 0 << ": " << scores[classno] << endl;
|
|
|
+ //cout << "i: " << 1 << ": " << scores[classno2] << endl;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
#pragma omp parallel for
|
|
|
- for ( int classifierIndex = 0 ; classifierIndex < (int)classifiers.size(); classifierIndex++ )
|
|
|
- {
|
|
|
- int classno = classifiers[(uint)classifierIndex].first;
|
|
|
- RegGaussianProcess *classifier = classifiers[(uint)classifierIndex].second;
|
|
|
- double yEstimate = classifier->predictKernel(kernelVector, kernelSelf);
|
|
|
- scores[classno] += yEstimate;
|
|
|
- //cout << "i: " << classifierIndex << ": " << scores[classno] << endl;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- double uncertainty = 0.0;
|
|
|
- if ( computeUncertainty || calibrateProbabilities )
|
|
|
- {
|
|
|
- Vector tmp;
|
|
|
- choleskySolveLargeScale ( choleskyMatrix, kernelVector, tmp );
|
|
|
-
|
|
|
- // tmp = K^{-1} k*
|
|
|
- uncertainty = kernelSelf - kernelVector.scalarProduct ( tmp );
|
|
|
-
|
|
|
- /*if(uncertainty < 0.0)
|
|
|
-// uncertainty = 0.0;*/
|
|
|
-
|
|
|
- if ( calibrateProbabilities )
|
|
|
- {
|
|
|
- /*********************************************************************
|
|
|
- * Calibration of probabilities is based on the following
|
|
|
- * idea:
|
|
|
- *
|
|
|
- * The scores \mu_i (or scores[i]) and the uncertainty
|
|
|
- * r.uncertainty are the mean and the variance of the predictive
|
|
|
- * distribution p(y_i | ...). So actually we do not know the correct value for
|
|
|
- * y_i and therefore just taking the maximum score ignores this uncertainty
|
|
|
- * completely!
|
|
|
- * What we might want to have is the probability for each class k
|
|
|
- * p_k = p( k = argmax_i y_i )
|
|
|
- *
|
|
|
- * Calculating this probability for n>2 is intractable and we
|
|
|
- * have to do monte carlo estimation which is performed in the following code
|
|
|
- *
|
|
|
- * An alternative would be to approximate p_k with
|
|
|
- * p( mu_k >= max_{i != k} y_i ) = prod_{i != k} F_i(mu_k)
|
|
|
- * with F_i being the cumulative distribution of the normal distribution i
|
|
|
- *
|
|
|
- * Details: Erik Rodner, "Learning with Few Examples for Visual Recognition Problems", PhD thesis
|
|
|
- *
|
|
|
- ********************************************************************/
|
|
|
-
|
|
|
- double stddev = sqrt(uncertainty);
|
|
|
- FullVector calibratedScores ( maxClassNo + 1 );
|
|
|
- calibratedScores.set(0.0);
|
|
|
- for ( uint i = 0 ; i < numSamplesCalibration ; i++ )
|
|
|
- {
|
|
|
- uint cmaxClassno = 0;
|
|
|
- double cmax = - std::numeric_limits<double>::max();
|
|
|
- for ( int classifierIndex = 0 ; classifierIndex < (int)classifiers.size(); classifierIndex++ )
|
|
|
- {
|
|
|
- int classno = classifiers[(uint)classifierIndex].first;
|
|
|
- double s = randGaussDouble ( stddev ) + scores[classno];
|
|
|
- if ( s > cmax )
|
|
|
- {
|
|
|
- cmax = s;
|
|
|
- cmaxClassno = classno;
|
|
|
- }
|
|
|
- }
|
|
|
- calibratedScores[ cmaxClassno ]++;
|
|
|
- }
|
|
|
- calibratedScores.normalize();
|
|
|
-
|
|
|
- // calibrating probabilities should not affect our hard
|
|
|
- // decision for a specific class
|
|
|
- if ( verbose ) {
|
|
|
- if ( scores.maxElement() != calibratedScores.maxElement() )
|
|
|
- cerr << "Calibration of probabilities affected the hard decision, you should increase calibrated_probabilities_numsamples !!" << endl;
|
|
|
- }
|
|
|
-
|
|
|
- scores = calibratedScores;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- ClassificationResult r ( scores.maxElement(), scores );
|
|
|
- r.uncertainty = uncertainty;
|
|
|
-
|
|
|
- return r;
|
|
|
+ for ( int classifierIndex = 0 ; classifierIndex < (int)classifiers.size(); classifierIndex++ )
|
|
|
+ {
|
|
|
+ int classno = classifiers[(uint)classifierIndex].first;
|
|
|
+ RegGaussianProcess *classifier = classifiers[(uint)classifierIndex].second;
|
|
|
+ double yEstimate = classifier->predictKernel(kernelVector, kernelSelf);
|
|
|
+ scores[classno] += yEstimate;
|
|
|
+ //cout << "i: " << classifierIndex << ": " << scores[classno] << endl;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ double uncertainty = 0.0;
|
|
|
+ if ( computeUncertainty || calibrateProbabilities )
|
|
|
+ {
|
|
|
+ Vector tmp;
|
|
|
+ choleskySolveLargeScale ( choleskyMatrix, kernelVector, tmp );
|
|
|
+
|
|
|
+ // tmp = K^{-1} k*
|
|
|
+ uncertainty = kernelSelf - kernelVector.scalarProduct ( tmp );
|
|
|
+
|
|
|
+ /*if(uncertainty < 0.0)
|
|
|
+ // uncertainty = 0.0;*/
|
|
|
+
|
|
|
+ if ( calibrateProbabilities )
|
|
|
+ {
|
|
|
+ /*********************************************************************
|
|
|
+ * Calibration of probabilities is based on the following
|
|
|
+ * idea:
|
|
|
+ *
|
|
|
+ * The scores \mu_i (or scores[i]) and the uncertainty
|
|
|
+ * r.uncertainty are the mean and the variance of the predictive
|
|
|
+ * distribution p(y_i | ...). So actually we do not know the correct value for
|
|
|
+ * y_i and therefore just taking the maximum score ignores this uncertainty
|
|
|
+ * completely!
|
|
|
+ * What we might want to have is the probability for each class k
|
|
|
+ * p_k = p( k = argmax_i y_i )
|
|
|
+ *
|
|
|
+ * Calculating this probability for n>2 is intractable and we
|
|
|
+ * have to do monte carlo estimation which is performed in the following code
|
|
|
+ *
|
|
|
+ * An alternative would be to approximate p_k with
|
|
|
+ * p( mu_k >= max_{i != k} y_i ) = prod_{i != k} F_i(mu_k)
|
|
|
+ * with F_i being the cumulative distribution of the normal distribution i
|
|
|
+ *
|
|
|
+ * Details: Erik Rodner, "Learning with Few Examples for Visual Recognition Problems", PhD thesis
|
|
|
+ *
|
|
|
+ ********************************************************************/
|
|
|
+
|
|
|
+ double stddev = sqrt(uncertainty);
|
|
|
+ FullVector calibratedScores ( maxClassNo + 1 );
|
|
|
+ calibratedScores.set(0.0);
|
|
|
+ for ( uint i = 0 ; i < numSamplesCalibration ; i++ )
|
|
|
+ {
|
|
|
+ uint cmaxClassno = 0;
|
|
|
+ double cmax = - std::numeric_limits<double>::max();
|
|
|
+ for ( int classifierIndex = 0 ; classifierIndex < (int)classifiers.size(); classifierIndex++ )
|
|
|
+ {
|
|
|
+ int classno = classifiers[(uint)classifierIndex].first;
|
|
|
+ double s = randGaussDouble ( stddev ) + scores[classno];
|
|
|
+ if ( s > cmax )
|
|
|
+ {
|
|
|
+ cmax = s;
|
|
|
+ cmaxClassno = classno;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ calibratedScores[ cmaxClassno ]++;
|
|
|
+ }
|
|
|
+ calibratedScores.normalize();
|
|
|
+
|
|
|
+ // calibrating probabilities should not affect our hard
|
|
|
+ // decision for a specific class
|
|
|
+ if ( verbose ) {
|
|
|
+ if ( scores.maxElement() != calibratedScores.maxElement() )
|
|
|
+ cerr << "Calibration of probabilities affected the hard decision, you should increase calibrated_probabilities_numsamples !!" << endl;
|
|
|
+ }
|
|
|
+
|
|
|
+ scores = calibratedScores;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ ClassificationResult r ( scores.maxElement(), scores );
|
|
|
+ r.uncertainty = uncertainty;
|
|
|
+
|
|
|
+ return r;
|
|
|
}
|
|
|
|
|
|
KCGPRegOneVsAll* KCGPRegOneVsAll::clone(void) const
|
|
|
{
|
|
|
- KCGPRegOneVsAll *classifier = new KCGPRegOneVsAll( *this );
|
|
|
- return classifier;
|
|
|
+ KCGPRegOneVsAll *classifier = new KCGPRegOneVsAll( *this );
|
|
|
+ return classifier;
|
|
|
}
|
|
|
|
|
|
void KCGPRegOneVsAll::clear()
|
|
|
{
|
|
|
- //nothing to clear
|
|
|
+ //nothing to clear
|
|
|
}
|
|
|
|
|
|
void KCGPRegOneVsAll::restore(std::istream& ifs, int type)
|
|
|
{
|
|
|
- ifs.precision (numeric_limits<double>::digits10 + 1);
|
|
|
- ifs >> maxClassNo;
|
|
|
- ifs >> computeUncertainty;
|
|
|
- ifs >> calibrateProbabilities;
|
|
|
-
|
|
|
- if(calibrateProbabilities || computeUncertainty)
|
|
|
- {
|
|
|
- ifs >> choleskyMatrix;
|
|
|
- }
|
|
|
-
|
|
|
- int size;
|
|
|
- ifs >> size;
|
|
|
-
|
|
|
- for(int i = 0; i < size; i++)
|
|
|
- {
|
|
|
- int tmp;
|
|
|
- ifs >> tmp;
|
|
|
- RegGaussianProcess *classifier;
|
|
|
- classifier = prototype->clone();
|
|
|
- classifier->restore(ifs);
|
|
|
- classifiers.push_back ( pair<int, RegGaussianProcess*> (tmp, classifier) );
|
|
|
- }
|
|
|
-
|
|
|
- KernelClassifier::restore(ifs,type);
|
|
|
+ ifs.precision (numeric_limits<double>::digits10 + 1);
|
|
|
+ ifs >> maxClassNo;
|
|
|
+ ifs >> computeUncertainty;
|
|
|
+ ifs >> calibrateProbabilities;
|
|
|
+
|
|
|
+ if (calibrateProbabilities || computeUncertainty)
|
|
|
+ {
|
|
|
+ ifs >> choleskyMatrix;
|
|
|
+ }
|
|
|
+
|
|
|
+ int size;
|
|
|
+ ifs >> size;
|
|
|
+
|
|
|
+ for (int i = 0; i < size; i++)
|
|
|
+ {
|
|
|
+ int tmp;
|
|
|
+ ifs >> tmp;
|
|
|
+ RegGaussianProcess *classifier;
|
|
|
+ classifier = prototype->clone();
|
|
|
+ classifier->restore(ifs);
|
|
|
+ classifiers.push_back ( pair<int, RegGaussianProcess*> (tmp, classifier) );
|
|
|
+ }
|
|
|
+
|
|
|
+ KernelClassifier::restore(ifs, type);
|
|
|
}
|
|
|
|
|
|
void KCGPRegOneVsAll::store(std::ostream& ofs, int type) const
|
|
|
{
|
|
|
- ofs.precision (numeric_limits<double>::digits10 + 1);
|
|
|
-
|
|
|
- ofs << maxClassNo << endl;
|
|
|
- ofs << computeUncertainty << endl;
|
|
|
- ofs << calibrateProbabilities << endl;
|
|
|
-
|
|
|
-
|
|
|
- if(calibrateProbabilities || computeUncertainty)
|
|
|
- {
|
|
|
- ofs << choleskyMatrix << endl;
|
|
|
- }
|
|
|
-
|
|
|
- ofs << classifiers.size() << endl;
|
|
|
- for(uint i = 0; i < classifiers.size(); i++)
|
|
|
- {
|
|
|
- ofs << classifiers[i].first << endl;
|
|
|
- classifiers[i].second->store(ofs);
|
|
|
- ofs << endl;
|
|
|
- }
|
|
|
- KernelClassifier::store(ofs,type);
|
|
|
+ ofs.precision (numeric_limits<double>::digits10 + 1);
|
|
|
+
|
|
|
+ ofs << maxClassNo << endl;
|
|
|
+ ofs << computeUncertainty << endl;
|
|
|
+ ofs << calibrateProbabilities << endl;
|
|
|
+
|
|
|
+
|
|
|
+ if (calibrateProbabilities || computeUncertainty)
|
|
|
+ {
|
|
|
+ ofs << choleskyMatrix << endl;
|
|
|
+ }
|
|
|
+
|
|
|
+ ofs << classifiers.size() << endl;
|
|
|
+ for (uint i = 0; i < classifiers.size(); i++)
|
|
|
+ {
|
|
|
+ ofs << classifiers[i].first << endl;
|
|
|
+ classifiers[i].second->store(ofs);
|
|
|
+ ofs << endl;
|
|
|
+ }
|
|
|
+ KernelClassifier::store(ofs, type);
|
|
|
}
|
|
|
|