/**
* @file KCGPLaplaceOneVsAll.cpp
* @brief One vs. All interface for kernel classifiers
* @author Erik Rodner
* @date 12/10/2009
*/
#include <iostream>
#include <sstream>
#include "core/vector/Algorithms.h"
#include "core/optimization/gradientBased/OptimizationAlgorithmFirst.h"
#include "core/optimization/gradientBased/FirstOrderTrustRegion.h"
#include "core/optimization/gradientBased/FirstOrderRasmussen.h"
#include "vislearning/classifier/kernelclassifier/GPLaplaceOptimizationProblem.h"
#include "core/algebra/CholeskyRobust.h"
#include "core/algebra/CholeskyRobustAuto.h"
#include "KCGPLaplaceOneVsAll.h"
#include "LHCumulativeGauss.h"
using namespace std;
using namespace NICE;
using namespace OBJREC;
KCGPLaplaceOneVsAll::KCGPLaplaceOneVsAll( const Config *conf, Kernel *kernelFunction, const string & section )
: KernelClassifier ( conf, kernelFunction ),
// we have to copy the config to initialize Laplace Approximation stuff
confCopy(*conf),
confSection(section)
{
this->maxClassNo = 0;
this->verbose = conf->gB( section, "verbose", false );
this->optimizeParameters = (kernelFunction == NULL) ? false : conf->gB( section, "optimize_parameters", true );
this->maxIterations = conf->gI( section, "optimization_maxiterations", 500 );
// the only one supported by now
likelihoodFunction = new LHCumulativeGauss( conf->gD(section, "likelihood_lengthscale", sqrt(2.0f)) );
useLooParameters = conf->gB( section, "use_loo_parameters", false );
}
KCGPLaplaceOneVsAll::~KCGPLaplaceOneVsAll()
{
if ( likelihoodFunction != NULL )
delete likelihoodFunction;
if ( laplaceApproximations.size() != 0 )
{
for ( uint i = 0 ; i < laplaceApproximations.size(); i++ )
delete laplaceApproximations[i];
laplaceApproximations.clear();
}
}
void KCGPLaplaceOneVsAll::teach ( KernelData *kernelData, const NICE::Vector & y )
{
maxClassNo = (int)y.Max();
// FIXME: This code is still not suitable for settings
// with missing classes between 0..maxClassNo
classnos.resize(maxClassNo+1);
for ( int i = 0 ; i <= maxClassNo ; i++ )
{
NICE::Vector ySubZeroMean ( y.size() );
for ( size_t j = 0 ; j < y.size() ; j++ )
ySubZeroMean[j] = ((int)y[j] == i) ? 1 : -1;
ySetZeroMean.push_back ( ySubZeroMean );
classnos[i] = i;
}
if ( laplaceApproximations.size() != 0 )
{
for ( uint i = 0 ; i < laplaceApproximations.size(); i++ )
delete laplaceApproximations[i];
laplaceApproximations.clear();
}
for ( uint k = 0 ; k < ySetZeroMean.size(); k++ )
laplaceApproximations.push_back ( new LaplaceApproximation ( &confCopy, confSection ) );
// Hyperparameter optimization
if ( optimizeParameters )
{
ParameterizedKernel *kernelPara = dynamic_cast< ParameterizedKernel * > ( kernelFunction );
if ( kernelPara == NULL ) {
fthrow(Exception, "KCGPLaplaceOneVsAll: you have to specify a parameterized kernel !");
}
GPLaplaceOptimizationProblem gpopt ( kernelData, ySetZeroMean, kernelPara, likelihoodFunction, laplaceApproximations, verbose );
// the trust region classifier is better for my large collection of one classification problem :)
// FirstOrderRasmussen optimizer;
FirstOrderTrustRegion optimizer;
optimizer.setMaxIterations ( maxIterations );
optimizer.setEpsilonG ( 0.01 );
cout << "KCGPLaplaceOneVsAll: Hyperparameter optimization ..." << endl;
optimizer.optimizeFirst ( gpopt );
cout << "KCGPLaplaceOneVsAll: Hyperparameter optimization ...done" << endl;
if ( useLooParameters )
{
cerr << "KCGPLaplaceOneVsAll: using best loo parameters" << endl;
gpopt.useLooParameters();
}
gpopt.update();
Vector parameters;
kernelPara->getParameters ( parameters );
cout << "KCGPLaplaceOneVsAll: Optimization finished: " << parameters << endl << endl;
} else {
kernelData->updateCholeskyFactorization();
for ( uint i = 0 ; i < ySetZeroMean.size() ; i++ )
{
const Vector & ySubZeroMean = ySetZeroMean[i];
fprintf (stderr, "KCGPLaplaceOneVsAll: training classifier class %d <-> remainder\n", i );
laplaceApproximations[i]->approximate ( kernelData, ySubZeroMean, likelihoodFunction );
}
}
}
ClassificationResult KCGPLaplaceOneVsAll::classifyKernel ( const NICE::Vector & kernelVector, double kernelSelf ) const
{
if ( laplaceApproximations.size() <= 0 )
fthrow(Exception, "The classifier was not trained with training data (use teach(...))");
FullVector scores ( maxClassNo+1 );
scores.set(0);
uint k = 0;
for ( vector< LaplaceApproximation* >::const_iterator i =
laplaceApproximations.begin(); i != laplaceApproximations.end(); i++,k++ )
{
int classno = classnos[k];
double yEstimate = (*i)->predict(kernelVector, kernelSelf, ySetZeroMean[k], likelihoodFunction);
for ( uint j = 0 ; j < classnos.size(); j++ )
if ( classnos[j] == classno )
scores[classnos[j]] += yEstimate;
else
scores[classnos[j]] += 1.0 - yEstimate;
}
scores.multiply( 1.0/laplaceApproximations.size() );
return ClassificationResult( scores.maxElement(), scores );
}