ComputerVisionJena
/
NICE_VisLearning


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336
							/** 
* @file testImageNetBinaryBruteForce.cpp
* @brief perform ImageNet tests with binary tasks for OCC using GP mean and variance, sophisticated approximations of both, Parzen Density Estimation and SVDD
* @author Alexander Lütz
* @date 23-05-2012 (dd-mm-yyyy)
*/

#include <ctime>
#include <time.h>

#include "core/basics/Config.h"
#include "core/basics/Timer.h"
#include "core/algebra/CholeskyRobust.h"
#include "core/algebra/DiagonalMatrixApprox.h"
#include "core/vector/Algorithms.h"
#include "core/vector/SparseVectorT.h"


#include "vislearning/cbaselib/ClassificationResults.h"
#include "vislearning/baselib/ProgressBar.h"
#include "vislearning/classifier/kernelclassifier/KCMinimumEnclosingBall.h"

#include "fast-hik/tools.h"
#include "fast-hik/MatFileIO.h"
#include "fast-hik/ImageNetData.h"

using namespace std;
using namespace NICE;
using namespace OBJREC;

// --------------- THE KERNEL FUNCTION ( exponential kernel with euclidian distance ) ----------------------
double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0)
{
  double inner_sum(0.0);

  double d;      
  
  //new version, where we needed on average 0.001707 s for each test sample
  NICE::SparseVector::const_iterator aIt = a.begin();
  NICE::SparseVector::const_iterator bIt = b.begin();
   
  //compute the euclidian distance between both feature vectores (given as SparseVectors)
  while ( (aIt != a.end()) && (bIt != b.end()) )
  {
    if (aIt->first == bIt->first)
    {
      d = ( aIt->second - bIt->second );      
      inner_sum += d * d;
      aIt++;
      bIt++;
    }
    else if ( aIt->first < bIt->first)
    {
      inner_sum += aIt->second * aIt->second;
      aIt++;      
    }
    else
    {
      inner_sum += bIt->second * bIt->second;
      bIt++;       
    }
  }
  
  //compute remaining values, if b reached the end but not a
  while (aIt != a.end())
  {
    inner_sum += aIt->second * aIt->second;
    aIt++; 
  }
  //compute remaining values, if a reached the end but not b
  while (bIt != b.end())
  {
    inner_sum += bIt->second * bIt->second;
    bIt++; 
  }  

  //normalization of the exponent
  inner_sum /= (2.0*sigma*sigma);
  
  //finally, compute the RBF-kernel score (RBF = radial basis function)
  return exp(-inner_sum);
}

// --------------- INPUT METHOD ----------------------
void readParameters(string & filename, const int & size, NICE::Vector & parameterVector)
{
  //we read the parameters which are given from a Matlab-Script (each line contains a single number, which is the optimal parameter for this class)
  
  parameterVector.resize(size);
  parameterVector.set(0.0);
  
  ifstream is(filename.c_str());

  if ( !is.good() )
      fthrow(IOException, "Unable to read parameters.");  
//
  string tmp;
  int cnt(0);
  while (! is.eof())
  {
    is >> tmp;
    parameterVector[cnt] = atof(tmp.c_str());
    cnt++;
  }
//   
  is.close(); 
}

//------------------- TRAINING METHODS --------------------

void inline trainGPMean(NICE::Vector & GPMeanRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
    
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
      cr.robustChol ( kernelMatrix, choleskyMatrix );  
      
      GPMeanRightPart.resize(nrOfExamplesPerClass);
      GPMeanRightPart.set(0.0);
      
      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :)
      
      // pre-compute K^{-1} \cdot y, which is the same for every new test sample
      choleskySolveLargeScale ( choleskyMatrix, y, GPMeanRightPart );
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}  

void inline trainGPVar(NICE::Matrix & choleskyMatrix, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      choleskyMatrix.resize(nrOfExamplesPerClass, nrOfExamplesPerClass);
      choleskyMatrix.set(0.0);      
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot k_* for new test samples
      cr.robustChol ( kernelMatrix, choleskyMatrix );   
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPMeanApprox(NICE::Vector & GPMeanApproxRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      NICE::Vector matrixDInv(nrOfExamplesPerClass,0.0);
      //compute D 
      //start with adding some noise, if necessary
      if (noise != 0.0)
        matrixDInv.set(noise);
      else
        matrixDInv.set(0.0);    
      
      // the approximation creates a diagonal matrix (which is easy to invert)
      // with entries equal the row sums of the original kernel matrix
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        for (int j = i; j < nrOfExamplesPerClass; j++)
        {
          matrixDInv[i] += kernelMatrix(i,j);
          if (i != j)
            matrixDInv[j] += kernelMatrix(i,j);
        }
      }
      
      //compute its inverse (and multiply every element with the label vector, which contains only one-entries and therefore be skipped...)
      GPMeanApproxRightPart.resize(nrOfExamplesPerClass);    
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        GPMeanApproxRightPart[i] = 1.0 / matrixDInv[i];
      } 
    }
    
    
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPMeanApprox training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPVarApprox(NICE::Vector & matrixDInv, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    std::cerr << "nrOfExamplesPerClass : " << nrOfExamplesPerClass << std::endl;
  
    Timer tTrainPreciseTimer;
    tTrainPreciseTimer.start();     

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {
      matrixDInv.resize(nrOfExamplesPerClass);
      matrixDInv.set(0.0);
      //compute D 
      //start with adding some noise, if necessary
      if (noise != 0.0)
        matrixDInv.set(noise);
      else
        matrixDInv.set(0.0);    
      
      // the approximation creates a diagonal matrix (which is easy to invert)
      // with entries equal the row sums of the original kernel matrix      
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        for (int j = i; j < nrOfExamplesPerClass; j++)
        {
          matrixDInv[i] += kernelMatrix(i,j);
          if (i != j)
            matrixDInv[j] += kernelMatrix(i,j);
        }
      }
      
      //compute its inverse
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        matrixDInv[i] = 1.0 / matrixDInv[i];
      }
    }
    
    tTrainPreciseTimer.stop(); 
    std::cerr << "Precise time used for GPVarApprox training class " << classNumber << ": " << tTrainPreciseTimer.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

// GP subset of regressors
void inline trainGPSRMean(NICE::Vector & GPMeanRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples )
{
  std::vector<int> examplesToChoose;
  indicesOfChosenExamples.clear();
  
  //add all examples for possible choice
  for (int i = 0; i < nrOfExamplesPerClass; i++)
  {
    examplesToChoose.push_back(i);
  }
  
  //now chose randomly some examples as active subset
  int index;
  for (int i = 0; i < std::min(nrOfRegressors,nrOfExamplesPerClass); i++)
  {
    index = rand() % examplesToChoose.size();
    indicesOfChosenExamples.push_back(examplesToChoose[index]);
    examplesToChoose.erase(examplesToChoose.begin() + index);
  }
  
  NICE::Matrix Kmn (indicesOfChosenExamples.size(), nrOfExamplesPerClass, 0.0);
  int rowCnt(0);
  //set every row
  for (int i = 0; i < indicesOfChosenExamples.size(); i++, rowCnt++ )
  {
    //set every element of this row
    NICE::Vector col = kernelMatrix.getRow(indicesOfChosenExamples[i]);
    for (int j = 0; j < nrOfExamplesPerClass; j++)
    {
      Kmn(rowCnt,j) = col(j);
    }
  }
  
  //we could speed this up if we would order the indices
  NICE::Matrix Kmm (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
  double tmp(0.0);
  for (int i = 0; i < indicesOfChosenExamples.size(); i++ )
  {
    for (int j = i; j < indicesOfChosenExamples.size(); j++ )
    {
      tmp = kernelMatrix(indicesOfChosenExamples[i], indicesOfChosenExamples[j]);
      Kmm(i,j) = tmp;
      if (i != j)
        Kmm(j,i) = tmp;
    }
  }
  

    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      NICE::Matrix innerMatrix;
      innerMatrix.multiply(Kmn, Kmn, false /* tranpose first matrix*/, true /* transpose second matrix*/);
      
      innerMatrix.addScaledMatrix( noise, Kmm );
      
      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :) 
      NICE::Vector projectedLabels;
      projectedLabels.multiply(Kmn,y);
      
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
      cr.robustChol ( innerMatrix, choleskyMatrix );  
      
      GPMeanRightPart.resize(indicesOfChosenExamples.size());
      GPMeanRightPart.set(0.0);
      
      // pre-compute K^{-1} \cdot y, which is the same for every new test sample
      choleskySolveLargeScale ( choleskyMatrix, projectedLabels, GPMeanRightPart );
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPSRMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

// GP subset of regressors
void inline trainGPSRVar(NICE::Matrix & choleskyMatrix, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples )
{
  std::vector<int> examplesToChoose;
  indicesOfChosenExamples.clear();
  
  //add all examples for possible choice
  for (int i = 0; i < nrOfExamplesPerClass; i++)
  {
    examplesToChoose.push_back(i);
  }
  
  //now chose randomly some examples as active subset
  int index;
  for (int i = 0; i < std::min(nrOfRegressors,nrOfExamplesPerClass); i++)
  {
    index = rand() % examplesToChoose.size();
    indicesOfChosenExamples.push_back(examplesToChoose[index]);
    examplesToChoose.erase(examplesToChoose.begin() + index);
  }
  
  NICE::Matrix Kmn (indicesOfChosenExamples.size(), nrOfExamplesPerClass, 0.0);
  int rowCnt(0);
  //set every row
  for (int i = 0; i < indicesOfChosenExamples.size(); i++, rowCnt++ )
  {
    //set every element of this row
    NICE::Vector col = kernelMatrix.getRow(indicesOfChosenExamples[i]);
    for (int j = 0; j < nrOfExamplesPerClass; j++)
    {
      Kmn(rowCnt,j) = col(j);
    }
  }
  
  //we could speed this up if we would order the indices
  NICE::Matrix Kmm (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
  double tmp(0.0);
  for (int i = 0; i < indicesOfChosenExamples.size(); i++ )
  {
    for (int j = i; j < indicesOfChosenExamples.size(); j++ )
    {
      tmp = kernelMatrix(indicesOfChosenExamples[i], indicesOfChosenExamples[j]);
      Kmm(i,j) = tmp;
      if (i != j)
        Kmm(j,i) = tmp;
    }
  }
  

    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      NICE::Matrix innerMatrix;
      innerMatrix.multiply(Kmn, Kmn, false /* tranpose first matrix*/, true /* transpose second matrix*/);
      
      innerMatrix.addScaledMatrix( noise, Kmm );
           
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      choleskyMatrix.resize( nrOfExamplesPerClass, nrOfExamplesPerClass );
      choleskyMatrix.set( 0.0 );
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
      cr.robustChol ( innerMatrix, choleskyMatrix );  
     }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPSRVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPOptMean(NICE::Vector & rightPartGPOptMean, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{
    DiagonalMatrixApprox diagApprox ( true /*verbose*/ );
    rightPartGPOptMean.resize(nrOfExamplesPerClass);

    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    { 
      rightPartGPOptMean.set(0.0);
      
      //compute optimal diagonal matrix
      diagApprox.approx ( kernelMatrix, rightPartGPOptMean );
      
      // invert entries
      // by theory we should multiply these entries with the labels
      // but we are in an OCC setting with labels equal to one
      
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        if (rightPartGPOptMean[i] != 0.0)
          rightPartGPOptMean[i] = 1.0 / rightPartGPOptMean[i]; 
      }
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPOptMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPOptVar(NICE::Vector & DiagGPOptVar, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{
    DiagonalMatrixApprox diagApprox ( true /*verbose*/ );
    DiagGPOptVar.resize(nrOfExamplesPerClass);
    
    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    { 
      DiagGPOptVar.set(0.0);
      
      //compute optimal diagonal matrix
      diagApprox.approx ( kernelMatrix, DiagGPOptVar );
      
      //invert entries
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        if (DiagGPOptVar[i] != 0.0)
          DiagGPOptVar[i] = 1.0 / DiagGPOptVar[i];
      }
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPOptVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

KCMinimumEnclosingBall *trainSVDD( const double & noise, const NICE::Matrix kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{
 
    Config conf;
    // set the outlier ratio (Paul optimized this paramter FIXME)
    conf.sD( "SVDD", "outlier_fraction", 0.1 );
    conf.sB( "SVDD", "verbose", false );
    KCMinimumEnclosingBall *svdd = new KCMinimumEnclosingBall ( &conf, NULL /* no kernel function */, "SVDD" /* config section */);
    KernelData kernelData ( &conf, kernelMatrix, "Kernel" , false /* update cholesky */ );
 
    Timer tTrainPrecise;
    tTrainPrecise.start();     

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {     
    
      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :)
      // KCMinimumEnclosingBall does not store the kernel data object, therefore, we are save with passing a local copy
      svdd->teach ( &kernelData, y );
    }
    
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for SVDD training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;        

    return svdd;
}

// ------------- EVALUATION METHODS ---------------------
void inline evaluateGPVarApprox(const NICE::Vector & kernelVector, const double & kernelSelf, const NICE::Vector & matrixDInv, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double uncertainty;
    
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {       
      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*  where D is our nice approximation of K
      
      NICE::Vector rightPart (kernelVector.size());
      for (int j = 0; j < kernelVector.size(); j++)
      {
        rightPart[j] = kernelVector[j] * matrixDInv[j];
      }

      uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPVar(const NICE::Vector & kernelVector, const double & kernelSelf, const NICE::Matrix & choleskyMatrix, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double uncertainty;
    
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {       
      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*       
      
      NICE::Vector rightPart (kernelVector.size(),0.0);      
      choleskySolveLargeScale ( choleskyMatrix, kernelVector, rightPart );
      
      uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPMeanApprox(const NICE::Vector & kernelVector, const NICE::Vector & rightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double mean;
  
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    { 
      // \mean = \k_*^T \cdot D^{-1} \cdot y  where D is our nice approximation of K    
      mean = kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPMean(const NICE::Vector & kernelVector,  const NICE::Vector & GPMeanRightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double mean;
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      // \mean = \k_*^T \cdot K^{-1} \cdot y      
      mean = kernelVector.scalarProduct ( GPMeanRightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPSRMean(const NICE::Vector & kernelVector,  const NICE::Vector & GPSRMeanRightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting, const int & nrOfRegressors, const std::vector<int> & indicesOfChosenExamples)
{
    double mean;
    
    //grep the entries corresponding to the active set
    NICE::Vector kernelVectorM;
    kernelVectorM.resize(nrOfRegressors);
    for (int i = 0; i < nrOfRegressors; i++)
    {
      kernelVectorM[i] = kernelVector[indicesOfChosenExamples[i]];
    }

    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      // \mean = \k_*^T \cdot K^{-1} \cdot y    
      mean = kernelVectorM.scalarProduct ( GPSRMeanRightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPSRVar(const NICE::Vector & kernelVector,  const NICE::Matrix & choleskyMatrix, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples, const double & noise)
{
    double uncertainty;
    
    //grep the entries corresponding to the active set
    NICE::Vector kernelVectorM;
    kernelVectorM.resize(nrOfRegressors);
    for (int i = 0; i < nrOfRegressors; i++)
    {
      kernelVectorM[i] = kernelVector[indicesOfChosenExamples[i]];
    } 
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      NICE::Vector rightPart (nrOfRegressors,0.0);      
      
      choleskySolveLargeScale ( choleskyMatrix, kernelVectorM, rightPart );
      
      uncertainty = noise*kernelVectorM.scalarProduct ( rightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

//this method is completely the same as evaluateGPMeanApprox, but for convenience, it is its own method
void inline evaluateGPOptMean(const NICE::Vector & kernelVector, const NICE::Vector & rightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double mean;
  
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    { 
      // \mean = \k_*^T \cdot D^{-1} \cdot y  where D is our nice approximation of K    
      mean = kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

//this method is completely the same as evaluateGPVarApprox, but for convenience, it is its own method
void inline evaluateGPOptVar(const NICE::Vector & kernelVector, const double & kernelSelf, const NICE::Vector & matrixDInv, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double uncertainty;
    
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {       
      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*  where D is our nice approximation of K
      
      NICE::Vector rightPart (kernelVector.size());
      for (int j = 0; j < kernelVector.size(); j++)
      {
        rightPart[j] = kernelVector[j] * matrixDInv[j];
      }

      uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateParzen(const NICE::Vector & kernelVector,  ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double score;
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {      
      //the Parzen score is nothing but the averaged similarity to every training sample
      score = kernelVector.Sum() / (double) kernelVector.size(); //maybe we could directly call kernelVector.Mean() here
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = score;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateSVDD( KCMinimumEnclosingBall *svdd, const NICE::Vector & kernelVector,  ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {        
      // In the following, we assume that we are using a Gaussian kernel
      r = svdd->classifyKernel ( kernelVector, 1.0 /* kernel self */ );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
}

/** 
    test the basic functionality of fast-hik hyperparameter optimization 
*/
int main (int argc, char **argv)
{   
  std::set_terminate(__gnu_cxx::__verbose_terminate_handler);

  Config conf ( argc, argv );
  string resultsfile = conf.gS("main", "results", "results.txt" );
  int nrOfExamplesPerClass = conf.gI("main", "nrOfExamplesPerClass", 50);
  nrOfExamplesPerClass = std::min(nrOfExamplesPerClass, 100); // we do not have more than 100 examples per class
  
  //which classes to considere? we assume consecutive class numers
  int indexOfFirstClass = conf.gI("main", "indexOfFirstClass", 0);
  indexOfFirstClass = std::max(indexOfFirstClass, 0); //we do not have less than 0 classes
  int indexOfLastClass = conf.gI("main", "indexOfLastClass", 999);
  indexOfLastClass = std::min(indexOfLastClass, 999); //we do not have more than 1000 classes
  
  int nrOfClassesToConcidere =  (indexOfLastClass - indexOfLastClass)+1;
  
  //repetitions for every class to achieve reliable time evalutions
  int runsPerClassToAverageTraining = conf.gI( "main", "runsPerClassToAverageTraining", 1 ); 
  int runsPerClassToAverageTesting = conf.gI( "main", "runsPerClassToAverageTesting", 1 );
  
  // share parameters among methods and classes?
  bool shareParameters = conf.gB("main" , "shareParameters", true);
  
  //which methods do we want to use?
  bool GPMeanApprox = conf.gB( "main", "GPMeanApprox", false);
  bool GPVarApprox = conf.gB( "main", "GPVarApprox", false);  
  bool GPMean = conf.gB( "main", "GPMean", false);
  bool GPVar = conf.gB( "main", "GPVar", false);
  bool GPSRMean = conf.gB( "main", "GPSRMean", false);
  bool GPSRVar = conf.gB( "main", "GPSRVar", false);  
  bool GPOptMean = conf.gB( "main", "GPOptMean", false);
  bool GPOptVar = conf.gB( "main", "GPOptVar", false);    
  bool Parzen = conf.gB( "main", "Parzen", false);
  bool SVDD = conf.gB( "main", "SVDD", false);  
  
  if (GPMeanApprox)
    std::cerr << "GPMeanApprox used" << std::endl;
  else 
    std::cerr << "GPMeanApprox not used" << std::endl;
  if (GPVarApprox)
    std::cerr << "GPVarApprox used" << std::endl;
  else 
    std::cerr << "GPVarApprox not used" << std::endl;
  if (GPMean)
    std::cerr << "GPMean used" << std::endl;
  else 
    std::cerr << "GPMean not used" << std::endl;
  if (GPVar)
    std::cerr << "GPVar used" << std::endl;
  else 
    std::cerr << "GPVar not used" << std::endl;
  if (GPSRMean)
    std::cerr << "GPSRMean used" << std::endl;
  else 
    std::cerr << "GPSRMean not used" << std::endl;
  if (GPSRVar)
    std::cerr << "GPSRVar used" << std::endl;
  else 
    std::cerr << "GPSRVar not used" << std::endl;
  if (GPOptMean)
    std::cerr << "GPOptMean used" << std::endl;
  else 
    std::cerr << "GPOptMean not used" << std::endl;
  if (GPOptVar)
    std::cerr << "GPOptVar used" << std::endl;
  else 
    std::cerr << "GPOptVar not used" << std::endl;  
  if (Parzen)
    std::cerr << "Parzen used" << std::endl;
  else 
    std::cerr << "Parzen not used" << std::endl;
  if (SVDD)
    std::cerr << "SVDD used" << std::endl;
  else 
    std::cerr << "SVDD not used" << std::endl;

  
  // GP variance approximation  
  NICE::Vector sigmaGPVarApproxParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPVarApproxParas(nrOfClassesToConcidere,0.0);
  // GP variance  
  NICE::Vector sigmaGPVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPVarParas(nrOfClassesToConcidere,0.0);
  //GP mean approximation  
  NICE::Vector sigmaGPMeanApproxParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPMeanApproxParas(nrOfClassesToConcidere,0.0);
  //GP mean  
  NICE::Vector sigmaGPMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPMeanParas(nrOfClassesToConcidere,0.0);
  //GP SR mean  
  NICE::Vector sigmaGPSRMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPSRMeanParas(nrOfClassesToConcidere,0.0);
  //GP SR var
  NICE::Vector sigmaGPSRVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPSRVarParas(nrOfClassesToConcidere,0.0);
  //GP Opt mean  
  NICE::Vector sigmaGPOptMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPOptMeanParas(nrOfClassesToConcidere,0.0);
  //GP Opt var
  NICE::Vector sigmaGPOptVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPOptVarParas(nrOfClassesToConcidere,0.0);  
  //Parzen  
  NICE::Vector sigmaParzenParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseParzenParas(nrOfClassesToConcidere,0.0);
  //SVDD  
  NICE::Vector sigmaSVDDParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseSVDDParas(nrOfClassesToConcidere,0.0);
    
  if (!shareParameters)
  {
    //read the optimal parameters for the different methods
    
    // GP variance approximation
    string sigmaGPVarApproxFile = conf.gS("main", "sigmaGPVarApproxFile", "approxVarSigma.txt");  
    string noiseGPVarApproxFile = conf.gS("main", "noiseGPVarApproxFile", "approxVarNoise.txt");   
    // GP variance
    string sigmaGPVarFile = conf.gS("main", "sigmaGPVarFile", "approxVarSigma.txt");  
    string noiseGPVarFile = conf.gS("main", "noiseGPVarFile", "approxVarNoise.txt");  
    //GP mean approximation
    string sigmaGPMeanApproxFile = conf.gS("main", "sigmaGPMeanApproxFile", "approxVarSigma.txt");  
    string noiseGPMeanApproxFile = conf.gS("main", "noiseGPMeanApproxFile", "approxVarNoise.txt");    
    //GP mean
    string sigmaGPMeanFile = conf.gS("main", "sigmaGPMeanFile", "approxVarSigma.txt");  
    string noiseGPMeanFile = conf.gS("main", "noiseGPMeanFile", "approxVarNoise.txt");      
    //Parzen
    string sigmaParzenFile = conf.gS("main", "sigmaParzenFile", "approxVarSigma.txt");  
    string noiseParzenFile = conf.gS("main", "noiseParzenFile", "approxVarNoise.txt");    
    //SVDD
    string sigmaSVDDFile = conf.gS("main", "sigmaSVDDFile", "approxVarSigma.txt");  
    string noiseSVDDFile = conf.gS("main", "noiseSVDDFile", "approxVarNoise.txt");      

    // GP variance approximation    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPVarApproxParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPVarApproxParas);  
    // GP variance    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPVarParas);  
    //GP mean approximation   
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPMeanApproxParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPMeanApproxParas);  
    //GP mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPMeanParas); 
    //GP SR mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPSRMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPSRMeanParas);
    //GP SR var  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPSRVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPSRVarParas); 
    //GP Opt mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPOptMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPOptMeanParas);
    //GP Opt var  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPOptVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPOptVarParas);     
    //Parzen    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaParzenParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseParzenParas);  
    //SVDD    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaSVDDParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseSVDDParas);   
  }
  else
  {
    //use static variables for all methods and classis
    double noise = conf.gD( "main", "noise", 0.01 );
    double sigma = conf.gD( "main", "sigma", 1.0 );
    
    sigmaGPVarApproxParas.set(sigma);
    noiseGPVarApproxParas.set(noise);
    // GP variance  
    sigmaGPVarParas.set(sigma);
    noiseGPVarParas.set(noise);
    //GP mean approximation  
    sigmaGPMeanApproxParas.set(sigma);
    noiseGPMeanApproxParas.set(noise);
    //GP mean  
    sigmaGPMeanParas.set(sigma);
    noiseGPMeanParas.set(noise);
    //GP SR mean  
    sigmaGPSRMeanParas.set(sigma);
    noiseGPSRMeanParas.set(noise);
    //GP SR var  
    sigmaGPSRVarParas.set(sigma);
    noiseGPSRVarParas.set(noise); 
    //GP Opt mean  
    sigmaGPOptMeanParas.set(sigma);
    noiseGPOptMeanParas.set(noise);
    //GP Opt var  
    sigmaGPOptVarParas.set(sigma);
    noiseGPOptVarParas.set(noise);    
    //Parzen  
    sigmaParzenParas.set(sigma);
    noiseParzenParas.set(noise);
    //SVDD  
    sigmaSVDDParas.set(sigma);
    noiseSVDDParas.set(noise);    
  }
  
  
  // -------- optimal parameters read --------------  
  
  std::vector<SparseVector> trainingData;
  NICE::Vector y;
  
  std::cerr << "Reading ImageNet data ..." << std::endl;
  bool imageNetLocal = conf.gB("main", "imageNetLocal" , false);
  string imageNetPath;
  if (imageNetLocal)
    imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/";
  else
    imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/";

  ImageNetData imageNetTrain ( imageNetPath + "demo/" );

  imageNetTrain.preloadData( "train", "training" );
  trainingData = imageNetTrain.getPreloadedData();
  y = imageNetTrain.getPreloadedLabels();
    
  std::cerr << "Reading of training data finished" << std::endl;
  std::cerr << "trainingData.size(): " << trainingData.size() << std::endl;
  std::cerr << "y.size(): " << y.size() << std::endl;
  
  std::cerr << "Reading ImageNet test data files (takes some seconds)..." << std::endl;
  ImageNetData imageNetTest ( imageNetPath + "demo/" );
  imageNetTest.preloadData ( "val", "testing" );
  imageNetTest.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" );  
  
  double OverallPerformanceGPVarApprox(0.0);
  double OverallPerformanceGPVar(0.0);
  double OverallPerformanceGPMeanApprox(0.0);
  double OverallPerformanceGPMean(0.0);
  double OverallPerformanceGPSRMean(0.0);
  double OverallPerformanceGPSRVar(0.0);  
  double OverallPerformanceGPOptMean(0.0);
  double OverallPerformanceGPOptVar(0.0);   
  double OverallPerformanceParzen(0.0);
  double OverallPerformanceSVDD(0.0);

  
  double kernelSigmaGPVarApprox;
  double kernelSigmaGPVar;
  double kernelSigmaGPMeanApprox;
  double kernelSigmaGPMean;
  double kernelSigmaGPSRMean;
  double kernelSigmaGPSRVar;
  double kernelSigmaGPOptMean;
  double kernelSigmaGPOptVar;  
  double kernelSigmaParzen;
  double kernelSigmaSVDD;
  
  for (int cl = indexOfFirstClass; cl <= indexOfLastClass; cl++)
  {
    std::cerr << "run for class " << cl << std::endl;
    int positiveClass = cl+1; //labels are from 1 to 1000, but our indices from 0 to 999
    // ------------------------------ TRAINING ------------------------------
  
    kernelSigmaGPVarApprox = sigmaGPVarApproxParas[cl];
    kernelSigmaGPVar = sigmaGPVarParas[cl];
    kernelSigmaGPMeanApprox = sigmaGPMeanApproxParas[cl];
    kernelSigmaGPMean = sigmaGPMeanParas[cl];
    kernelSigmaGPSRMean = sigmaGPSRMeanParas[cl];
    kernelSigmaGPSRVar = sigmaGPSRVarParas[cl];
    kernelSigmaGPOptMean = sigmaGPOptMeanParas[cl];
    kernelSigmaGPOptVar = sigmaGPOptVarParas[cl];    
    kernelSigmaParzen = sigmaParzenParas[cl];
    kernelSigmaSVDD = sigmaSVDDParas[cl];
    
    Timer tTrain;
    tTrain.start();
    
    //compute the kernel matrix, which will be shared among all methods in this scenario       
    NICE::Matrix kernelMatrix(nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
    
    //NOTE in theory we have to compute a single kernel Matrix for every method, since every method may have its own optimal parameter
    // I'm sure, we can speed it up a bit and compute it only for every different parameter
    //nonetheless, it's not as nice as we originally thought (same matrix for every method) 
    
    //NOTE Nonetheless, since we're only interested in runtimes, we can ignore this
           
    //now sum up all entries of each row in the original kernel matrix
    double kernelScore(0.0);
    for (int i = cl*100; i < cl*100+nrOfExamplesPerClass; i++)
    {
      for (int j = i; j < cl*100+nrOfExamplesPerClass; j++)
      {
        kernelScore = measureDistance(trainingData[i],trainingData[j], kernelSigmaGPVarApprox);
        kernelMatrix(i-cl*100,j-cl*100) = kernelScore;
        
        if (i != j)
            kernelMatrix(j-cl*100,i-cl*100) = kernelScore;
      }
    }  
    
    // now call the individual training methods
    
    //train GP Mean Approx
    NICE::Vector GPMeanApproxRightPart;
    if (GPMeanApprox)
      trainGPMeanApprox(GPMeanApproxRightPart, noiseGPMeanApproxParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );    
    
    //train GP Var Approx    
    NICE::Vector matrixDInv;
    if (GPVarApprox)
      trainGPVarApprox(matrixDInv, noiseGPVarApproxParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
         
    //train GP Mean
    NICE::Vector GPMeanRightPart;
    if (GPMean)
      trainGPMean(GPMeanRightPart, noiseGPMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );    
    
    //train GP Var
    NICE::Matrix GPVarCholesky;
    if (GPVar)
      trainGPVar(GPVarCholesky, noiseGPVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );       
    
    //train GP SR Mean
    NICE::Vector GPSRMeanRightPart;
    std::vector<int> indicesOfChosenExamplesGPSRMean;
    int nrOfRegressors = conf.gI( "GPSR", "nrOfRegressors", nrOfExamplesPerClass/2);
    nrOfRegressors = std::min( nrOfRegressors, nrOfExamplesPerClass );
    if (GPSRMean)
      trainGPSRMean(GPSRMeanRightPart, noiseGPSRMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining, nrOfRegressors, indicesOfChosenExamplesGPSRMean );        
    
    //train GP SR Var
    NICE::Matrix GPSRVarCholesky;   
    std::vector<int> indicesOfChosenExamplesGPSRVar;
    if (GPSRVar)
      trainGPSRVar(GPSRVarCholesky, noiseGPSRVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining, nrOfRegressors, indicesOfChosenExamplesGPSRVar );  
    
    //train GP Opt Mean
    NICE::Vector GPOptMeanRightPart;
    if (GPOptMean)
      trainGPOptMean(GPOptMeanRightPart, noiseGPOptMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
    
    //train GP Opt Var
    NICE::Vector DiagGPOptVar;
    if (GPOptVar)
      trainGPOptVar(DiagGPOptVar, noiseGPOptVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );     
    
    //train Parzen 
    //nothing to do :)
    
    //train SVDD
    KCMinimumEnclosingBall *svdd;
    if (SVDD)
      svdd = trainSVDD(noiseSVDDParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
  
    tTrain.stop();
    std::cerr << "Time used for training class " << cl << ": " << tTrain.getLast() << std::endl;      
       
    std::cerr << "training done - now perform the evaluation" << std::endl;


    // ------------------------------ TESTING ------------------------------
   
    std::cerr << "Classification step ... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl;
    
    ClassificationResults resultsGPVarApprox;
    ClassificationResults resultsGPVar;
    ClassificationResults resultsGPMeanApprox;
    ClassificationResults resultsGPMean;
    ClassificationResults resultsGPSRMean;
    ClassificationResults resultsGPSRVar;    
    ClassificationResults resultsGPOptMean;
    ClassificationResults resultsGPOptVar;    
    ClassificationResults resultsParzen;
    ClassificationResults resultsSVDD;       
    
    ProgressBar pb;
    Timer tTest;
    tTest.start();    
    Timer tTestSingle;
    
    double timeForSingleExamplesGPVarApprox(0.0);    
    double timeForSingleExamplesGPVar(0.0);
    double timeForSingleExamplesGPMeanApprox(0.0);    
    double timeForSingleExamplesGPMean(0.0);    
    double timeForSingleExamplesGPSRMean(0.0);
    double timeForSingleExamplesGPSRVar(0.0);
    double timeForSingleExamplesGPOptMean(0.0);
    double timeForSingleExamplesGPOptVar(0.0);    
    double timeForSingleExamplesParzen(0.0);    
    double timeForSingleExamplesSVDD(0.0);    
    
    for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ )
    {
      pb.update ( imageNetTest.getNumPreloadedExamples() );

      const SparseVector & svec = imageNetTest.getPreloadedExample ( i );

      //NOTE: again we should use method-specific optimal parameters. If we're only interested in the runtimes, this doesn't matter
      
      //compute (self) similarities
      double kernelSelf (measureDistance(svec,svec, kernelSigmaGPVarApprox) );
      NICE::Vector kernelVector (nrOfExamplesPerClass, 0.0);
      
      for (int j = 0; j < nrOfExamplesPerClass; j++)
      {
        kernelVector[j] = measureDistance(trainingData[j+cl*100],svec, kernelSigmaGPVarApprox);
      }     
      
      //call the individual test-methods
      
      //evaluate GP Var Approx
      ClassificationResult rGPVarApprox;      
      if (GPVarApprox)
        evaluateGPVarApprox( kernelVector, kernelSelf, matrixDInv, rGPVarApprox, timeForSingleExamplesGPVarApprox, runsPerClassToAverageTesting );
      
      //evaluate GP Var
      ClassificationResult rGPVar;
      if (GPVar)
        evaluateGPVar( kernelVector, kernelSelf, GPVarCholesky, rGPVar, timeForSingleExamplesGPVar, runsPerClassToAverageTesting );      
      
      //evaluate GP Mean Approx
      ClassificationResult rGPMeanApprox;      
      if (GPMeanApprox)
        evaluateGPMeanApprox( kernelVector, matrixDInv, rGPMeanApprox, timeForSingleExamplesGPMeanApprox, runsPerClassToAverageTesting );
      
      //evaluate GP Mean
      ClassificationResult rGPMean;
      if (GPMean)
        evaluateGPMean( kernelVector, GPMeanRightPart, rGPMean, timeForSingleExamplesGPMean, runsPerClassToAverageTesting );       
       
      //evaluate GP SR Mean
      ClassificationResult rGPSRMean;
      if (GPSRMean)
        evaluateGPSRMean( kernelVector, GPSRMeanRightPart, rGPSRMean, timeForSingleExamplesGPSRMean, runsPerClassToAverageTesting, nrOfRegressors, indicesOfChosenExamplesGPSRMean );       
      
      //evaluate GP SR Var
      ClassificationResult rGPSRVar;
      if (GPSRVar)
        evaluateGPSRVar( kernelVector, GPSRVarCholesky, rGPSRVar, timeForSingleExamplesGPSRVar, runsPerClassToAverageTesting, nrOfRegressors, indicesOfChosenExamplesGPSRVar, noiseGPSRVarParas[cl] );       
      
      //evaluate GP Opt Mean
      ClassificationResult rGPOptMean;
      if (GPOptMean)
        evaluateGPOptMean( kernelVector, GPOptMeanRightPart, rGPOptMean, timeForSingleExamplesGPOptMean, runsPerClassToAverageTesting );       
      
      //evaluate GP Opt Var
      ClassificationResult rGPOptVar;
      if (GPOptVar)
        evaluateGPOptVar( kernelVector, kernelSelf, DiagGPOptVar, rGPOptVar, timeForSingleExamplesGPOptVar, runsPerClassToAverageTesting );   
      
      //evaluate Parzen
      ClassificationResult rParzen;
      if (Parzen)
        evaluateParzen( kernelVector, rParzen, timeForSingleExamplesParzen, runsPerClassToAverageTesting ); 
      
      //evaluate SVDD
      ClassificationResult rSVDD;
      if (SVDD)
        evaluateSVDD( svdd, kernelVector, rSVDD, timeForSingleExamplesSVDD, runsPerClassToAverageTesting );       

      
      // set ground truth label
      rGPVarApprox.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPMeanApprox.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPSRMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPSRVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPOptMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPOptVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;      
      rParzen.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rSVDD.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;

      //remember the results for the evaluation lateron
      resultsGPVarApprox.push_back ( rGPVarApprox );
      resultsGPVar.push_back ( rGPVar );
      resultsGPMeanApprox.push_back ( rGPMeanApprox );
      resultsGPMean.push_back ( rGPMean );
      resultsGPSRMean.push_back ( rGPSRMean );
      resultsGPSRVar.push_back ( rGPSRVar );
      resultsGPOptMean.push_back ( rGPOptMean );
      resultsGPOptVar.push_back ( rGPOptVar );      
      resultsParzen.push_back ( rParzen );
      resultsSVDD.push_back ( rSVDD );      
    }
    
    tTest.stop();
    std::cerr << "Time used for evaluating class " << cl << ": " << tTest.getLast() << std::endl;       
    
    timeForSingleExamplesGPVarApprox/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPVar/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPMeanApprox/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPSRMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPSRVar/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPOptMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPOptVar/= imageNetTest.getNumPreloadedExamples();    
    timeForSingleExamplesParzen/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesSVDD/= imageNetTest.getNumPreloadedExamples();
    
    std::cerr << "GPVarApprox -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPVarApprox << std::endl;    
    std::cerr << "GPVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPVar << std::endl;    
    std::cerr << "GPMeanApprox -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPMeanApprox << std::endl;    
    std::cerr << "GPMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPMean << std::endl;    
    std::cerr << "GPSRMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPSRMean << std::endl;    
    std::cerr << "GPSRVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPSRVar << std::endl;    
    std::cerr << "GPOptMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPOptMean << std::endl;    
    std::cerr << "GPOptVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPOptVar << std::endl;    
    std::cerr << "Parzen -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesParzen << std::endl;    
    std::cerr << "SVDD -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesSVDD << std::endl;    

    // run the AUC-evaluation
    double perfvalueGPVarApprox( 0.0 );
    double perfvalueGPVar( 0.0 );
    double perfvalueGPMeanApprox( 0.0 );
    double perfvalueGPMean( 0.0 );
    double perfvalueGPSRMean( 0.0 );
    double perfvalueGPSRVar( 0.0 );
    double perfvalueGPOptMean( 0.0 );
    double perfvalueGPOptVar( 0.0 );    
    double perfvalueParzen( 0.0 );
    double perfvalueSVDD( 0.0 );     

    if (GPVarApprox)
      perfvalueGPVarApprox = resultsGPVarApprox.getBinaryClassPerformance( ClassificationResults::PERF_AUC );    
    if (GPVar)
      perfvalueGPVar = resultsGPVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPMeanApprox)
      perfvalueGPMeanApprox = resultsGPMeanApprox.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPMean)
      perfvalueGPMean = resultsGPMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPSRMean)
      perfvalueGPSRMean = resultsGPSRMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPSRVar)
      perfvalueGPSRVar = resultsGPSRVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPOptMean)
      perfvalueGPOptMean = resultsGPOptMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPOptVar)
      perfvalueGPOptVar = resultsGPOptVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );    
    if (Parzen)
      perfvalueParzen = resultsParzen.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (SVDD)
      perfvalueSVDD = resultsSVDD.getBinaryClassPerformance( ClassificationResults::PERF_AUC );  


    std::cerr << "Performance GPVarApprox: " << perfvalueGPVarApprox << std::endl;
    std::cerr << "Performance GPVar: " << perfvalueGPVar << std::endl;
    std::cerr << "Performance GPMeanApprox: " << perfvalueGPMeanApprox << std::endl;
    std::cerr << "Performance GPMean: " << perfvalueGPMean << std::endl;
    std::cerr << "Performance GPSRMean: " << perfvalueGPSRMean << std::endl;
    std::cerr << "Performance GPSRVar: " << perfvalueGPSRVar << std::endl;
    std::cerr << "Performance GPOptMean: " << perfvalueGPOptMean << std::endl;
    std::cerr << "Performance GPOptVar: " << perfvalueGPOptVar << std::endl;    
    std::cerr << "Performance Parzen: " << perfvalueParzen << std::endl;
    std::cerr << "Performance SVDD: " << perfvalueSVDD << std::endl;    
    
    OverallPerformanceGPVarApprox += perfvalueGPVar;    
    OverallPerformanceGPVar += perfvalueGPVarApprox;
    OverallPerformanceGPMeanApprox += perfvalueGPMeanApprox;
    OverallPerformanceGPMean += perfvalueGPMean;
    OverallPerformanceGPSRMean += perfvalueGPSRMean;
    OverallPerformanceGPSRVar += perfvalueGPSRVar;
    OverallPerformanceGPOptMean += perfvalueGPOptMean;
    OverallPerformanceGPOptVar += perfvalueGPOptVar;    
    OverallPerformanceParzen += perfvalueParzen;
    OverallPerformanceSVDD += perfvalueSVDD;   

    // clean up memory used by SVDD
    if (SVDD)    
      delete svdd;
  }
  
  OverallPerformanceGPVarApprox /= nrOfClassesToConcidere;
  OverallPerformanceGPVar /= nrOfClassesToConcidere;
  OverallPerformanceGPMeanApprox /= nrOfClassesToConcidere;
  OverallPerformanceGPMean /= nrOfClassesToConcidere;
  OverallPerformanceGPSRMean /= nrOfClassesToConcidere;
  OverallPerformanceGPSRVar /= nrOfClassesToConcidere;
  OverallPerformanceGPOptMean /= nrOfClassesToConcidere;
  OverallPerformanceGPOptVar /= nrOfClassesToConcidere;  
  OverallPerformanceParzen /= nrOfClassesToConcidere;
  OverallPerformanceSVDD /= nrOfClassesToConcidere;  
  
  std::cerr << "overall performance GPVarApprox: " << OverallPerformanceGPVarApprox << std::endl;
  std::cerr << "overall performance GPVar: " << OverallPerformanceGPVar << std::endl;
  std::cerr << "overall performance GPMeanApprox: " << OverallPerformanceGPMeanApprox << std::endl;
  std::cerr << "overall performance GPMean: " << OverallPerformanceGPMean << std::endl;
  std::cerr << "overall performance GPSRMean: " << OverallPerformanceGPSRMean << std::endl;
  std::cerr << "overall performance GPSRVar: " << OverallPerformanceGPSRVar << std::endl;
  std::cerr << "overall performance GPOptMean: " << OverallPerformanceGPOptMean << std::endl;
  std::cerr << "overall performance GPOptVar: " << OverallPerformanceGPOptVar << std::endl;  
  std::cerr << "overall performance Parzen: " << OverallPerformanceParzen << std::endl;
  std::cerr << "overall performance SVDD: " << OverallPerformanceSVDD << std::endl;  
  
  return 0;
}