ComputerVisionJena
/
NICE_VisLearning


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329
							/** 
* @file testImageNetBinaryBruteForce.cpp
* @brief perform ImageNet tests with binary tasks for OCC using GP mean and variance, sophisticated approximations of both, Parzen Density Estimation and SVDD
* @author Alexander Lütz
* @date 23-05-2012 (dd-mm-yyyy)
*/

#include <ctime>
#include <time.h>

#include "core/basics/Config.h"
#include "core/basics/Timer.h"
#include "core/algebra/CholeskyRobust.h"
#include "core/algebra/DiagonalMatrixApprox.h"
#include "core/vector/Algorithms.h"
#include "core/vector/SparseVectorT.h"


#include "vislearning/cbaselib/ClassificationResults.h"
#include "vislearning/baselib/ProgressBar.h"
#include "vislearning/classifier/kernelclassifier/KCMinimumEnclosingBall.h"

#include "fast-hik/tools.h"
#include "fast-hik/MatFileIO.h"
#include "fast-hik/ImageNetData.h"

using namespace std;
using namespace NICE;
using namespace OBJREC;

// --------------- THE KERNEL FUNCTION ( exponential kernel with euclidian distance ) ----------------------
double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0)
{
  double inner_sum(0.0);

  double d;      
  
  //new version, where we needed on average 0.001707 s for each test sample
  NICE::SparseVector::const_iterator aIt = a.begin();
  NICE::SparseVector::const_iterator bIt = b.begin();
   
  //compute the euclidian distance between both feature vectores (given as SparseVectors)
  while ( (aIt != a.end()) && (bIt != b.end()) )
  {
    if (aIt->first == bIt->first)
    {
      d = ( aIt->second - bIt->second );      
      inner_sum += d * d;
      aIt++;
      bIt++;
    }
    else if ( aIt->first < bIt->first)
    {
      inner_sum += aIt->second * aIt->second;
      aIt++;      
    }
    else
    {
      inner_sum += bIt->second * bIt->second;
      bIt++;       
    }
  }
  
  //compute remaining values, if b reached the end but not a
  while (aIt != a.end())
  {
    inner_sum += aIt->second * aIt->second;
    aIt++; 
  }
  //compute remaining values, if a reached the end but not b
  while (bIt != b.end())
  {
    inner_sum += bIt->second * bIt->second;
    bIt++; 
  }  

  //normalization of the exponent
  inner_sum /= (2.0*sigma*sigma);
  
  //finally, compute the RBF-kernel score (RBF = radial basis function)
  return exp(-inner_sum);
}

// --------------- INPUT METHOD ----------------------
void readParameters(string & filename, const int & size, NICE::Vector & parameterVector)
{
  //we read the parameters which are given from a Matlab-Script (each line contains a single number, which is the optimal parameter for this class)
  
  parameterVector.resize(size);
  parameterVector.set(0.0);
  
  ifstream is(filename.c_str());

  if ( !is.good() )
      fthrow(IOException, "Unable to read parameters.");  
//
  string tmp;
  int cnt(0);
  while (! is.eof())
  {
    is >> tmp;
    parameterVector[cnt] = atof(tmp.c_str());
    cnt++;
  }
//   
  is.close(); 
}

//------------------- TRAINING METHODS --------------------

void inline trainGPVarApprox(NICE::Vector & matrixDInv, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    std::cerr << "nrOfExamplesPerClass : " << nrOfExamplesPerClass << std::endl;
  
    Timer tTrainPreciseTimer;
    tTrainPreciseTimer.start();     

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {
      matrixDInv.resize(nrOfExamplesPerClass);
      matrixDInv.set(0.0);
      //compute D 
      //start with adding some noise, if necessary
      if (noise != 0.0)
        matrixDInv.set(noise);
      else
        matrixDInv.set(0.0);    
      
      // the approximation creates a diagonal matrix (which is easy to invert)
      // with entries equal the row sums of the original kernel matrix      
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        for (int j = i; j < nrOfExamplesPerClass; j++)
        {
          matrixDInv[i] += kernelMatrix(i,j);
          if (i != j)
            matrixDInv[j] += kernelMatrix(i,j);
        }
      }
      
      //compute its inverse
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        matrixDInv[i] = 1.0 / matrixDInv[i];
      }
    }
    
    tTrainPreciseTimer.stop(); 
    std::cerr << "Precise time used for GPVarApprox training class " << classNumber << ": " << tTrainPreciseTimer.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPVar(NICE::Matrix & choleskyMatrix, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      choleskyMatrix.resize(nrOfExamplesPerClass, nrOfExamplesPerClass);
      choleskyMatrix.set(0.0);      
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot k_* for new test samples
      cr.robustChol ( kernelMatrix, choleskyMatrix );   
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPMeanApprox(NICE::Vector & GPMeanApproxRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      NICE::Vector matrixDInv(nrOfExamplesPerClass,0.0);
      //compute D 
      //start with adding some noise, if necessary
      if (noise != 0.0)
        matrixDInv.set(noise);
      else
        matrixDInv.set(0.0);    
      
      // the approximation creates a diagonal matrix (which is easy to invert)
      // with entries equal the row sums of the original kernel matrix
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        for (int j = i; j < nrOfExamplesPerClass; j++)
        {
          matrixDInv[i] += kernelMatrix(i,j);
          if (i != j)
            matrixDInv[j] += kernelMatrix(i,j);
        }
      }
      
      //compute its inverse (and multiply every element with the label vector, which contains only one-entries and therefore be skipped...)
      GPMeanApproxRightPart.resize(nrOfExamplesPerClass);    
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        GPMeanApproxRightPart[i] = 1.0 / matrixDInv[i];
      } 
    }
    
    
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPMeanApprox training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}
    
void inline trainGPMean(NICE::Vector & GPMeanRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
    
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
      cr.robustChol ( kernelMatrix, choleskyMatrix );  
      
      GPMeanRightPart.resize(nrOfExamplesPerClass);
      GPMeanRightPart.set(0.0);
      
      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :)
      
      // pre-compute K^{-1} \cdot y, which is the same for every new test sample
      choleskySolveLargeScale ( choleskyMatrix, y, GPMeanRightPart );
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}    

// GP subset of regressors
void inline trainGPSRMean(NICE::Vector & GPMeanRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples )
{
  std::vector<int> examplesToChoose;
  indicesOfChosenExamples.clear();
  
  //add all examples for possible choice
  for (int i = 0; i < nrOfExamplesPerClass; i++)
  {
    examplesToChoose.push_back(i);
  }
  
  //now chose randomly some examples as active subset
  int index;
  for (int i = 0; i < std::min(nrOfRegressors,nrOfExamplesPerClass); i++)
  {
    index = rand() % examplesToChoose.size();
    indicesOfChosenExamples.push_back(examplesToChoose[index]);
    examplesToChoose.erase(examplesToChoose.begin() + index);
  }
  
  NICE::Matrix Kmn (indicesOfChosenExamples.size(), nrOfExamplesPerClass, 0.0);
  int rowCnt(0);
  //set every row
  for (int i = 0; i < indicesOfChosenExamples.size(); i++, rowCnt++ )
  {
    //set every element of this row
    NICE::Vector col = kernelMatrix.getRow(indicesOfChosenExamples[i]);
    for (int j = 0; j < nrOfExamplesPerClass; j++)
    {
      Kmn(rowCnt,j) = col(j);
    }
  }
  
  //we could speed this up if we would order the indices
  NICE::Matrix Kmm (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
  double tmp(0.0);
  for (int i = 0; i < indicesOfChosenExamples.size(); i++ )
  {
    for (int j = i; j < indicesOfChosenExamples.size(); j++ )
    {
      tmp = kernelMatrix(indicesOfChosenExamples[i], indicesOfChosenExamples[j]);
      Kmm(i,j) = tmp;
      if (i != j)
        Kmm(j,i) = tmp;
    }
  }
  

    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      NICE::Matrix innerMatrix;
      innerMatrix.multiply(Kmn, Kmn, false /* tranpose first matrix*/, true /* transpose second matrix*/);
      
      innerMatrix.addScaledMatrix( noise, Kmm );
      
      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :) 
      NICE::Vector projectedLabels;
      projectedLabels.multiply(Kmn,y);
      
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
      cr.robustChol ( innerMatrix, choleskyMatrix );  
      
      GPMeanRightPart.resize(indicesOfChosenExamples.size());
      GPMeanRightPart.set(0.0);
      
      // pre-compute K^{-1} \cdot y, which is the same for every new test sample
      choleskySolveLargeScale ( choleskyMatrix, projectedLabels, GPMeanRightPart );
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPSRMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

// GP subset of regressors
void inline trainGPSRVar(NICE::Matrix & choleskyMatrix, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples )
{
  std::vector<int> examplesToChoose;
  indicesOfChosenExamples.clear();
  
  //add all examples for possible choice
  for (int i = 0; i < nrOfExamplesPerClass; i++)
  {
    examplesToChoose.push_back(i);
  }
  
  //now chose randomly some examples as active subset
  int index;
  for (int i = 0; i < std::min(nrOfRegressors,nrOfExamplesPerClass); i++)
  {
    index = rand() % examplesToChoose.size();
    indicesOfChosenExamples.push_back(examplesToChoose[index]);
    examplesToChoose.erase(examplesToChoose.begin() + index);
  }
  
  NICE::Matrix Kmn (indicesOfChosenExamples.size(), nrOfExamplesPerClass, 0.0);
  int rowCnt(0);
  //set every row
  for (int i = 0; i < indicesOfChosenExamples.size(); i++, rowCnt++ )
  {
    //set every element of this row
    NICE::Vector col = kernelMatrix.getRow(indicesOfChosenExamples[i]);
    for (int j = 0; j < nrOfExamplesPerClass; j++)
    {
      Kmn(rowCnt,j) = col(j);
    }
  }
  
  //we could speed this up if we would order the indices
  NICE::Matrix Kmm (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
  double tmp(0.0);
  for (int i = 0; i < indicesOfChosenExamples.size(); i++ )
  {
    for (int j = i; j < indicesOfChosenExamples.size(); j++ )
    {
      tmp = kernelMatrix(indicesOfChosenExamples[i], indicesOfChosenExamples[j]);
      Kmm(i,j) = tmp;
      if (i != j)
        Kmm(j,i) = tmp;
    }
  }
  

    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      NICE::Matrix innerMatrix;
      innerMatrix.multiply(Kmn, Kmn, false /* tranpose first matrix*/, true /* transpose second matrix*/);
      
      innerMatrix.addScaledMatrix( noise, Kmm );
           
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      choleskyMatrix.resize( nrOfExamplesPerClass, nrOfExamplesPerClass );
      choleskyMatrix.set( 0.0 );
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
      cr.robustChol ( innerMatrix, choleskyMatrix );  
     }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPSRVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPOptMean(NICE::Vector & rightPartGPOptMean, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{
    DiagonalMatrixApprox diagApprox ( true /*verbose*/ );
    
    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    { 
      //compute optimal diagonal matrix
      diagApprox.approx ( kernelMatrix, rightPartGPOptMean );
      
      // invert entries
      // by theory we should multiply these entries with the labels
      // but we are in an OCC setting with labels equal to one
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        if (rightPartGPOptMean[i] != 0.0)
          rightPartGPOptMean[i] = 1.0 / rightPartGPOptMean[i]; 
      }
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPOptMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPOptVar(NICE::Vector & DiagGPOptVar, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{
    DiagonalMatrixApprox diagApprox ( true /*verbose*/ );
    
    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    { 
      //compute optimal diagonal matrix
      diagApprox.approx ( kernelMatrix, DiagGPOptVar );
      
      //invert entries
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        if (DiagGPOptVar[i] != 0.0)
          DiagGPOptVar[i] = 1.0 / DiagGPOptVar[i];
      }
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPOptVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

KCMinimumEnclosingBall *trainSVDD( const double & noise, const NICE::Matrix kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{
 
    Config conf;
    // set the outlier ratio (Paul optimized this paramter FIXME)
    conf.sD( "SVDD", "outlier_fraction", 0.1 );
    conf.sB( "SVDD", "verbose", false );
    KCMinimumEnclosingBall *svdd = new KCMinimumEnclosingBall ( &conf, NULL /* no kernel function */, "SVDD" /* config section */);
    KernelData kernelData ( &conf, kernelMatrix, "Kernel" , false /* update cholesky */ );
 
    Timer tTrainPrecise;
    tTrainPrecise.start();     

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {     
    
      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :)
      // KCMinimumEnclosingBall does not store the kernel data object, therefore, we are save with passing a local copy
      svdd->teach ( &kernelData, y );
    }
    
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for SVDD training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;        

    return svdd;
}

// ------------- EVALUATION METHODS ---------------------
void inline evaluateGPVarApprox(const NICE::Vector & kernelVector, const double & kernelSelf, const NICE::Vector & matrixDInv, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double uncertainty;
    
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {       
      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*  where D is our nice approximation of K
      
      NICE::Vector rightPart (kernelVector.size());
      for (int j = 0; j < kernelVector.size(); j++)
      {
        rightPart[j] = kernelVector[j] * matrixDInv[j];
      }

      uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPVar(const NICE::Vector & kernelVector, const double & kernelSelf, const NICE::Matrix & choleskyMatrix, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double uncertainty;
    
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {       
      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*       
      
      NICE::Vector rightPart (kernelVector.size(),0.0);      
      choleskySolveLargeScale ( choleskyMatrix, kernelVector, rightPart );
      
      uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPMeanApprox(const NICE::Vector & kernelVector, const NICE::Vector & rightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double mean;
  
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    { 
      // \mean = \k_*^T \cdot D^{-1} \cdot y  where D is our nice approximation of K    
      mean = kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPMean(const NICE::Vector & kernelVector,  const NICE::Vector & GPMeanRightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double mean;
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      // \mean = \k_*^T \cdot K^{-1} \cdot y      
      mean = kernelVector.scalarProduct ( GPMeanRightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPSRMean(const NICE::Vector & kernelVector,  const NICE::Vector & GPSRMeanRightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting, const int & nrOfRegressors, const std::vector<int> & indicesOfChosenExamples)
{
    double mean;
    
    //grep the entries corresponding to the active set
    NICE::Vector kernelVectorM;
    kernelVectorM.resize(nrOfRegressors);
    for (int i = 0; i < nrOfRegressors; i++)
    {
      kernelVectorM[i] = kernelVector[indicesOfChosenExamples[i]];
    }

    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      // \mean = \k_*^T \cdot K^{-1} \cdot y    
      mean = kernelVectorM.scalarProduct ( GPSRMeanRightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPSRVar(const NICE::Vector & kernelVector,  const NICE::Matrix & choleskyMatrix, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples, const double & noise)
{
    double uncertainty;
    
    //grep the entries corresponding to the active set
    NICE::Vector kernelVectorM;
    kernelVectorM.resize(nrOfRegressors);
    for (int i = 0; i < nrOfRegressors; i++)
    {
      kernelVectorM[i] = kernelVector[indicesOfChosenExamples[i]];
    } 
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      NICE::Vector rightPart (nrOfRegressors,0.0);      
      
      choleskySolveLargeScale ( choleskyMatrix, kernelVectorM, rightPart );
      
      uncertainty = noise*kernelVectorM.scalarProduct ( rightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

//this method is completely the same as evaluateGPMeanApprox, but for convenience, it is its own method
void inline evaluateGPOptMean(const NICE::Vector & kernelVector, const NICE::Vector & rightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double mean;
  
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    { 
      // \mean = \k_*^T \cdot D^{-1} \cdot y  where D is our nice approximation of K    
      mean = kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

//this method is completely the same as evaluateGPVarApprox, but for convenience, it is its own method
void inline evaluateGPOptVar(const NICE::Vector & kernelVector, const double & kernelSelf, const NICE::Vector & matrixDInv, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double uncertainty;
    
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {       
      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*  where D is our nice approximation of K
      
      NICE::Vector rightPart (kernelVector.size());
      for (int j = 0; j < kernelVector.size(); j++)
      {
        rightPart[j] = kernelVector[j] * matrixDInv[j];
      }

      uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateParzen(const NICE::Vector & kernelVector,  ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double score;
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {      
      //the Parzen score is nothing but the averaged similarity to every training sample
      score = kernelVector.Sum() / (double) kernelVector.size(); //maybe we could directly call kernelVector.Mean() here
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = score;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateSVDD( KCMinimumEnclosingBall *svdd, const NICE::Vector & kernelVector,  ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {        
      // In the following, we assume that we are using a Gaussian kernel
      r = svdd->classifyKernel ( kernelVector, 1.0 /* kernel self */ );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
}

/** 
    test the basic functionality of fast-hik hyperparameter optimization 
*/
int main (int argc, char **argv)
{   
  std::set_terminate(__gnu_cxx::__verbose_terminate_handler);

  Config conf ( argc, argv );
  string resultsfile = conf.gS("main", "results", "results.txt" );
  int nrOfExamplesPerClass = conf.gI("main", "nrOfExamplesPerClass", 50);
  nrOfExamplesPerClass = std::min(nrOfExamplesPerClass, 100); // we do not have more than 100 examples per class
  
  //which classes to considere? we assume consecutive class numers
  int indexOfFirstClass = conf.gI("main", "indexOfFirstClass", 0);
  indexOfFirstClass = std::max(indexOfFirstClass, 0); //we do not have less than 0 classes
  int indexOfLastClass = conf.gI("main", "indexOfLastClass", 999);
  indexOfLastClass = std::min(indexOfLastClass, 999); //we do not have more than 1000 classes
  
  int nrOfClassesToConcidere =  (indexOfLastClass - indexOfLastClass)+1;
  
  //repetitions for every class to achieve reliable time evalutions
  int runsPerClassToAverageTraining = conf.gI( "main", "runsPerClassToAverageTraining", 1 ); 
  int runsPerClassToAverageTesting = conf.gI( "main", "runsPerClassToAverageTesting", 1 );
  
  // share parameters among methods and classes?
  bool shareParameters = conf.gB("main" , "shareParameters", true);
  
  //which methods do we want to use?
  bool GPMeanApprox = conf.gB( "main", "GPMeanApprox", false);
  bool GPVarApprox = conf.gB( "main", "GPVarApprox", false);  
  bool GPMean = conf.gB( "main", "GPMean", false);
  bool GPVar = conf.gB( "main", "GPVar", false);
  bool GPSRMean = conf.gB( "main", "GPSRMean", false);
  bool GPSRVar = conf.gB( "main", "GPSRVar", false);  
  bool GPOptMean = conf.gB( "main", "GPOptMean", false);
  bool GPOptVar = conf.gB( "main", "GPOptVar", false);    
  bool Parzen = conf.gB( "main", "Parzen", false);
  bool SVDD = conf.gB( "main", "SVDD", false);  
  
  if (GPMeanApprox)
    std::cerr << "GPMeanApprox used" << std::endl;
  else 
    std::cerr << "GPMeanApprox not used" << std::endl;
  if (GPVarApprox)
    std::cerr << "GPVarApprox used" << std::endl;
  else 
    std::cerr << "GPVarApprox not used" << std::endl;
  if (GPMean)
    std::cerr << "GPMean used" << std::endl;
  else 
    std::cerr << "GPMean not used" << std::endl;
  if (GPVar)
    std::cerr << "GPVar used" << std::endl;
  else 
    std::cerr << "GPVar not used" << std::endl;
  if (GPSRMean)
    std::cerr << "GPSRMean used" << std::endl;
  else 
    std::cerr << "GPSRMean not used" << std::endl;
  if (GPSRVar)
    std::cerr << "GPSRVar used" << std::endl;
  else 
    std::cerr << "GPSRVar not used" << std::endl;
  if (GPOptMean)
    std::cerr << "GPOptMean used" << std::endl;
  else 
    std::cerr << "GPOptMean not used" << std::endl;
  if (GPOptVar)
    std::cerr << "GPOptVar used" << std::endl;
  else 
    std::cerr << "GPOptVar not used" << std::endl;  
  if (Parzen)
    std::cerr << "Parzen used" << std::endl;
  else 
    std::cerr << "Parzen not used" << std::endl;
  if (SVDD)
    std::cerr << "SVDD used" << std::endl;
  else 
    std::cerr << "SVDD not used" << std::endl;

  
  // GP variance approximation  
  NICE::Vector sigmaGPVarApproxParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPVarApproxParas(nrOfClassesToConcidere,0.0);
  // GP variance  
  NICE::Vector sigmaGPVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPVarParas(nrOfClassesToConcidere,0.0);
  //GP mean approximation  
  NICE::Vector sigmaGPMeanApproxParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPMeanApproxParas(nrOfClassesToConcidere,0.0);
  //GP mean  
  NICE::Vector sigmaGPMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPMeanParas(nrOfClassesToConcidere,0.0);
  //GP SR mean  
  NICE::Vector sigmaGPSRMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPSRMeanParas(nrOfClassesToConcidere,0.0);
  //GP SR var
  NICE::Vector sigmaGPSRVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPSRVarParas(nrOfClassesToConcidere,0.0);
  //GP Opt mean  
  NICE::Vector sigmaGPOptMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPOptMeanParas(nrOfClassesToConcidere,0.0);
  //GP Opt var
  NICE::Vector sigmaGPOptVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPOptVarParas(nrOfClassesToConcidere,0.0);  
  //Parzen  
  NICE::Vector sigmaParzenParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseParzenParas(nrOfClassesToConcidere,0.0);
  //SVDD  
  NICE::Vector sigmaSVDDParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseSVDDParas(nrOfClassesToConcidere,0.0);
    
  if (!shareParameters)
  {
    //read the optimal parameters for the different methods
    
    // GP variance approximation
    string sigmaGPVarApproxFile = conf.gS("main", "sigmaGPVarApproxFile", "approxVarSigma.txt");  
    string noiseGPVarApproxFile = conf.gS("main", "noiseGPVarApproxFile", "approxVarNoise.txt");   
    // GP variance
    string sigmaGPVarFile = conf.gS("main", "sigmaGPVarFile", "approxVarSigma.txt");  
    string noiseGPVarFile = conf.gS("main", "noiseGPVarFile", "approxVarNoise.txt");  
    //GP mean approximation
    string sigmaGPMeanApproxFile = conf.gS("main", "sigmaGPMeanApproxFile", "approxVarSigma.txt");  
    string noiseGPMeanApproxFile = conf.gS("main", "noiseGPMeanApproxFile", "approxVarNoise.txt");    
    //GP mean
    string sigmaGPMeanFile = conf.gS("main", "sigmaGPMeanFile", "approxVarSigma.txt");  
    string noiseGPMeanFile = conf.gS("main", "noiseGPMeanFile", "approxVarNoise.txt");      
    //Parzen
    string sigmaParzenFile = conf.gS("main", "sigmaParzenFile", "approxVarSigma.txt");  
    string noiseParzenFile = conf.gS("main", "noiseParzenFile", "approxVarNoise.txt");    
    //SVDD
    string sigmaSVDDFile = conf.gS("main", "sigmaSVDDFile", "approxVarSigma.txt");  
    string noiseSVDDFile = conf.gS("main", "noiseSVDDFile", "approxVarNoise.txt");      

    // GP variance approximation    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPVarApproxParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPVarApproxParas);  
    // GP variance    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPVarParas);  
    //GP mean approximation   
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPMeanApproxParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPMeanApproxParas);  
    //GP mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPMeanParas); 
    //GP SR mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPSRMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPSRMeanParas);
    //GP SR var  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPSRVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPSRVarParas); 
    //GP Opt mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPOptMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPOptMeanParas);
    //GP Opt var  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPOptVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPOptVarParas);     
    //Parzen    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaParzenParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseParzenParas);  
    //SVDD    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaSVDDParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseSVDDParas);   
  }
  else
  {
    //use static variables for all methods and classis
    double noise = conf.gD( "main", "noise", 0.01 );
    double sigma = conf.gD( "main", "sigma", 1.0 );
    
    sigmaGPVarApproxParas.set(sigma);
    noiseGPVarApproxParas.set(noise);
    // GP variance  
    sigmaGPVarParas.set(sigma);
    noiseGPVarParas.set(noise);
    //GP mean approximation  
    sigmaGPMeanApproxParas.set(sigma);
    noiseGPMeanApproxParas.set(noise);
    //GP mean  
    sigmaGPMeanParas.set(sigma);
    noiseGPMeanParas.set(noise);
    //GP SR mean  
    sigmaGPSRMeanParas.set(sigma);
    noiseGPSRMeanParas.set(noise);
    //GP SR var  
    sigmaGPSRVarParas.set(sigma);
    noiseGPSRVarParas.set(noise); 
    //GP Opt mean  
    sigmaGPOptMeanParas.set(sigma);
    noiseGPOptMeanParas.set(noise);
    //GP Opt var  
    sigmaGPOptVarParas.set(sigma);
    noiseGPOptVarParas.set(noise);    
    //Parzen  
    sigmaParzenParas.set(sigma);
    noiseParzenParas.set(noise);
    //SVDD  
    sigmaSVDDParas.set(sigma);
    noiseSVDDParas.set(noise);    
  }
  
  
  // -------- optimal parameters read --------------  
  
  std::vector<SparseVector> trainingData;
  NICE::Vector y;
  
  std::cerr << "Reading ImageNet data ..." << std::endl;
  bool imageNetLocal = conf.gB("main", "imageNetLocal" , false);
  string imageNetPath;
  if (imageNetLocal)
    imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/";
  else
    imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/";

  ImageNetData imageNetTrain ( imageNetPath + "demo/" );

  imageNetTrain.preloadData( "train", "training" );
  trainingData = imageNetTrain.getPreloadedData();
  y = imageNetTrain.getPreloadedLabels();
    
  std::cerr << "Reading of training data finished" << std::endl;
  std::cerr << "trainingData.size(): " << trainingData.size() << std::endl;
  std::cerr << "y.size(): " << y.size() << std::endl;
  
  std::cerr << "Reading ImageNet test data files (takes some seconds)..." << std::endl;
  ImageNetData imageNetTest ( imageNetPath + "demo/" );
  imageNetTest.preloadData ( "val", "testing" );
  imageNetTest.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" );  
  
  double OverallPerformanceGPVarApprox(0.0);
  double OverallPerformanceGPVar(0.0);
  double OverallPerformanceGPMeanApprox(0.0);
  double OverallPerformanceGPMean(0.0);
  double OverallPerformanceGPSRMean(0.0);
  double OverallPerformanceGPSRVar(0.0);  
  double OverallPerformanceGPOptMean(0.0);
  double OverallPerformanceGPOptVar(0.0);   
  double OverallPerformanceParzen(0.0);
  double OverallPerformanceSVDD(0.0);

  
  double kernelSigmaGPVarApprox;
  double kernelSigmaGPVar;
  double kernelSigmaGPMeanApprox;
  double kernelSigmaGPMean;
  double kernelSigmaGPSRMean;
  double kernelSigmaGPSRVar;
  double kernelSigmaGPOptMean;
  double kernelSigmaGPOptVar;  
  double kernelSigmaParzen;
  double kernelSigmaSVDD;
  
  for (int cl = indexOfFirstClass; cl <= indexOfLastClass; cl++)
  {
    std::cerr << "run for class " << cl << std::endl;
    int positiveClass = cl+1; //labels are from 1 to 1000, but our indices from 0 to 999
    // ------------------------------ TRAINING ------------------------------
  
    kernelSigmaGPVarApprox = sigmaGPVarApproxParas[cl];
    kernelSigmaGPVar = sigmaGPVarParas[cl];
    kernelSigmaGPMeanApprox = sigmaGPMeanApproxParas[cl];
    kernelSigmaGPMean = sigmaGPMeanParas[cl];
    kernelSigmaGPSRMean = sigmaGPSRMeanParas[cl];
    kernelSigmaGPSRVar = sigmaGPSRVarParas[cl];
    kernelSigmaGPOptMean = sigmaGPOptMeanParas[cl];
    kernelSigmaGPOptVar = sigmaGPOptVarParas[cl];    
    kernelSigmaParzen = sigmaParzenParas[cl];
    kernelSigmaSVDD = sigmaSVDDParas[cl];
    
    Timer tTrain;
    tTrain.start();
    
    //compute the kernel matrix, which will be shared among all methods in this scenario       
    NICE::Matrix kernelMatrix(nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
    
    //NOTE in theory we have to compute a single kernel Matrix for every method, since every method may have its own optimal parameter
    // I'm sure, we can speed it up a bit and compute it only for every different parameter
    //nonetheless, it's not as nice as we originally thought (same matrix for every method) 
    
    //NOTE Nonetheless, since we're only interested in runtimes, we can ignore this
           
    //now sum up all entries of each row in the original kernel matrix
    double kernelScore(0.0);
    for (int i = cl*100; i < cl*100+nrOfExamplesPerClass; i++)
    {
      for (int j = i; j < cl*100+nrOfExamplesPerClass; j++)
      {
        kernelScore = measureDistance(trainingData[i],trainingData[j], kernelSigmaGPVarApprox);
        kernelMatrix(i-cl*100,j-cl*100) = kernelScore;
        
        if (i != j)
            kernelMatrix(j-cl*100,i-cl*100) = kernelScore;
      }
    }  
    
    // now call the individual training methods
    
    //train GP Var Approx    
    NICE::Vector matrixDInv;
    if (GPVarApprox)
      trainGPVarApprox(matrixDInv, noiseGPVarApproxParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
    
    //train GP Var
    NICE::Matrix GPVarCholesky;
    if (GPVar)
      trainGPVar(GPVarCholesky, noiseGPVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );    
    
    //train GP Mean Approx
    NICE::Vector GPMeanApproxRightPart;
    if (GPMeanApprox)
      trainGPMeanApprox(GPMeanApproxRightPart, noiseGPMeanApproxParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
    
    //train GP Mean
    NICE::Vector GPMeanRightPart;
    if (GPMean)
      trainGPMean(GPMeanRightPart, noiseGPMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );    
    
    //train GP SR Mean
    NICE::Vector GPSRMeanRightPart;
    std::vector<int> indicesOfChosenExamplesGPSRMean;
    int nrOfRegressors = conf.gI( "GPSR", "nrOfRegressors", nrOfExamplesPerClass/2);
    nrOfRegressors = std::min( nrOfRegressors, nrOfExamplesPerClass );
    if (GPSRMean)
      trainGPSRMean(GPSRMeanRightPart, noiseGPSRMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining, nrOfRegressors, indicesOfChosenExamplesGPSRMean );        
    
    //train GP SR Var
    NICE::Matrix GPSRVarCholesky;   
    std::vector<int> indicesOfChosenExamplesGPSRVar;
    if (GPSRVar)
      trainGPSRVar(GPSRVarCholesky, noiseGPSRVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining, nrOfRegressors, indicesOfChosenExamplesGPSRVar );  
    
    //train GP Opt Mean
    NICE::Vector GPOptMeanRightPart;
    if (GPOptMean)
      trainGPMeanApprox(GPOptMeanRightPart, noiseGPOptMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
    
    //train GP Opt Var
    NICE::Vector DiagGPOptVar;
    if (GPOptVar)
      trainGPMean(DiagGPOptVar, noiseGPOptVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );     
    
    //train Parzen 
    //nothing to do :)
    
    //train SVDD
    KCMinimumEnclosingBall *svdd;
    if (SVDD)
      svdd = trainSVDD(noiseSVDDParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
  
    tTrain.stop();
    std::cerr << "Time used for training class " << cl << ": " << tTrain.getLast() << std::endl;      
       
    std::cerr << "training done - now perform the evaluation" << std::endl;


    // ------------------------------ TESTING ------------------------------
   
    std::cerr << "Classification step ... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl;
    
    ClassificationResults resultsGPVarApprox;
    ClassificationResults resultsGPVar;
    ClassificationResults resultsGPMeanApprox;
    ClassificationResults resultsGPMean;
    ClassificationResults resultsGPSRMean;
    ClassificationResults resultsGPSRVar;    
    ClassificationResults resultsGPOptMean;
    ClassificationResults resultsGPOptVar;    
    ClassificationResults resultsParzen;
    ClassificationResults resultsSVDD;       
    
    ProgressBar pb;
    Timer tTest;
    tTest.start();    
    Timer tTestSingle;
    
    double timeForSingleExamplesGPVarApprox(0.0);    
    double timeForSingleExamplesGPVar(0.0);
    double timeForSingleExamplesGPMeanApprox(0.0);    
    double timeForSingleExamplesGPMean(0.0);    
    double timeForSingleExamplesGPSRMean(0.0);
    double timeForSingleExamplesGPSRVar(0.0);
    double timeForSingleExamplesGPOptMean(0.0);
    double timeForSingleExamplesGPOptVar(0.0);    
    double timeForSingleExamplesParzen(0.0);    
    double timeForSingleExamplesSVDD(0.0);    
    
    for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ )
    {
      pb.update ( imageNetTest.getNumPreloadedExamples() );

      const SparseVector & svec = imageNetTest.getPreloadedExample ( i );

      //NOTE: again we should use method-specific optimal parameters. If we're only interested in the runtimes, this doesn't matter
      
      //compute (self) similarities
      double kernelSelf (measureDistance(svec,svec, kernelSigmaGPVarApprox) );
      NICE::Vector kernelVector (nrOfExamplesPerClass, 0.0);
      
      for (int j = 0; j < nrOfExamplesPerClass; j++)
      {
        kernelVector[j] = measureDistance(trainingData[j+cl*100],svec, kernelSigmaGPVarApprox);
      }     
      
      //call the individual test-methods
      
      //evaluate GP Var Approx
      ClassificationResult rGPVarApprox;      
      if (GPVarApprox)
        evaluateGPVarApprox( kernelVector, kernelSelf, matrixDInv, rGPVarApprox, timeForSingleExamplesGPVarApprox, runsPerClassToAverageTesting );
      
      //evaluate GP Var
      ClassificationResult rGPVar;
      if (GPVar)
        evaluateGPVar( kernelVector, kernelSelf, GPVarCholesky, rGPVar, timeForSingleExamplesGPVar, runsPerClassToAverageTesting );      
      
      //evaluate GP Mean Approx
      ClassificationResult rGPMeanApprox;      
      if (GPMeanApprox)
        evaluateGPMeanApprox( kernelVector, matrixDInv, rGPMeanApprox, timeForSingleExamplesGPMeanApprox, runsPerClassToAverageTesting );
      
      //evaluate GP Mean
      ClassificationResult rGPMean;
      if (GPMean)
        evaluateGPMean( kernelVector, GPMeanRightPart, rGPMean, timeForSingleExamplesGPMean, runsPerClassToAverageTesting );       
       
      //evaluate GP SR Mean
      ClassificationResult rGPSRMean;
      if (GPSRMean)
        evaluateGPSRMean( kernelVector, GPSRMeanRightPart, rGPSRMean, timeForSingleExamplesGPSRMean, runsPerClassToAverageTesting, nrOfRegressors, indicesOfChosenExamplesGPSRMean );       
      
      //evaluate GP SR Var
      ClassificationResult rGPSRVar;
      if (GPSRVar)
        evaluateGPSRVar( kernelVector, GPSRVarCholesky, rGPSRVar, timeForSingleExamplesGPSRVar, runsPerClassToAverageTesting, nrOfRegressors, indicesOfChosenExamplesGPSRVar, noiseGPSRVarParas[cl] );       
      
      //evaluate GP Opt Mean
      ClassificationResult rGPOptMean;
      if (GPOptMean)
        evaluateGPOptMean( kernelVector, GPOptMeanRightPart, rGPOptMean, timeForSingleExamplesGPOptMean, runsPerClassToAverageTesting );       
      
      //evaluate GP Opt Var
      ClassificationResult rGPOptVar;
      if (GPOptVar)
        evaluateGPOptVar( kernelVector, kernelSelf, DiagGPOptVar, rGPOptVar, timeForSingleExamplesGPOptVar, runsPerClassToAverageTesting );   
      
      //evaluate Parzen
      ClassificationResult rParzen;
      if (Parzen)
        evaluateParzen( kernelVector, rParzen, timeForSingleExamplesParzen, runsPerClassToAverageTesting ); 
      
      //evaluate SVDD
      ClassificationResult rSVDD;
      if (SVDD)
        evaluateSVDD( svdd, kernelVector, rSVDD, timeForSingleExamplesSVDD, runsPerClassToAverageTesting );       

      
      // set ground truth label
      rGPVarApprox.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPMeanApprox.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPSRMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPSRVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPOptMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPOptVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;      
      rParzen.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rSVDD.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;

      //remember the results for the evaluation lateron
      resultsGPVarApprox.push_back ( rGPVarApprox );
      resultsGPVar.push_back ( rGPVar );
      resultsGPMeanApprox.push_back ( rGPMeanApprox );
      resultsGPMean.push_back ( rGPMean );
      resultsGPSRMean.push_back ( rGPSRMean );
      resultsGPSRVar.push_back ( rGPSRVar );
      resultsGPOptMean.push_back ( rGPOptMean );
      resultsGPOptVar.push_back ( rGPOptVar );      
      resultsParzen.push_back ( rParzen );
      resultsSVDD.push_back ( rSVDD );      
    }
    
    tTest.stop();
    std::cerr << "Time used for evaluating class " << cl << ": " << tTest.getLast() << std::endl;       
    
    timeForSingleExamplesGPVarApprox/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPVar/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPMeanApprox/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPSRMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPSRVar/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPOptMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPOptVar/= imageNetTest.getNumPreloadedExamples();    
    timeForSingleExamplesParzen/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesSVDD/= imageNetTest.getNumPreloadedExamples();
    
    std::cerr << "GPVarApprox -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPVarApprox << std::endl;    
    std::cerr << "GPVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPVar << std::endl;    
    std::cerr << "GPMeanApprox -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPMeanApprox << std::endl;    
    std::cerr << "GPMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPMean << std::endl;    
    std::cerr << "GPSRMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPSRMean << std::endl;    
    std::cerr << "GPSRVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPSRVar << std::endl;    
    std::cerr << "GPOptMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPOptMean << std::endl;    
    std::cerr << "GPOptVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPOptVar << std::endl;    
    std::cerr << "Parzen -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesParzen << std::endl;    
    std::cerr << "SVDD -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesSVDD << std::endl;    

    // run the AUC-evaluation
    double perfvalueGPVarApprox( 0.0 );
    double perfvalueGPVar( 0.0 );
    double perfvalueGPMeanApprox( 0.0 );
    double perfvalueGPMean( 0.0 );
    double perfvalueGPSRMean( 0.0 );
    double perfvalueGPSRVar( 0.0 );
    double perfvalueGPOptMean( 0.0 );
    double perfvalueGPOptVar( 0.0 );    
    double perfvalueParzen( 0.0 );
    double perfvalueSVDD( 0.0 );     

    if (GPVarApprox)
      perfvalueGPVarApprox = resultsGPVarApprox.getBinaryClassPerformance( ClassificationResults::PERF_AUC );    
    if (GPVar)
      perfvalueGPVar = resultsGPVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPMeanApprox)
      perfvalueGPMeanApprox = resultsGPMeanApprox.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPMean)
      perfvalueGPMean = resultsGPMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPSRMean)
      perfvalueGPSRMean = resultsGPSRMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPSRVar)
      perfvalueGPSRVar = resultsGPSRVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPOptMean)
      perfvalueGPOptMean = resultsGPOptMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPOptVar)
      perfvalueGPOptVar = resultsGPOptVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );    
    if (Parzen)
      perfvalueParzen = resultsParzen.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (SVDD)
      perfvalueSVDD = resultsSVDD.getBinaryClassPerformance( ClassificationResults::PERF_AUC );  


    std::cerr << "Performance GPVarApprox: " << perfvalueGPVarApprox << std::endl;
    std::cerr << "Performance GPVar: " << perfvalueGPVar << std::endl;
    std::cerr << "Performance GPMeanApprox: " << perfvalueGPMeanApprox << std::endl;
    std::cerr << "Performance GPMean: " << perfvalueGPMean << std::endl;
    std::cerr << "Performance GPSRMean: " << perfvalueGPSRMean << std::endl;
    std::cerr << "Performance GPSRVar: " << perfvalueGPSRVar << std::endl;
    std::cerr << "Performance GPOptMean: " << perfvalueGPOptMean << std::endl;
    std::cerr << "Performance GPOptVar: " << perfvalueGPOptVar << std::endl;    
    std::cerr << "Performance Parzen: " << perfvalueParzen << std::endl;
    std::cerr << "Performance SVDD: " << perfvalueSVDD << std::endl;    
    
    OverallPerformanceGPVarApprox += perfvalueGPVar;    
    OverallPerformanceGPVar += perfvalueGPVarApprox;
    OverallPerformanceGPMeanApprox += perfvalueGPMeanApprox;
    OverallPerformanceGPMean += perfvalueGPMean;
    OverallPerformanceGPSRMean += perfvalueGPSRMean;
    OverallPerformanceGPSRVar += perfvalueGPSRVar;
    OverallPerformanceGPOptMean += perfvalueGPOptMean;
    OverallPerformanceGPOptVar += perfvalueGPOptVar;    
    OverallPerformanceParzen += perfvalueParzen;
    OverallPerformanceSVDD += perfvalueSVDD;   

    // clean up memory used by SVDD
    if (SVDD)    
      delete svdd;
  }
  
  OverallPerformanceGPVarApprox /= nrOfClassesToConcidere;
  OverallPerformanceGPVar /= nrOfClassesToConcidere;
  OverallPerformanceGPMeanApprox /= nrOfClassesToConcidere;
  OverallPerformanceGPMean /= nrOfClassesToConcidere;
  OverallPerformanceGPSRMean /= nrOfClassesToConcidere;
  OverallPerformanceGPSRVar /= nrOfClassesToConcidere;
  OverallPerformanceGPOptMean /= nrOfClassesToConcidere;
  OverallPerformanceGPOptVar /= nrOfClassesToConcidere;  
  OverallPerformanceParzen /= nrOfClassesToConcidere;
  OverallPerformanceSVDD /= nrOfClassesToConcidere;  
  
  std::cerr << "overall performance GPVarApprox: " << OverallPerformanceGPVarApprox << std::endl;
  std::cerr << "overall performance GPVar: " << OverallPerformanceGPVar << std::endl;
  std::cerr << "overall performance GPMeanApprox: " << OverallPerformanceGPMeanApprox << std::endl;
  std::cerr << "overall performance GPMean: " << OverallPerformanceGPMean << std::endl;
  std::cerr << "overall performance GPSRMean: " << OverallPerformanceGPSRMean << std::endl;
  std::cerr << "overall performance GPSRVar: " << OverallPerformanceGPSRVar << std::endl;
  std::cerr << "overall performance GPOptMean: " << OverallPerformanceGPOptMean << std::endl;
  std::cerr << "overall performance GPOptVar: " << OverallPerformanceGPOptVar << std::endl;  
  std::cerr << "overall performance Parzen: " << OverallPerformanceParzen << std::endl;
  std::cerr << "overall performance SVDD: " << OverallPerformanceSVDD << std::endl;  
  
  return 0;
}