ComputerVisionJena
/
NICE_VisLearning


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388
							/** 
* @file testImageNetBinaryBruteForce.cpp
* @brief perform ImageNet tests with binary tasks for OCC using GP mean and variance, sophisticated approximations of both, Parzen Density Estimation and SVDD
* @author Alexander Lütz
* @date 23-05-2012 (dd-mm-yyyy)
*/

#include <ctime>
#include <time.h>

#include "core/basics/Config.h"
#include "core/basics/Timer.h"
#include "core/algebra/CholeskyRobust.h"
#include "core/algebra/DiagonalMatrixApprox.h"
#include "core/vector/Algorithms.h"
#include "core/vector/SparseVectorT.h"


#include "vislearning/cbaselib/ClassificationResults.h"
#include "vislearning/baselib/ProgressBar.h"
#include "vislearning/classifier/kernelclassifier/KCMinimumEnclosingBall.h"

#include "fast-hik/tools.h"
#include "fast-hik/MatFileIO.h"
#include "fast-hik/ImageNetData.h"

using namespace std;
using namespace NICE;
using namespace OBJREC;

// --------------- THE KERNEL FUNCTION ( exponential kernel with euclidian distance ) ----------------------
double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0)
{
  double inner_sum(0.0);

  double d;      
  
  //new version, where we needed on average 0.001707 s for each test sample
  NICE::SparseVector::const_iterator aIt = a.begin();
  NICE::SparseVector::const_iterator bIt = b.begin();
   
  //compute the euclidian distance between both feature vectores (given as SparseVectors)
  while ( (aIt != a.end()) && (bIt != b.end()) )
  {
    if (aIt->first == bIt->first)
    {
      d = ( aIt->second - bIt->second );      
      inner_sum += d * d;
      aIt++;
      bIt++;
    }
    else if ( aIt->first < bIt->first)
    {
      inner_sum += aIt->second * aIt->second;
      aIt++;      
    }
    else
    {
      inner_sum += bIt->second * bIt->second;
      bIt++;       
    }
  }
  
  //compute remaining values, if b reached the end but not a
  while (aIt != a.end())
  {
    inner_sum += aIt->second * aIt->second;
    aIt++; 
  }
  //compute remaining values, if a reached the end but not b
  while (bIt != b.end())
  {
    inner_sum += bIt->second * bIt->second;
    bIt++; 
  }  

  //normalization of the exponent
  inner_sum /= (2.0*sigma*sigma);
  
  //finally, compute the RBF-kernel score (RBF = radial basis function)
  return exp(-inner_sum);
}

// --------------- INPUT METHOD ----------------------
void readParameters(string & filename, const int & size, NICE::Vector & parameterVector)
{
  //we read the parameters which are given from a Matlab-Script (each line contains a single number, which is the optimal parameter for this class)
  
  parameterVector.resize(size);
  parameterVector.set(0.0);
  
  ifstream is(filename.c_str());

  if ( !is.good() )
      fthrow(IOException, "Unable to read parameters.");  
//
  string tmp;
  int cnt(0);
  while (! is.eof())
  {
    is >> tmp;
    parameterVector[cnt] = atof(tmp.c_str());
    cnt++;
  }
//   
  is.close(); 
}

//------------------- TRAINING METHODS --------------------

void inline trainGPMean(NICE::Vector & GPMeanRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
    
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
      cr.robustChol ( kernelMatrix, choleskyMatrix );  
      
      GPMeanRightPart.resize(nrOfExamplesPerClass);
      GPMeanRightPart.set(0.0);
      
      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :)
      
      // pre-compute K^{-1} \cdot y, which is the same for every new test sample
      choleskySolveLargeScale ( choleskyMatrix, y, GPMeanRightPart );
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}  

void inline trainGPVar(NICE::Matrix & choleskyMatrix, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      choleskyMatrix.resize(nrOfExamplesPerClass, nrOfExamplesPerClass);
      choleskyMatrix.set(0.0);      
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot k_* for new test samples
      cr.robustChol ( kernelMatrix, choleskyMatrix );   
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPMeanApprox(NICE::Vector & GPMeanApproxRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      NICE::Vector matrixDInv(nrOfExamplesPerClass,0.0);
      //compute D 
      //start with adding some noise, if necessary
      if (noise != 0.0)
        matrixDInv.set(noise);
      else
        matrixDInv.set(0.0);    
      
      // the approximation creates a diagonal matrix (which is easy to invert)
      // with entries equal the row sums of the original kernel matrix
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        for (int j = i; j < nrOfExamplesPerClass; j++)
        {
          matrixDInv[i] += kernelMatrix(i,j);
          if (i != j)
            matrixDInv[j] += kernelMatrix(i,j);
        }
      }
      
      //compute its inverse (and multiply every element with the label vector, which contains only one-entries and therefore be skipped...)
      GPMeanApproxRightPart.resize(nrOfExamplesPerClass);    
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        GPMeanApproxRightPart[i] = 1.0 / matrixDInv[i];
      } 
    }
    
    
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPMeanApprox training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPVarApprox(NICE::Vector & matrixDInv, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    std::cerr << "nrOfExamplesPerClass : " << nrOfExamplesPerClass << std::endl;
  
    Timer tTrainPreciseTimer;
    tTrainPreciseTimer.start();     

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {
      matrixDInv.resize(nrOfExamplesPerClass);
      matrixDInv.set(0.0);
      //compute D 
      //start with adding some noise, if necessary
      if (noise != 0.0)
        matrixDInv.set(noise);
      else
        matrixDInv.set(0.0);    
      
      // the approximation creates a diagonal matrix (which is easy to invert)
      // with entries equal the row sums of the original kernel matrix      
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        for (int j = i; j < nrOfExamplesPerClass; j++)
        {
          matrixDInv[i] += kernelMatrix(i,j);
          if (i != j)
            matrixDInv[j] += kernelMatrix(i,j);
        }
      }
      
      //compute its inverse
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        matrixDInv[i] = 1.0 / matrixDInv[i];
      }
    }
    
    tTrainPreciseTimer.stop(); 
    std::cerr << "Precise time used for GPVarApprox training class " << classNumber << ": " << tTrainPreciseTimer.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

// GP subset of regressors
void inline trainGPSRMean(NICE::Vector & GPMeanRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples )
{
  std::vector<int> examplesToChoose;
  indicesOfChosenExamples.clear();
  
  //add all examples for possible choice
  for (int i = 0; i < nrOfExamplesPerClass; i++)
  {
    examplesToChoose.push_back(i);
  }
  
  //now chose randomly some examples as active subset
  int index;
  for (int i = 0; i < std::min(nrOfRegressors,nrOfExamplesPerClass); i++)
  {
    index = rand() % examplesToChoose.size();
    indicesOfChosenExamples.push_back(examplesToChoose[index]);
    examplesToChoose.erase(examplesToChoose.begin() + index);
  }
  
  NICE::Matrix Kmn (indicesOfChosenExamples.size(), nrOfExamplesPerClass, 0.0);
  int rowCnt(0);
  //set every row
  for (int i = 0; i < indicesOfChosenExamples.size(); i++, rowCnt++ )
  {
    //set every element of this row
    NICE::Vector col = kernelMatrix.getRow(indicesOfChosenExamples[i]);
    for (int j = 0; j < nrOfExamplesPerClass; j++)
    {
      Kmn(rowCnt,j) = col(j);
    }
  }
  
  //we could speed this up if we would order the indices
  NICE::Matrix Kmm (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
  double tmp(0.0);
  for (int i = 0; i < indicesOfChosenExamples.size(); i++ )
  {
    for (int j = i; j < indicesOfChosenExamples.size(); j++ )
    {
      tmp = kernelMatrix(indicesOfChosenExamples[i], indicesOfChosenExamples[j]);
      Kmm(i,j) = tmp;
      if (i != j)
        Kmm(j,i) = tmp;
    }
  }
  

    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      NICE::Matrix innerMatrix;
      innerMatrix.multiply(Kmn, Kmn, false /* tranpose first matrix*/, true /* transpose second matrix*/);
      
      innerMatrix.addScaledMatrix( noise, Kmm );
      
      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :) 
      NICE::Vector projectedLabels;
      projectedLabels.multiply(Kmn,y);
      
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
      cr.robustChol ( innerMatrix, choleskyMatrix );  
      
      GPMeanRightPart.resize(indicesOfChosenExamples.size());
      GPMeanRightPart.set(0.0);
      
      // pre-compute K^{-1} \cdot y, which is the same for every new test sample
      choleskySolveLargeScale ( choleskyMatrix, projectedLabels, GPMeanRightPart );
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPSRMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

// GP subset of regressors
void inline trainGPSRVar(NICE::Matrix & choleskyMatrix, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples )
{
  std::vector<int> examplesToChoose;
  indicesOfChosenExamples.clear();
  
  //add all examples for possible choice
  for (int i = 0; i < nrOfExamplesPerClass; i++)
  {
    examplesToChoose.push_back(i);
  }
  
  //now chose randomly some examples as active subset
  int index;
  for (int i = 0; i < std::min(nrOfRegressors,nrOfExamplesPerClass); i++)
  {
    index = rand() % examplesToChoose.size();
    indicesOfChosenExamples.push_back(examplesToChoose[index]);
    examplesToChoose.erase(examplesToChoose.begin() + index);
  }
  
  NICE::Matrix Kmn (indicesOfChosenExamples.size(), nrOfExamplesPerClass, 0.0);
  int rowCnt(0);
  //set every row
  for (int i = 0; i < indicesOfChosenExamples.size(); i++, rowCnt++ )
  {
    //set every element of this row
    NICE::Vector col = kernelMatrix.getRow(indicesOfChosenExamples[i]);
    for (int j = 0; j < nrOfExamplesPerClass; j++)
    {
      Kmn(rowCnt,j) = col(j);
    }
  }
  
  //we could speed this up if we would order the indices
  NICE::Matrix Kmm (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
  double tmp(0.0);
  for (int i = 0; i < indicesOfChosenExamples.size(); i++ )
  {
    for (int j = i; j < indicesOfChosenExamples.size(); j++ )
    {
      tmp = kernelMatrix(indicesOfChosenExamples[i], indicesOfChosenExamples[j]);
      Kmm(i,j) = tmp;
      if (i != j)
        Kmm(j,i) = tmp;
    }
  }
  

    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      NICE::Matrix innerMatrix;
      innerMatrix.multiply(Kmn, Kmn, false /* tranpose first matrix*/, true /* transpose second matrix*/);
      
      innerMatrix.addScaledMatrix( noise, Kmm );
           
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      choleskyMatrix.resize( nrOfExamplesPerClass, nrOfExamplesPerClass );
      choleskyMatrix.set( 0.0 );
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
      cr.robustChol ( innerMatrix, choleskyMatrix );  
     }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPSRVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPOptMean(NICE::Vector & rightPartGPOptMean, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{
    DiagonalMatrixApprox diagApprox ( true /*verbose*/ );
//     rightPartGPOptMean.resize(nrOfExamplesPerClass);
    
    NICE::Matrix kInv( nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0 );
    CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);

    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    { 
      cr.robustCholInv ( kernelMatrix, kInv );
      
      //we initialize the D-Matrix with the approximation we use in other methods (row sums of kernel matrix)
      rightPartGPOptMean.resize(nrOfExamplesPerClass);
      rightPartGPOptMean.set(0.0);
      //compute D 
      //start with adding some noise, if necessary
      if (noise != 0.0)
        rightPartGPOptMean.set(noise);
      else
        rightPartGPOptMean.set(0.0);    
      
      // the approximation creates a diagonal matrix (which is easy to invert)
      // with entries equal the row sums of the original kernel matrix      
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        for (int j = i; j < nrOfExamplesPerClass; j++)
        {
          rightPartGPOptMean[i] += kernelMatrix(i,j);
          if (i != j)
            rightPartGPOptMean[j] += kernelMatrix(i,j);
        }
      }
      
      //compute its inverse
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        rightPartGPOptMean[i] = 1.0 / rightPartGPOptMean[i];
      }      
      
//       rightPartGPOptMean.set(0.0);
      
      //compute optimal diagonal matrix
      diagApprox.approx ( kernelMatrix, rightPartGPOptMean );
      
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPOptMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPOptVar(NICE::Vector & DiagGPOptVar, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{
    DiagonalMatrixApprox diagApprox ( true /*verbose*/ );
    DiagGPOptVar.resize(nrOfExamplesPerClass);
    
    NICE::Matrix kInv( nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0 );
    CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);    
    
    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    { 
      cr.robustCholInv ( kernelMatrix, kInv );
      
//       DiagGPOptVar.set(0.0);
      
      //we initialize the D-Matrix with the approximation we use in other methods (row sums of kernel matrix)
      DiagGPOptVar.resize(nrOfExamplesPerClass);
      DiagGPOptVar.set(0.0);
      //compute D 
      //start with adding some noise, if necessary
      if (noise != 0.0)
        DiagGPOptVar.set(noise);
      else
        DiagGPOptVar.set(0.0);    
      
      // the approximation creates a diagonal matrix (which is easy to invert)
      // with entries equal the row sums of the original kernel matrix      
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        for (int j = i; j < nrOfExamplesPerClass; j++)
        {
          DiagGPOptVar[i] += kernelMatrix(i,j);
          if (i != j)
            DiagGPOptVar[j] += kernelMatrix(i,j);
        }
      }
      
      //compute its inverse
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        DiagGPOptVar[i] = 1.0 / DiagGPOptVar[i];
      }         
      
      //compute optimal diagonal matrix
      diagApprox.approx ( kernelMatrix, DiagGPOptVar );
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPOptVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

KCMinimumEnclosingBall *trainSVDD( const double & noise, const NICE::Matrix kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{
 
    Config conf;
    // set the outlier ratio (Paul optimized this paramter FIXME)
    conf.sD( "SVDD", "outlier_fraction", 0.1 );
    conf.sB( "SVDD", "verbose", false );
    KCMinimumEnclosingBall *svdd = new KCMinimumEnclosingBall ( &conf, NULL /* no kernel function */, "SVDD" /* config section */);
    KernelData kernelData ( &conf, kernelMatrix, "Kernel" , false /* update cholesky */ );
 
    Timer tTrainPrecise;
    tTrainPrecise.start();     

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {     
    
      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :)
      // KCMinimumEnclosingBall does not store the kernel data object, therefore, we are save with passing a local copy
      svdd->teach ( &kernelData, y );
    }
    
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for SVDD training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;        

    return svdd;
}

// ------------- EVALUATION METHODS ---------------------
void inline evaluateGPVarApprox(const NICE::Vector & kernelVector, const double & kernelSelf, const NICE::Vector & matrixDInv, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double uncertainty;
    
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {       
      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*  where D is our nice approximation of K
      
      NICE::Vector rightPart (kernelVector.size());
      for (int j = 0; j < kernelVector.size(); j++)
      {
        rightPart[j] = kernelVector[j] * matrixDInv[j];
      }

      uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPVar(const NICE::Vector & kernelVector, const double & kernelSelf, const NICE::Matrix & choleskyMatrix, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double uncertainty;
    
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {       
      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*       
      
      NICE::Vector rightPart (kernelVector.size(),0.0);      
      choleskySolveLargeScale ( choleskyMatrix, kernelVector, rightPart );
      
      uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPMeanApprox(const NICE::Vector & kernelVector, const NICE::Vector & rightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double mean;
  
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    { 
      // \mean = \k_*^T \cdot D^{-1} \cdot y  where D is our nice approximation of K    
      mean = kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPMean(const NICE::Vector & kernelVector,  const NICE::Vector & GPMeanRightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double mean;
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      // \mean = \k_*^T \cdot K^{-1} \cdot y      
      mean = kernelVector.scalarProduct ( GPMeanRightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPSRMean(const NICE::Vector & kernelVector,  const NICE::Vector & GPSRMeanRightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting, const int & nrOfRegressors, const std::vector<int> & indicesOfChosenExamples)
{
    double mean;
    
    //grep the entries corresponding to the active set
    NICE::Vector kernelVectorM;
    kernelVectorM.resize(nrOfRegressors);
    for (int i = 0; i < nrOfRegressors; i++)
    {
      kernelVectorM[i] = kernelVector[indicesOfChosenExamples[i]];
    }

    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      // \mean = \k_*^T \cdot K^{-1} \cdot y    
      mean = kernelVectorM.scalarProduct ( GPSRMeanRightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPSRVar(const NICE::Vector & kernelVector,  const NICE::Matrix & choleskyMatrix, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples, const double & noise)
{
    double uncertainty;
    
    //grep the entries corresponding to the active set
    NICE::Vector kernelVectorM;
    kernelVectorM.resize(nrOfRegressors);
    for (int i = 0; i < nrOfRegressors; i++)
    {
      kernelVectorM[i] = kernelVector[indicesOfChosenExamples[i]];
    } 
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      NICE::Vector rightPart (nrOfRegressors,0.0);      
      
      choleskySolveLargeScale ( choleskyMatrix, kernelVectorM, rightPart );
      
      uncertainty = noise*kernelVectorM.scalarProduct ( rightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

//this method is completely the same as evaluateGPMeanApprox, but for convenience, it is its own method
void inline evaluateGPOptMean(const NICE::Vector & kernelVector, const NICE::Vector & rightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double mean;
  
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    { 
      // \mean = \k_*^T \cdot D^{-1} \cdot y  where D is our nice approximation of K    
      mean = kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

//this method is completely the same as evaluateGPVarApprox, but for convenience, it is its own method
void inline evaluateGPOptVar(const NICE::Vector & kernelVector, const double & kernelSelf, const NICE::Vector & matrixDInv, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double uncertainty;
    
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {       
      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*  where D is our nice approximation of K
      
      NICE::Vector rightPart (kernelVector.size());
      for (int j = 0; j < kernelVector.size(); j++)
      {
        rightPart[j] = kernelVector[j] * matrixDInv[j];
      }

      uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateParzen(const NICE::Vector & kernelVector,  ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double score;
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {      
      //the Parzen score is nothing but the averaged similarity to every training sample
      score = kernelVector.Sum() / (double) kernelVector.size(); //maybe we could directly call kernelVector.Mean() here
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = score;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateSVDD( KCMinimumEnclosingBall *svdd, const NICE::Vector & kernelVector,  ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {        
      // In the following, we assume that we are using a Gaussian kernel
      r = svdd->classifyKernel ( kernelVector, 1.0 /* kernel self */ );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
}

/** 
    test the basic functionality of fast-hik hyperparameter optimization 
*/
int main (int argc, char **argv)
{   
  std::set_terminate(__gnu_cxx::__verbose_terminate_handler);

  Config conf ( argc, argv );
  string resultsfile = conf.gS("main", "results", "results.txt" );
  int nrOfExamplesPerClass = conf.gI("main", "nrOfExamplesPerClass", 50);
  nrOfExamplesPerClass = std::min(nrOfExamplesPerClass, 100); // we do not have more than 100 examples per class
  
  //which classes to considere? we assume consecutive class numers
  int indexOfFirstClass = conf.gI("main", "indexOfFirstClass", 0);
  indexOfFirstClass = std::max(indexOfFirstClass, 0); //we do not have less than 0 classes
  int indexOfLastClass = conf.gI("main", "indexOfLastClass", 999);
  indexOfLastClass = std::min(indexOfLastClass, 999); //we do not have more than 1000 classes
  
  int nrOfClassesToConcidere =  (indexOfLastClass - indexOfLastClass)+1;
  
  //repetitions for every class to achieve reliable time evalutions
  int runsPerClassToAverageTraining = conf.gI( "main", "runsPerClassToAverageTraining", 1 ); 
  int runsPerClassToAverageTesting = conf.gI( "main", "runsPerClassToAverageTesting", 1 );
  
  // share parameters among methods and classes?
  bool shareParameters = conf.gB("main" , "shareParameters", true);
  
  //which methods do we want to use?
  bool GPMeanApprox = conf.gB( "main", "GPMeanApprox", false);
  bool GPVarApprox = conf.gB( "main", "GPVarApprox", false);  
  bool GPMean = conf.gB( "main", "GPMean", false);
  bool GPVar = conf.gB( "main", "GPVar", false);
  bool GPSRMean = conf.gB( "main", "GPSRMean", false);
  bool GPSRVar = conf.gB( "main", "GPSRVar", false);  
  bool GPOptMean = conf.gB( "main", "GPOptMean", false);
  bool GPOptVar = conf.gB( "main", "GPOptVar", false);    
  bool Parzen = conf.gB( "main", "Parzen", false);
  bool SVDD = conf.gB( "main", "SVDD", false);  
  
  if (GPMeanApprox)
    std::cerr << "GPMeanApprox used" << std::endl;
  else 
    std::cerr << "GPMeanApprox not used" << std::endl;
  if (GPVarApprox)
    std::cerr << "GPVarApprox used" << std::endl;
  else 
    std::cerr << "GPVarApprox not used" << std::endl;
  if (GPMean)
    std::cerr << "GPMean used" << std::endl;
  else 
    std::cerr << "GPMean not used" << std::endl;
  if (GPVar)
    std::cerr << "GPVar used" << std::endl;
  else 
    std::cerr << "GPVar not used" << std::endl;
  if (GPSRMean)
    std::cerr << "GPSRMean used" << std::endl;
  else 
    std::cerr << "GPSRMean not used" << std::endl;
  if (GPSRVar)
    std::cerr << "GPSRVar used" << std::endl;
  else 
    std::cerr << "GPSRVar not used" << std::endl;
  if (GPOptMean)
    std::cerr << "GPOptMean used" << std::endl;
  else 
    std::cerr << "GPOptMean not used" << std::endl;
  if (GPOptVar)
    std::cerr << "GPOptVar used" << std::endl;
  else 
    std::cerr << "GPOptVar not used" << std::endl;  
  if (Parzen)
    std::cerr << "Parzen used" << std::endl;
  else 
    std::cerr << "Parzen not used" << std::endl;
  if (SVDD)
    std::cerr << "SVDD used" << std::endl;
  else 
    std::cerr << "SVDD not used" << std::endl;

  
  // GP variance approximation  
  NICE::Vector sigmaGPVarApproxParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPVarApproxParas(nrOfClassesToConcidere,0.0);
  // GP variance  
  NICE::Vector sigmaGPVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPVarParas(nrOfClassesToConcidere,0.0);
  //GP mean approximation  
  NICE::Vector sigmaGPMeanApproxParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPMeanApproxParas(nrOfClassesToConcidere,0.0);
  //GP mean  
  NICE::Vector sigmaGPMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPMeanParas(nrOfClassesToConcidere,0.0);
  //GP SR mean  
  NICE::Vector sigmaGPSRMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPSRMeanParas(nrOfClassesToConcidere,0.0);
  //GP SR var
  NICE::Vector sigmaGPSRVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPSRVarParas(nrOfClassesToConcidere,0.0);
  //GP Opt mean  
  NICE::Vector sigmaGPOptMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPOptMeanParas(nrOfClassesToConcidere,0.0);
  //GP Opt var
  NICE::Vector sigmaGPOptVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPOptVarParas(nrOfClassesToConcidere,0.0);  
  //Parzen  
  NICE::Vector sigmaParzenParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseParzenParas(nrOfClassesToConcidere,0.0);
  //SVDD  
  NICE::Vector sigmaSVDDParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseSVDDParas(nrOfClassesToConcidere,0.0);
    
  if (!shareParameters)
  {
    //read the optimal parameters for the different methods
    
    // GP variance approximation
    string sigmaGPVarApproxFile = conf.gS("main", "sigmaGPVarApproxFile", "approxVarSigma.txt");  
    string noiseGPVarApproxFile = conf.gS("main", "noiseGPVarApproxFile", "approxVarNoise.txt");   
    // GP variance
    string sigmaGPVarFile = conf.gS("main", "sigmaGPVarFile", "approxVarSigma.txt");  
    string noiseGPVarFile = conf.gS("main", "noiseGPVarFile", "approxVarNoise.txt");  
    //GP mean approximation
    string sigmaGPMeanApproxFile = conf.gS("main", "sigmaGPMeanApproxFile", "approxVarSigma.txt");  
    string noiseGPMeanApproxFile = conf.gS("main", "noiseGPMeanApproxFile", "approxVarNoise.txt");    
    //GP mean
    string sigmaGPMeanFile = conf.gS("main", "sigmaGPMeanFile", "approxVarSigma.txt");  
    string noiseGPMeanFile = conf.gS("main", "noiseGPMeanFile", "approxVarNoise.txt");      
    //Parzen
    string sigmaParzenFile = conf.gS("main", "sigmaParzenFile", "approxVarSigma.txt");  
    string noiseParzenFile = conf.gS("main", "noiseParzenFile", "approxVarNoise.txt");    
    //SVDD
    string sigmaSVDDFile = conf.gS("main", "sigmaSVDDFile", "approxVarSigma.txt");  
    string noiseSVDDFile = conf.gS("main", "noiseSVDDFile", "approxVarNoise.txt");      

    // GP variance approximation    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPVarApproxParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPVarApproxParas);  
    // GP variance    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPVarParas);  
    //GP mean approximation   
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPMeanApproxParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPMeanApproxParas);  
    //GP mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPMeanParas); 
    //GP SR mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPSRMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPSRMeanParas);
    //GP SR var  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPSRVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPSRVarParas); 
    //GP Opt mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPOptMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPOptMeanParas);
    //GP Opt var  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPOptVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPOptVarParas);     
    //Parzen    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaParzenParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseParzenParas);  
    //SVDD    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaSVDDParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseSVDDParas);   
  }
  else
  {
    //use static variables for all methods and classis
    double noise = conf.gD( "main", "noise", 0.01 );
    double sigma = conf.gD( "main", "sigma", 1.0 );
    
    sigmaGPVarApproxParas.set(sigma);
    noiseGPVarApproxParas.set(noise);
    // GP variance  
    sigmaGPVarParas.set(sigma);
    noiseGPVarParas.set(noise);
    //GP mean approximation  
    sigmaGPMeanApproxParas.set(sigma);
    noiseGPMeanApproxParas.set(noise);
    //GP mean  
    sigmaGPMeanParas.set(sigma);
    noiseGPMeanParas.set(noise);
    //GP SR mean  
    sigmaGPSRMeanParas.set(sigma);
    noiseGPSRMeanParas.set(noise);
    //GP SR var  
    sigmaGPSRVarParas.set(sigma);
    noiseGPSRVarParas.set(noise); 
    //GP Opt mean  
    sigmaGPOptMeanParas.set(sigma);
    noiseGPOptMeanParas.set(noise);
    //GP Opt var  
    sigmaGPOptVarParas.set(sigma);
    noiseGPOptVarParas.set(noise);    
    //Parzen  
    sigmaParzenParas.set(sigma);
    noiseParzenParas.set(noise);
    //SVDD  
    sigmaSVDDParas.set(sigma);
    noiseSVDDParas.set(noise);    
  }
  
  
  // -------- optimal parameters read --------------  
  
  std::vector<SparseVector> trainingData;
  NICE::Vector y;
  
  std::cerr << "Reading ImageNet data ..." << std::endl;
  bool imageNetLocal = conf.gB("main", "imageNetLocal" , false);
  string imageNetPath;
  if (imageNetLocal)
    imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/";
  else
    imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/";

  ImageNetData imageNetTrain ( imageNetPath + "demo/" );

  imageNetTrain.preloadData( "train", "training" );
  trainingData = imageNetTrain.getPreloadedData();
  y = imageNetTrain.getPreloadedLabels();
    
  std::cerr << "Reading of training data finished" << std::endl;
  std::cerr << "trainingData.size(): " << trainingData.size() << std::endl;
  std::cerr << "y.size(): " << y.size() << std::endl;
  
  std::cerr << "Reading ImageNet test data files (takes some seconds)..." << std::endl;
  ImageNetData imageNetTest ( imageNetPath + "demo/" );
  imageNetTest.preloadData ( "val", "testing" );
  imageNetTest.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" );  
  
  double OverallPerformanceGPVarApprox(0.0);
  double OverallPerformanceGPVar(0.0);
  double OverallPerformanceGPMeanApprox(0.0);
  double OverallPerformanceGPMean(0.0);
  double OverallPerformanceGPSRMean(0.0);
  double OverallPerformanceGPSRVar(0.0);  
  double OverallPerformanceGPOptMean(0.0);
  double OverallPerformanceGPOptVar(0.0);   
  double OverallPerformanceParzen(0.0);
  double OverallPerformanceSVDD(0.0);

  
  double kernelSigmaGPVarApprox;
  double kernelSigmaGPVar;
  double kernelSigmaGPMeanApprox;
  double kernelSigmaGPMean;
  double kernelSigmaGPSRMean;
  double kernelSigmaGPSRVar;
  double kernelSigmaGPOptMean;
  double kernelSigmaGPOptVar;  
  double kernelSigmaParzen;
  double kernelSigmaSVDD;
  
  for (int cl = indexOfFirstClass; cl <= indexOfLastClass; cl++)
  {
    std::cerr << "run for class " << cl << std::endl;
    int positiveClass = cl+1; //labels are from 1 to 1000, but our indices from 0 to 999
    // ------------------------------ TRAINING ------------------------------
  
    kernelSigmaGPVarApprox = sigmaGPVarApproxParas[cl];
    kernelSigmaGPVar = sigmaGPVarParas[cl];
    kernelSigmaGPMeanApprox = sigmaGPMeanApproxParas[cl];
    kernelSigmaGPMean = sigmaGPMeanParas[cl];
    kernelSigmaGPSRMean = sigmaGPSRMeanParas[cl];
    kernelSigmaGPSRVar = sigmaGPSRVarParas[cl];
    kernelSigmaGPOptMean = sigmaGPOptMeanParas[cl];
    kernelSigmaGPOptVar = sigmaGPOptVarParas[cl];    
    kernelSigmaParzen = sigmaParzenParas[cl];
    kernelSigmaSVDD = sigmaSVDDParas[cl];
    
    Timer tTrain;
    tTrain.start();
    
    //compute the kernel matrix, which will be shared among all methods in this scenario       
    NICE::Matrix kernelMatrix(nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
    
    //NOTE in theory we have to compute a single kernel Matrix for every method, since every method may have its own optimal parameter
    // I'm sure, we can speed it up a bit and compute it only for every different parameter
    //nonetheless, it's not as nice as we originally thought (same matrix for every method) 
    
    //NOTE Nonetheless, since we're only interested in runtimes, we can ignore this
           
    //now sum up all entries of each row in the original kernel matrix
    double kernelScore(0.0);
    for (int i = cl*100; i < cl*100+nrOfExamplesPerClass; i++)
    {
      for (int j = i; j < cl*100+nrOfExamplesPerClass; j++)
      {
        kernelScore = measureDistance(trainingData[i],trainingData[j], kernelSigmaGPVarApprox);
        kernelMatrix(i-cl*100,j-cl*100) = kernelScore;
        
        if (i != j)
            kernelMatrix(j-cl*100,i-cl*100) = kernelScore;
      }
    }  
    
    // now call the individual training methods
    
    //train GP Mean Approx
    NICE::Vector GPMeanApproxRightPart;
    if (GPMeanApprox)
      trainGPMeanApprox(GPMeanApproxRightPart, noiseGPMeanApproxParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );    
    
    //train GP Var Approx    
    NICE::Vector matrixDInv;
    if (GPVarApprox)
      trainGPVarApprox(matrixDInv, noiseGPVarApproxParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
         
    //train GP Mean
    NICE::Vector GPMeanRightPart;
    if (GPMean)
      trainGPMean(GPMeanRightPart, noiseGPMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );    
    
    //train GP Var
    NICE::Matrix GPVarCholesky;
    if (GPVar)
      trainGPVar(GPVarCholesky, noiseGPVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );       
    
    //train GP SR Mean
    NICE::Vector GPSRMeanRightPart;
    std::vector<int> indicesOfChosenExamplesGPSRMean;
    int nrOfRegressors = conf.gI( "GPSR", "nrOfRegressors", nrOfExamplesPerClass/2);
    nrOfRegressors = std::min( nrOfRegressors, nrOfExamplesPerClass );
    if (GPSRMean)
      trainGPSRMean(GPSRMeanRightPart, noiseGPSRMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining, nrOfRegressors, indicesOfChosenExamplesGPSRMean );        
    
    //train GP SR Var
    NICE::Matrix GPSRVarCholesky;   
    std::vector<int> indicesOfChosenExamplesGPSRVar;
    if (GPSRVar)
      trainGPSRVar(GPSRVarCholesky, noiseGPSRVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining, nrOfRegressors, indicesOfChosenExamplesGPSRVar );  
    
    //train GP Opt Mean
    NICE::Vector GPOptMeanRightPart;
    if (GPOptMean)
      trainGPOptMean(GPOptMeanRightPart, noiseGPOptMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
    std::cerr << "GPOptMeanRightPart: " << std::endl; std::cerr << GPOptMeanRightPart << std::endl;
    
    //train GP Opt Var
    NICE::Vector DiagGPOptVar;
    if (GPOptVar)
      trainGPOptVar(DiagGPOptVar, noiseGPOptVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );     
    std::cerr << "DiagGPOptVar: " << std::endl; std::cerr << DiagGPOptVar << std::endl;
    
    //train Parzen 
    //nothing to do :)
    
    //train SVDD
    KCMinimumEnclosingBall *svdd;
    if (SVDD)
      svdd = trainSVDD(noiseSVDDParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
  
    tTrain.stop();
    std::cerr << "Time used for training class " << cl << ": " << tTrain.getLast() << std::endl;      
       
    std::cerr << "training done - now perform the evaluation" << std::endl;


    // ------------------------------ TESTING ------------------------------
   
    std::cerr << "Classification step ... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl;
    
    ClassificationResults resultsGPVarApprox;
    ClassificationResults resultsGPVar;
    ClassificationResults resultsGPMeanApprox;
    ClassificationResults resultsGPMean;
    ClassificationResults resultsGPSRMean;
    ClassificationResults resultsGPSRVar;    
    ClassificationResults resultsGPOptMean;
    ClassificationResults resultsGPOptVar;    
    ClassificationResults resultsParzen;
    ClassificationResults resultsSVDD;       
    
    ProgressBar pb;
    Timer tTest;
    tTest.start();    
    Timer tTestSingle;
    
    double timeForSingleExamplesGPVarApprox(0.0);    
    double timeForSingleExamplesGPVar(0.0);
    double timeForSingleExamplesGPMeanApprox(0.0);    
    double timeForSingleExamplesGPMean(0.0);    
    double timeForSingleExamplesGPSRMean(0.0);
    double timeForSingleExamplesGPSRVar(0.0);
    double timeForSingleExamplesGPOptMean(0.0);
    double timeForSingleExamplesGPOptVar(0.0);    
    double timeForSingleExamplesParzen(0.0);    
    double timeForSingleExamplesSVDD(0.0);    
    
    for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ )
    {
      pb.update ( imageNetTest.getNumPreloadedExamples() );

      const SparseVector & svec = imageNetTest.getPreloadedExample ( i );

      //NOTE: again we should use method-specific optimal parameters. If we're only interested in the runtimes, this doesn't matter
      
      //compute (self) similarities
      double kernelSelf (measureDistance(svec,svec, kernelSigmaGPVarApprox) );
      NICE::Vector kernelVector (nrOfExamplesPerClass, 0.0);
      
      for (int j = 0; j < nrOfExamplesPerClass; j++)
      {
        kernelVector[j] = measureDistance(trainingData[j+cl*100],svec, kernelSigmaGPVarApprox);
      }     
      
      //call the individual test-methods
      
      //evaluate GP Var Approx
      ClassificationResult rGPVarApprox;      
      if (GPVarApprox)
        evaluateGPVarApprox( kernelVector, kernelSelf, matrixDInv, rGPVarApprox, timeForSingleExamplesGPVarApprox, runsPerClassToAverageTesting );
      
      //evaluate GP Var
      ClassificationResult rGPVar;
      if (GPVar)
        evaluateGPVar( kernelVector, kernelSelf, GPVarCholesky, rGPVar, timeForSingleExamplesGPVar, runsPerClassToAverageTesting );      
      
      //evaluate GP Mean Approx
      ClassificationResult rGPMeanApprox;      
      if (GPMeanApprox)
        evaluateGPMeanApprox( kernelVector, matrixDInv, rGPMeanApprox, timeForSingleExamplesGPMeanApprox, runsPerClassToAverageTesting );
      
      //evaluate GP Mean
      ClassificationResult rGPMean;
      if (GPMean)
        evaluateGPMean( kernelVector, GPMeanRightPart, rGPMean, timeForSingleExamplesGPMean, runsPerClassToAverageTesting );       
       
      //evaluate GP SR Mean
      ClassificationResult rGPSRMean;
      if (GPSRMean)
        evaluateGPSRMean( kernelVector, GPSRMeanRightPart, rGPSRMean, timeForSingleExamplesGPSRMean, runsPerClassToAverageTesting, nrOfRegressors, indicesOfChosenExamplesGPSRMean );       
      
      //evaluate GP SR Var
      ClassificationResult rGPSRVar;
      if (GPSRVar)
        evaluateGPSRVar( kernelVector, GPSRVarCholesky, rGPSRVar, timeForSingleExamplesGPSRVar, runsPerClassToAverageTesting, nrOfRegressors, indicesOfChosenExamplesGPSRVar, noiseGPSRVarParas[cl] );       
      
      //evaluate GP Opt Mean
      ClassificationResult rGPOptMean;
      if (GPOptMean)
        evaluateGPOptMean( kernelVector, GPOptMeanRightPart, rGPOptMean, timeForSingleExamplesGPOptMean, runsPerClassToAverageTesting );       
      
      //evaluate GP Opt Var
      ClassificationResult rGPOptVar;
      if (GPOptVar)
        evaluateGPOptVar( kernelVector, kernelSelf, DiagGPOptVar, rGPOptVar, timeForSingleExamplesGPOptVar, runsPerClassToAverageTesting );   
      
      //evaluate Parzen
      ClassificationResult rParzen;
      if (Parzen)
        evaluateParzen( kernelVector, rParzen, timeForSingleExamplesParzen, runsPerClassToAverageTesting ); 
      
      //evaluate SVDD
      ClassificationResult rSVDD;
      if (SVDD)
        evaluateSVDD( svdd, kernelVector, rSVDD, timeForSingleExamplesSVDD, runsPerClassToAverageTesting );       

      
      // set ground truth label
      rGPVarApprox.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPMeanApprox.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPSRMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPSRVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPOptMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPOptVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;      
      rParzen.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rSVDD.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;

      //remember the results for the evaluation lateron
      resultsGPVarApprox.push_back ( rGPVarApprox );
      resultsGPVar.push_back ( rGPVar );
      resultsGPMeanApprox.push_back ( rGPMeanApprox );
      resultsGPMean.push_back ( rGPMean );
      resultsGPSRMean.push_back ( rGPSRMean );
      resultsGPSRVar.push_back ( rGPSRVar );
      resultsGPOptMean.push_back ( rGPOptMean );
      resultsGPOptVar.push_back ( rGPOptVar );      
      resultsParzen.push_back ( rParzen );
      resultsSVDD.push_back ( rSVDD );      
    }
    
    tTest.stop();
    std::cerr << "Time used for evaluating class " << cl << ": " << tTest.getLast() << std::endl;       
    
    timeForSingleExamplesGPVarApprox/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPVar/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPMeanApprox/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPSRMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPSRVar/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPOptMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPOptVar/= imageNetTest.getNumPreloadedExamples();    
    timeForSingleExamplesParzen/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesSVDD/= imageNetTest.getNumPreloadedExamples();
    
    std::cerr << "GPVarApprox -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPVarApprox << std::endl;    
    std::cerr << "GPVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPVar << std::endl;    
    std::cerr << "GPMeanApprox -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPMeanApprox << std::endl;    
    std::cerr << "GPMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPMean << std::endl;    
    std::cerr << "GPSRMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPSRMean << std::endl;    
    std::cerr << "GPSRVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPSRVar << std::endl;    
    std::cerr << "GPOptMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPOptMean << std::endl;    
    std::cerr << "GPOptVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPOptVar << std::endl;    
    std::cerr << "Parzen -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesParzen << std::endl;    
    std::cerr << "SVDD -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesSVDD << std::endl;    

    // run the AUC-evaluation
    double perfvalueGPVarApprox( 0.0 );
    double perfvalueGPVar( 0.0 );
    double perfvalueGPMeanApprox( 0.0 );
    double perfvalueGPMean( 0.0 );
    double perfvalueGPSRMean( 0.0 );
    double perfvalueGPSRVar( 0.0 );
    double perfvalueGPOptMean( 0.0 );
    double perfvalueGPOptVar( 0.0 );    
    double perfvalueParzen( 0.0 );
    double perfvalueSVDD( 0.0 );     

    if (GPVarApprox)
      perfvalueGPVarApprox = resultsGPVarApprox.getBinaryClassPerformance( ClassificationResults::PERF_AUC );    
    if (GPVar)
      perfvalueGPVar = resultsGPVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPMeanApprox)
      perfvalueGPMeanApprox = resultsGPMeanApprox.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPMean)
      perfvalueGPMean = resultsGPMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPSRMean)
      perfvalueGPSRMean = resultsGPSRMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPSRVar)
      perfvalueGPSRVar = resultsGPSRVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPOptMean)
      perfvalueGPOptMean = resultsGPOptMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPOptVar)
      perfvalueGPOptVar = resultsGPOptVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );    
    if (Parzen)
      perfvalueParzen = resultsParzen.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (SVDD)
      perfvalueSVDD = resultsSVDD.getBinaryClassPerformance( ClassificationResults::PERF_AUC );  


    std::cerr << "Performance GPVarApprox: " << perfvalueGPVarApprox << std::endl;
    std::cerr << "Performance GPVar: " << perfvalueGPVar << std::endl;
    std::cerr << "Performance GPMeanApprox: " << perfvalueGPMeanApprox << std::endl;
    std::cerr << "Performance GPMean: " << perfvalueGPMean << std::endl;
    std::cerr << "Performance GPSRMean: " << perfvalueGPSRMean << std::endl;
    std::cerr << "Performance GPSRVar: " << perfvalueGPSRVar << std::endl;
    std::cerr << "Performance GPOptMean: " << perfvalueGPOptMean << std::endl;
    std::cerr << "Performance GPOptVar: " << perfvalueGPOptVar << std::endl;    
    std::cerr << "Performance Parzen: " << perfvalueParzen << std::endl;
    std::cerr << "Performance SVDD: " << perfvalueSVDD << std::endl;    
    
    OverallPerformanceGPVarApprox += perfvalueGPVar;    
    OverallPerformanceGPVar += perfvalueGPVarApprox;
    OverallPerformanceGPMeanApprox += perfvalueGPMeanApprox;
    OverallPerformanceGPMean += perfvalueGPMean;
    OverallPerformanceGPSRMean += perfvalueGPSRMean;
    OverallPerformanceGPSRVar += perfvalueGPSRVar;
    OverallPerformanceGPOptMean += perfvalueGPOptMean;
    OverallPerformanceGPOptVar += perfvalueGPOptVar;    
    OverallPerformanceParzen += perfvalueParzen;
    OverallPerformanceSVDD += perfvalueSVDD;   

    // clean up memory used by SVDD
    if (SVDD)    
      delete svdd;
  }
  
  OverallPerformanceGPVarApprox /= nrOfClassesToConcidere;
  OverallPerformanceGPVar /= nrOfClassesToConcidere;
  OverallPerformanceGPMeanApprox /= nrOfClassesToConcidere;
  OverallPerformanceGPMean /= nrOfClassesToConcidere;
  OverallPerformanceGPSRMean /= nrOfClassesToConcidere;
  OverallPerformanceGPSRVar /= nrOfClassesToConcidere;
  OverallPerformanceGPOptMean /= nrOfClassesToConcidere;
  OverallPerformanceGPOptVar /= nrOfClassesToConcidere;  
  OverallPerformanceParzen /= nrOfClassesToConcidere;
  OverallPerformanceSVDD /= nrOfClassesToConcidere;  
  
  std::cerr << "overall performance GPVarApprox: " << OverallPerformanceGPVarApprox << std::endl;
  std::cerr << "overall performance GPVar: " << OverallPerformanceGPVar << std::endl;
  std::cerr << "overall performance GPMeanApprox: " << OverallPerformanceGPMeanApprox << std::endl;
  std::cerr << "overall performance GPMean: " << OverallPerformanceGPMean << std::endl;
  std::cerr << "overall performance GPSRMean: " << OverallPerformanceGPSRMean << std::endl;
  std::cerr << "overall performance GPSRVar: " << OverallPerformanceGPSRVar << std::endl;
  std::cerr << "overall performance GPOptMean: " << OverallPerformanceGPOptMean << std::endl;
  std::cerr << "overall performance GPOptVar: " << OverallPerformanceGPOptVar << std::endl;  
  std::cerr << "overall performance Parzen: " << OverallPerformanceParzen << std::endl;
  std::cerr << "overall performance SVDD: " << OverallPerformanceSVDD << std::endl;  
  
  return 0;
}