/** 
* @file testImageNetBinaryBruteForce.cpp
* @brief perform ImageNet tests with binary tasks for OCC using GP mean and variance, sophisticated approximations of both, Parzen Density Estimation and SVDD
* @author Alexander Lütz
* @date 23-05-2012 (dd-mm-yyyy)
*/

#ifdef NICE_USELIB_MATIO

#include <ctime>
#include <time.h>

#include "core/basics/Config.h"
#include "core/basics/Timer.h"
#include "core/algebra/CholeskyRobust.h"
#include "core/algebra/DiagonalMatrixApprox.h"
#include "core/vector/Algorithms.h"
#include "core/vector/SparseVectorT.h"


#include "vislearning/cbaselib/ClassificationResults.h"
#include "vislearning/baselib/ProgressBar.h"
#include "vislearning/classifier/kernelclassifier/KCMinimumEnclosingBall.h"

#include "core/matlabAccess/MatFileIO.h"
#include "vislearning/matlabAccessHighLevel/ImageNetData.h"

using namespace std;
using namespace NICE;
using namespace OBJREC;

// --------------- THE KERNEL FUNCTION ( exponential kernel with euclidian distance ) ----------------------
double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0)
{
  double inner_sum(0.0);

  double d;      
  
  //new version, where we needed on average 0.001707 s for each test sample
  NICE::SparseVector::const_iterator aIt = a.begin();
  NICE::SparseVector::const_iterator bIt = b.begin();
   
  //compute the euclidian distance between both feature vectores (given as SparseVectors)
  while ( (aIt != a.end()) && (bIt != b.end()) )
  {
    if (aIt->first == bIt->first)
    {
      d = ( aIt->second - bIt->second );      
      inner_sum += d * d;
      aIt++;
      bIt++;
    }
    else if ( aIt->first < bIt->first)
    {
      inner_sum += aIt->second * aIt->second;
      aIt++;      
    }
    else
    {
      inner_sum += bIt->second * bIt->second;
      bIt++;       
    }
  }
  
  //compute remaining values, if b reached the end but not a
  while (aIt != a.end())
  {
    inner_sum += aIt->second * aIt->second;
    aIt++; 
  }
  //compute remaining values, if a reached the end but not b
  while (bIt != b.end())
  {
    inner_sum += bIt->second * bIt->second;
    bIt++; 
  }  

  //normalization of the exponent
  inner_sum /= (2.0*sigma*sigma);
  
  //finally, compute the RBF-kernel score (RBF = radial basis function)
  return exp(-inner_sum);
}

// --------------- INPUT METHOD ----------------------
void readParameters(string & filename, const int & size, NICE::Vector & parameterVector)
{
  //we read the parameters which are given from a Matlab-Script (each line contains a single number, which is the optimal parameter for this class)
  
  parameterVector.resize(size);
  parameterVector.set(0.0);
  
  ifstream is(filename.c_str());

  if ( !is.good() )
      fthrow(IOException, "Unable to read parameters.");  
//
  string tmp;
  int cnt(0);
  while (! is.eof())
  {
    is >> tmp;
    parameterVector[cnt] = atof(tmp.c_str());
    cnt++;
  }
//   
  is.close(); 
}

//------------------- TRAINING METHODS --------------------

void inline trainGPMean(NICE::Vector & GPMeanRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
    
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
      cr.robustChol ( kernelMatrix, choleskyMatrix );  
      
      GPMeanRightPart.resize(nrOfExamplesPerClass);
      GPMeanRightPart.set(0.0);
      
      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :)
      
      // pre-compute K^{-1} \cdot y, which is the same for every new test sample
      choleskySolveLargeScale ( choleskyMatrix, y, GPMeanRightPart );
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}  

void inline trainGPVar(NICE::Matrix & choleskyMatrix, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      choleskyMatrix.resize(nrOfExamplesPerClass, nrOfExamplesPerClass);
      choleskyMatrix.set(0.0);      
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot k_* for new test samples
      cr.robustChol ( kernelMatrix, choleskyMatrix );   
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPMeanApprox(NICE::Vector & GPMeanApproxRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      NICE::Vector matrixDInv(nrOfExamplesPerClass,0.0);
      //compute D 
      //start with adding some noise, if necessary
      if (noise != 0.0)
        matrixDInv.set(noise);
      else
        matrixDInv.set(0.0);    
      
      // the approximation creates a diagonal matrix (which is easy to invert)
      // with entries equal the row sums of the original kernel matrix
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        for (int j = i; j < nrOfExamplesPerClass; j++)
        {
          matrixDInv[i] += kernelMatrix(i,j);
          if (i != j)
            matrixDInv[j] += kernelMatrix(i,j);
        }
      }
      
      //compute its inverse (and multiply every element with the label vector, which contains only one-entries and therefore be skipped...)
      GPMeanApproxRightPart.resize(nrOfExamplesPerClass);    
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        GPMeanApproxRightPart[i] = 1.0 / matrixDInv[i];
      } 
    }
    
    
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPMeanApprox training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPVarApprox(NICE::Vector & matrixDInv, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{

    std::cerr << "nrOfExamplesPerClass : " << nrOfExamplesPerClass << std::endl;
  
    Timer tTrainPreciseTimer;
    tTrainPreciseTimer.start();     

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {
      matrixDInv.resize(nrOfExamplesPerClass);
      matrixDInv.set(0.0);
      //compute D 
      //start with adding some noise, if necessary
      if (noise != 0.0)
        matrixDInv.set(noise);
      else
        matrixDInv.set(0.0);    
      
      // the approximation creates a diagonal matrix (which is easy to invert)
      // with entries equal the row sums of the original kernel matrix      
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        for (int j = i; j < nrOfExamplesPerClass; j++)
        {
          matrixDInv[i] += kernelMatrix(i,j);
          if (i != j)
            matrixDInv[j] += kernelMatrix(i,j);
        }
      }
      
      //compute its inverse
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        matrixDInv[i] = 1.0 / matrixDInv[i];
      }
    }
    
    tTrainPreciseTimer.stop(); 
    std::cerr << "Precise time used for GPVarApprox training class " << classNumber << ": " << tTrainPreciseTimer.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

// GP subset of regressors
void inline trainGPSRMean(NICE::Vector & GPMeanRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples )
{
  std::vector<int> examplesToChoose;
  indicesOfChosenExamples.clear();
  
  //add all examples for possible choice
  for (int i = 0; i < nrOfExamplesPerClass; i++)
  {
    examplesToChoose.push_back(i);
  }
  
  //now chose randomly some examples as active subset
  int index;
  for (int i = 0; i < std::min(nrOfRegressors,nrOfExamplesPerClass); i++)
  {
    index = rand() % examplesToChoose.size();
    indicesOfChosenExamples.push_back(examplesToChoose[index]);
    examplesToChoose.erase(examplesToChoose.begin() + index);
  }
  
  NICE::Matrix Kmn (indicesOfChosenExamples.size(), nrOfExamplesPerClass, 0.0);
  int rowCnt(0);
  //set every row
  for (uint i = 0; i < indicesOfChosenExamples.size(); i++, rowCnt++ )
  {
    //set every element of this row
    NICE::Vector col = kernelMatrix.getRow(indicesOfChosenExamples[i]);
    for (int j = 0; j < nrOfExamplesPerClass; j++)
    {
      Kmn(rowCnt,j) = col(j);
    }
  }
  
  //we could speed this up if we would order the indices
  NICE::Matrix Kmm (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
  double tmp(0.0);
  for (uint i = 0; i < indicesOfChosenExamples.size(); i++ )
  {
    for (uint j = i; j < indicesOfChosenExamples.size(); j++ )
    {
      tmp = kernelMatrix(indicesOfChosenExamples[i], indicesOfChosenExamples[j]);
      Kmm(i,j) = tmp;
      if (i != j)
        Kmm(j,i) = tmp;
    }
  }
  

    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      NICE::Matrix innerMatrix;
      innerMatrix.multiply(Kmn, Kmn, false /* tranpose first matrix*/, true /* transpose second matrix*/);
      
      innerMatrix.addScaledMatrix( noise, Kmm );
      
      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :) 
      NICE::Vector projectedLabels;
      projectedLabels.multiply(Kmn,y);
      
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
      cr.robustChol ( innerMatrix, choleskyMatrix );  
      
      GPMeanRightPart.resize(indicesOfChosenExamples.size());
      GPMeanRightPart.set(0.0);
      
      // pre-compute K^{-1} \cdot y, which is the same for every new test sample
      choleskySolveLargeScale ( choleskyMatrix, projectedLabels, GPMeanRightPart );
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPSRMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

// GP subset of regressors
void inline trainGPSRVar(NICE::Matrix & choleskyMatrix, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples )
{
  std::vector<int> examplesToChoose;
  indicesOfChosenExamples.clear();
  
  //add all examples for possible choice
  for (int i = 0; i < nrOfExamplesPerClass; i++)
  {
    examplesToChoose.push_back(i);
  }
  
  //now chose randomly some examples as active subset
  int index;
  for (int i = 0; i < std::min(nrOfRegressors,nrOfExamplesPerClass); i++)
  {
    index = rand() % examplesToChoose.size();
    indicesOfChosenExamples.push_back(examplesToChoose[index]);
    examplesToChoose.erase(examplesToChoose.begin() + index);
  }
  
  NICE::Matrix Kmn (indicesOfChosenExamples.size(), nrOfExamplesPerClass, 0.0);
  int rowCnt(0);
  //set every row
  for (uint i = 0; i < indicesOfChosenExamples.size(); i++, rowCnt++ )
  {
    //set every element of this row
    NICE::Vector col = kernelMatrix.getRow(indicesOfChosenExamples[i]);
    for (int j = 0; j < nrOfExamplesPerClass; j++)
    {
      Kmn(rowCnt,j) = col(j);
    }
  }
  
  //we could speed this up if we would order the indices
  NICE::Matrix Kmm (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
  double tmp(0.0);
  for (uint i = 0; i < indicesOfChosenExamples.size(); i++ )
  {
    for (uint j = i; j < indicesOfChosenExamples.size(); j++ )
    {
      tmp = kernelMatrix(indicesOfChosenExamples[i], indicesOfChosenExamples[j]);
      Kmm(i,j) = tmp;
      if (i != j)
        Kmm(j,i) = tmp;
    }
  }
  

    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {  
      NICE::Matrix innerMatrix;
      innerMatrix.multiply(Kmn, Kmn, false /* tranpose first matrix*/, true /* transpose second matrix*/);
      
      innerMatrix.addScaledMatrix( noise, Kmm );
           
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      choleskyMatrix.resize( nrOfExamplesPerClass, nrOfExamplesPerClass );
      choleskyMatrix.set( 0.0 );
      
      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
      cr.robustChol ( innerMatrix, choleskyMatrix );  
     }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPSRVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

// GP FITC approx
void inline trainGPFITCMean(NICE::Vector & GPMeanRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples )
{
  std::vector<int> examplesToChoose;
  indicesOfChosenExamples.clear();
  
  //add all examples for possible choice
  for (int i = 0; i < nrOfExamplesPerClass; i++)
  {
    examplesToChoose.push_back(i);
  }
  
  //now chose randomly some examples as active subset
  int index;
  for (int i = 0; i < std::min(nrOfRegressors,nrOfExamplesPerClass); i++)
  {
    index = rand() % examplesToChoose.size();
    indicesOfChosenExamples.push_back(examplesToChoose[index]);
    examplesToChoose.erase(examplesToChoose.begin() + index);
  }
  
  NICE::Vector diagK (nrOfExamplesPerClass, 0.0);
  //set every element
  for (int i = 0; i < nrOfExamplesPerClass; i++ )
  {
    diagK(i) = kernelMatrix(i,i);
  }
  
  NICE::Matrix Ku (indicesOfChosenExamples.size(), nrOfExamplesPerClass, 0.0);
  int rowCnt(0);
  //set every row
  for (uint i = 0; i < indicesOfChosenExamples.size(); i++, rowCnt++ )
  {
    //set every element of this row
    NICE::Vector col = kernelMatrix.getRow(indicesOfChosenExamples[i]);
    for (int j = 0; j < nrOfExamplesPerClass; j++)
    {
      Ku(rowCnt,j) = col(j);
    }
  }
  
  //we could speed this up if we would order the indices
  NICE::Matrix Kuu (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
  double tmp(0.0);
  for (uint i = 0; i < indicesOfChosenExamples.size(); i++ )
  {
    for (uint j = i; j < indicesOfChosenExamples.size(); j++ )
    {
      tmp = kernelMatrix(indicesOfChosenExamples[i], indicesOfChosenExamples[j]);
      Kuu(i,j) = tmp;
      if (i != j)
        Kuu(j,i) = tmp;
    }
  }
  
  NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :) 
  
    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {       
      
//       NICE::Vector projectedLabels;
//       projectedLabels.multiply(Kmn,y);
      
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      NICE::Matrix Luu (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
      std::cerr << "mean 1) cr.robustChol ( Kuu, Luu )" << std::endl;
      cr.robustChol ( Kuu, Luu );  
      
      NICE::Matrix V (Ku);
      std::cerr << "mean 2) choleskySolveMatrixLargeScale( Luu, V)" << std::endl;
      choleskySolveMatrixLargeScale( Luu, V);
      
      NICE::Vector dg (diagK);
      NICE::Vector sumV (diagK.size(),0.0);
      for (uint i=0; i<V.cols(); i++)
      {
        for (uint j=0; j<V.rows(); j++)
        {
          sumV(i) += V(j,i)*V(j,i);
        }
        sumV(i) += noise;
      }
      dg += sumV;
      
      for (uint i=0; i<V.cols(); i++)
      {
        for (uint j=0; j<V.rows(); j++)
        {
          V(j,i) /= sqrt(dg(i));
        }
      }     
      
      NICE::Matrix Lu (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
      NICE::Matrix tmpVV (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
      tmpVV.multiply(V,V,false,true);
      tmpVV.addIdentity(1.0);
      std::cerr << "mean 3) cr.robustChol ( tmpVV, Lu );" << std::endl;
      cr.robustChol ( tmpVV, Lu );
      
      NICE::Vector r (dg);
      for (uint i=0; i<r.size(); i++)
      {
        r(i) = 1.0/sqrt(r(i));
      }
      
      NICE::Vector be (indicesOfChosenExamples.size(), 0.0);
      std::cerr << "mean 4) choleskySolveLargeScale (Lu, V*r, be)" << std::endl;
      choleskySolveLargeScale (Lu, V*r, be);
      std::cerr << "mean 5) choleskySolveLargeScale (Lu.transpose(), be, be)" << std::endl;
      choleskySolveLargeScale (Lu.transpose(), be, be);
        
      GPMeanRightPart.resize(indicesOfChosenExamples.size());
      GPMeanRightPart.set(0.0);
      
      std::cerr << "mean 6) choleskySolveLargeScale ( Luu.transpose(), be, GPMeanRightPart )" << std::endl;
      choleskySolveLargeScale ( Luu.transpose(), be, GPMeanRightPart );
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPFITCMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

// GP FITC approx
void inline trainGPFITCVar(NICE::Matrix & choleskyMatrix, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples )
{
  std::vector<int> examplesToChoose;
  indicesOfChosenExamples.clear();
  
  //add all examples for possible choice
  for (int i = 0; i < nrOfExamplesPerClass; i++)
  {
    examplesToChoose.push_back(i);
  }
  
  //now chose randomly some examples as active subset
  int index;
  for (int i = 0; i < std::min(nrOfRegressors,nrOfExamplesPerClass); i++)
  {
    index = rand() % examplesToChoose.size();
    indicesOfChosenExamples.push_back(examplesToChoose[index]);
    examplesToChoose.erase(examplesToChoose.begin() + index);
  }
  
  NICE::Vector diagK (nrOfExamplesPerClass, 0.0);
  //set every element
  for (int i = 0; i < nrOfExamplesPerClass; i++ )
  {
    diagK(i) = kernelMatrix(i,i);
  }
  
  NICE::Matrix Ku (indicesOfChosenExamples.size(), nrOfExamplesPerClass, 0.0);
  int rowCnt(0);
  //set every row
  for (uint i = 0; i < indicesOfChosenExamples.size(); i++, rowCnt++ )
  {
    //set every element of this row
    NICE::Vector col = kernelMatrix.getRow(indicesOfChosenExamples[i]);
    for (int j = 0; j < nrOfExamplesPerClass; j++)
    {
      Ku(rowCnt,j) = col(j);
    }
  }
  
  //we could speed this up if we would order the indices
  NICE::Matrix Kuu (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
  double tmp(0.0);
  for (uint i = 0; i < indicesOfChosenExamples.size(); i++ )
  {
    for (uint j = i; j < indicesOfChosenExamples.size(); j++ )
    {
      tmp = kernelMatrix(indicesOfChosenExamples[i], indicesOfChosenExamples[j]);
      Kuu(i,j) = tmp;
      if (i != j)
        Kuu(j,i) = tmp;
    }
  }
  
  NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :) 
  
    Timer tTrainPrecise;
    tTrainPrecise.start();      

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {       
      
//       NICE::Vector projectedLabels;
//       projectedLabels.multiply(Kmn,y);
      
      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
      
      NICE::Matrix Luu (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
      std::cerr << "var 1) cr.robustChol ( Kuu, Luu )" << std::endl;
      cr.robustChol ( Kuu, Luu );  
      
      NICE::Matrix V (Ku);
      std::cerr << "var 2) choleskySolveMatrixLargeScale( Luu, V)" << std::endl;
      choleskySolveMatrixLargeScale( Luu, V);
      
      NICE::Vector dg (diagK);
      NICE::Vector sumV (diagK.size(),0.0);
      for (uint i=0; i<V.cols(); i++)
      {
        for (uint j=0; j<V.rows(); j++)
        {
          sumV(i) += V(j,i)*V(j,i);
        }
        sumV(i) += noise;
      }
      dg += sumV;
      
      for (uint i=0; i<V.cols(); i++)
      {
        for (uint j=0; j<V.rows(); j++)
        {
          V(j,i) /= sqrt(dg(i));
        }
      }     
      
      NICE::Matrix Lu (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
      NICE::Matrix tmpVV (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
      tmpVV.multiply(V,V,false,true);
      tmpVV.addIdentity(1.0);
      std::cerr << "var 3) cr.robustChol ( tmpVV, Lu )" << std::endl;     
      cr.robustChol ( tmpVV, Lu );
      
      NICE::Matrix iKuu (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
      iKuu.addIdentity(1.0);
      std::cerr << "var 4) choleskySolveMatrixLargeScale ( Luu.transpose(), iKuu )" << std::endl;     
      choleskySolveMatrixLargeScale ( Luu.transpose(), iKuu );
      std::cerr << "var 5) choleskySolveMatrixLargeScale ( Luu, iKuu )" << std::endl;     
      choleskySolveMatrixLargeScale ( Luu, iKuu );
      
      NICE::Matrix LuLuu (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
      LuLuu.multiply(Lu,Luu);
      
      choleskyMatrix.resize( indicesOfChosenExamples.size(), indicesOfChosenExamples.size() );
      choleskyMatrix.set( 0.0 );
      choleskyMatrix.setIdentity();
      
      std::cerr << "var 6) choleskySolveMatrixLargeScale ( LuLuu.transpose(), choleskyMatrix)" << std::endl;     
      choleskySolveMatrixLargeScale ( LuLuu.transpose(), choleskyMatrix);
      std::cerr << "var 7) choleskySolveMatrixLargeScale ( LuLuu, choleskyMatrix)" << std::endl;     
      choleskySolveMatrixLargeScale ( LuLuu, choleskyMatrix);
      
      choleskyMatrix -= iKuu;
    }
 
    tTrainPrecise.stop();  
    std::cerr << "Precise time used for GPFITCVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPOptMean(NICE::Vector & rightPartGPOptMean, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{
    DiagonalMatrixApprox diagApprox ( true /*verbose*/ );
//     rightPartGPOptMean.resize(nrOfExamplesPerClass);
    
    NICE::Matrix kInv( nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0 );
    CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);

    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    { 
      cr.robustCholInv ( kernelMatrix, kInv );
      
      //we initialize the D-Matrix with the approximation we use in other methods (row sums of kernel matrix)
      rightPartGPOptMean.resize(nrOfExamplesPerClass);
      rightPartGPOptMean.set(0.0);
      //compute D 
      //start with adding some noise, if necessary
      if (noise != 0.0)
        rightPartGPOptMean.set(noise);
      else
        rightPartGPOptMean.set(0.0);    
      
      // the approximation creates a diagonal matrix (which is easy to invert)
      // with entries equal the row sums of the original kernel matrix      
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        for (int j = i; j < nrOfExamplesPerClass; j++)
        {
          rightPartGPOptMean[i] += kernelMatrix(i,j);
          if (i != j)
            rightPartGPOptMean[j] += kernelMatrix(i,j);
        }
      }
      
      //compute its inverse
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        rightPartGPOptMean[i] = 1.0 / rightPartGPOptMean[i];
      }      
      
//       rightPartGPOptMean.set(0.0);
      
      //compute optimal diagonal matrix
      diagApprox.approx ( kernelMatrix, rightPartGPOptMean );
      
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPOptMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

void inline trainGPOptVar(NICE::Vector & DiagGPOptVar, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{
    DiagonalMatrixApprox diagApprox ( true /*verbose*/ );
    DiagGPOptVar.resize(nrOfExamplesPerClass);
    
    NICE::Matrix kInv( nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0 );
    CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);    
    
    Timer tTrainPrecise;
    tTrainPrecise.start();     
    
    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    { 
      cr.robustCholInv ( kernelMatrix, kInv );
      
//       DiagGPOptVar.set(0.0);
      
      //we initialize the D-Matrix with the approximation we use in other methods (row sums of kernel matrix)
      DiagGPOptVar.resize(nrOfExamplesPerClass);
      DiagGPOptVar.set(0.0);
      //compute D 
      //start with adding some noise, if necessary
      if (noise != 0.0)
        DiagGPOptVar.set(noise);
      else
        DiagGPOptVar.set(0.0);    
      
      // the approximation creates a diagonal matrix (which is easy to invert)
      // with entries equal the row sums of the original kernel matrix      
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        for (int j = i; j < nrOfExamplesPerClass; j++)
        {
          DiagGPOptVar[i] += kernelMatrix(i,j);
          if (i != j)
            DiagGPOptVar[j] += kernelMatrix(i,j);
        }
      }
      
      //compute its inverse
      for (int i = 0; i < nrOfExamplesPerClass; i++)
      {
        DiagGPOptVar[i] = 1.0 / DiagGPOptVar[i];
      }         
      
      //compute optimal diagonal matrix
      diagApprox.approx ( kernelMatrix, DiagGPOptVar );
    }
 
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for GPOptVar training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
}

KCMinimumEnclosingBall *trainSVDD( const double & noise, const NICE::Matrix kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
{
 
    Config conf;
    // set the outlier ratio (Paul optimized this paramter FIXME)
    conf.sD( "SVDD", "outlier_fraction", 0.1 );
    conf.sB( "SVDD", "verbose", false );
    KCMinimumEnclosingBall *svdd = new KCMinimumEnclosingBall ( &conf, NULL /* no kernel function */, "SVDD" /* config section */);
    KernelData kernelData ( &conf, kernelMatrix, "Kernel" , false /* update cholesky */ );
 
    Timer tTrainPrecise;
    tTrainPrecise.start();     

    for (int run = 0; run < runsPerClassToAverageTraining; run++)
    {     
    
      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :)
      // KCMinimumEnclosingBall does not store the kernel data object, therefore, we are save with passing a local copy
      svdd->teach ( &kernelData, y );
    }
    
    tTrainPrecise.stop(); 
    std::cerr << "Precise time used for SVDD training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;        

    return svdd;
}

// ------------- EVALUATION METHODS ---------------------
void inline evaluateGPVarApprox(const NICE::Vector & kernelVector, const double & kernelSelf, const NICE::Vector & matrixDInv, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double uncertainty;
    
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {       
      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*  where D is our nice approximation of K
      
      NICE::Vector rightPart (kernelVector.size());
      for (uint j = 0; j < kernelVector.size(); j++)
      {
        rightPart[j] = kernelVector[j] * matrixDInv[j];
      }

      uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPVar(const NICE::Vector & kernelVector, const double & kernelSelf, const NICE::Matrix & choleskyMatrix, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double uncertainty;
    
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {       
      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*       
      
      NICE::Vector rightPart (kernelVector.size(),0.0);      
      choleskySolveLargeScale ( choleskyMatrix, kernelVector, rightPart );
      
      uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPMeanApprox(const NICE::Vector & kernelVector, const NICE::Vector & rightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double mean;
  
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    { 
      // \mean = \k_*^T \cdot D^{-1} \cdot y  where D is our nice approximation of K    
      mean = kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPMean(const NICE::Vector & kernelVector,  const NICE::Vector & GPMeanRightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double mean;
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      // \mean = \k_*^T \cdot K^{-1} \cdot y      
      mean = kernelVector.scalarProduct ( GPMeanRightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPSRMean(const NICE::Vector & kernelVector,  const NICE::Vector & GPSRMeanRightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting, const int & nrOfRegressors, const std::vector<int> & indicesOfChosenExamples)
{
    double mean;
    
    //grep the entries corresponding to the active set
    NICE::Vector kernelVectorM;
    kernelVectorM.resize(nrOfRegressors);
    for (int i = 0; i < nrOfRegressors; i++)
    {
      kernelVectorM[i] = kernelVector[indicesOfChosenExamples[i]];
    }

    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      // \mean = \k_*^T \cdot K^{-1} \cdot y    
      mean = kernelVectorM.scalarProduct ( GPSRMeanRightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPSRVar(const NICE::Vector & kernelVector,  const NICE::Matrix & choleskyMatrix, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples, const double & noise)
{
    double uncertainty;
    
    //grep the entries corresponding to the active set
    NICE::Vector kernelVectorM;
    kernelVectorM.resize(nrOfRegressors);
    for (int i = 0; i < nrOfRegressors; i++)
    {
      kernelVectorM[i] = kernelVector[indicesOfChosenExamples[i]];
    } 
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      NICE::Vector rightPart (nrOfRegressors,0.0);      
      
      choleskySolveLargeScale ( choleskyMatrix, kernelVectorM, rightPart );
      
      uncertainty = noise*kernelVectorM.scalarProduct ( rightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPFITCMean(const NICE::Vector & kernelVector,  const NICE::Vector & GPFITCMeanRightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting, const int & nrOfRegressors, const std::vector<int> & indicesOfChosenExamples)
{
    double mean;
    
    //grep the entries corresponding to the active set
    NICE::Vector kernelVectorM;
    kernelVectorM.resize(nrOfRegressors);
    for (int i = 0; i < nrOfRegressors; i++)
    {
      kernelVectorM[i] = kernelVector[indicesOfChosenExamples[i]];
    }

    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      // \mean = \k_*^T \cdot K^{-1} \cdot y    
      mean = kernelVectorM.scalarProduct ( GPFITCMeanRightPart );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateGPFITCVar(const NICE::Vector & kernelVector,  const NICE::Matrix & choleskyMatrix, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples, const double & noise)
{
    double uncertainty;
    
    //grep the entries corresponding to the active set
    NICE::Vector kernelVectorM;
    kernelVectorM.resize(nrOfRegressors);
    for (int i = 0; i < nrOfRegressors; i++)
    {
      kernelVectorM[i] = kernelVector[indicesOfChosenExamples[i]];
    } 
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {
      NICE::Vector tmp (nrOfRegressors,0.0);  
      tmp = choleskyMatrix*kernelVectorM;
      tmp *= kernelVectorM;
           
      uncertainty = 1.0 + tmp.Sum();
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

//this method is completely the same as evaluateGPMeanApprox, but for convenience, it is its own method
void inline evaluateGPOptMean(const NICE::Vector & kernelVector, const NICE::Vector & rightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double mean;
  
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    { 
      // \mean = \k_*^T \cdot D^{-1} \cdot y  where D is our nice approximation of K    
      mean = kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = mean;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

//this method is completely the same as evaluateGPVarApprox, but for convenience, it is its own method
void inline evaluateGPOptVar(const NICE::Vector & kernelVector, const double & kernelSelf, const NICE::Vector & matrixDInv, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double uncertainty;
    
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {       
      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*  where D is our nice approximation of K
      
      NICE::Vector rightPart (kernelVector.size());
      for (uint j = 0; j < kernelVector.size(); j++)
      {
        rightPart[j] = kernelVector[j] * matrixDInv[j];
      }

      uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = 1.0 - uncertainty;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateParzen(const NICE::Vector & kernelVector,  ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    double score;
    
    Timer tTestSingle;
    tTestSingle.start();
    
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {      
      //the Parzen score is nothing but the averaged similarity to every training sample
      score = kernelVector.Sum() / (double) kernelVector.size(); //maybe we could directly call kernelVector.Mean() here
    }
      
    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
    
    FullVector scores ( 2 );
    scores[0] = 0.0;
    scores[1] = score;

    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
}

void inline evaluateSVDD( KCMinimumEnclosingBall *svdd, const NICE::Vector & kernelVector,  ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
{
    Timer tTestSingle;
    tTestSingle.start();
      
    for (int run = 0; run < runsPerClassToAverageTesting; run++)
    {        
      // In the following, we assume that we are using a Gaussian kernel
      r = svdd->classifyKernel ( kernelVector, 1.0 /* kernel self */ );
    }

    tTestSingle.stop();
    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
}

/** 
    test the basic functionality of fast-hik hyperparameter optimization 
*/
int main (int argc, char **argv)
{   
  std::set_terminate(__gnu_cxx::__verbose_terminate_handler);

  Config conf ( argc, argv );
  string resultsfile = conf.gS("main", "results", "results.txt" );
  int nrOfExamplesPerClass = conf.gI("main", "nrOfExamplesPerClass", 50);
  nrOfExamplesPerClass = std::min(nrOfExamplesPerClass, 100); // we do not have more than 100 examples per class
  
  //which classes to considere? we assume consecutive class numers
  int indexOfFirstClass = conf.gI("main", "indexOfFirstClass", 0);
  indexOfFirstClass = std::max(indexOfFirstClass, 0); //we do not have less than 0 classes
  int indexOfLastClass = conf.gI("main", "indexOfLastClass", 999);
  indexOfLastClass = std::min(indexOfLastClass, 999); //we do not have more than 1000 classes
  
  int nrOfClassesToConcidere =  (indexOfLastClass - indexOfLastClass)+1;
  
  //repetitions for every class to achieve reliable time evalutions
  int runsPerClassToAverageTraining = conf.gI( "main", "runsPerClassToAverageTraining", 1 ); 
  int runsPerClassToAverageTesting = conf.gI( "main", "runsPerClassToAverageTesting", 1 );
  
  // share parameters among methods and classes?
  bool shareParameters = conf.gB("main" , "shareParameters", true);
  
  //which methods do we want to use?
  bool GPMeanApprox = conf.gB( "main", "GPMeanApprox", false);
  bool GPVarApprox = conf.gB( "main", "GPVarApprox", false);  
  bool GPMean = conf.gB( "main", "GPMean", false);
  bool GPVar = conf.gB( "main", "GPVar", false);
  bool GPSRMean = conf.gB( "main", "GPSRMean", false);
  bool GPSRVar = conf.gB( "main", "GPSRVar", false);  
  bool GPFITCMean = conf.gB( "main", "GPFITCMean", false);
  bool GPFITCVar = conf.gB( "main", "GPFITCVar", false); 
  bool GPOptMean = conf.gB( "main", "GPOptMean", false);
  bool GPOptVar = conf.gB( "main", "GPOptVar", false);    
  bool Parzen = conf.gB( "main", "Parzen", false);
  bool SVDD = conf.gB( "main", "SVDD", false);  
  
  if (GPMeanApprox)
    std::cerr << "GPMeanApprox used" << std::endl;
  else 
    std::cerr << "GPMeanApprox not used" << std::endl;
  if (GPVarApprox)
    std::cerr << "GPVarApprox used" << std::endl;
  else 
    std::cerr << "GPVarApprox not used" << std::endl;
  if (GPMean)
    std::cerr << "GPMean used" << std::endl;
  else 
    std::cerr << "GPMean not used" << std::endl;
  if (GPVar)
    std::cerr << "GPVar used" << std::endl;
  else 
    std::cerr << "GPVar not used" << std::endl;
  if (GPSRMean)
    std::cerr << "GPSRMean used" << std::endl;
  else 
    std::cerr << "GPSRMean not used" << std::endl;
  if (GPSRVar)
    std::cerr << "GPSRVar used" << std::endl;
  else 
    std::cerr << "GPSRVar not used" << std::endl;
  if (GPFITCMean)
    std::cerr << "GPFITCMean used" << std::endl;
  else 
    std::cerr << "GPFITCMean not used" << std::endl;
  if (GPFITCVar)
    std::cerr << "GPFITCVar used" << std::endl;
  else 
    std::cerr << "GPFITCVar not used" << std::endl; 
  if (GPOptMean)
    std::cerr << "GPOptMean used" << std::endl;
  else 
    std::cerr << "GPOptMean not used" << std::endl;
  if (GPOptVar)
    std::cerr << "GPOptVar used" << std::endl;
  else 
    std::cerr << "GPOptVar not used" << std::endl;  
  if (Parzen)
    std::cerr << "Parzen used" << std::endl;
  else 
    std::cerr << "Parzen not used" << std::endl;
  if (SVDD)
    std::cerr << "SVDD used" << std::endl;
  else 
    std::cerr << "SVDD not used" << std::endl;

  
  // GP variance approximation  
  NICE::Vector sigmaGPVarApproxParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPVarApproxParas(nrOfClassesToConcidere,0.0);
  // GP variance  
  NICE::Vector sigmaGPVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPVarParas(nrOfClassesToConcidere,0.0);
  //GP mean approximation  
  NICE::Vector sigmaGPMeanApproxParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPMeanApproxParas(nrOfClassesToConcidere,0.0);
  //GP mean  
  NICE::Vector sigmaGPMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPMeanParas(nrOfClassesToConcidere,0.0);
  //GP SR mean  
  NICE::Vector sigmaGPSRMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPSRMeanParas(nrOfClassesToConcidere,0.0);
  //GP SR var
  NICE::Vector sigmaGPSRVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPSRVarParas(nrOfClassesToConcidere,0.0);
  //GP FITC mean  
  NICE::Vector sigmaGPFITCMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPFITCMeanParas(nrOfClassesToConcidere,0.0);
  //GP FITC var
  NICE::Vector sigmaGPFITCVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPFITCVarParas(nrOfClassesToConcidere,0.0);  
  //GP Opt mean  
  NICE::Vector sigmaGPOptMeanParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPOptMeanParas(nrOfClassesToConcidere,0.0);
  //GP Opt var
  NICE::Vector sigmaGPOptVarParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseGPOptVarParas(nrOfClassesToConcidere,0.0);  
  //Parzen  
  NICE::Vector sigmaParzenParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseParzenParas(nrOfClassesToConcidere,0.0);
  //SVDD  
  NICE::Vector sigmaSVDDParas(nrOfClassesToConcidere,0.0);
  NICE::Vector noiseSVDDParas(nrOfClassesToConcidere,0.0);
    
  if (!shareParameters)
  {
    //read the optimal parameters for the different methods
    
    // GP variance approximation
    string sigmaGPVarApproxFile = conf.gS("main", "sigmaGPVarApproxFile", "approxVarSigma.txt");  
    string noiseGPVarApproxFile = conf.gS("main", "noiseGPVarApproxFile", "approxVarNoise.txt");   
    // GP variance
    string sigmaGPVarFile = conf.gS("main", "sigmaGPVarFile", "approxVarSigma.txt");  
    string noiseGPVarFile = conf.gS("main", "noiseGPVarFile", "approxVarNoise.txt");  
    //GP mean approximation
    string sigmaGPMeanApproxFile = conf.gS("main", "sigmaGPMeanApproxFile", "approxVarSigma.txt");  
    string noiseGPMeanApproxFile = conf.gS("main", "noiseGPMeanApproxFile", "approxVarNoise.txt");    
    //GP mean
    string sigmaGPMeanFile = conf.gS("main", "sigmaGPMeanFile", "approxVarSigma.txt");  
    string noiseGPMeanFile = conf.gS("main", "noiseGPMeanFile", "approxVarNoise.txt");      
    //Parzen
    string sigmaParzenFile = conf.gS("main", "sigmaParzenFile", "approxVarSigma.txt");  
    string noiseParzenFile = conf.gS("main", "noiseParzenFile", "approxVarNoise.txt");    
    //SVDD
    string sigmaSVDDFile = conf.gS("main", "sigmaSVDDFile", "approxVarSigma.txt");  
    string noiseSVDDFile = conf.gS("main", "noiseSVDDFile", "approxVarNoise.txt");      

    // GP variance approximation    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPVarApproxParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPVarApproxParas);  
    // GP variance    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPVarParas);  
    //GP mean approximation   
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPMeanApproxParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPMeanApproxParas);  
    //GP mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPMeanParas); 
    //GP SR mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPSRMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPSRMeanParas);
    //GP SR var  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPSRVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPSRVarParas); 
    //GP FITC mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPFITCMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPFITCMeanParas);
    //GP FITC var  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPFITCVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPFITCVarParas);      
    //GP Opt mean  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPOptMeanParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPOptMeanParas);
    //GP Opt var  
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPOptVarParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPOptVarParas);     
    //Parzen    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaParzenParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseParzenParas);  
    //SVDD    
    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaSVDDParas);
    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseSVDDParas);   
  }
  else
  {
    //use static variables for all methods and classis
    double noise = conf.gD( "main", "noise", 0.01 );
    double sigma = conf.gD( "main", "sigma", 1.0 );
    
    sigmaGPVarApproxParas.set(sigma);
    noiseGPVarApproxParas.set(noise);
    // GP variance  
    sigmaGPVarParas.set(sigma);
    noiseGPVarParas.set(noise);
    //GP mean approximation  
    sigmaGPMeanApproxParas.set(sigma);
    noiseGPMeanApproxParas.set(noise);
    //GP mean  
    sigmaGPMeanParas.set(sigma);
    noiseGPMeanParas.set(noise);
    //GP SR mean  
    sigmaGPSRMeanParas.set(sigma);
    noiseGPSRMeanParas.set(noise);
    //GP SR var  
    sigmaGPSRVarParas.set(sigma);
    noiseGPSRVarParas.set(noise); 
    //GP FITC mean  
    sigmaGPFITCMeanParas.set(sigma);
    noiseGPFITCMeanParas.set(noise);
    //GP FITC var  
    sigmaGPFITCVarParas.set(sigma);
    noiseGPFITCVarParas.set(noise);
    //GP Opt mean  
    sigmaGPOptMeanParas.set(sigma);
    noiseGPOptMeanParas.set(noise);
    //GP Opt var  
    sigmaGPOptVarParas.set(sigma);
    noiseGPOptVarParas.set(noise);    
    //Parzen  
    sigmaParzenParas.set(sigma);
    noiseParzenParas.set(noise);
    //SVDD  
    sigmaSVDDParas.set(sigma);
    noiseSVDDParas.set(noise);    
  }
  
  
  // -------- optimal parameters read --------------  
  
  std::vector<SparseVector> trainingData;
  NICE::Vector y;
  
  std::cerr << "Reading ImageNet data ..." << std::endl;
  bool imageNetLocal = conf.gB("main", "imageNetLocal" , false);
  string imageNetPath;
  if (imageNetLocal)
    imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/";
  else
    imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/";

  ImageNetData imageNetTrain ( imageNetPath + "demo/" );

  imageNetTrain.preloadData( "train", "training" );
  trainingData = imageNetTrain.getPreloadedData();
  y = imageNetTrain.getPreloadedLabels();
    
  std::cerr << "Reading of training data finished" << std::endl;
  std::cerr << "trainingData.size(): " << trainingData.size() << std::endl;
  std::cerr << "y.size(): " << y.size() << std::endl;
  
  std::cerr << "Reading ImageNet test data files (takes some seconds)..." << std::endl;
  ImageNetData imageNetTest ( imageNetPath + "demo/" );
  imageNetTest.preloadData ( "val", "testing" );
  imageNetTest.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" );  
  
  double OverallPerformanceGPVarApprox(0.0);
  double OverallPerformanceGPVar(0.0);
  double OverallPerformanceGPMeanApprox(0.0);
  double OverallPerformanceGPMean(0.0);
  double OverallPerformanceGPSRMean(0.0);
  double OverallPerformanceGPSRVar(0.0);  
  double OverallPerformanceGPFITCMean(0.0);
  double OverallPerformanceGPFITCVar(0.0); 
  double OverallPerformanceGPOptMean(0.0);
  double OverallPerformanceGPOptVar(0.0);   
  double OverallPerformanceParzen(0.0);
  double OverallPerformanceSVDD(0.0);

  
  double kernelSigmaGPVarApprox;
  double kernelSigmaGPVar;
  double kernelSigmaGPMeanApprox;
  double kernelSigmaGPMean;
  double kernelSigmaGPSRMean;
  double kernelSigmaGPSRVar;
  double kernelSigmaGPFITCMean;
  double kernelSigmaGPFITCVar; 
  double kernelSigmaGPOptMean;
  double kernelSigmaGPOptVar;  
  double kernelSigmaParzen;
  double kernelSigmaSVDD;
  
  for (int cl = indexOfFirstClass; cl <= indexOfLastClass; cl++)
  {
    std::cerr << "run for class " << cl << std::endl;
    int positiveClass = cl+1; //labels are from 1 to 1000, but our indices from 0 to 999
    // ------------------------------ TRAINING ------------------------------
  
    kernelSigmaGPVarApprox = sigmaGPVarApproxParas[cl];
    kernelSigmaGPVar = sigmaGPVarParas[cl];
    kernelSigmaGPMeanApprox = sigmaGPMeanApproxParas[cl];
    kernelSigmaGPMean = sigmaGPMeanParas[cl];
    kernelSigmaGPSRMean = sigmaGPSRMeanParas[cl];
    kernelSigmaGPSRVar = sigmaGPSRVarParas[cl];
    kernelSigmaGPFITCMean = sigmaGPFITCMeanParas[cl];
    kernelSigmaGPFITCVar = sigmaGPFITCVarParas[cl];    
    kernelSigmaGPOptMean = sigmaGPOptMeanParas[cl];
    kernelSigmaGPOptVar = sigmaGPOptVarParas[cl];    
    kernelSigmaParzen = sigmaParzenParas[cl];
    kernelSigmaSVDD = sigmaSVDDParas[cl];
    
    Timer tTrain;
    tTrain.start();
    
    //compute the kernel matrix, which will be shared among all methods in this scenario       
    NICE::Matrix kernelMatrix(nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
    
    //NOTE in theory we have to compute a single kernel Matrix for every method, since every method may have its own optimal parameter
    // I'm sure, we can speed it up a bit and compute it only for every different parameter
    //nonetheless, it's not as nice as we originally thought (same matrix for every method) 
    
    //NOTE Nonetheless, since we're only interested in runtimes, we can ignore this
           
    //now sum up all entries of each row in the original kernel matrix
    double kernelScore(0.0);
    for (int i = cl*100; i < cl*100+nrOfExamplesPerClass; i++)
    {
      for (int j = i; j < cl*100+nrOfExamplesPerClass; j++)
      {
        kernelScore = measureDistance(trainingData[i],trainingData[j], kernelSigmaGPVarApprox);
        kernelMatrix(i-cl*100,j-cl*100) = kernelScore;
        
        if (i != j)
            kernelMatrix(j-cl*100,i-cl*100) = kernelScore;
      }
    }  
    
    // now call the individual training methods
    
    //train GP Mean Approx
    NICE::Vector GPMeanApproxRightPart;
    if (GPMeanApprox)
      trainGPMeanApprox(GPMeanApproxRightPart, noiseGPMeanApproxParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );    
    
    //train GP Var Approx    
    NICE::Vector matrixDInv;
    if (GPVarApprox)
      trainGPVarApprox(matrixDInv, noiseGPVarApproxParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
         
    //train GP Mean
    NICE::Vector GPMeanRightPart;
    if (GPMean)
      trainGPMean(GPMeanRightPart, noiseGPMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );    
    
    //train GP Var
    NICE::Matrix GPVarCholesky;
    if (GPVar)
      trainGPVar(GPVarCholesky, noiseGPVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );       
    
    int nrOfRegressors (0);
    //train GP SR Mean
    NICE::Vector GPSRMeanRightPart;
    std::vector<int> indicesOfChosenExamplesGPSRMean;
    nrOfRegressors = conf.gI( "GPSR", "nrOfRegressors", nrOfExamplesPerClass/2);
    nrOfRegressors = std::min( nrOfRegressors, nrOfExamplesPerClass );
    if (GPSRMean)
      trainGPSRMean(GPSRMeanRightPart, noiseGPSRMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining, nrOfRegressors, indicesOfChosenExamplesGPSRMean );        
    
    //train GP SR Var
    NICE::Matrix GPSRVarCholesky;   
    std::vector<int> indicesOfChosenExamplesGPSRVar;
    if (GPSRVar)
      trainGPSRVar(GPSRVarCholesky, noiseGPSRVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining, nrOfRegressors, indicesOfChosenExamplesGPSRVar );  
    
    //train GP FITC Mean
    NICE::Vector GPFITCMeanRightPart;
    std::vector<int> indicesOfChosenExamplesGPFITCMean;
    nrOfRegressors = conf.gI( "GPFITC", "nrOfRegressors", nrOfExamplesPerClass/5);
    nrOfRegressors = std::min( nrOfRegressors, nrOfExamplesPerClass );
    if (GPFITCMean)
      trainGPFITCMean(GPFITCMeanRightPart, noiseGPFITCMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining, nrOfRegressors, indicesOfChosenExamplesGPFITCMean ); 

     //train GP FITC Var
    NICE::Matrix GPFITCVarCholesky;   
    std::vector<int> indicesOfChosenExamplesGPFITCVar;
    if (GPFITCVar)
      trainGPFITCVar(GPFITCVarCholesky, noiseGPFITCVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining, nrOfRegressors, indicesOfChosenExamplesGPFITCVar );    
    
    //train GP Opt Mean
    NICE::Vector GPOptMeanRightPart;
    if (GPOptMean)
      trainGPOptMean(GPOptMeanRightPart, noiseGPOptMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
    std::cerr << "GPOptMeanRightPart: " << std::endl; std::cerr << GPOptMeanRightPart << std::endl;
    
    //train GP Opt Var
    NICE::Vector DiagGPOptVar;
    if (GPOptVar)
      trainGPOptVar(DiagGPOptVar, noiseGPOptVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );     
    std::cerr << "DiagGPOptVar: " << std::endl; std::cerr << DiagGPOptVar << std::endl;
    
    //train Parzen 
    //nothing to do :)
    
    //train SVDD
    KCMinimumEnclosingBall *svdd;
    if (SVDD)
      svdd = trainSVDD(noiseSVDDParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
  
    tTrain.stop();
    std::cerr << "Time used for training class " << cl << ": " << tTrain.getLast() << std::endl;      
       
    std::cerr << "training done - now perform the evaluation" << std::endl;


    // ------------------------------ TESTING ------------------------------
   
    std::cerr << "Classification step ... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl;
    
    ClassificationResults resultsGPVarApprox;
    ClassificationResults resultsGPVar;
    ClassificationResults resultsGPMeanApprox;
    ClassificationResults resultsGPMean;
    ClassificationResults resultsGPSRMean;
    ClassificationResults resultsGPSRVar;  
    ClassificationResults resultsGPFITCMean;
    ClassificationResults resultsGPFITCVar;
    ClassificationResults resultsGPOptMean;
    ClassificationResults resultsGPOptVar;    
    ClassificationResults resultsParzen;
    ClassificationResults resultsSVDD;       
    
    ProgressBar pb;
    Timer tTest;
    tTest.start();    
    Timer tTestSingle;
    
    double timeForSingleExamplesGPVarApprox(0.0);    
    double timeForSingleExamplesGPVar(0.0);
    double timeForSingleExamplesGPMeanApprox(0.0);    
    double timeForSingleExamplesGPMean(0.0);    
    double timeForSingleExamplesGPSRMean(0.0);
    double timeForSingleExamplesGPSRVar(0.0);
    double timeForSingleExamplesGPFITCMean(0.0);
    double timeForSingleExamplesGPFITCVar(0.0);
    double timeForSingleExamplesGPOptMean(0.0);
    double timeForSingleExamplesGPOptVar(0.0);    
    double timeForSingleExamplesParzen(0.0);    
    double timeForSingleExamplesSVDD(0.0);    
    
    for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ )
    {
      pb.update ( imageNetTest.getNumPreloadedExamples() );

      const SparseVector & svec = imageNetTest.getPreloadedExample ( i );

      //NOTE: again we should use method-specific optimal parameters. If we're only interested in the runtimes, this doesn't matter
      
      //compute (self) similarities
      double kernelSelf (measureDistance(svec,svec, kernelSigmaGPVarApprox) );
      NICE::Vector kernelVector (nrOfExamplesPerClass, 0.0);
      
      for (int j = 0; j < nrOfExamplesPerClass; j++)
      {
        kernelVector[j] = measureDistance(trainingData[j+cl*100],svec, kernelSigmaGPVarApprox);
      }     
      
      //call the individual test-methods
      
      //evaluate GP Var Approx
      ClassificationResult rGPVarApprox;      
      if (GPVarApprox)
        evaluateGPVarApprox( kernelVector, kernelSelf, matrixDInv, rGPVarApprox, timeForSingleExamplesGPVarApprox, runsPerClassToAverageTesting );
      
      //evaluate GP Var
      ClassificationResult rGPVar;
      if (GPVar)
        evaluateGPVar( kernelVector, kernelSelf, GPVarCholesky, rGPVar, timeForSingleExamplesGPVar, runsPerClassToAverageTesting );      
      
      //evaluate GP Mean Approx
      ClassificationResult rGPMeanApprox;      
      if (GPMeanApprox)
        evaluateGPMeanApprox( kernelVector, matrixDInv, rGPMeanApprox, timeForSingleExamplesGPMeanApprox, runsPerClassToAverageTesting );
      
      //evaluate GP Mean
      ClassificationResult rGPMean;
      if (GPMean)
        evaluateGPMean( kernelVector, GPMeanRightPart, rGPMean, timeForSingleExamplesGPMean, runsPerClassToAverageTesting );       
       
      //evaluate GP SR Mean
      ClassificationResult rGPSRMean;
      if (GPSRMean)
        evaluateGPSRMean( kernelVector, GPSRMeanRightPart, rGPSRMean, timeForSingleExamplesGPSRMean, runsPerClassToAverageTesting, nrOfRegressors, indicesOfChosenExamplesGPSRMean );       
      
      //evaluate GP SR Var
      ClassificationResult rGPSRVar;
      if (GPSRVar)
        evaluateGPSRVar( kernelVector, GPSRVarCholesky, rGPSRVar, timeForSingleExamplesGPSRVar, runsPerClassToAverageTesting, nrOfRegressors, indicesOfChosenExamplesGPSRVar, noiseGPSRVarParas[cl] );  
      
      //evaluate GP FITC Mean
      ClassificationResult rGPFITCMean;
      if (GPFITCMean)
        evaluateGPFITCMean( kernelVector, GPFITCMeanRightPart, rGPFITCMean, timeForSingleExamplesGPFITCMean, runsPerClassToAverageTesting, nrOfRegressors, indicesOfChosenExamplesGPFITCMean );       
      
      //evaluate GP FITC Var
      ClassificationResult rGPFITCVar;
      if (GPFITCVar)
        evaluateGPFITCVar( kernelVector, GPFITCVarCholesky, rGPFITCVar, timeForSingleExamplesGPFITCVar, runsPerClassToAverageTesting, nrOfRegressors, indicesOfChosenExamplesGPFITCVar, noiseGPFITCVarParas[cl] );             
      
      //evaluate GP Opt Mean
      ClassificationResult rGPOptMean;
      if (GPOptMean)
        evaluateGPOptMean( kernelVector, GPOptMeanRightPart, rGPOptMean, timeForSingleExamplesGPOptMean, runsPerClassToAverageTesting );       
      
      //evaluate GP Opt Var
      ClassificationResult rGPOptVar;
      if (GPOptVar)
        evaluateGPOptVar( kernelVector, kernelSelf, DiagGPOptVar, rGPOptVar, timeForSingleExamplesGPOptVar, runsPerClassToAverageTesting );   
      
      //evaluate Parzen
      ClassificationResult rParzen;
      if (Parzen)
        evaluateParzen( kernelVector, rParzen, timeForSingleExamplesParzen, runsPerClassToAverageTesting ); 
      
      //evaluate SVDD
      ClassificationResult rSVDD;
      if (SVDD)
        evaluateSVDD( svdd, kernelVector, rSVDD, timeForSingleExamplesSVDD, runsPerClassToAverageTesting );       

      
      // set ground truth label
      rGPVarApprox.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPMeanApprox.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPSRMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPSRVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPFITCMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPFITCVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPOptMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rGPOptVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;      
      rParzen.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
      rSVDD.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;

      //remember the results for the evaluation lateron
      resultsGPVarApprox.push_back ( rGPVarApprox );
      resultsGPVar.push_back ( rGPVar );
      resultsGPMeanApprox.push_back ( rGPMeanApprox );
      resultsGPMean.push_back ( rGPMean );
      resultsGPSRMean.push_back ( rGPSRMean );
      resultsGPSRVar.push_back ( rGPSRVar );
      resultsGPFITCMean.push_back ( rGPFITCMean );
      resultsGPFITCVar.push_back ( rGPFITCVar );
      resultsGPOptMean.push_back ( rGPOptMean );
      resultsGPOptVar.push_back ( rGPOptVar );      
      resultsParzen.push_back ( rParzen );
      resultsSVDD.push_back ( rSVDD );      
    }
    
    tTest.stop();
    std::cerr << "Time used for evaluating class " << cl << ": " << tTest.getLast() << std::endl;       
    
    timeForSingleExamplesGPVarApprox/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPVar/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPMeanApprox/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPSRMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPSRVar/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPFITCMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPFITCVar/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPOptMean/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesGPOptVar/= imageNetTest.getNumPreloadedExamples();    
    timeForSingleExamplesParzen/= imageNetTest.getNumPreloadedExamples();
    timeForSingleExamplesSVDD/= imageNetTest.getNumPreloadedExamples();
    
    std::cerr.precision(10);
    std::cerr << "GPVarApprox -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPVarApprox << std::endl;    
    std::cerr << "GPVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPVar << std::endl;    
    std::cerr << "GPMeanApprox -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPMeanApprox << std::endl;    
    std::cerr << "GPMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPMean << std::endl;    
    std::cerr << "GPSRMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPSRMean << std::endl;    
    std::cerr << "GPSRVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPSRVar << std::endl;    
    std::cerr << "GPFITCMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPFITCMean << std::endl;    
    std::cerr << "GPFITCVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPFITCVar << std::endl; 
    std::cerr << "GPOptMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPOptMean << std::endl;    
    std::cerr << "GPOptVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPOptVar << std::endl;    
    std::cerr << "Parzen -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesParzen << std::endl;    
    std::cerr << "SVDD -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesSVDD << std::endl;    

    // run the AUC-evaluation
    double perfvalueGPVarApprox( 0.0 );
    double perfvalueGPVar( 0.0 );
    double perfvalueGPMeanApprox( 0.0 );
    double perfvalueGPMean( 0.0 );
    double perfvalueGPSRMean( 0.0 );
    double perfvalueGPSRVar( 0.0 );
    double perfvalueGPFITCMean( 0.0 );
    double perfvalueGPFITCVar( 0.0 );    
    double perfvalueGPOptMean( 0.0 );
    double perfvalueGPOptVar( 0.0 );    
    double perfvalueParzen( 0.0 );
    double perfvalueSVDD( 0.0 );     

    if (GPVarApprox)
      perfvalueGPVarApprox = resultsGPVarApprox.getBinaryClassPerformance( ClassificationResults::PERF_AUC );    
    if (GPVar)
      perfvalueGPVar = resultsGPVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPMeanApprox)
      perfvalueGPMeanApprox = resultsGPMeanApprox.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPMean)
      perfvalueGPMean = resultsGPMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPSRMean)
      perfvalueGPSRMean = resultsGPSRMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPSRVar)
      perfvalueGPSRVar = resultsGPSRVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPFITCMean)
      perfvalueGPFITCMean = resultsGPFITCMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPFITCVar)
      perfvalueGPFITCVar = resultsGPFITCVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );    
    if (GPOptMean)
      perfvalueGPOptMean = resultsGPOptMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (GPOptVar)
      perfvalueGPOptVar = resultsGPOptVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );    
    if (Parzen)
      perfvalueParzen = resultsParzen.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
    if (SVDD)
      perfvalueSVDD = resultsSVDD.getBinaryClassPerformance( ClassificationResults::PERF_AUC );  


    std::cerr << "Performance GPVarApprox: " << perfvalueGPVarApprox << std::endl;
    std::cerr << "Performance GPVar: " << perfvalueGPVar << std::endl;
    std::cerr << "Performance GPMeanApprox: " << perfvalueGPMeanApprox << std::endl;
    std::cerr << "Performance GPMean: " << perfvalueGPMean << std::endl;
    std::cerr << "Performance GPSRMean: " << perfvalueGPSRMean << std::endl;
    std::cerr << "Performance GPSRVar: " << perfvalueGPSRVar << std::endl;
    std::cerr << "Performance GPFITCMean: " << perfvalueGPFITCMean << std::endl;
    std::cerr << "Performance GPFITCVar: " << perfvalueGPFITCVar << std::endl;    
    std::cerr << "Performance GPOptMean: " << perfvalueGPOptMean << std::endl;
    std::cerr << "Performance GPOptVar: " << perfvalueGPOptVar << std::endl;    
    std::cerr << "Performance Parzen: " << perfvalueParzen << std::endl;
    std::cerr << "Performance SVDD: " << perfvalueSVDD << std::endl;    
    
    OverallPerformanceGPVarApprox += perfvalueGPVar;    
    OverallPerformanceGPVar += perfvalueGPVarApprox;
    OverallPerformanceGPMeanApprox += perfvalueGPMeanApprox;
    OverallPerformanceGPMean += perfvalueGPMean;
    OverallPerformanceGPSRMean += perfvalueGPSRMean;
    OverallPerformanceGPSRVar += perfvalueGPSRVar;
    OverallPerformanceGPFITCMean += perfvalueGPFITCMean;
    OverallPerformanceGPFITCVar += perfvalueGPFITCVar;   
    OverallPerformanceGPOptMean += perfvalueGPOptMean;
    OverallPerformanceGPOptVar += perfvalueGPOptVar;    
    OverallPerformanceParzen += perfvalueParzen;
    OverallPerformanceSVDD += perfvalueSVDD;   

    // clean up memory used by SVDD
    if (SVDD)    
      delete svdd;
  }
  
  OverallPerformanceGPVarApprox /= nrOfClassesToConcidere;
  OverallPerformanceGPVar /= nrOfClassesToConcidere;
  OverallPerformanceGPMeanApprox /= nrOfClassesToConcidere;
  OverallPerformanceGPMean /= nrOfClassesToConcidere;
  OverallPerformanceGPSRMean /= nrOfClassesToConcidere;
  OverallPerformanceGPSRVar /= nrOfClassesToConcidere;
  OverallPerformanceGPFITCMean /= nrOfClassesToConcidere;
  OverallPerformanceGPFITCVar /= nrOfClassesToConcidere;
  OverallPerformanceGPOptMean /= nrOfClassesToConcidere;
  OverallPerformanceGPOptVar /= nrOfClassesToConcidere;  
  OverallPerformanceParzen /= nrOfClassesToConcidere;
  OverallPerformanceSVDD /= nrOfClassesToConcidere;  
  
  std::cerr << "overall performance GPVarApprox: " << OverallPerformanceGPVarApprox << std::endl;
  std::cerr << "overall performance GPVar: " << OverallPerformanceGPVar << std::endl;
  std::cerr << "overall performance GPMeanApprox: " << OverallPerformanceGPMeanApprox << std::endl;
  std::cerr << "overall performance GPMean: " << OverallPerformanceGPMean << std::endl;
  std::cerr << "overall performance GPSRMean: " << OverallPerformanceGPSRMean << std::endl;
  std::cerr << "overall performance GPSRVar: " << OverallPerformanceGPSRVar << std::endl;
  std::cerr << "overall performance GPFITCMean: " << OverallPerformanceGPFITCMean << std::endl;
  std::cerr << "overall performance GPFITCVar: " << OverallPerformanceGPFITCVar << std::endl;
  std::cerr << "overall performance GPOptMean: " << OverallPerformanceGPOptMean << std::endl;
  std::cerr << "overall performance GPOptVar: " << OverallPerformanceGPOptVar << std::endl;  
  std::cerr << "overall performance Parzen: " << OverallPerformanceParzen << std::endl;
  std::cerr << "overall performance SVDD: " << OverallPerformanceSVDD << std::endl;  
  
  return 0;
}

#endif