浏览代码

Sparse GP using subset of regressors is implemented

Alexander Luetz 13 年之前
父节点
当前提交
fc16dcdebf

+ 1 - 1
optimization/quadprog/QuadProg++.cpp

@@ -728,7 +728,7 @@ void cholesky_decomposition(Matrix<double>& A)
 	}
 	NICE::Matrix L;
 	L.resize(n,n);
-	NICE::CholeskyRobust *cra = new NICE::CholeskyRobustAuto(true);
+	NICE::CholeskyRobust *cra = new NICE::CholeskyRobustAuto(false);
 	cra->robustChol ( M, L );
 	//copy back
 	for (i = 0; i < n; i++)

+ 4 - 4
progs/ImagenetBinary.conf

@@ -2,7 +2,7 @@
 # whether to use eriks folder (only works on dionysos)
 imageNetLocal = false
 shareParameters = true
-noise = 0.01
+noise = 0.1
 sigma = 1.0
 
 #GP variance approximation
@@ -26,7 +26,7 @@ noiseGPVarApproxFile = /home/luetz/code/nice/vislearning/progs/gpvarNoise.txt
 
 indexOfFirstClass = 0
 indexOfLastClass = 0
-runsPerClassToAverageTraining = 10
-runsPerClassToAverageTraining = 10
+runsPerClassToAverageTraining = 100000
+runsPerClassToAverageTraining = 100000
 
-nrOfExamplesPerClass = 50
+nrOfExamplesPerClass = 100

+ 336 - 36
progs/testImageNetBinaryBruteForce.cpp

@@ -28,7 +28,7 @@ using namespace NICE;
 using namespace OBJREC;
 
 // --------------- THE KERNEL FUNCTION ( exponential kernel with euclidian distance ) ----------------------
-double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0)//, const bool & verbose = false)
+double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0)
 {
   double inner_sum(0.0);
 
@@ -38,6 +38,7 @@ double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector
   NICE::SparseVector::const_iterator aIt = a.begin();
   NICE::SparseVector::const_iterator bIt = b.begin();
    
+  //compute the euclidian distance between both feature vectores (given as SparseVectors)
   while ( (aIt != a.end()) && (bIt != b.end()) )
   {
     if (aIt->first == bIt->first)
@@ -72,13 +73,18 @@ double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector
     bIt++; 
   }  
 
+  //normalization of the exponent
   inner_sum /= (2.0*sigma*sigma);
   
+  //finally, compute the RBF-kernel score (RBF = radial basis function)
   return exp(-inner_sum);
 }
 
+// --------------- INPUT METHOD ----------------------
 void readParameters(string & filename, const int & size, NICE::Vector & parameterVector)
 {
+  //we read the parameters which are given from a Matlab-Script (each line contains a single number, which is the optimal parameter for this class)
+  
   parameterVector.resize(size);
   parameterVector.set(0.0);
   
@@ -120,6 +126,8 @@ void inline trainGPVarApprox(NICE::Vector & matrixDInv, const double & noise, co
       else
         matrixDInv.set(0.0);    
       
+      // the approximation creates a diagonal matrix (which is easy to invert)
+      // with entries equal the row sums of the original kernel matrix      
       for (int i = 0; i < nrOfExamplesPerClass; i++)
       {
         for (int j = i; j < nrOfExamplesPerClass; j++)
@@ -149,11 +157,12 @@ void inline trainGPVar(NICE::Matrix & choleskyMatrix, const double & noise, cons
     
     for (int run = 0; run < runsPerClassToAverageTraining; run++)
     {  
-    
       CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
       
       choleskyMatrix.resize(nrOfExamplesPerClass, nrOfExamplesPerClass);
       choleskyMatrix.set(0.0);      
+      
+      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot k_* for new test samples
       cr.robustChol ( kernelMatrix, choleskyMatrix );   
     }
  
@@ -177,6 +186,8 @@ void inline trainGPMeanApprox(NICE::Vector & GPMeanApproxRightPart, const double
       else
         matrixDInv.set(0.0);    
       
+      // the approximation creates a diagonal matrix (which is easy to invert)
+      // with entries equal the row sums of the original kernel matrix
       for (int i = 0; i < nrOfExamplesPerClass; i++)
       {
         for (int j = i; j < nrOfExamplesPerClass; j++)
@@ -187,7 +198,7 @@ void inline trainGPMeanApprox(NICE::Vector & GPMeanApproxRightPart, const double
         }
       }
       
-      //compute its inverse (and multiply every element with the label vector, which contains only one-entries...)
+      //compute its inverse (and multiply every element with the label vector, which contains only one-entries and therefore be skipped...)
       GPMeanApproxRightPart.resize(nrOfExamplesPerClass);    
       for (int i = 0; i < nrOfExamplesPerClass; i++)
       {
@@ -212,12 +223,16 @@ void inline trainGPMean(NICE::Vector & GPMeanRightPart, const double & noise, co
       CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
       
       NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
+      
+      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
       cr.robustChol ( kernelMatrix, choleskyMatrix );  
       
       GPMeanRightPart.resize(nrOfExamplesPerClass);
       GPMeanRightPart.set(0.0);
       
       NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :)
+      
+      // pre-compute K^{-1} \cdot y, which is the same for every new test sample
       choleskySolveLargeScale ( choleskyMatrix, y, GPMeanRightPart );
     }
  
@@ -225,14 +240,167 @@ void inline trainGPMean(NICE::Vector & GPMeanRightPart, const double & noise, co
     std::cerr << "Precise time used for GPMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
 }    
 
+// GP subset of regressors
+void inline trainGPSRMean(NICE::Vector & GPMeanRightPart, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples )
+{
+  std::vector<int> examplesToChoose;
+  indicesOfChosenExamples.clear();
+  
+  //add all examples for possible choice
+  for (int i = 0; i < nrOfExamplesPerClass; i++)
+  {
+    examplesToChoose.push_back(i);
+  }
+  
+  //now chose randomly some examples as active subset
+  int index;
+  for (int i = 0; i < std::min(nrOfRegressors,nrOfExamplesPerClass); i++)
+  {
+    index = rand() % examplesToChoose.size();
+    indicesOfChosenExamples.push_back(examplesToChoose[index]);
+    examplesToChoose.erase(examplesToChoose.begin() + index);
+  }
+  
+  NICE::Matrix Kmn (indicesOfChosenExamples.size(), nrOfExamplesPerClass, 0.0);
+  int rowCnt(0);
+  //set every row
+  for (int i = 0; i < indicesOfChosenExamples.size(); i++, rowCnt++ )
+  {
+    //set every element of this row
+    NICE::Vector col = kernelMatrix.getRow(indicesOfChosenExamples[i]);
+    for (int j = 0; j < nrOfExamplesPerClass; j++)
+    {
+      Kmn(rowCnt,j) = col(j);
+    }
+  }
+  
+  //we could speed this up if we would order the indices
+  NICE::Matrix Kmm (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
+  double tmp(0.0);
+  for (int i = 0; i < indicesOfChosenExamples.size(); i++ )
+  {
+    for (int j = i; j < indicesOfChosenExamples.size(); j++ )
+    {
+      tmp = kernelMatrix(indicesOfChosenExamples[i], indicesOfChosenExamples[j]);
+      Kmm(i,j) = tmp;
+      if (i != j)
+        Kmm(j,i) = tmp;
+    }
+  }
+  
+
+    Timer tTrainPrecise;
+    tTrainPrecise.start();      
+
+    for (int run = 0; run < runsPerClassToAverageTraining; run++)
+    {  
+      NICE::Matrix innerMatrix;
+      innerMatrix.multiply(Kmn, Kmn, true /* tranpose first matrix*/, false /* transpose second matrix*/);
+      
+      innerMatrix.addScaledMatrix( noise, Kmm );
+      
+      NICE::Vector y(nrOfExamplesPerClass,1.0); //OCC setting :) 
+      NICE::Vector projectedLabels;
+      projectedLabels.multiply(Kmn,y);
+      
+      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
+      
+      NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
+      
+      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
+      cr.robustChol ( innerMatrix, choleskyMatrix );  
+      
+      GPMeanRightPart.resize(indicesOfChosenExamples.size());
+      GPMeanRightPart.set(0.0);
+      
+      // pre-compute K^{-1} \cdot y, which is the same for every new test sample
+      choleskySolveLargeScale ( choleskyMatrix, projectedLabels, GPMeanRightPart );
+    }
+ 
+    tTrainPrecise.stop(); 
+    std::cerr << "Precise time used for GPSRMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
+}
+
+// GP subset of regressors
+void inline trainGPSRVar(NICE::Matrix choleskyMatrix, const double & noise, const NICE::Matrix & kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples )
+{
+  std::vector<int> examplesToChoose;
+  indicesOfChosenExamples.clear();
+  
+  //add all examples for possible choice
+  for (int i = 0; i < nrOfExamplesPerClass; i++)
+  {
+    examplesToChoose.push_back(i);
+  }
+  
+  //now chose randomly some examples as active subset
+  int index;
+  for (int i = 0; i < std::min(nrOfRegressors,nrOfExamplesPerClass); i++)
+  {
+    index = rand() % examplesToChoose.size();
+    indicesOfChosenExamples.push_back(examplesToChoose[index]);
+    examplesToChoose.erase(examplesToChoose.begin() + index);
+  }
+  
+  NICE::Matrix Kmn (indicesOfChosenExamples.size(), nrOfExamplesPerClass, 0.0);
+  int rowCnt(0);
+  //set every row
+  for (int i = 0; i < indicesOfChosenExamples.size(); i++, rowCnt++ )
+  {
+    //set every element of this row
+    NICE::Vector col = kernelMatrix.getRow(indicesOfChosenExamples[i]);
+    for (int j = 0; j < nrOfExamplesPerClass; j++)
+    {
+      Kmn(rowCnt,j) = col(j);
+    }
+  }
+  
+  //we could speed this up if we would order the indices
+  NICE::Matrix Kmm (indicesOfChosenExamples.size(), indicesOfChosenExamples.size(), 0.0);
+  double tmp(0.0);
+  for (int i = 0; i < indicesOfChosenExamples.size(); i++ )
+  {
+    for (int j = i; j < indicesOfChosenExamples.size(); j++ )
+    {
+      tmp = kernelMatrix(indicesOfChosenExamples[i], indicesOfChosenExamples[j]);
+      Kmm(i,j) = tmp;
+      if (i != j)
+        Kmm(j,i) = tmp;
+    }
+  }
+  
+
+    Timer tTrainPrecise;
+    tTrainPrecise.start();      
+
+    for (int run = 0; run < runsPerClassToAverageTraining; run++)
+    {  
+      NICE::Matrix innerMatrix;
+      innerMatrix.multiply(Kmn, Kmn, true /* tranpose first matrix*/, false /* transpose second matrix*/);
+      
+      innerMatrix.addScaledMatrix( noise, Kmm );
+           
+      CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
+      
+      choleskyMatrix.resize( nrOfExamplesPerClass, nrOfExamplesPerClass );
+      choleskyMatrix.set( 0.0 );
+      
+      //compute the cholesky decomposition of K in order to compute K^{-1} \cdot y
+      cr.robustChol ( innerMatrix, choleskyMatrix );  
+     }
+ 
+    tTrainPrecise.stop(); 
+    std::cerr << "Precise time used for GPSRMean training class " << classNumber << ": " << tTrainPrecise.getLast()/(double)runsPerClassToAverageTraining << std::endl;    
+}
+
 KCMinimumEnclosingBall *trainSVDD( const double & noise, const NICE::Matrix kernelMatrix, const int & nrOfExamplesPerClass, const int & classNumber, const int & runsPerClassToAverageTraining )
 {
  
     Config conf;
     // set the outlier ratio (Paul optimized this paramter FIXME)
     conf.sD( "SVDD", "outlier_fraction", 0.1 );
+    conf.sB( "SVDD", "verbose", false );
     KCMinimumEnclosingBall *svdd = new KCMinimumEnclosingBall ( &conf, NULL /* no kernel function */, "SVDD" /* config section */);
-
     KernelData kernelData ( &conf, kernelMatrix, "Kernel" , false /* update cholesky */ );
  
     Timer tTrainPrecise;
@@ -262,6 +430,8 @@ void inline evaluateGPVarApprox(const NICE::Vector & kernelVector, const double
       
     for (int run = 0; run < runsPerClassToAverageTesting; run++)
     {       
+      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*  where D is our nice approximation of K
+      
       NICE::Vector rightPart (kernelVector.size());
       for (int j = 0; j < kernelVector.size(); j++)
       {
@@ -290,8 +460,9 @@ void inline evaluateGPVar(const NICE::Vector & kernelVector, const double & kern
       
     for (int run = 0; run < runsPerClassToAverageTesting; run++)
     {       
-      NICE::Vector rightPart (kernelVector.size(),0.0);
+      // uncertainty = k{**} - \k_*^T \cdot D^{-1} \cdot k_*       
       
+      NICE::Vector rightPart (kernelVector.size(),0.0);      
       choleskySolveLargeScale ( choleskyMatrix, kernelVector, rightPart );
       
       uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
@@ -315,7 +486,8 @@ void inline evaluateGPMeanApprox(const NICE::Vector & kernelVector, const NICE::
     tTestSingle.start();
       
     for (int run = 0; run < runsPerClassToAverageTesting; run++)
-    {           
+    { 
+      // \mean = \k_*^T \cdot D^{-1} \cdot y  where D is our nice approximation of K    
       mean = kernelVector.scalarProduct ( rightPart );
     }
       
@@ -335,8 +507,10 @@ void inline evaluateGPMean(const NICE::Vector & kernelVector,  const NICE::Vecto
     
     Timer tTestSingle;
     tTestSingle.start();
+    
     for (int run = 0; run < runsPerClassToAverageTesting; run++)
-    {       
+    {
+      // \mean = \k_*^T \cdot K^{-1} \cdot y      
       mean = kernelVector.scalarProduct ( GPMeanRightPart );
     }
 
@@ -350,6 +524,70 @@ void inline evaluateGPMean(const NICE::Vector & kernelVector,  const NICE::Vecto
     r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
 }
 
+void inline evaluateGPSRMean(const NICE::Vector & kernelVector,  const NICE::Vector & GPSRMeanRightPart, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting, const int & nrOfRegressors, const std::vector<int> & indicesOfChosenExamples)
+{
+    double mean;
+    
+    //grep the entries corresponding to the active set
+    NICE::Vector kernelVectorM;
+    kernelVectorM.resize(nrOfRegressors);
+    for (int i = 0; i < nrOfRegressors; i++)
+    {
+      kernelVectorM[i] = kernelVector[indicesOfChosenExamples[i]];
+    }
+    
+    Timer tTestSingle;
+    tTestSingle.start();
+    
+    for (int run = 0; run < runsPerClassToAverageTesting; run++)
+    {
+      // \mean = \k_*^T \cdot K^{-1} \cdot y      
+      mean = kernelVectorM.scalarProduct ( GPSRMeanRightPart );
+    }
+
+    tTestSingle.stop();
+    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
+    
+    FullVector scores ( 2 );
+    scores[0] = 0.0;
+    scores[1] = mean;
+
+    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
+}
+
+void inline evaluateGPSRVar(const NICE::Vector & kernelVector,  const NICE::Matrix & choleskyMatrix, ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting, const int & nrOfRegressors, std::vector<int> & indicesOfChosenExamples, const double & noise)
+{
+    double uncertainty;
+    
+    //grep the entries corresponding to the active set
+    NICE::Vector kernelVectorM;
+    kernelVectorM.resize(nrOfRegressors);
+    for (int i = 0; i < nrOfRegressors; i++)
+    {
+      kernelVectorM[i] = kernelVector[indicesOfChosenExamples[i]];
+    }    
+    
+    Timer tTestSingle;
+    tTestSingle.start();
+    
+    for (int run = 0; run < runsPerClassToAverageTesting; run++)
+    {
+      NICE::Vector rightPart (nrOfRegressors,0.0);      
+      choleskySolveLargeScale ( choleskyMatrix, kernelVectorM, rightPart );
+      
+      uncertainty = noise*kernelVectorM.scalarProduct ( rightPart );
+    }
+
+    tTestSingle.stop();
+    timeForSingleExamples += tTestSingle.getLast()/(double)runsPerClassToAverageTesting;      
+    
+    FullVector scores ( 2 );
+    scores[0] = 0.0;
+    scores[1] = 1.0 - uncertainty;
+
+    r = ClassificationResult ( scores[1]<0.5 ? 0 : 1, scores );    
+}
+
 void inline evaluateParzen(const NICE::Vector & kernelVector,  ClassificationResult & r, double & timeForSingleExamples, const int & runsPerClassToAverageTesting)
 {
     double score;
@@ -358,8 +596,9 @@ void inline evaluateParzen(const NICE::Vector & kernelVector,  ClassificationRes
     tTestSingle.start();
     
     for (int run = 0; run < runsPerClassToAverageTesting; run++)
-    {       
-      double score( kernelVector.Sum() / (double) kernelVector.size() ); //maybe we could directly call kernelVector.Mean()
+    {      
+      //the Parzen score is nothing but the averaged similarity to every training sample
+      score = kernelVector.Sum() / (double) kernelVector.size(); //maybe we could directly call kernelVector.Mean() here
     }
       
     tTestSingle.stop();
@@ -399,6 +638,7 @@ int main (int argc, char **argv)
   int nrOfExamplesPerClass = conf.gI("main", "nrOfExamplesPerClass", 50);
   nrOfExamplesPerClass = std::min(nrOfExamplesPerClass, 100); // we do not have more than 100 examples per class
   
+  //which classes to considere? we assume consecutive class numers
   int indexOfFirstClass = conf.gI("main", "indexOfFirstClass", 0);
   indexOfFirstClass = std::max(indexOfFirstClass, 0); //we do not have less than 0 classes
   int indexOfLastClass = conf.gI("main", "indexOfLastClass", 999);
@@ -406,9 +646,11 @@ int main (int argc, char **argv)
   
   int nrOfClassesToConcidere =  (indexOfLastClass - indexOfLastClass)+1;
   
-  int runsPerClassToAverageTraining = conf.gI( "main", "runsPerClassToAverageTraining", 1 );
+  //repetitions for every class to achieve reliable time evalutions
+  int runsPerClassToAverageTraining = conf.gI( "main", "runsPerClassToAverageTraining", 1 ); 
   int runsPerClassToAverageTesting = conf.gI( "main", "runsPerClassToAverageTesting", 1 );
   
+  // share parameters among methods and classes?
   bool shareParameters = conf.gB("main" , "shareParameters", true);
 
   
@@ -424,6 +666,12 @@ int main (int argc, char **argv)
   //GP mean  
   NICE::Vector sigmaGPMeanParas(nrOfClassesToConcidere,0.0);
   NICE::Vector noiseGPMeanParas(nrOfClassesToConcidere,0.0);
+  //GP SR mean  
+  NICE::Vector sigmaGPSRMeanParas(nrOfClassesToConcidere,0.0);
+  NICE::Vector noiseGPSRMeanParas(nrOfClassesToConcidere,0.0);
+  //GP SR var
+  NICE::Vector sigmaGPSRVarParas(nrOfClassesToConcidere,0.0);
+  NICE::Vector noiseGPSRVarParas(nrOfClassesToConcidere,0.0);
   //Parzen  
   NICE::Vector sigmaParzenParas(nrOfClassesToConcidere,0.0);
   NICE::Vector noiseParzenParas(nrOfClassesToConcidere,0.0);
@@ -466,6 +714,12 @@ int main (int argc, char **argv)
     //GP mean  
     readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPMeanParas);
     readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPMeanParas); 
+    //GP SR mean  
+    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPSRMeanParas);
+    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPSRMeanParas);
+    //GP SR var  
+    readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaGPSRVarParas);
+    readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseGPSRVarParas);    
     //Parzen    
     readParameters(sigmaGPVarApproxFile,nrOfClassesToConcidere, sigmaParzenParas);
     readParameters(noiseGPVarApproxFile,nrOfClassesToConcidere, noiseParzenParas);  
@@ -475,6 +729,7 @@ int main (int argc, char **argv)
   }
   else
   {
+    //use static variables for all methods and classis
     double noise = conf.gD( "main", "noise", 0.01 );
     double sigma = conf.gD( "main", "sigma", 1.0 );
     
@@ -489,6 +744,12 @@ int main (int argc, char **argv)
     //GP mean  
     sigmaGPMeanParas.set(sigma);
     noiseGPMeanParas.set(noise);
+    //GP SR mean  
+    sigmaGPSRMeanParas.set(sigma);
+    noiseGPSRMeanParas.set(noise);
+    //GP SR var  
+    sigmaGPSRVarParas.set(sigma);
+    noiseGPSRVarParas.set(noise);    
     //Parzen  
     sigmaParzenParas.set(sigma);
     noiseParzenParas.set(noise);
@@ -530,6 +791,8 @@ int main (int argc, char **argv)
   double OverallPerformanceGPVar(0.0);
   double OverallPerformanceGPMeanApprox(0.0);
   double OverallPerformanceGPMean(0.0);
+  double OverallPerformanceGPSRMean(0.0);
+  double OverallPerformanceGPSRVar(0.0);  
   double OverallPerformanceParzen(0.0);
   double OverallPerformanceSVDD(0.0);
 
@@ -538,6 +801,8 @@ int main (int argc, char **argv)
   double kernelSigmaGPVar;
   double kernelSigmaGPMeanApprox;
   double kernelSigmaGPMean;
+  double kernelSigmaGPSRMean;
+  double kernelSigmaGPSRVar;
   double kernelSigmaParzen;
   double kernelSigmaSVDD;
   
@@ -551,30 +816,23 @@ int main (int argc, char **argv)
     kernelSigmaGPVar = sigmaGPVarParas[cl];
     kernelSigmaGPMeanApprox = sigmaGPMeanApproxParas[cl];
     kernelSigmaGPMean = sigmaGPMeanParas[cl];
+    kernelSigmaGPMean = sigmaGPSRMeanParas[cl];
+    kernelSigmaGPSRVar = sigmaGPSRVarParas[cl];
     kernelSigmaParzen = sigmaParzenParas[cl];
     kernelSigmaSVDD = sigmaSVDDParas[cl];
     
     Timer tTrain;
     tTrain.start();
-       
+    
+    //compute the kernel matrix, which will be shared among all methods in this scenario       
     NICE::Matrix kernelMatrix(nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
     
-    //TODO in theory we have to compute a single kernel Matrix for every method, since every method may have its own optimal parameter
+    //NOTE in theory we have to compute a single kernel Matrix for every method, since every method may have its own optimal parameter
     // I'm sure, we can speed it up a bit and compute it only for every different parameter
     //nonetheless, it's not as nice as we originally thought (same matrix for every method) 
     
-    //NOTE since we're only interested in runtimes, we can ignore this (and still do some further code optimization...) //TODO
-    
-/*    //adding some noise, if necessary
-    if (noiseParas[cl] != 0.0)
-    {
-      kernelMatrix.addIdentity(noiseParas[cl]);
-    }
-    else
-    {
-      //zero was already set
-    } */     
-       
+    //NOTE Nonetheless, since we're only interested in runtimes, we can ignore this
+           
     //now sum up all entries of each row in the original kernel matrix
     double kernelScore(0.0);
     for (int i = cl*100; i < cl*100+nrOfExamplesPerClass; i++)
@@ -589,10 +847,10 @@ int main (int argc, char **argv)
       }
     }  
     
-    //train GP Var Approx
+    // now call the individual training methods
     
+    //train GP Var Approx    
     NICE::Vector matrixDInv;
-    for (int i = 0; i < runsPerClassToAverageTraining; i++)
     trainGPVarApprox(matrixDInv, noiseGPVarApproxParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
     
     //train GP Var
@@ -607,11 +865,22 @@ int main (int argc, char **argv)
     NICE::Vector GPMeanRightPart;
     trainGPMean(GPMeanRightPart, noiseGPMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );    
     
+    //train GP SR Mean
+    NICE::Vector GPSRMeanRightPart;
+    std::vector<int> indicesOfChosenExamplesGPSRMean;
+    int nrOfRegressors = conf.gI( "GPSR", "nrOfRegressors", nrOfExamplesPerClass/2);
+    nrOfRegressors = std::min( nrOfRegressors, nrOfExamplesPerClass );
+    trainGPSRMean(GPSRMeanRightPart, noiseGPSRMeanParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining, nrOfRegressors, indicesOfChosenExamplesGPSRMean );        
+    
+    //train GP SR Var
+    NICE::Matrix GPSRVarCholesky;   
+    std::vector<int> indicesOfChosenExamplesGPSRVar;
+    trainGPSRVar(GPSRVarCholesky, noiseGPSRVarParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining, nrOfRegressors, indicesOfChosenExamplesGPSRVar );      
+    
     //train Parzen 
     //nothing to do :)
     
     //train SVDD
-    //TODO what do we need here?
     KCMinimumEnclosingBall *svdd = trainSVDD(noiseSVDDParas[cl], kernelMatrix, nrOfExamplesPerClass, cl, runsPerClassToAverageTraining );
   
     tTrain.stop();
@@ -627,7 +896,9 @@ int main (int argc, char **argv)
     ClassificationResults resultsGPVarApprox;
     ClassificationResults resultsGPVar;
     ClassificationResults resultsGPMeanApprox;
-    ClassificationResults resultsGPMean;    
+    ClassificationResults resultsGPMean;
+    ClassificationResults resultsGPSRMean;
+    ClassificationResults resultsGPSRVar;    
     ClassificationResults resultsParzen;
     ClassificationResults resultsSVDD;       
     
@@ -640,6 +911,8 @@ int main (int argc, char **argv)
     double timeForSingleExamplesGPVar(0.0);
     double timeForSingleExamplesGPMeanApprox(0.0);    
     double timeForSingleExamplesGPMean(0.0);    
+    double timeForSingleExamplesGPSRMean(0.0);    
+    double timeForSingleExamplesGPSRVar(0.0);    
     double timeForSingleExamplesParzen(0.0);    
     double timeForSingleExamplesSVDD(0.0);    
     
@@ -649,7 +922,9 @@ int main (int argc, char **argv)
 
       const SparseVector & svec = imageNetTest.getPreloadedExample ( i );
 
-      //TODO: again we should use method-specific optimal parameters. If we're only interested in the runtimes, this doesn't matter
+      //NOTE: again we should use method-specific optimal parameters. If we're only interested in the runtimes, this doesn't matter
+      
+      //compute (self) similarities
       double kernelSelf (measureDistance(svec,svec, kernelSigmaGPVarApprox) );
       NICE::Vector kernelVector (nrOfExamplesPerClass, 0.0);
       
@@ -658,6 +933,8 @@ int main (int argc, char **argv)
         kernelVector[j] = measureDistance(trainingData[j+cl*100],svec, kernelSigmaGPVarApprox);
       }     
       
+      //call the individual test-methods
+      
       //evaluate GP Var Approx
       ClassificationResult rGPVarApprox;      
       evaluateGPVarApprox( kernelVector, kernelSelf, matrixDInv, rGPVarApprox, timeForSingleExamplesGPVarApprox, runsPerClassToAverageTesting );
@@ -673,6 +950,15 @@ int main (int argc, char **argv)
       //evaluate GP Mean
       ClassificationResult rGPMean;
       evaluateGPMean( kernelVector, GPMeanRightPart, rGPMean, timeForSingleExamplesGPMean, runsPerClassToAverageTesting );       
+       
+      //evaluate GP SR Mean
+      ClassificationResult rGPSRMean;
+      evaluateGPSRMean( kernelVector, GPSRMeanRightPart, rGPSRMean, timeForSingleExamplesGPSRMean, runsPerClassToAverageTesting, nrOfRegressors, indicesOfChosenExamplesGPSRMean );       
+      
+      //evaluate GP SR Var
+      ClassificationResult rGPSRVar;
+      evaluateGPSRVar( kernelVector, GPSRVarCholesky, rGPSRVar, timeForSingleExamplesGPSRVar, runsPerClassToAverageTesting, nrOfRegressors, indicesOfChosenExamplesGPSRVar, noiseGPSRVarParas[cl] );       
+
       
       //evaluate Parzen
       ClassificationResult rParzen;
@@ -688,17 +974,18 @@ int main (int argc, char **argv)
       rGPVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
       rGPMeanApprox.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
       rGPMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
+      rGPSRMean.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
+      rGPSRVar.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
       rParzen.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
       rSVDD.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
-      
-//       std::cerr << "scores: " << std::endl;
-//       scores >> std::cerr;
-//       std::cerr << "gt: " <<  r.classno_groundtruth << " -- " << r.classno << std::endl;
-      
+
+      //remember the results for the evaluation lateron
       resultsGPVarApprox.push_back ( rGPVarApprox );
       resultsGPVar.push_back ( rGPVar );
       resultsGPMeanApprox.push_back ( rGPMeanApprox );
       resultsGPMean.push_back ( rGPMean );
+      resultsGPSRMean.push_back ( rGPSRMean );
+      resultsGPSRVar.push_back ( rGPSRVar );
       resultsParzen.push_back ( rParzen );
       resultsSVDD.push_back ( rSVDD );      
     }
@@ -710,6 +997,8 @@ int main (int argc, char **argv)
     timeForSingleExamplesGPVar/= imageNetTest.getNumPreloadedExamples();
     timeForSingleExamplesGPMeanApprox/= imageNetTest.getNumPreloadedExamples();
     timeForSingleExamplesGPMean/= imageNetTest.getNumPreloadedExamples();
+    timeForSingleExamplesGPSRMean/= imageNetTest.getNumPreloadedExamples();
+    timeForSingleExamplesGPSRVar/= imageNetTest.getNumPreloadedExamples();
     timeForSingleExamplesParzen/= imageNetTest.getNumPreloadedExamples();
     timeForSingleExamplesSVDD/= imageNetTest.getNumPreloadedExamples();
     
@@ -717,15 +1006,18 @@ int main (int argc, char **argv)
     std::cerr << "GPVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPVar << std::endl;    
     std::cerr << "GPMeanApprox -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPMeanApprox << std::endl;    
     std::cerr << "GPMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPMean << std::endl;    
+    std::cerr << "GPSRMean -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPSRMean << std::endl;    
+    std::cerr << "GPSRVar -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesGPSRVar << std::endl;    
     std::cerr << "Parzen -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesParzen << std::endl;    
     std::cerr << "SVDD -- time used for evaluation single elements of class " << cl << " : " << timeForSingleExamplesSVDD << std::endl;    
 
-//     std::cerr << "Writing results to " << resultsfile << std::endl;
-//     results.writeWEKA ( resultsfile, 1 );
+    // run the AUC-evaluation
     double perfvalueGPVarApprox = resultsGPVarApprox.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
     double perfvalueGPVar = resultsGPVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
     double perfvalueGPMeanApprox = resultsGPMeanApprox.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
     double perfvalueGPMean = resultsGPMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
+    double perfvalueGPSRMean = resultsGPSRMean.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
+    double perfvalueGPSRVar = resultsGPSRVar.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
     double perfvalueParzen = resultsParzen.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
     double perfvalueSVDD = resultsSVDD.getBinaryClassPerformance( ClassificationResults::PERF_AUC );    
 
@@ -733,6 +1025,8 @@ int main (int argc, char **argv)
     std::cerr << "Performance GPVar: " << perfvalueGPVar << std::endl;
     std::cerr << "Performance GPMeanApprox: " << perfvalueGPMeanApprox << std::endl;
     std::cerr << "Performance GPMean: " << perfvalueGPMean << std::endl;
+    std::cerr << "Performance GPSRMean: " << perfvalueGPSRMean << std::endl;
+    std::cerr << "Performance GPSRVar: " << perfvalueGPSRVar << std::endl;
     std::cerr << "Performance Parzen: " << perfvalueParzen << std::endl;
     std::cerr << "Performance SVDD: " << perfvalueSVDD << std::endl;    
     
@@ -740,6 +1034,8 @@ int main (int argc, char **argv)
     OverallPerformanceGPVar += perfvalueGPVarApprox;
     OverallPerformanceGPMeanApprox += perfvalueGPMeanApprox;
     OverallPerformanceGPMean += perfvalueGPMean;
+    OverallPerformanceGPSRMean += perfvalueGPSRMean;
+    OverallPerformanceGPSRVar += perfvalueGPSRVar;
     OverallPerformanceParzen += perfvalueParzen;
     OverallPerformanceSVDD += perfvalueSVDD;   
 
@@ -751,6 +1047,8 @@ int main (int argc, char **argv)
   OverallPerformanceGPVar /= nrOfClassesToConcidere;
   OverallPerformanceGPMeanApprox /= nrOfClassesToConcidere;
   OverallPerformanceGPMean /= nrOfClassesToConcidere;
+  OverallPerformanceGPSRMean /= nrOfClassesToConcidere;
+  OverallPerformanceGPSRVar /= nrOfClassesToConcidere;
   OverallPerformanceParzen /= nrOfClassesToConcidere;
   OverallPerformanceSVDD /= nrOfClassesToConcidere;  
   
@@ -758,6 +1056,8 @@ int main (int argc, char **argv)
   std::cerr << "overall performance GPVar: " << OverallPerformanceGPVar << std::endl;
   std::cerr << "overall performance GPMeanApprox: " << OverallPerformanceGPMeanApprox << std::endl;
   std::cerr << "overall performance GPMean: " << OverallPerformanceGPMean << std::endl;
+  std::cerr << "overall performance GPSRMean: " << OverallPerformanceGPSRMean << std::endl;
+  std::cerr << "overall performance GPSRVar: " << OverallPerformanceGPSRVar << std::endl;
   std::cerr << "overall performance Parzen: " << OverallPerformanceParzen << std::endl;
   std::cerr << "overall performance SVDD: " << OverallPerformanceSVDD << std::endl;  
   

+ 12 - 14
progs/testImageNetBinaryGPBaseline.cpp

@@ -114,10 +114,7 @@ int main (int argc, char **argv)
   NICE::Vector sigmaParas(nrOfClassesToConcidere,kernelSigma);
   NICE::Vector noiseParas(nrOfClassesToConcidere,0.0);
   
-  std::cerr << "try to read optimal sigmas from " << sigmaFile << std::endl;
   readParameters(sigmaFile,nrOfClassesToConcidere, sigmaParas);
-  //------------
-  std::cerr << "try to read optimal noises from " << noiseFile << std::endl;
   readParameters(noiseFile,nrOfClassesToConcidere, noiseParas);
   
   std::vector<SparseVector> trainingData;
@@ -186,23 +183,16 @@ int main (int argc, char **argv)
    
     //compute its inverse
     //noise is already added :)
-/*    Timer tTrainPrecise;
-    tTrainPrecise.start();  */   
-    
-    //tic tTrainPrecise
-    time_t  tTrainPreciseStart = clock(); 
-    
+    Timer tTrainPrecise;
+    tTrainPrecise.start();     
     
     CholeskyRobust cr  ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
     
     NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);      
     cr.robustChol ( kernelMatrix, choleskyMatrix );    
     
-//     tTrainPrecise.stop(); 
-//     std::cerr << "Precise time used for training class " << cl << ": " << tTrainPrecise.getLast() << std::endl;    
-    //toc tTrainPrecise
-    float tTrainPrecise = (float) (clock() - tTrainPreciseStart);
-    std::cerr << "Time for HIK preparation of alpha multiplications: " << tTrainPrecise/CLOCKS_PER_SEC << std::endl;       
+    tTrainPrecise.stop(); 
+    std::cerr << "Precise time used for training class " << cl << ": " << tTrainPrecise.getLast() << std::endl;    
     
     tTrain.stop();
     std::cerr << "Time used for training class " << cl << ": " << tTrain.getLast() << std::endl;    
@@ -223,8 +213,10 @@ int main (int argc, char **argv)
     {
       pb.update ( imageNetTest.getNumPreloadedExamples() );
 
+      //get the precomputed features
       const SparseVector & svec = imageNetTest.getPreloadedExample ( i );
       
+      //compute (self-)similarities
       double kernelSelf (measureDistance(svec,svec, kernelSigma) );
       NICE::Vector kernelVector (nrOfExamplesPerClass, 0.0);
       
@@ -233,6 +225,7 @@ int main (int argc, char **argv)
         kernelVector[j] = measureDistance(trainingData[j+cl*100],svec, kernelSigma);
       }     
       
+      //compute the resulting score
       tTestSingle.start();
       NICE::Vector rightPart (nrOfExamplesPerClass);
       choleskySolveLargeScale ( choleskyMatrix, kernelVector, rightPart );
@@ -241,6 +234,7 @@ int main (int argc, char **argv)
       tTestSingle.stop();
       timeForSingleExamples += tTestSingle.getLast();
       
+      //this is the standard score-object needed for the evaluation
       FullVector scores ( 2 );
       scores[0] = 0.0;
       scores[1] = 1.0 - uncertainty;
@@ -250,8 +244,10 @@ int main (int argc, char **argv)
       // set ground truth label
       r.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
       
+      //we could write the resulting score on the command line
 //       std::cerr << "scores: " << std::endl;
 //       scores >> std::cerr;
+      //as well as the ground truth label
 //       std::cerr << "gt: " <<  r.classno_groundtruth << " -- " << r.classno << std::endl;
       
       results.push_back ( r );
@@ -264,6 +260,8 @@ int main (int argc, char **argv)
     std::cerr << "Time used for evaluation single elements of class " << cl << " : " << timeForSingleExamples << std::endl;
     
 
+    // we could also write the results to an external file. Note, that this file will be overwritten in every iteration
+    // so if you want to store all results, you should add a suffix with the class number
 //     std::cerr << "Writing results to " << resultsfile << std::endl;
 //     results.writeWEKA ( resultsfile, 1 );
     double perfvalue = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );