/** 
* @file testNullSpace.cpp
* @brief test function for class KCNullSpace
* @author Paul Bodesheim
* @date 28-11-2012 (dd-mm-yyyy)
*/

#include <ctime>
#include <time.h>

#include "core/basics/Config.h"

#ifdef NICE_USELIB_MATIO

#include "core/basics/Timer.h"
#include "core/vector/Algorithms.h"
#include "core/vector/SparseVectorT.h"

#include "vislearning/classifier/kernelclassifier/KCNullSpace.h"
#include "vislearning/math/kernels/KernelData.h"
#include "vislearning/cbaselib/ClassificationResults.h"
#include "vislearning/baselib/ProgressBar.h"

#include "core/matlabAccess/MatFileIO.h"
#include "vislearning/matlabAccessHighLevel/ImageNetData.h"

// #include <iostream>
// #include <fstream>

using namespace std;
using namespace NICE;
using namespace OBJREC;

// --------------- THE KERNEL FUNCTION ( exponential kernel with euclidian distance ) ----------------------
double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0)
{
  double inner_sum(0.0);

  double d;      
  
  //new version, where we needed on average 0.001707 s for each test sample
  NICE::SparseVector::const_iterator aIt = a.begin();
  NICE::SparseVector::const_iterator bIt = b.begin();
   
  //compute the euclidian distance between both feature vectores (given as SparseVectors)
  while ( (aIt != a.end()) && (bIt != b.end()) )
  {
    if (aIt->first == bIt->first)
    {
      d = ( aIt->second - bIt->second );      
      inner_sum += d * d;
      aIt++;
      bIt++;
    }
    else if ( aIt->first < bIt->first)
    {
      inner_sum += aIt->second * aIt->second;
      aIt++;      
    }
    else
    {
      inner_sum += bIt->second * bIt->second;
      bIt++;       
    }
  }
  
  //compute remaining values, if b reached the end but not a
  while (aIt != a.end())
  {
    inner_sum += aIt->second * aIt->second;
    aIt++; 
  }
  //compute remaining values, if a reached the end but not b
  while (bIt != b.end())
  {
    inner_sum += bIt->second * bIt->second;
    bIt++; 
  }  

  //normalization of the exponent
  inner_sum /= (2.0*sigma*sigma);
  
  //finally, compute the RBF-kernel score (RBF = radial basis function)
  return exp(-inner_sum);
}

// --------------- THE KERNEL FUNCTION ( HIK ) ----------------------
double minimumDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b )
{
  double inner_sum(0.0);
  
  NICE::SparseVector::const_iterator aIt = a.begin();
  NICE::SparseVector::const_iterator bIt = b.begin();
  
  //compute the minimum distance between both feature vectores (given as SparseVectors)
  while ( (aIt != a.end()) && (bIt != b.end()) )
  {
    if (aIt->first == bIt->first)
    {
      inner_sum += std::min( aIt->second , bIt->second );
      aIt++;
      bIt++;
    }
    else if ( aIt->first < bIt->first)
    {
      aIt++;      
    }
    else
    {
      bIt++;       
    }
  }
  
  return inner_sum;
  
}

/** 
    test the basic functionality of fast-hik hyperparameter optimization 
*/
int main (int argc, char **argv)
{   
  std::set_terminate(__gnu_cxx::__verbose_terminate_handler);

  Config conf ( argc, argv );
  string resultsfile = conf.gS("main", "results", "results.txt" );
  int nrOfExamplesPerClass = conf.gI("main", "nrOfExamplesPerClass", 100);
  nrOfExamplesPerClass = std::min(nrOfExamplesPerClass, 100); // we do not have more than 100 examples per class
  int maxKnownClass = conf.gI("KCNullSpace", "maxKnownClass", 5);
  int OCCsingleClassLabel = conf.gI("KCNullSpace", "OCCsingleClassLabel", 1);
  bool testVerbose = conf.gB("KCNullSpace", "verbose", false);
  
  std::cerr << "conf verbose: " << testVerbose << std::endl;
  
  // -------- read ImageNet data --------------  
  std::vector<SparseVector> trainingData;
  NICE::Vector y;
  NICE::Vector yTest;
  
  std::cerr << "Reading ImageNet data ..." << std::endl;
  bool imageNetLocal = conf.gB("main", "imageNetLocal" , false);
  string imageNetPath;
  if (imageNetLocal)
    imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/";
  else
    imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/";

  ImageNetData imageNetTrain ( imageNetPath + "demo/" );

  imageNetTrain.preloadData( "train", "training" );
  imageNetTrain.normalizeData("L1");
  trainingData = imageNetTrain.getPreloadedData();
  y = imageNetTrain.getPreloadedLabels();
  
  std::cerr << "Reading of training data finished" << std::endl;
  std::cerr << "trainingData.size(): " << trainingData.size() << std::endl;
  std::cerr << "y.size(): " << y.size() << std::endl;
  
  std::cerr << "Reading ImageNet test data files (takes some seconds)..." << std::endl;
  ImageNetData imageNetTest ( imageNetPath + "demo/" );
  imageNetTest.preloadData ( "val", "testing" );
  imageNetTest.normalizeData("L1");
  imageNetTest.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" );  
  yTest = imageNetTest.getPreloadedLabels();

  // ---------- SELECT TRAINING SET FOR MULTICLASS NOVELTY DETECTION AND COMPUTE KERNEL MATRIX ------------------------
  NICE::Vector knownClassLabels(maxKnownClass,0.0);
  for (int k=1; k<=maxKnownClass; k++)
    knownClassLabels(k-1) = k;
  
  std::vector<SparseVector> currentTrainingData;
  currentTrainingData.clear();
  NICE::Vector currentTrainingLabels(nrOfExamplesPerClass*knownClassLabels.size(),0);
  
  int kk(0);
  for (size_t i = 0; i < y.size(); i++)
  {
    for (size_t j=0; j<knownClassLabels.size(); j++)
    {
      if ( y[i] == knownClassLabels[j] )
      {
        currentTrainingLabels(kk) = knownClassLabels[j];
        currentTrainingData.push_back(trainingData[i]);
        kk++;
        break;
      }  
    }
    
  }
  
  Timer tTrain;
  tTrain.start();
    
  //compute the kernel matrix
  NICE::Matrix kernelMatrix(nrOfExamplesPerClass*knownClassLabels.size(), nrOfExamplesPerClass*knownClassLabels.size(), 0.0);
  double kernelScore(0.0);
  
  for (size_t i = 0; i < kernelMatrix.rows(); i++)
  {
    for (size_t j = i; j < kernelMatrix.cols(); j++)
    {
      kernelScore = minimumDistance(currentTrainingData[i],currentTrainingData[j]);
      kernelMatrix(i,j) = kernelScore;
        
      if (i != j)
          kernelMatrix(j,i) = kernelScore;
    }
  }    
  
  KernelData kernelData( &conf, kernelMatrix, "Kernel", false );
  KCNullSpace knfst( &conf);
  
  // train the model
  knfst.teach(&kernelData, currentTrainingLabels);
  
  tTrain.stop();
  std::cerr << "Time used for training " << ": " << tTrain.getLast() << std::endl;  
  
  // some outputs of training
  std::cerr << "training set statistic: " << std::endl;
  for (std::map<int,int>::iterator it = knfst.getTrainingSetStatistic()->begin(); it != knfst.getTrainingSetStatistic()->end(); it++)
  {
    std::cerr << "class label: " << (*it).first << "  number of class samples: " << (*it).second << std::endl;
  }
  std::cerr << "one-class setting?: " << knfst.isOneClass() << std::endl;
  std::cerr << "null space dimension: "<< knfst.getNullSpaceDimension() << std::endl;
  std::cerr << "target points: " << std::endl;
  for (std::map<int,NICE::Vector>::iterator it = knfst.getTargetPoints()->begin(); it != knfst.getTargetPoints()->end(); it++)
    std::cerr << (*it).second << std::endl;    
       
  std::cerr << "training done - now perform the evaluation" << std::endl;

  // --------- TESTING MULTICLASS NOVELTY DETECTION ------------------------------
   
  std::cerr << "Multi-class novelty detection... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl;
    
  ClassificationResults results;

  ProgressBar pb;
  Timer tTest;
  tTest.start();    

  for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ )
  {
    if ( (i % 100)==0)
      pb.update ( imageNetTest.getNumPreloadedExamples()/100 );

    const SparseVector & svec = imageNetTest.getPreloadedExample ( i );
     
    // compute (self) similarities
    double kernelSelf (minimumDistance(svec,svec) );
    NICE::Vector kernelVector (nrOfExamplesPerClass*knownClassLabels.size(), 0.0);
      
    for (uint j = 0; j < nrOfExamplesPerClass*knownClassLabels.size(); j++)
    {
      kernelVector[j] = minimumDistance(currentTrainingData[j],svec);
    }     
      
    ClassificationResult r;      
    r = knfst.noveltyDetection( kernelVector, kernelSelf);  
      
    // set ground truth label
    r.classno_groundtruth = 0;
    for (size_t j=0; j<knownClassLabels.size(); j++)
    {
      if ( yTest[i] == knownClassLabels[j] )
      {
        r.classno_groundtruth = 1;
        break;
      }
    }

    // remember the results for the evaluation lateron
    results.push_back ( r );
  }
    
  tTest.stop();
  std::cerr << "Time used for evaluation: " << tTest.getLast() << std::endl;       
    
  double timeForSingleExample(0.0);
  timeForSingleExample = tTest.getLast()/imageNetTest.getNumPreloadedExamples();
  std::cerr.precision(10);
  std::cerr << "time used for evaluation of single elements: " << timeForSingleExample << std::endl;      
  
  // run the AUC-evaluation
  double perfvalue( 0.0 );   
  perfvalue = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );

  std::cerr << " novelty detection performance: " << perfvalue << std::endl;
  
  // --------- TESTING MULTICLASS CLASSIFICATION ------------------------------
  results.clear();
  tTest.start();    
    
  for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ )
  {
    // only use samples of known classes
    if ( yTest[i] > maxKnownClass) 
    {
      continue;
    }
    
    const SparseVector & svec = imageNetTest.getPreloadedExample ( i );
     
    // compute (self) similarities
    double kernelSelf (minimumDistance(svec,svec) );
    NICE::Vector kernelVector (nrOfExamplesPerClass*knownClassLabels.size(), 0.0);
      
    for (uint j = 0; j < nrOfExamplesPerClass*knownClassLabels.size(); j++)
    {
      kernelVector[j] = minimumDistance(currentTrainingData[j],svec);
    }     
      
    ClassificationResult r;      
    r = knfst.classifyKernel( kernelVector, kernelSelf);  
      
    // set ground truth label
    for (uint j=0; j < knownClassLabels.size(); j++)
    {
      if (yTest[i] == knownClassLabels[j])
      {
        r.classno_groundtruth = j;
        break;
      }
    }
    // remember the results for the evaluation lateron
    results.push_back ( r );
  }
    
  tTest.stop();
  std::cerr << "Time used for evaluation: " << tTest.getLast() << std::endl;       
    
  timeForSingleExample = tTest.getLast()/imageNetTest.getNumPreloadedExamples();
  std::cerr.precision(10);
  std::cerr << "time used for evaluation of single elements: " << timeForSingleExample << std::endl;      
  
  // run the AUC-evaluation
  perfvalue = results.getAverageRecognitionRate();

  std::cerr << " classification performance: " << perfvalue << std::endl; 
  
  // ---------- SELECT TRAINING SET FOR ONECLASS CLASSIFICATION AND COMPUTE KERNEL MATRIX ------------------------ 
  currentTrainingData.clear();
  currentTrainingLabels.clear();
  
  for (size_t i = 0; i < y.size(); i++)
  {
    if ( y[i] == OCCsingleClassLabel )
    {
      currentTrainingLabels.append(OCCsingleClassLabel);
      currentTrainingData.push_back(trainingData[i]);
    }  
  }
      
  tTrain.start();
    
  //compute the kernel matrix
  NICE::Matrix kernelMatrixOCC(currentTrainingData.size(), currentTrainingData.size(), 0.0);
  
  std::cerr << "OCC Kernel Matrix: " << kernelMatrixOCC.rows() << " x " << kernelMatrixOCC.cols() << std::endl;
  
  for (size_t i = 0; i < kernelMatrixOCC.rows(); i++)
  {
    for (size_t j = i; j < kernelMatrixOCC.cols(); j++)
    {
      kernelScore = minimumDistance(currentTrainingData[i],currentTrainingData[j]);
      kernelMatrixOCC(i,j) = kernelScore;
        
      if (i != j)
          kernelMatrixOCC(j,i) = kernelScore;
    }
  }    
  
  filebuf fb;
  fb.open("/home/bodesheim/experiments/kernelMatrixOCC.txt",ios::out);
  ostream os (&fb);
  os << kernelMatrixOCC;
  fb.close();
  
  KernelData kernelDataOCC( &conf, kernelMatrixOCC, "Kernel", false );
  
  // train the model
  std::cerr << "Train OCC model... " << std::endl;  
  knfst.teach(&kernelDataOCC, currentTrainingLabels);
  
  tTrain.stop();
  std::cerr << "Time used for training " << ": " << tTrain.getLast() << std::endl;  
  
  // some outputs of training
  std::cerr << "training set statistic: " << std::endl;
  for (std::map<int,int>::iterator itt = knfst.getTrainingSetStatistic()->begin(); itt != knfst.getTrainingSetStatistic()->end(); itt++)
  {
    std::cerr << "class label: " << (*itt).first << "  number of class samples: " << (*itt).second << std::endl;
  }
  std::cerr << "one-class setting?: " << knfst.isOneClass() << std::endl;
  std::cerr << "null space dimension: "<< knfst.getNullSpaceDimension() << std::endl;
  std::cerr << "target points: " << std::endl;
  for (std::map<int,NICE::Vector>::const_iterator it = knfst.getTargetPoints()->begin(); it != knfst.getTargetPoints()->end(); it++)
    std::cerr << (*it).second << std::endl;    
       
  std::cerr << "training done - now perform the evaluation" << std::endl;

  // --------- TESTING OCC ------------------------------
   
  std::cerr << "OCC... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl;
    
  results.clear();
     
  tTest.start();    
  ProgressBar pb3;
  
  std::cerr << "start for loop" << std::endl;
    
  for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ )
  {
    if ( (i % 100)==0)
      pb3.update ( imageNetTest.getNumPreloadedExamples()/100 );

    const SparseVector & svec = imageNetTest.getPreloadedExample ( i );
     
    //compute (self) similarities
    double kernelSelf (minimumDistance(svec,svec) );
    NICE::Vector kernelVector (currentTrainingData.size(), 0.0);
      
    for (uint j = 0; j < currentTrainingData.size(); j++)
    {
      kernelVector[j] = minimumDistance(currentTrainingData[j],svec);
    }     
      
    ClassificationResult r;
    r = knfst.noveltyDetection( kernelVector, kernelSelf);  
      
    // set ground truth label
    r.classno_groundtruth = 0;
    if ( yTest[i] == OCCsingleClassLabel )
    {
      r.classno_groundtruth = 1;
    }
    else
    {
      r.classno_groundtruth = 0;
    }

    //remember the results for the evaluation lateron
    results.push_back ( r );
  }
    
  tTest.stop();
  std::cerr << "Time used for evaluation: " << tTest.getLast() << std::endl;       
    
  double timeForSingleExampleOCC = tTest.getLast()/imageNetTest.getNumPreloadedExamples();
  std::cerr.precision(10);
  std::cerr << "time used for evaluation of single elements: " << timeForSingleExampleOCC << std::endl;      
  
  // run the AUC-evaluation
  double perfvalueOCC = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );

  std::cerr << " occ performance: " << perfvalueOCC << std::endl;
  
  return 0;
}
#else
int main (int argc, char **argv)
{
  std::cerr << "MatIO library is missing in your system - this program will have no effect. " << std::endl;  
}

#endif