ComputerVisionJena
/
NICE_GP_HIK_Exp


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
							/** 
* @file testImageNetBinary.cpp
* @brief perform ImageNet tests with binary classification
* @author Erik Rodner
* @date 01/04/2012

*/
#include "core/basics/Config.h"

#ifdef NICE_USELIB_MATIO

#include "core/algebra/IterativeLinearSolver.h"
#include "core/algebra/PartialGenericMatrix.h"
#include "core/algebra/GBCDSolver.h"
#include "core/algebra/ILSConjugateGradients.h"
#include <core/matlabAccess/MatFileIO.h>


#include "vislearning/cbaselib/ClassificationResults.h"
#include "vislearning/baselib/ProgressBar.h"


#include <vislearning/matlabAccessHighLevel/ImageNetData.h>

#include <gp-hik-core/kernels/IntersectionKernelFunction.h>
#include <gp-hik-core/tools.h>
#include <gp-hik-core/GMHIKernel.h>

using namespace std;
using namespace NICE;
using namespace OBJREC;

void selectExamples ( const Config *conf, const Vector & y, map<int, int> & examples, Vector & yb )
{
  int positiveClass = conf->gI("main", "positive_class");

  map< int, set<int> > mysets;
  int n = y.size();
  set<int> positives;
  set<int> negatives;
  for ( uint i = 0 ; i < n; i++ )
    mysets[ y[i] ].insert ( i );

  if ( mysets[ positiveClass ].size() == 0 ) 
    fthrow(Exception, "Class " << positiveClass << " is not available.");

  // add our positive examples
  for ( set<int>::const_iterator i = mysets[positiveClass].begin(); i != mysets[positiveClass].end(); i++ )
    positives.insert ( *i );

  int Nneg = conf->gI("main", "nneg", 1 );
  for ( map<int, set<int> >::const_iterator k = mysets.begin(); k != mysets.end(); k++ )
  {
    int classno = k->first;
    if ( classno == positiveClass )
      continue;
    const set<int> & s = k->second;
    uint ind = 0;
    for ( set<int>::const_iterator i = s.begin(); (i != s.end() && ind < Nneg); i++,ind++  )
      negatives.insert ( *i );
  }
  cerr << "Number of positive examples: " << positives.size() << endl;
  cerr << "Number of negative examples: " << negatives.size() << endl;

  yb.resize(y.size());
  int ind = 0;
  for ( uint i = 0 ; i < y.size(); i++ )
  {
    if (positives.find(i) != positives.end()) {
      yb[ examples.size() ] = 1.0;
      examples.insert( pair<int, int> ( i, ind ) );
      ind++;
    } else if ( negatives.find(i) != negatives.end() ) {
      yb[ examples.size() ] = -1.0;
      examples.insert( pair<int, int> ( i, ind ) );
      ind++;
    }
  }
  yb.resize( examples.size() );
  cerr << "Examples: " << examples.size() << endl; 

}

class BlockHIK : public PartialGenericMatrix 
{
  protected:
    const double *data;
    int n;
    int d;
    double noise;
    Vector diag;

  public:
  
 
    BlockHIK ( const double *data, int n, int d, double noise ) { 
      this->data = data; 
      this->n = n; 
      this->d = d; 
      this->noise = noise; 

      diag.resize(n);
      for ( uint i = 0 ; i < n ; i++ ) 
      {
        double sum = 0.0;
        for ( uint dim = 0 ; dim < d ; dim++ )
          sum += data[i * d + dim];
        diag[i] = sum;
      }
    }

    /** multiply a sub-matrix with a given vector: Asub * xsub = ysub */
    virtual void multiply ( const SetType & rowSet, const SetType & columnSet, NICE::Vector & y, const NICE::Vector & x) const
    {
      Matrix K;
      
      if ( rowSet.size() == 0 || columnSet.size() == 0 )
        fthrow(Exception, "Sets are zero ...weird" );
      K.resize(rowSet.size(), columnSet.size());
      K.set(0.0);
  
      //run over every dimension and add the corresponding min-values to the entries in the kernel matrix
      int dimension = d;
      for (int dim = 0; dim < dimension; dim++)
      {
        int indi = 0;
        for ( SetType::const_iterator i = rowSet.begin(); i != rowSet.end(); i++, indi++ )
        {
          int indj = 0;
          int myi = *i;
          double vali = data[ myi * d + dim ];
          for ( SetType::const_iterator j = columnSet.begin(); j != columnSet.end(); j++, indj++ )
          {  
            int myj = *j;
            double valj = data[ myj * d + dim ];
            double val = std::min ( valj, vali );

            if ( indi >= K.rows() || indj >= K.cols() )
              fthrow(Exception, "... weird indices!!" );
            K(indi,indj) += val;
            if ( myi == myj )
              K(indi, indj) += noise / dimension;
          } 
        } 
      }//dim-loop  

      y.resize( rowSet.size() );
      y = K*x;
    }

    /** multiply with a vector: A*x = y */
    virtual void multiply (NICE::Vector & y, const NICE::Vector & x) const
    {
      fthrow(Exception, "You do not really want to compute kernel matrices as big as this one!");
    }

    virtual double getDiagonalElement ( uint i ) const
    {
      return diag[i] + noise;
    }

    virtual uint rows() const
    {
      return n;
    }

    virtual uint cols() const
    {
      return n;
    }

};

double *createFlatData ( const FeatureMatrix & f )
{
  int n = f.get_n();
  int d = f.get_d();
  double *data = new double [ n * d ]; 
  memset ( data, 0, n*d*sizeof(double) );

  for (int dim = 0; dim < d; dim++)
  {
    const multimap< double, SortedVectorSparse<double>::dataelement> & nonzeroElements = f.getFeatureValues(dim).nonzeroElements();
    int nrZeroIndices = f.getNumberOfZeroElementsPerDimension(dim);
    if ( nrZeroIndices == n ) continue;
      
    for ( multimap< double, SortedVectorSparse<double>::dataelement>::const_iterator i = nonzeroElements.begin(); i != nonzeroElements.end(); i++)
    {
      const SortedVectorSparse<double>::dataelement & de = i->second;
      uint feat = de.first;
      double fval = de.second;
      data[ feat*d + dim ] = fval;
    }
  }
  return data;
}

/** 
    test the basic functionality of fast-hik hyperparameter optimization 
*/
int main (int argc, char **argv)
{   
  std::set_terminate(__gnu_cxx::__verbose_terminate_handler);

  Config conf ( argc, argv );
  string resultsfile = conf.gS("main", "results", "results.txt" );
  int positiveClass = conf.gI("main", "positive_class");

  cerr << "Positive class is " << positiveClass << endl;
  
  sparse_t data;
  NICE::Vector y;
  cerr << "Reading ImageNet data ..." << endl;
  bool imageNetLocal = conf.gB("main", "imageNetLocal" , false);
  string imageNetPath;
  if (imageNetLocal)
    imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/";
  else
    imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/";

  ImageNetData imageNet ( imageNetPath + "demo/" );

  imageNet.getBatchData ( data, y, "train", "training" );

  map<int, int> examples;
  Vector yb;
  selectExamples ( &conf, y, examples, yb );
  
  double noise = conf.gD("main", "noise", 10);
  int dimension = conf.gI("main", "dimension", 1000);
  int numBins = conf.gI("main", "num_bins", 100);
  Quantization q ( numBins );
  FastMinKernel fmk ( data, noise, examples, dimension );
   
  GMHIKernel gmk ( &fmk );

  bool verbose = true;
  int max_iterations = 500;
  vector< IterativeLinearSolver * > methods;

  ILSConjugateGradients *m = new ILSConjugateGradients(verbose, max_iterations);
  m->setTimeAnalysis ( true );
  methods.push_back ( m );

  for ( vector< IterativeLinearSolver * >::const_iterator i = methods.begin();
        i != methods.end(); i++ ) 
  {
    IterativeLinearSolver *method = *i;
    Vector sol (gmk.cols(), 0.0);
    method->solveLin ( gmk, yb, sol );
  }

  Vector sol ( gmk.cols(), 0.0 );
  double *Tlookup = fmk.solveLin( yb, sol, q, NULL, true /* useRandomSubsets */, 100 /* max iterations */, -1, 0.0, true);

  
  int randomSetSize = conf.gI("main", "random_set_size", 60);
  int stepComponents = conf.gI("main", "step_components", 50);
  GBCDSolver gbcd ( randomSetSize, stepComponents, true );
  gbcd.setTimeAnalysis(true);
  Vector sol_gbcd;
  double *cdata = createFlatData ( fmk.featureMatrix() );
  BlockHIK bhik ( cdata, fmk.get_n(), fmk.get_d(), noise );
  gbcd.solveLin ( bhik, yb, sol_gbcd );

  delete [] cdata;

  return 0;
}
#else
int main (int argc, char **argv)
{
  std::cerr << "MatIO library is missing in your system - this program will have no effect. " << std::endl;  
}

#endif