123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238 |
- /**
- * @file testImageNetBinaryGPBaseline.cpp
- * @brief perform ImageNet tests with binary classification
- * @author Erik Rodner
- * @date 01/04/2012
- */
- #include <core/basics/Config.h>
- #ifdef NICE_USELIB_MATIO
- #include <core/basics/Timer.h>
- #include <core/matlabAccess/MatFileIO.h>
- //----------
- #include <vislearning/baselib/ProgressBar.h>
- #include <vislearning/cbaselib/ClassificationResults.h>
- #include "vislearning/classifier/classifierbase/KernelClassifier.h"
- #include "vislearning/classifier/kernelclassifier/KCGPRegression.h"
- #include <vislearning/matlabAccessHighLevel/ImageNetData.h>
- //----------
- #include <gp-hik-core/tools.h>
- #include <gp-hik-core/kernels/IntersectionKernelFunction.h>
- using namespace std;
- using namespace NICE;
- using namespace OBJREC;
- /**
- test the basic functionality of fast-hik hyperparameter optimization
- */
- int main (int argc, char **argv)
- {
- std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
- Config conf ( argc, argv );
- string resultsfile = conf.gS("main", "results", "results.txt" );
- int positiveClass = conf.gI("main", "positive_class");
- cerr << "Positive class is " << positiveClass << endl;
- sparse_t data;
- NICE::Vector y;
- cerr << "Reading ImageNet data ..." << endl;
- bool imageNetLocal = conf.gB("main", "imageNetLocal" , false);
- string imageNetPath;
- if (imageNetLocal)
- imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/";
- else
- imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/";
- ImageNetData imageNet ( imageNetPath + "demo/" );
- imageNet.getBatchData ( data, y, "train", "training" );
- uint n = y.size();
- //noise will be
- double noise(0.0);
-
- set<int> positives;
- set<int> negatives;
- map< int, set<int> > mysets;
- for ( uint i = 0 ; i < n; i++ )
- mysets[ y[i] ].insert ( i );
- if ( mysets[ positiveClass ].size() == 0 )
- fthrow(Exception, "Class " << positiveClass << " is not available.");
- // add our positive examples
- for ( set<int>::const_iterator i = mysets[positiveClass].begin(); i != mysets[positiveClass].end(); i++ )
- positives.insert ( *i );
- int Nneg = conf.gI("main", "nneg", 1 );
- for ( map<int, set<int> >::const_iterator k = mysets.begin(); k != mysets.end(); k++ )
- {
- int classno = k->first;
- if ( classno == positiveClass )
- continue;
- const set<int> & s = k->second;
- uint ind = 0;
- for ( set<int>::const_iterator i = s.begin(); (i != s.end() && ind < Nneg); i++,ind++ )
- negatives.insert ( *i );
- }
- cerr << "Number of positive examples: " << positives.size() << endl;
- cerr << "Number of negative examples: " << negatives.size() << endl;
-
- int nrExamplesForTraining(positives.size()+negatives.size());
-
- std::vector<NICE::SparseVector> dataMatrixSparse;
- dataMatrixSparse.resize(nrExamplesForTraining);
-
- std::cerr << "data matrix prepared" << std::endl;
-
- int dim(data.njc-1);
-
- NICE::Vector labelsTrain(nrExamplesForTraining,0);
-
- std::map<int,int> indices; // orig index, new index
-
- int counter(0);
- for ( int i = 0; i < dim; i++ ) //walk over dimensions
- {
- for ( int j = data.jc[i]; j < data.jc[i+1] && j < data.ndata; j++ ) //walk over single features, which are sparsely represented
- {
- int example_index = data.ir[ j];
- std::set<int>::const_iterator itPos = positives.find(example_index);
- std::set<int>::const_iterator itNeg = negatives.find(example_index);
- if ( itPos != positives.end() )
- {
- std::map<int,int>::const_iterator newPosition = indices.find(example_index);
-
- //feature already known from a different dimension
- if (newPosition != indices.end())
- dataMatrixSparse[newPosition->second].insert(pair<short,double>((short)i , ((double*)data.data)[j]));
- //new feature, previous dimension where sparse for it
- else
- {
- indices.insert(pair<int,int>(example_index,counter));
- dataMatrixSparse[counter].insert(pair<short,double>((short)i , ((double*)data.data)[j]));
-
- //set the label-vector to +1 for this feature
- labelsTrain[counter] = 1;
- counter++;
- }
-
- }
- else if ( itNeg != negatives.end())
- {
- std::map<int,int>::const_iterator newPosition = indices.find(example_index);
-
- //feature already known from a different dimension
- if (newPosition != indices.end())
- dataMatrixSparse[newPosition->second].insert(pair<short,double>((short)i , ((double*)data.data)[j]));
- //new feature, previous dimension where sparse for it
- else
- {
- indices.insert(pair<int,int>(example_index,counter));
- dataMatrixSparse[counter].insert(pair<short,double>((short)i , ((double*)data.data)[j]));
- //label vector already contains -1
- counter++;
- }
- }
- }
- }
-
- std::cerr << "data read completely" << std::endl;
-
- for (int i = 0; i < dataMatrixSparse.size(); i++)
- {
- dataMatrixSparse[i].setDim(dim);
- }
-
- std::cerr << "preparations done, start timing experiments" << std::endl;
-
- Timer t;
- t.start();
- //standard training comes here
- NICE::IntersectionKernelFunction<double> hik;
-
- std::cerr << "compute kernel matrix will be called" << std::endl;
- NICE::Matrix K (hik.computeKernelMatrix(dataMatrixSparse, noise));
- std::cerr << "kernel matrix succesfully computed" << std::endl;
-
- OBJREC::KCGPRegression classifier ( &conf);
- std::cerr << "start teaching" << std::endl;
-
- classifier.teach ( new KernelData ( &conf, K ), labelsTrain );
-
- t.stop();
- cerr << "Time used for training: " << t.getLast() << endl;
-
- //end of standard training
-
- // ------------------------------ TESTING ------------------------------
-
- cerr << "Reading ImageNet test data files (takes some seconds)..." << endl;
- imageNet.preloadData ( "val", "testing" );
- imageNet.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" );
-
- ClassificationResults results;
- cerr << "Classification step ... with " << imageNet.getNumPreloadedExamples() << " examples" << endl;
- ProgressBar pb;
-
- NICE::Matrix confMat(2,2,0.0);
-
- for ( uint i = 0 ; i < (uint)imageNet.getNumPreloadedExamples(); i++ )
- {
- pb.update ( imageNet.getNumPreloadedExamples() );
- const SparseVector & svec = imageNet.getPreloadedExample ( i );
- t.start();
- // classification step
- Vector kernelVector = hik.computeKernelVector(dataMatrixSparse,svec);
- double kernelSelf = hik.measureDistance(svec, svec);
- ClassificationResult r = classifier.classifyKernel ( kernelVector, kernelSelf );
-
- t.stop();
- // cerr << i << " / " << (uint)imageNet.getNumPreloadedExamples() << " Time used for classifying a single example: " << t.getLast() << endl;
-
- // set ground truth label
- r.classno_groundtruth = (((int)imageNet.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
- results.push_back ( r );
-
- confMat( r.classno_groundtruth, r.classno ) += 1;
- }
-
- confMat.normalizeRowsL1();
- std::cerr << "confMat: " << confMat << std::endl;
- cerr << "average recognition rate: " << confMat.trace()/confMat.rows() << endl;
- cerr << "Writing results to " << resultsfile << endl;
- results.writeWEKA ( resultsfile, 0 );
- double perfvalue = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
- cerr << "Performance: " << perfvalue << endl;
- return 0;
- }
- #else
- int main (int argc, char **argv)
- {
- std::cerr << "MatIO library is missing in your system - this program will have no effect. " << std::endl;
- }
- #endif
|