/** * @file testImageNetBinaryGPBaseline.cpp * @brief perform ImageNet tests with binary classification * @author Erik Rodner * @date 01/04/2012 */ #include #include #include //---------- #include #include #include "vislearning/classifier/classifierbase/KernelClassifier.h" #include "vislearning/classifier/kernelclassifier/KCGPRegression.h" #include //---------- #include #include using namespace std; using namespace NICE; using namespace OBJREC; /** test the basic functionality of fast-hik hyperparameter optimization */ int main (int argc, char **argv) { std::set_terminate(__gnu_cxx::__verbose_terminate_handler); Config conf ( argc, argv ); string resultsfile = conf.gS("main", "results", "results.txt" ); int positiveClass = conf.gI("main", "positive_class"); cerr << "Positive class is " << positiveClass << endl; sparse_t data; NICE::Vector y; cerr << "Reading ImageNet data ..." << endl; bool imageNetLocal = conf.gB("main", "imageNetLocal" , false); string imageNetPath; if (imageNetLocal) imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/"; else imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/"; ImageNetData imageNet ( imageNetPath + "demo/" ); imageNet.getBatchData ( data, y, "train", "training" ); uint n = y.size(); //noise will be double noise(0.0); set positives; set negatives; map< int, set > mysets; for ( uint i = 0 ; i < n; i++ ) mysets[ y[i] ].insert ( i ); if ( mysets[ positiveClass ].size() == 0 ) fthrow(Exception, "Class " << positiveClass << " is not available."); // add our positive examples for ( set::const_iterator i = mysets[positiveClass].begin(); i != mysets[positiveClass].end(); i++ ) positives.insert ( *i ); int Nneg = conf.gI("main", "nneg", 1 ); for ( map >::const_iterator k = mysets.begin(); k != mysets.end(); k++ ) { int classno = k->first; if ( classno == positiveClass ) continue; const set & s = k->second; uint ind = 0; for ( set::const_iterator i = s.begin(); (i != s.end() && ind < Nneg); i++,ind++ ) negatives.insert ( *i ); } cerr << "Number of positive examples: " << positives.size() << endl; cerr << "Number of negative examples: " << negatives.size() << endl; int nrExamplesForTraining(positives.size()+negatives.size()); std::vector dataMatrixSparse; dataMatrixSparse.resize(nrExamplesForTraining); std::cerr << "data matrix prepared" << std::endl; int dim(data.njc-1); NICE::Vector labelsTrain(nrExamplesForTraining,0); std::map indices; // orig index, new index int counter(0); for ( int i = 0; i < dim; i++ ) //walk over dimensions { for ( int j = data.jc[i]; j < data.jc[i+1] && j < data.ndata; j++ ) //walk over single features, which are sparsely represented { int example_index = data.ir[ j]; std::set::const_iterator itPos = positives.find(example_index); std::set::const_iterator itNeg = negatives.find(example_index); if ( itPos != positives.end() ) { std::map::const_iterator newPosition = indices.find(example_index); //feature already known from a different dimension if (newPosition != indices.end()) dataMatrixSparse[newPosition->second].insert(pair((short)i , ((double*)data.data)[j])); //new feature, previous dimension where sparse for it else { indices.insert(pair(example_index,counter)); dataMatrixSparse[counter].insert(pair((short)i , ((double*)data.data)[j])); //set the label-vector to +1 for this feature labelsTrain[counter] = 1; counter++; } } else if ( itNeg != negatives.end()) { std::map::const_iterator newPosition = indices.find(example_index); //feature already known from a different dimension if (newPosition != indices.end()) dataMatrixSparse[newPosition->second].insert(pair((short)i , ((double*)data.data)[j])); //new feature, previous dimension where sparse for it else { indices.insert(pair(example_index,counter)); dataMatrixSparse[counter].insert(pair((short)i , ((double*)data.data)[j])); //label vector already contains -1 counter++; } } } } std::cerr << "data read completely" << std::endl; for (int i = 0; i < dataMatrixSparse.size(); i++) { dataMatrixSparse[i].setDim(dim); } std::cerr << "preparations done, start timing experiments" << std::endl; Timer t; t.start(); //standard training comes here NICE::IntersectionKernelFunction hik; std::cerr << "compute kernel matrix will be called" << std::endl; NICE::Matrix K (hik.computeKernelMatrix(dataMatrixSparse, noise)); std::cerr << "kernel matrix succesfully computed" << std::endl; OBJREC::KCGPRegression classifier ( &conf); std::cerr << "start teaching" << std::endl; classifier.teach ( new KernelData ( &conf, K ), labelsTrain ); t.stop(); cerr << "Time used for training: " << t.getLast() << endl; //end of standard training // ------------------------------ TESTING ------------------------------ cerr << "Reading ImageNet test data files (takes some seconds)..." << endl; imageNet.preloadData ( "val", "testing" ); imageNet.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" ); ClassificationResults results; cerr << "Classification step ... with " << imageNet.getNumPreloadedExamples() << " examples" << endl; ProgressBar pb; NICE::Matrix confMat(2,2,0.0); for ( uint i = 0 ; i < (uint)imageNet.getNumPreloadedExamples(); i++ ) { pb.update ( imageNet.getNumPreloadedExamples() ); const SparseVector & svec = imageNet.getPreloadedExample ( i ); t.start(); // classification step Vector kernelVector = hik.computeKernelVector(dataMatrixSparse,svec); double kernelSelf = hik.measureDistance(svec, svec); ClassificationResult r = classifier.classifyKernel ( kernelVector, kernelSelf ); t.stop(); // cerr << i << " / " << (uint)imageNet.getNumPreloadedExamples() << " Time used for classifying a single example: " << t.getLast() << endl; // set ground truth label r.classno_groundtruth = (((int)imageNet.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0; results.push_back ( r ); confMat( r.classno_groundtruth, r.classno ) += 1; } confMat.normalizeRowsL1(); std::cerr << "confMat: " << confMat << std::endl; cerr << "average recognition rate: " << confMat.trace()/confMat.rows() << endl; cerr << "Writing results to " << resultsfile << endl; results.writeWEKA ( resultsfile, 0 ); double perfvalue = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC ); cerr << "Performance: " << perfvalue << endl; return 0; }