/** * @file testImageNetBinaryGPBaseline.cpp * @brief perform ImageNet tests with binary tasks for OCC using the baseline GP * @author Alexander Lütz * @date 29-05-2012 (dd-mm-yyyy) */ #include "core/basics/Config.h" #include "core/basics/Timer.h" #include "core/vector/SparseVectorT.h" #include "core/algebra/CholeskyRobust.h" #include "core/vector/Algorithms.h" #include "vislearning/cbaselib/ClassificationResults.h" #include "vislearning/baselib/ProgressBar.h" #include "fast-hik/tools.h" #include "fast-hik/MatFileIO.h" #include "fast-hik/ImageNetData.h" using namespace std; using namespace NICE; using namespace OBJREC; double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0)//, const bool & verbose = false) { double inner_sum(0.0); double d; //new version, where we needed on average 0.001707 s for each test sample NICE::SparseVector::const_iterator aIt = a.begin(); NICE::SparseVector::const_iterator bIt = b.begin(); while ( (aIt != a.end()) && (bIt != b.end()) ) { if (aIt->first == bIt->first) { d = ( aIt->second - bIt->second ); inner_sum += d * d; aIt++; bIt++; } else if ( aIt->first < bIt->first) { inner_sum += aIt->second * aIt->second; aIt++; } else { inner_sum += bIt->second * bIt->second; bIt++; } } //compute remaining values, if b reached the end but not a while (aIt != a.end()) { inner_sum += aIt->second * aIt->second; aIt++; } //compute remaining values, if a reached the end but not b while (bIt != b.end()) { inner_sum += bIt->second * bIt->second; bIt++; } inner_sum /= (2.0*sigma*sigma); return exp(-inner_sum); //expValue; } void readParameters(const string & filename, const int & size, NICE::Vector & parameterVector) { parameterVector.resize(size); parameterVector.set(0.0); ifstream is(filename.c_str()); if ( !is.good() ) fthrow(IOException, "Unable to read parameters."); // string tmp; int cnt(0); while (! is.eof()) { is >> tmp; parameterVector[cnt] = atof(tmp.c_str()); cnt++; } // is.close(); } /** test the basic functionality of fast-hik hyperparameter optimization */ int main (int argc, char **argv) { std::set_terminate(__gnu_cxx::__verbose_terminate_handler); Config conf ( argc, argv ); string resultsfile = conf.gS("main", "results", "results.txt" ); double kernelSigma = conf.gD("main", "kernelSigma", 2.0); int nrOfExamplesPerClass = conf.gI("main", "nrOfExamplesPerClass", 50); nrOfExamplesPerClass = std::min(nrOfExamplesPerClass, 100); // we do not have more than 100 examples per class int nrOfClassesToConcidere = conf.gI("main", "nrOfClassesToConcidere", 1000); nrOfClassesToConcidere = std::min(nrOfClassesToConcidere, 1000); //we do not have more than 1000 classes string sigmaFile = conf.gS("main", "sigmaFile", "approxVarSigma.txt"); string noiseFile = conf.gS("main", "noiseFile", "approxVarNoise.txt"); NICE::Vector sigmaParas(nrOfClassesToConcidere,kernelSigma); NICE::Vector noiseParas(nrOfClassesToConcidere,0.0); readParameters(sigmaFile,nrOfClassesToConcidere, sigmaParas); readParameters(noiseFile,nrOfClassesToConcidere, noiseParas); std::vector trainingData; NICE::Vector y; std::cerr << "Reading ImageNet data ..." << std::endl; bool imageNetLocal = conf.gB("main", "imageNetLocal" , false); string imageNetPath; if (imageNetLocal) imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/"; else imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/"; ImageNetData imageNetTrain ( imageNetPath + "demo/" ); imageNetTrain.preloadData( "train", "training" ); trainingData = imageNetTrain.getPreloadedData(); y = imageNetTrain.getPreloadedLabels(); std::cerr << "Reading of training data finished" << std::endl; std::cerr << "trainingData.size(): " << trainingData.size() << std::endl; std::cerr << "y.size(): " << y.size() << std::endl; std::cerr << "Reading ImageNet test data files (takes some seconds)..." << std::endl; ImageNetData imageNetTest ( imageNetPath + "demo/" ); imageNetTest.preloadData ( "val", "testing" ); imageNetTest.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" ); double OverallPerformance(0.0); for (int cl = 0; cl < nrOfClassesToConcidere; cl++) { std::cerr << "run for class " << cl << std::endl; int positiveClass = cl+1; // ------------------------------ TRAINING ------------------------------ kernelSigma = sigmaParas[cl]; std::cerr << "using sigma: " << kernelSigma << " and noise " << noiseParas[cl] << std::endl; Timer tTrain; tTrain.start(); NICE::Matrix kernelMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0); //now compute the kernelScores for every element double kernelScore(0.0); for (int i = cl*100; i < cl*100+nrOfExamplesPerClass; i++) { for (int j = i; j < cl*100+nrOfExamplesPerClass; j++) { kernelScore = measureDistance(trainingData[i],trainingData[j], kernelSigma);//optimalParameters[cl]); kernelMatrix(i-cl*100,j-cl*100) = kernelScore; if (i != j) kernelMatrix(j-cl*100,i-cl*100) = kernelScore; } } //adding some noise, if necessary if (noiseParas[cl] != 0.0) { kernelMatrix.addIdentity(noiseParas[cl]); } else { //zero was already set } //compute its inverse //noise is already added :) Timer tTrainPrecise; tTrainPrecise.start(); CholeskyRobust cr ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/); NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0); cr.robustChol ( kernelMatrix, choleskyMatrix ); tTrainPrecise.stop(); std::cerr << "Precise time used for training class " << cl << ": " << tTrainPrecise.getLast() << std::endl; tTrain.stop(); std::cerr << "Time used for training class " << cl << ": " << tTrain.getLast() << std::endl; std::cerr << "training done - now perform the evaluation" << std::endl; // ------------------------------ TESTING ------------------------------ ClassificationResults results; std::cerr << "Classification step ... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl; ProgressBar pb; Timer tTest; tTest.start(); Timer tTestSingle; double timeForSingleExamples(0.0); for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ ) { pb.update ( imageNetTest.getNumPreloadedExamples() ); //get the precomputed features const SparseVector & svec = imageNetTest.getPreloadedExample ( i ); //compute (self-)similarities double kernelSelf (measureDistance(svec,svec, kernelSigma) ); NICE::Vector kernelVector (nrOfExamplesPerClass, 0.0); for (int j = 0; j < nrOfExamplesPerClass; j++) { kernelVector[j] = measureDistance(trainingData[j+cl*100],svec, kernelSigma); } //compute the resulting score tTestSingle.start(); NICE::Vector rightPart (nrOfExamplesPerClass); choleskySolveLargeScale ( choleskyMatrix, kernelVector, rightPart ); double uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart ); tTestSingle.stop(); timeForSingleExamples += tTestSingle.getLast(); //this is the standard score-object needed for the evaluation FullVector scores ( 2 ); scores[0] = 0.0; scores[1] = 1.0 - uncertainty; ClassificationResult r ( scores[1]<0.5 ? 0 : 1, scores ); // set ground truth label r.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0; //we could write the resulting score on the command line // std::cerr << "scores: " << std::endl; // scores >> std::cerr; //as well as the ground truth label // std::cerr << "gt: " << r.classno_groundtruth << " -- " << r.classno << std::endl; results.push_back ( r ); } tTest.stop(); std::cerr << "Time used for evaluating class " << cl << ": " << tTest.getLast() << std::endl; timeForSingleExamples/= imageNetTest.getNumPreloadedExamples(); std::cerr << "Time used for evaluation single elements of class " << cl << " : " << timeForSingleExamples << std::endl; // we could also write the results to an external file. Note, that this file will be overwritten in every iteration // so if you want to store all results, you should add a suffix with the class number // std::cerr << "Writing results to " << resultsfile << std::endl; // results.writeWEKA ( resultsfile, 1 ); double perfvalue = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC ); std::cerr << "Performance: " << perfvalue << std::endl; OverallPerformance += perfvalue; } OverallPerformance /= nrOfClassesToConcidere; std::cerr << "overall performance: " << OverallPerformance << std::endl; return 0; }