/** * @file testImageNetBinaryBruteForce.cpp * @brief perform ImageNet tests with binary tasks for OCC * @author Alexander Lütz * @date 23-05-2012 (dd-mm-yyyy) */ #include "core/basics/Config.h" #include "core/vector/SparseVectorT.h" #include "vislearning/cbaselib/ClassificationResults.h" #include "vislearning/baselib/ProgressBar.h" #include "fast-hik/tools.h" #include "fast-hik/MatFileIO.h" #include "fast-hik/ImageNetData.h" using namespace std; using namespace NICE; using namespace OBJREC; double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0, const bool & verbose = false) { double inner_sum(0.0); double s; double d; //this is the first version, where we needed on average 0.017988 s for each test sample // std::set set_a; // // // for ( NICE::SparseVector::const_iterator i = a.begin(); i != a.end(); i++ ) // { // double u (i->second); // double v (b.get(i->first)); // s = ( u + v ); // if ( fabs(s) < 10e-6 ) continue; // d = u-v; // inner_sum += d*d; // set_a.insert(i->first); // } // // for ( NICE::SparseVector::const_iterator i = b.begin(); i != b.end(); i++ ) // { // if (set_a.find(i->first) != set_a.end()) //already worked on in first loop // continue; // // double u (i->second); // if ( fabs(u) < 10e-6 ) continue; // inner_sum += u*u; // } //new version, where we needed on average 0.001707 s for each test sample NICE::SparseVector::const_iterator aIt = a.begin(); NICE::SparseVector::const_iterator bIt = b.begin(); while ( (aIt != a.end()) && (bIt != b.end()) ) { // std::cerr << "a: " << aIt->first << " b: " << bIt->first << std::endl; if (aIt->first == bIt->first) { s = ( aIt->second + bIt->second ); // if (! fabs(s) < 10e-6 ) //for numerical reasons // { d = ( aIt->second - bIt->second ); inner_sum += d * d; // } aIt++; bIt++; } else if ( aIt->first < bIt->first) { // if (! fabs(aIt->second) < 10e-6 ) // { inner_sum += aIt->second * aIt->second; // } aIt++; } else { // if (! fabs(bIt->second) < 10e-6 ) // { inner_sum += bIt->second * bIt->second; // } bIt++; } } //compute remaining values, if b reached the end but not a while (aIt != a.end()) { inner_sum += aIt->second * aIt->second; aIt++; } //compute remaining values, if a reached the end but not b while (bIt != b.end()) { inner_sum += bIt->second * bIt->second; bIt++; } if (verbose) std::cerr << "inner_sum before /= (2.0*sigma*sigma) " << inner_sum << std::endl; inner_sum /= (2.0*sigma*sigma); if (verbose) std::cerr << "inner_sum after /= (2.0*sigma*sigma) " << inner_sum << std::endl; double expValue = exp(-inner_sum); if (verbose) std::cerr << "resulting expValue " << expValue << std::endl; return exp(-inner_sum); //expValue; } /** test the basic functionality of fast-hik hyperparameter optimization */ int main (int argc, char **argv) { std::set_terminate(__gnu_cxx::__verbose_terminate_handler); Config conf ( argc, argv ); string resultsfile = conf.gS("main", "results", "results.txt" ); int positiveClass = conf.gI("main", "positive_class"); double noise = conf.gD("main", "noise", 0.01); double kernelSigma = conf.gD("main", "kernelSigma", 2.0); int nrOfExamplesPerClass = conf.gI("main", "nrOfExamplesPerClass", 50); nrOfExamplesPerClass = std::min(nrOfExamplesPerClass, 100); // we do not have more than 100 examples per class int nrOfClassesToConcidere = conf.gI("main", "nrOfClassesToConcidere", 1000); nrOfClassesToConcidere = std::min(nrOfClassesToConcidere, 1000); //we do not have more than 1000 classes std::cerr << "Positive class is " << positiveClass << std::endl; std::vector trainingData; NICE::Vector y; std::cerr << "Reading ImageNet data ..." << std::endl; bool imageNetLocal = conf.gB("main", "imageNetLocal" , false); string imageNetPath; if (imageNetLocal) imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/"; else imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/"; ImageNetData imageNetTrain ( imageNetPath + "demo/" ); imageNetTrain.preloadData( "train", "training" ); trainingData = imageNetTrain.getPreloadedData(); y = imageNetTrain.getPreloadedLabels(); std::cerr << "Reading of training data finished" << std::endl; std::cerr << "trainingData.size(): " << trainingData.size() << std::endl; std::cerr << "y.size(): " << y.size() << std::endl; std::cerr << "Reading ImageNet test data files (takes some seconds)..." << std::endl; ImageNetData imageNetTest ( imageNetPath + "demo/" ); imageNetTest.preloadData ( "val", "testing" ); imageNetTest.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" ); double OverallPerformance(0.0); for (int cl = 0; cl < nrOfClassesToConcidere; cl++) { std::cerr << "run for class " << cl << std::endl; // ------------------------------ TRAINING ------------------------------ NICE::Vector matrixDInv (nrOfExamplesPerClass, 0.0); //compute D //start with adding some noise, if necessary if (noise != 0.0) matrixDInv.set(noise); else matrixDInv.set(0.0); std::cerr << "set matrixDInv to noise - now compute the scores for this special type of matrix" << std::endl; if ( cl == 0) { std::cerr << "print first training example of class zero: " << std::endl; trainingData[0] >> std::cerr; } //now sum up all entries of each row in the original kernel matrix double kernelScore(0.0); for (int i = cl*100; i < cl*100+nrOfExamplesPerClass; i++) { // if ( (i % 50) == 0) std::cerr << i << " / " << nrOfExamplesPerClass << std::endl; for (int j = i; j < cl*100+nrOfExamplesPerClass; j++) { // std::cerr << j << " / " << nrOfExamplesPerClass << std::endl; if ( (cl == 0) && (i == 0)) { kernelScore = measureDistance(trainingData[i],trainingData[j], kernelSigma, true /*verbose*/); } else kernelScore = measureDistance(trainingData[i],trainingData[j], kernelSigma); if (kernelScore == 0.0) std::cerr << "score of zero for examples " << i << " and " << j << std::endl; matrixDInv[i-cl*100] += kernelScore; if (i != j) matrixDInv[j-cl*100] += kernelScore; } } std::cerr << "invert the main diagonal" << std::endl; //compute its inverse for (int i = 0; i < nrOfExamplesPerClass; i++) { matrixDInv[i] = 1.0 / matrixDInv[i]; } std::cerr << "resulting D-Vector (or matrix :) ) " << std::endl; std::cerr << matrixDInv << std::endl; std::cerr << "training done - now perform the evaluation" << std::endl; // ------------------------------ TESTING ------------------------------ ClassificationResults results; std::cerr << "Classification step ... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl; ProgressBar pb; for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ ) { pb.update ( imageNetTest.getNumPreloadedExamples() ); const SparseVector & svec = imageNetTest.getPreloadedExample ( i ); // SparseVector svec = imageNetTest.getPreloadedExample ( i ); if ( i == 0) { std::cerr << "print first test example: " << std::endl; std::cerr << "this is of class " << (int)imageNetTest.getPreloadedLabel ( i ) << std::endl; // svec >> std::cerr; svec.store(std::cerr); } double kernelSelf (measureDistance(svec,svec, kernelSigma) ) ; NICE::Vector kernelVector (nrOfExamplesPerClass, 0.0); for (int j = 0; j < nrOfExamplesPerClass; j++) { kernelVector[j] = measureDistance(trainingData[j+cl*100],svec, kernelSigma); } if ( i == 0) { std::cerr << "print first kernel vector: " << kernelVector << std::endl; } NICE::Vector rightPart (nrOfExamplesPerClass); for (int j = 0; j < nrOfExamplesPerClass; j++) { rightPart[j] = kernelVector[j] * matrixDInv[j]; } double uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart ); FullVector scores ( 2 ); scores[0] = 0.0; scores[1] = 1.0 - uncertainty; ClassificationResult r ( scores[1]<0.5 ? 0 : 1, scores ); // set ground truth label r.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0; // std::cerr << "scores: " << std::endl; // scores >> std::cerr; // std::cerr << "gt: " << r.classno_groundtruth << " -- " << r.classno << std::endl; results.push_back ( r ); } // std::cerr << "Writing results to " << resultsfile << std::endl; // results.writeWEKA ( resultsfile, 1 ); double perfvalue = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC ); std::cerr << "Performance: " << perfvalue << std::endl; OverallPerformance += perfvalue; } OverallPerformance /= nrOfClassesToConcidere; std::cerr << "overall performance: " << OverallPerformance << std::endl; return 0; }