/** * @file testNullSpace.cpp * @brief test function for class KCNullSpace * @author Paul Bodesheim * @date 28-11-2012 (dd-mm-yyyy) */ #include #include #include "core/basics/Config.h" #include "core/basics/Timer.h" #include "core/vector/Algorithms.h" #include "core/vector/SparseVectorT.h" #include "vislearning/classifier/kernelclassifier/KCNullSpace.h" #include "vislearning/math/kernels/KernelData.h" #include "vislearning/cbaselib/ClassificationResults.h" #include "vislearning/baselib/ProgressBar.h" #include "core/matlabAccess/MatFileIO.h" #include "vislearning/matlabAccessHighLevel/ImageNetData.h" // #include // #include using namespace std; using namespace NICE; using namespace OBJREC; // --------------- THE KERNEL FUNCTION ( exponential kernel with euclidian distance ) ---------------------- double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0) { double inner_sum(0.0); double d; //new version, where we needed on average 0.001707 s for each test sample NICE::SparseVector::const_iterator aIt = a.begin(); NICE::SparseVector::const_iterator bIt = b.begin(); //compute the euclidian distance between both feature vectores (given as SparseVectors) while ( (aIt != a.end()) && (bIt != b.end()) ) { if (aIt->first == bIt->first) { d = ( aIt->second - bIt->second ); inner_sum += d * d; aIt++; bIt++; } else if ( aIt->first < bIt->first) { inner_sum += aIt->second * aIt->second; aIt++; } else { inner_sum += bIt->second * bIt->second; bIt++; } } //compute remaining values, if b reached the end but not a while (aIt != a.end()) { inner_sum += aIt->second * aIt->second; aIt++; } //compute remaining values, if a reached the end but not b while (bIt != b.end()) { inner_sum += bIt->second * bIt->second; bIt++; } //normalization of the exponent inner_sum /= (2.0*sigma*sigma); //finally, compute the RBF-kernel score (RBF = radial basis function) return exp(-inner_sum); } // --------------- THE KERNEL FUNCTION ( HIK ) ---------------------- double minimumDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b ) { double inner_sum(0.0); NICE::SparseVector::const_iterator aIt = a.begin(); NICE::SparseVector::const_iterator bIt = b.begin(); //compute the minimum distance between both feature vectores (given as SparseVectors) while ( (aIt != a.end()) && (bIt != b.end()) ) { if (aIt->first == bIt->first) { inner_sum += std::min( aIt->second , bIt->second ); aIt++; bIt++; } else if ( aIt->first < bIt->first) { aIt++; } else { bIt++; } } return inner_sum; } /** test the basic functionality of fast-hik hyperparameter optimization */ int main (int argc, char **argv) { std::set_terminate(__gnu_cxx::__verbose_terminate_handler); Config conf ( argc, argv ); string resultsfile = conf.gS("main", "results", "results.txt" ); int nrOfExamplesPerClass = conf.gI("main", "nrOfExamplesPerClass", 100); nrOfExamplesPerClass = std::min(nrOfExamplesPerClass, 100); // we do not have more than 100 examples per class int maxKnownClass = conf.gI("KCNullSpace", "maxKnownClass", 5); int OCCsingleClassLabel = conf.gI("KCNullSpace", "OCCsingleClassLabel", 1); bool testVerbose = conf.gB("KCNullSpace", "verbose", false); std::cerr << "conf verbose: " << testVerbose << std::endl; // -------- read ImageNet data -------------- std::vector trainingData; NICE::Vector y; NICE::Vector yTest; std::cerr << "Reading ImageNet data ..." << std::endl; bool imageNetLocal = conf.gB("main", "imageNetLocal" , false); string imageNetPath; if (imageNetLocal) imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/"; else imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/"; ImageNetData imageNetTrain ( imageNetPath + "demo/" ); imageNetTrain.preloadData( "train", "training" ); imageNetTrain.normalizeData("L1"); trainingData = imageNetTrain.getPreloadedData(); y = imageNetTrain.getPreloadedLabels(); std::cerr << "Reading of training data finished" << std::endl; std::cerr << "trainingData.size(): " << trainingData.size() << std::endl; std::cerr << "y.size(): " << y.size() << std::endl; std::cerr << "Reading ImageNet test data files (takes some seconds)..." << std::endl; ImageNetData imageNetTest ( imageNetPath + "demo/" ); imageNetTest.preloadData ( "val", "testing" ); imageNetTest.normalizeData("L1"); imageNetTest.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" ); yTest = imageNetTest.getPreloadedLabels(); // ---------- SELECT TRAINING SET FOR MULTICLASS NOVELTY DETECTION AND COMPUTE KERNEL MATRIX ------------------------ NICE::Vector knownClassLabels(maxKnownClass,0.0); for (int k=1; k<=maxKnownClass; k++) knownClassLabels(k-1) = k; std::vector currentTrainingData; currentTrainingData.clear(); NICE::Vector currentTrainingLabels(nrOfExamplesPerClass*knownClassLabels.size(),0); int kk(0); for (size_t i = 0; i < y.size(); i++) { for (size_t j=0; j::iterator it = knfst.getTrainingSetStatistic()->begin(); it != knfst.getTrainingSetStatistic()->end(); it++) { std::cerr << "class label: " << (*it).first << " number of class samples: " << (*it).second << std::endl; } std::cerr << "one-class setting?: " << knfst.isOneClass() << std::endl; std::cerr << "null space dimension: "<< knfst.getNullSpaceDimension() << std::endl; std::cerr << "target points: " << std::endl; for (std::map::iterator it = knfst.getTargetPoints()->begin(); it != knfst.getTargetPoints()->end(); it++) std::cerr << (*it).second << std::endl; std::cerr << "training done - now perform the evaluation" << std::endl; // --------- TESTING MULTICLASS NOVELTY DETECTION ------------------------------ std::cerr << "Multi-class novelty detection... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl; ClassificationResults results; ProgressBar pb; Timer tTest; tTest.start(); for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ ) { if ( (i % 100)==0) pb.update ( imageNetTest.getNumPreloadedExamples()/100 ); const SparseVector & svec = imageNetTest.getPreloadedExample ( i ); // compute (self) similarities double kernelSelf (minimumDistance(svec,svec) ); NICE::Vector kernelVector (nrOfExamplesPerClass*knownClassLabels.size(), 0.0); for (uint j = 0; j < nrOfExamplesPerClass*knownClassLabels.size(); j++) { kernelVector[j] = minimumDistance(currentTrainingData[j],svec); } ClassificationResult r; r = knfst.noveltyDetection( kernelVector, kernelSelf); // set ground truth label r.classno_groundtruth = 0; for (size_t j=0; j maxKnownClass) { continue; } const SparseVector & svec = imageNetTest.getPreloadedExample ( i ); // compute (self) similarities double kernelSelf (minimumDistance(svec,svec) ); NICE::Vector kernelVector (nrOfExamplesPerClass*knownClassLabels.size(), 0.0); for (uint j = 0; j < nrOfExamplesPerClass*knownClassLabels.size(); j++) { kernelVector[j] = minimumDistance(currentTrainingData[j],svec); } ClassificationResult r; r = knfst.classifyKernel( kernelVector, kernelSelf); // set ground truth label for (uint j=0; j < knownClassLabels.size(); j++) { if (yTest[i] == knownClassLabels[j]) { r.classno_groundtruth = j; break; } } // remember the results for the evaluation lateron results.push_back ( r ); } tTest.stop(); std::cerr << "Time used for evaluation: " << tTest.getLast() << std::endl; timeForSingleExample = tTest.getLast()/imageNetTest.getNumPreloadedExamples(); std::cerr.precision(10); std::cerr << "time used for evaluation of single elements: " << timeForSingleExample << std::endl; // run the AUC-evaluation perfvalue = results.getAverageRecognitionRate(); std::cerr << " classification performance: " << perfvalue << std::endl; // ---------- SELECT TRAINING SET FOR ONECLASS CLASSIFICATION AND COMPUTE KERNEL MATRIX ------------------------ currentTrainingData.clear(); currentTrainingLabels.clear(); for (size_t i = 0; i < y.size(); i++) { if ( y[i] == OCCsingleClassLabel ) { currentTrainingLabels.append(OCCsingleClassLabel); currentTrainingData.push_back(trainingData[i]); } } tTrain.start(); //compute the kernel matrix NICE::Matrix kernelMatrixOCC(currentTrainingData.size(), currentTrainingData.size(), 0.0); std::cerr << "OCC Kernel Matrix: " << kernelMatrixOCC.rows() << " x " << kernelMatrixOCC.cols() << std::endl; for (size_t i = 0; i < kernelMatrixOCC.rows(); i++) { for (size_t j = i; j < kernelMatrixOCC.cols(); j++) { kernelScore = minimumDistance(currentTrainingData[i],currentTrainingData[j]); kernelMatrixOCC(i,j) = kernelScore; if (i != j) kernelMatrixOCC(j,i) = kernelScore; } } filebuf fb; fb.open("/home/bodesheim/experiments/kernelMatrixOCC.txt",ios::out); ostream os (&fb); os << kernelMatrixOCC; fb.close(); KernelData kernelDataOCC( &conf, kernelMatrixOCC, "Kernel", false ); // train the model std::cerr << "Train OCC model... " << std::endl; knfst.teach(&kernelDataOCC, currentTrainingLabels); tTrain.stop(); std::cerr << "Time used for training " << ": " << tTrain.getLast() << std::endl; // some outputs of training std::cerr << "training set statistic: " << std::endl; for (std::map::iterator itt = knfst.getTrainingSetStatistic()->begin(); itt != knfst.getTrainingSetStatistic()->end(); itt++) { std::cerr << "class label: " << (*itt).first << " number of class samples: " << (*itt).second << std::endl; } std::cerr << "one-class setting?: " << knfst.isOneClass() << std::endl; std::cerr << "null space dimension: "<< knfst.getNullSpaceDimension() << std::endl; std::cerr << "target points: " << std::endl; for (std::map::const_iterator it = knfst.getTargetPoints()->begin(); it != knfst.getTargetPoints()->end(); it++) std::cerr << (*it).second << std::endl; std::cerr << "training done - now perform the evaluation" << std::endl; // --------- TESTING OCC ------------------------------ std::cerr << "OCC... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl; results.clear(); tTest.start(); ProgressBar pb3; std::cerr << "start for loop" << std::endl; for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ ) { if ( (i % 100)==0) pb3.update ( imageNetTest.getNumPreloadedExamples()/100 ); const SparseVector & svec = imageNetTest.getPreloadedExample ( i ); //compute (self) similarities double kernelSelf (minimumDistance(svec,svec) ); NICE::Vector kernelVector (currentTrainingData.size(), 0.0); for (uint j = 0; j < currentTrainingData.size(); j++) { kernelVector[j] = minimumDistance(currentTrainingData[j],svec); } ClassificationResult r; r = knfst.noveltyDetection( kernelVector, kernelSelf); // set ground truth label r.classno_groundtruth = 0; if ( yTest[i] == OCCsingleClassLabel ) { r.classno_groundtruth = 1; } else { r.classno_groundtruth = 0; } //remember the results for the evaluation lateron results.push_back ( r ); } tTest.stop(); std::cerr << "Time used for evaluation: " << tTest.getLast() << std::endl; double timeForSingleExampleOCC = tTest.getLast()/imageNetTest.getNumPreloadedExamples(); std::cerr.precision(10); std::cerr << "time used for evaluation of single elements: " << timeForSingleExampleOCC << std::endl; // run the AUC-evaluation double perfvalueOCC = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC ); std::cerr << " occ performance: " << perfvalueOCC << std::endl; return 0; }