/** * @file testImageNetBinary.cpp * @brief perform ImageNet tests with binary classification * @author Erik Rodner * @date 01/04/2012 */ #include "core/basics/Config.h" #ifdef NICE_USELIB_MATIO #include "core/algebra/IterativeLinearSolver.h" #include "core/algebra/PartialGenericMatrix.h" #include "core/algebra/GBCDSolver.h" #include "core/algebra/ILSConjugateGradients.h" #include #include "vislearning/cbaselib/ClassificationResults.h" #include "vislearning/baselib/ProgressBar.h" #include #include #include #include using namespace std; using namespace NICE; using namespace OBJREC; void selectExamples ( const Config *conf, const Vector & y, map & examples, Vector & yb ) { int positiveClass = conf->gI("main", "positive_class"); map< int, set > mysets; int n = y.size(); set positives; set negatives; for ( uint i = 0 ; i < n; i++ ) mysets[ y[i] ].insert ( i ); if ( mysets[ positiveClass ].size() == 0 ) fthrow(Exception, "Class " << positiveClass << " is not available."); // add our positive examples for ( set::const_iterator i = mysets[positiveClass].begin(); i != mysets[positiveClass].end(); i++ ) positives.insert ( *i ); int Nneg = conf->gI("main", "nneg", 1 ); for ( map >::const_iterator k = mysets.begin(); k != mysets.end(); k++ ) { int classno = k->first; if ( classno == positiveClass ) continue; const set & s = k->second; uint ind = 0; for ( set::const_iterator i = s.begin(); (i != s.end() && ind < Nneg); i++,ind++ ) negatives.insert ( *i ); } cerr << "Number of positive examples: " << positives.size() << endl; cerr << "Number of negative examples: " << negatives.size() << endl; yb.resize(y.size()); int ind = 0; for ( uint i = 0 ; i < y.size(); i++ ) { if (positives.find(i) != positives.end()) { yb[ examples.size() ] = 1.0; examples.insert( pair ( i, ind ) ); ind++; } else if ( negatives.find(i) != negatives.end() ) { yb[ examples.size() ] = -1.0; examples.insert( pair ( i, ind ) ); ind++; } } yb.resize( examples.size() ); cerr << "Examples: " << examples.size() << endl; } class BlockHIK : public PartialGenericMatrix { protected: const double *data; int n; int d; double noise; Vector diag; public: BlockHIK ( const double *data, int n, int d, double noise ) { this->data = data; this->n = n; this->d = d; this->noise = noise; diag.resize(n); for ( uint i = 0 ; i < n ; i++ ) { double sum = 0.0; for ( uint dim = 0 ; dim < d ; dim++ ) sum += data[i * d + dim]; diag[i] = sum; } } /** multiply a sub-matrix with a given vector: Asub * xsub = ysub */ virtual void multiply ( const SetType & rowSet, const SetType & columnSet, NICE::Vector & y, const NICE::Vector & x) const { Matrix K; if ( rowSet.size() == 0 || columnSet.size() == 0 ) fthrow(Exception, "Sets are zero ...weird" ); K.resize(rowSet.size(), columnSet.size()); K.set(0.0); //run over every dimension and add the corresponding min-values to the entries in the kernel matrix int dimension = d; for (int dim = 0; dim < dimension; dim++) { int indi = 0; for ( SetType::const_iterator i = rowSet.begin(); i != rowSet.end(); i++, indi++ ) { int indj = 0; int myi = *i; double vali = data[ myi * d + dim ]; for ( SetType::const_iterator j = columnSet.begin(); j != columnSet.end(); j++, indj++ ) { int myj = *j; double valj = data[ myj * d + dim ]; double val = std::min ( valj, vali ); if ( indi >= K.rows() || indj >= K.cols() ) fthrow(Exception, "... weird indices!!" ); K(indi,indj) += val; if ( myi == myj ) K(indi, indj) += noise / dimension; } } }//dim-loop y.resize( rowSet.size() ); y = K*x; } /** multiply with a vector: A*x = y */ virtual void multiply (NICE::Vector & y, const NICE::Vector & x) const { fthrow(Exception, "You do not really want to compute kernel matrices as big as this one!"); } virtual double getDiagonalElement ( uint i ) const { return diag[i] + noise; } virtual uint rows() const { return n; } virtual uint cols() const { return n; } }; double *createFlatData ( const FeatureMatrix & f ) { int n = f.get_n(); int d = f.get_d(); double *data = new double [ n * d ]; memset ( data, 0, n*d*sizeof(double) ); for (int dim = 0; dim < d; dim++) { const multimap< double, SortedVectorSparse::dataelement> & nonzeroElements = f.getFeatureValues(dim).nonzeroElements(); int nrZeroIndices = f.getNumberOfZeroElementsPerDimension(dim); if ( nrZeroIndices == n ) continue; for ( multimap< double, SortedVectorSparse::dataelement>::const_iterator i = nonzeroElements.begin(); i != nonzeroElements.end(); i++) { const SortedVectorSparse::dataelement & de = i->second; uint feat = de.first; double fval = de.second; data[ feat*d + dim ] = fval; } } return data; } /** test the basic functionality of fast-hik hyperparameter optimization */ int main (int argc, char **argv) { std::set_terminate(__gnu_cxx::__verbose_terminate_handler); Config conf ( argc, argv ); string resultsfile = conf.gS("main", "results", "results.txt" ); int positiveClass = conf.gI("main", "positive_class"); cerr << "Positive class is " << positiveClass << endl; sparse_t data; NICE::Vector y; cerr << "Reading ImageNet data ..." << endl; bool imageNetLocal = conf.gB("main", "imageNetLocal" , false); string imageNetPath; if (imageNetLocal) imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/"; else imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/"; ImageNetData imageNet ( imageNetPath + "demo/" ); imageNet.getBatchData ( data, y, "train", "training" ); map examples; Vector yb; selectExamples ( &conf, y, examples, yb ); double noise = conf.gD("main", "noise", 10); int dimension = conf.gI("main", "dimension", 1000); int numBins = conf.gI("main", "num_bins", 100); Quantization q ( numBins ); FastMinKernel fmk ( data, noise, examples, dimension ); GMHIKernel gmk ( &fmk ); bool verbose = true; int max_iterations = 500; vector< IterativeLinearSolver * > methods; ILSConjugateGradients *m = new ILSConjugateGradients(verbose, max_iterations); m->setTimeAnalysis ( true ); methods.push_back ( m ); for ( vector< IterativeLinearSolver * >::const_iterator i = methods.begin(); i != methods.end(); i++ ) { IterativeLinearSolver *method = *i; Vector sol (gmk.cols(), 0.0); method->solveLin ( gmk, yb, sol ); } Vector sol ( gmk.cols(), 0.0 ); double *Tlookup = fmk.solveLin( yb, sol, q, NULL, true /* useRandomSubsets */, 100 /* max iterations */, -1, 0.0, true); int randomSetSize = conf.gI("main", "random_set_size", 60); int stepComponents = conf.gI("main", "step_components", 50); GBCDSolver gbcd ( randomSetSize, stepComponents, true ); gbcd.setTimeAnalysis(true); Vector sol_gbcd; double *cdata = createFlatData ( fmk.featureMatrix() ); BlockHIK bhik ( cdata, fmk.get_n(), fmk.get_d(), noise ); gbcd.solveLin ( bhik, yb, sol_gbcd ); delete [] cdata; return 0; } #else int main (int argc, char **argv) { std::cerr << "MatIO library is missing in your system - this program will have no effect. " << std::endl; } #endif