testImageNetBinaryGPBaseline.cpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. /**
  2. * @file testImageNetBinaryGPBaseline.cpp
  3. * @brief perform ImageNet tests with binary classification
  4. * @author Erik Rodner
  5. * @date 01/04/2012
  6. */
  7. #include <core/basics/Config.h>
  8. #include <core/basics/Timer.h>
  9. #include <core/matlabAccess/MatFileIO.h>
  10. //----------
  11. #include <vislearning/baselib/ProgressBar.h>
  12. #include <vislearning/cbaselib/ClassificationResults.h>
  13. #include "vislearning/classifier/classifierbase/KernelClassifier.h"
  14. #include "vislearning/classifier/kernelclassifier/KCGPRegression.h"
  15. #include <vislearning/matlabAccessHighLevel/ImageNetData.h>
  16. //----------
  17. #include <gp-hik-core/tools.h>
  18. #include <gp-hik-core/kernels/IntersectionKernelFunction.h>
  19. using namespace std;
  20. using namespace NICE;
  21. using namespace OBJREC;
  22. /**
  23. test the basic functionality of fast-hik hyperparameter optimization
  24. */
  25. int main (int argc, char **argv)
  26. {
  27. std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
  28. Config conf ( argc, argv );
  29. string resultsfile = conf.gS("main", "results", "results.txt" );
  30. int positiveClass = conf.gI("main", "positive_class");
  31. cerr << "Positive class is " << positiveClass << endl;
  32. sparse_t data;
  33. NICE::Vector y;
  34. cerr << "Reading ImageNet data ..." << endl;
  35. bool imageNetLocal = conf.gB("main", "imageNetLocal" , false);
  36. string imageNetPath;
  37. if (imageNetLocal)
  38. imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/";
  39. else
  40. imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/";
  41. ImageNetData imageNet ( imageNetPath + "demo/" );
  42. imageNet.getBatchData ( data, y, "train", "training" );
  43. uint n = y.size();
  44. //noise will be
  45. double noise(0.0);
  46. set<int> positives;
  47. set<int> negatives;
  48. map< int, set<int> > mysets;
  49. for ( uint i = 0 ; i < n; i++ )
  50. mysets[ y[i] ].insert ( i );
  51. if ( mysets[ positiveClass ].size() == 0 )
  52. fthrow(Exception, "Class " << positiveClass << " is not available.");
  53. // add our positive examples
  54. for ( set<int>::const_iterator i = mysets[positiveClass].begin(); i != mysets[positiveClass].end(); i++ )
  55. positives.insert ( *i );
  56. int Nneg = conf.gI("main", "nneg", 1 );
  57. for ( map<int, set<int> >::const_iterator k = mysets.begin(); k != mysets.end(); k++ )
  58. {
  59. int classno = k->first;
  60. if ( classno == positiveClass )
  61. continue;
  62. const set<int> & s = k->second;
  63. uint ind = 0;
  64. for ( set<int>::const_iterator i = s.begin(); (i != s.end() && ind < Nneg); i++,ind++ )
  65. negatives.insert ( *i );
  66. }
  67. cerr << "Number of positive examples: " << positives.size() << endl;
  68. cerr << "Number of negative examples: " << negatives.size() << endl;
  69. int nrExamplesForTraining(positives.size()+negatives.size());
  70. std::vector<NICE::SparseVector> dataMatrixSparse;
  71. dataMatrixSparse.resize(nrExamplesForTraining);
  72. std::cerr << "data matrix prepared" << std::endl;
  73. int dim(data.njc-1);
  74. NICE::Vector labelsTrain(nrExamplesForTraining,0);
  75. std::map<int,int> indices; // orig index, new index
  76. int counter(0);
  77. for ( int i = 0; i < dim; i++ ) //walk over dimensions
  78. {
  79. for ( int j = data.jc[i]; j < data.jc[i+1] && j < data.ndata; j++ ) //walk over single features, which are sparsely represented
  80. {
  81. int example_index = data.ir[ j];
  82. std::set<int>::const_iterator itPos = positives.find(example_index);
  83. std::set<int>::const_iterator itNeg = negatives.find(example_index);
  84. if ( itPos != positives.end() )
  85. {
  86. std::map<int,int>::const_iterator newPosition = indices.find(example_index);
  87. //feature already known from a different dimension
  88. if (newPosition != indices.end())
  89. dataMatrixSparse[newPosition->second].insert(pair<short,double>((short)i , ((double*)data.data)[j]));
  90. //new feature, previous dimension where sparse for it
  91. else
  92. {
  93. indices.insert(pair<int,int>(example_index,counter));
  94. dataMatrixSparse[counter].insert(pair<short,double>((short)i , ((double*)data.data)[j]));
  95. //set the label-vector to +1 for this feature
  96. labelsTrain[counter] = 1;
  97. counter++;
  98. }
  99. }
  100. else if ( itNeg != negatives.end())
  101. {
  102. std::map<int,int>::const_iterator newPosition = indices.find(example_index);
  103. //feature already known from a different dimension
  104. if (newPosition != indices.end())
  105. dataMatrixSparse[newPosition->second].insert(pair<short,double>((short)i , ((double*)data.data)[j]));
  106. //new feature, previous dimension where sparse for it
  107. else
  108. {
  109. indices.insert(pair<int,int>(example_index,counter));
  110. dataMatrixSparse[counter].insert(pair<short,double>((short)i , ((double*)data.data)[j]));
  111. //label vector already contains -1
  112. counter++;
  113. }
  114. }
  115. }
  116. }
  117. std::cerr << "data read completely" << std::endl;
  118. for (int i = 0; i < dataMatrixSparse.size(); i++)
  119. {
  120. dataMatrixSparse[i].setDim(dim);
  121. }
  122. std::cerr << "preparations done, start timing experiments" << std::endl;
  123. Timer t;
  124. t.start();
  125. //standard training comes here
  126. NICE::IntersectionKernelFunction<double> hik;
  127. std::cerr << "compute kernel matrix will be called" << std::endl;
  128. NICE::Matrix K (hik.computeKernelMatrix(dataMatrixSparse, noise));
  129. std::cerr << "kernel matrix succesfully computed" << std::endl;
  130. OBJREC::KCGPRegression classifier ( &conf);
  131. std::cerr << "start teaching" << std::endl;
  132. classifier.teach ( new KernelData ( &conf, K ), labelsTrain );
  133. t.stop();
  134. cerr << "Time used for training: " << t.getLast() << endl;
  135. //end of standard training
  136. // ------------------------------ TESTING ------------------------------
  137. cerr << "Reading ImageNet test data files (takes some seconds)..." << endl;
  138. imageNet.preloadData ( "val", "testing" );
  139. imageNet.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" );
  140. ClassificationResults results;
  141. cerr << "Classification step ... with " << imageNet.getNumPreloadedExamples() << " examples" << endl;
  142. ProgressBar pb;
  143. NICE::Matrix confMat(2,2,0.0);
  144. for ( uint i = 0 ; i < (uint)imageNet.getNumPreloadedExamples(); i++ )
  145. {
  146. pb.update ( imageNet.getNumPreloadedExamples() );
  147. const SparseVector & svec = imageNet.getPreloadedExample ( i );
  148. t.start();
  149. // classification step
  150. Vector kernelVector = hik.computeKernelVector(dataMatrixSparse,svec);
  151. double kernelSelf = hik.measureDistance(svec, svec);
  152. ClassificationResult r = classifier.classifyKernel ( kernelVector, kernelSelf );
  153. t.stop();
  154. // cerr << i << " / " << (uint)imageNet.getNumPreloadedExamples() << " Time used for classifying a single example: " << t.getLast() << endl;
  155. // set ground truth label
  156. r.classno_groundtruth = (((int)imageNet.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
  157. results.push_back ( r );
  158. confMat( r.classno_groundtruth, r.classno ) += 1;
  159. }
  160. confMat.normalizeRowsL1();
  161. std::cerr << "confMat: " << confMat << std::endl;
  162. cerr << "average recognition rate: " << confMat.trace()/confMat.rows() << endl;
  163. cerr << "Writing results to " << resultsfile << endl;
  164. results.writeWEKA ( resultsfile, 0 );
  165. double perfvalue = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
  166. cerr << "Performance: " << perfvalue << endl;
  167. return 0;
  168. }