testImageNetBinaryGPBaseline.cpp 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. /**
  2. * @file testImageNetBinaryGPBaseline.cpp
  3. * @brief perform ImageNet tests with binary tasks for OCC using the baseline GP
  4. * @author Alexander Lütz
  5. * @date 29-05-2012 (dd-mm-yyyy)
  6. */
  7. #include <iostream>
  8. #include "core/basics/Config.h"
  9. #ifdef NICE_USELIB_MATIO
  10. #include "core/basics/Timer.h"
  11. #include "core/vector/SparseVectorT.h"
  12. #include "core/algebra/CholeskyRobust.h"
  13. #include "core/vector/Algorithms.h"
  14. #include "vislearning/cbaselib/ClassificationResults.h"
  15. #include "vislearning/baselib/ProgressBar.h"
  16. #include "core/matlabAccess/MatFileIO.h"
  17. #include "vislearning/matlabAccessHighLevel/ImageNetData.h"
  18. // #include "fast-hik/tools.h"
  19. using namespace std;
  20. using namespace NICE;
  21. using namespace OBJREC;
  22. double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0)//, const bool & verbose = false)
  23. {
  24. double inner_sum(0.0);
  25. double d;
  26. //new version, where we needed on average 0.001707 s for each test sample
  27. NICE::SparseVector::const_iterator aIt = a.begin();
  28. NICE::SparseVector::const_iterator bIt = b.begin();
  29. while ( (aIt != a.end()) && (bIt != b.end()) )
  30. {
  31. if (aIt->first == bIt->first)
  32. {
  33. d = ( aIt->second - bIt->second );
  34. inner_sum += d * d;
  35. aIt++;
  36. bIt++;
  37. }
  38. else if ( aIt->first < bIt->first)
  39. {
  40. inner_sum += aIt->second * aIt->second;
  41. aIt++;
  42. }
  43. else
  44. {
  45. inner_sum += bIt->second * bIt->second;
  46. bIt++;
  47. }
  48. }
  49. //compute remaining values, if b reached the end but not a
  50. while (aIt != a.end())
  51. {
  52. inner_sum += aIt->second * aIt->second;
  53. aIt++;
  54. }
  55. //compute remaining values, if a reached the end but not b
  56. while (bIt != b.end())
  57. {
  58. inner_sum += bIt->second * bIt->second;
  59. bIt++;
  60. }
  61. inner_sum /= (2.0*sigma*sigma);
  62. return exp(-inner_sum); //expValue;
  63. }
  64. void readParameters(const string & filename, const int & size, NICE::Vector & parameterVector)
  65. {
  66. parameterVector.resize(size);
  67. parameterVector.set(0.0);
  68. ifstream is(filename.c_str());
  69. if ( !is.good() )
  70. fthrow(IOException, "Unable to read parameters.");
  71. //
  72. string tmp;
  73. int cnt(0);
  74. while (! is.eof())
  75. {
  76. is >> tmp;
  77. parameterVector[cnt] = atof(tmp.c_str());
  78. cnt++;
  79. }
  80. //
  81. is.close();
  82. }
  83. /**
  84. test the basic functionality of fast-hik hyperparameter optimization
  85. */
  86. int main (int argc, char **argv)
  87. {
  88. std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
  89. Config conf ( argc, argv );
  90. string resultsfile = conf.gS("main", "results", "results.txt" );
  91. double kernelSigma = conf.gD("main", "kernelSigma", 2.0);
  92. int nrOfExamplesPerClass = conf.gI("main", "nrOfExamplesPerClass", 50);
  93. nrOfExamplesPerClass = std::min(nrOfExamplesPerClass, 100); // we do not have more than 100 examples per class
  94. int nrOfClassesToConcidere = conf.gI("main", "nrOfClassesToConcidere", 1000);
  95. nrOfClassesToConcidere = std::min(nrOfClassesToConcidere, 1000); //we do not have more than 1000 classes
  96. string sigmaFile = conf.gS("main", "sigmaFile", "approxVarSigma.txt");
  97. string noiseFile = conf.gS("main", "noiseFile", "approxVarNoise.txt");
  98. NICE::Vector sigmaParas(nrOfClassesToConcidere,kernelSigma);
  99. NICE::Vector noiseParas(nrOfClassesToConcidere,0.0);
  100. readParameters(sigmaFile,nrOfClassesToConcidere, sigmaParas);
  101. readParameters(noiseFile,nrOfClassesToConcidere, noiseParas);
  102. std::vector<SparseVector> trainingData;
  103. NICE::Vector y;
  104. std::cerr << "Reading ImageNet data ..." << std::endl;
  105. bool imageNetLocal = conf.gB("main", "imageNetLocal" , false);
  106. string imageNetPath;
  107. if (imageNetLocal)
  108. imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/";
  109. else
  110. imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/";
  111. ImageNetData imageNetTrain ( imageNetPath + "demo/" );
  112. imageNetTrain.preloadData( "train", "training" );
  113. trainingData = imageNetTrain.getPreloadedData();
  114. y = imageNetTrain.getPreloadedLabels();
  115. std::cerr << "Reading of training data finished" << std::endl;
  116. std::cerr << "trainingData.size(): " << trainingData.size() << std::endl;
  117. std::cerr << "y.size(): " << y.size() << std::endl;
  118. std::cerr << "Reading ImageNet test data files (takes some seconds)..." << std::endl;
  119. ImageNetData imageNetTest ( imageNetPath + "demo/" );
  120. imageNetTest.preloadData ( "val", "testing" );
  121. imageNetTest.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" );
  122. double OverallPerformance(0.0);
  123. for (int cl = 0; cl < nrOfClassesToConcidere; cl++)
  124. {
  125. std::cerr << "run for class " << cl << std::endl;
  126. int positiveClass = cl+1;
  127. // ------------------------------ TRAINING ------------------------------
  128. kernelSigma = sigmaParas[cl];
  129. std::cerr << "using sigma: " << kernelSigma << " and noise " << noiseParas[cl] << std::endl;
  130. Timer tTrain;
  131. tTrain.start();
  132. NICE::Matrix kernelMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
  133. //now compute the kernelScores for every element
  134. double kernelScore(0.0);
  135. for (int i = cl*100; i < cl*100+nrOfExamplesPerClass; i++)
  136. {
  137. for (int j = i; j < cl*100+nrOfExamplesPerClass; j++)
  138. {
  139. kernelScore = measureDistance(trainingData[i],trainingData[j], kernelSigma);//optimalParameters[cl]);
  140. kernelMatrix(i-cl*100,j-cl*100) = kernelScore;
  141. if (i != j)
  142. kernelMatrix(j-cl*100,i-cl*100) = kernelScore;
  143. }
  144. }
  145. //adding some noise, if necessary
  146. if (noiseParas[cl] != 0.0)
  147. {
  148. kernelMatrix.addIdentity(noiseParas[cl]);
  149. }
  150. else
  151. {
  152. //zero was already set
  153. }
  154. //compute its inverse
  155. //noise is already added :)
  156. Timer tTrainPrecise;
  157. tTrainPrecise.start();
  158. CholeskyRobust cr ( false /* verbose*/, 0.0 /*noiseStep*/, false /* useCuda*/);
  159. NICE::Matrix choleskyMatrix (nrOfExamplesPerClass, nrOfExamplesPerClass, 0.0);
  160. cr.robustChol ( kernelMatrix, choleskyMatrix );
  161. tTrainPrecise.stop();
  162. std::cerr << "Precise time used for training class " << cl << ": " << tTrainPrecise.getLast() << std::endl;
  163. tTrain.stop();
  164. std::cerr << "Time used for training class " << cl << ": " << tTrain.getLast() << std::endl;
  165. std::cerr << "training done - now perform the evaluation" << std::endl;
  166. // ------------------------------ TESTING ------------------------------
  167. ClassificationResults results;
  168. std::cerr << "Classification step ... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl;
  169. ProgressBar pb;
  170. Timer tTest;
  171. tTest.start();
  172. Timer tTestSingle;
  173. double timeForSingleExamples(0.0);
  174. for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ )
  175. {
  176. pb.update ( imageNetTest.getNumPreloadedExamples() );
  177. //get the precomputed features
  178. const SparseVector & svec = imageNetTest.getPreloadedExample ( i );
  179. //compute (self-)similarities
  180. double kernelSelf (measureDistance(svec,svec, kernelSigma) );
  181. NICE::Vector kernelVector (nrOfExamplesPerClass, 0.0);
  182. for (int j = 0; j < nrOfExamplesPerClass; j++)
  183. {
  184. kernelVector[j] = measureDistance(trainingData[j+cl*100],svec, kernelSigma);
  185. }
  186. //compute the resulting score
  187. tTestSingle.start();
  188. NICE::Vector rightPart (nrOfExamplesPerClass);
  189. choleskySolveLargeScale ( choleskyMatrix, kernelVector, rightPart );
  190. double uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
  191. tTestSingle.stop();
  192. timeForSingleExamples += tTestSingle.getLast();
  193. //this is the standard score-object needed for the evaluation
  194. FullVector scores ( 2 );
  195. scores[0] = 0.0;
  196. scores[1] = 1.0 - uncertainty;
  197. ClassificationResult r ( scores[1]<0.5 ? 0 : 1, scores );
  198. // set ground truth label
  199. r.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
  200. //we could write the resulting score on the command line
  201. // std::cerr << "scores: " << std::endl;
  202. // scores >> std::cerr;
  203. //as well as the ground truth label
  204. // std::cerr << "gt: " << r.classno_groundtruth << " -- " << r.classno << std::endl;
  205. results.push_back ( r );
  206. }
  207. tTest.stop();
  208. std::cerr << "Time used for evaluating class " << cl << ": " << tTest.getLast() << std::endl;
  209. timeForSingleExamples/= imageNetTest.getNumPreloadedExamples();
  210. std::cerr << "Time used for evaluation single elements of class " << cl << " : " << timeForSingleExamples << std::endl;
  211. // we could also write the results to an external file. Note, that this file will be overwritten in every iteration
  212. // so if you want to store all results, you should add a suffix with the class number
  213. // std::cerr << "Writing results to " << resultsfile << std::endl;
  214. // results.writeWEKA ( resultsfile, 1 );
  215. double perfvalue = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
  216. std::cerr << "Performance: " << perfvalue << std::endl;
  217. OverallPerformance += perfvalue;
  218. }
  219. OverallPerformance /= nrOfClassesToConcidere;
  220. std::cerr << "overall performance: " << OverallPerformance << std::endl;
  221. return 0;
  222. }
  223. #else
  224. int main (int argc, char **argv)
  225. {
  226. std::cerr << "MatIO library is missing in your system - this program will have no effect. " << std::endl;
  227. }
  228. #endif