testImageNetBinaryBruteForce.cpp 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. /**
  2. * @file testImageNetBinaryBruteForce.cpp
  3. * @brief perform ImageNet tests with binary tasks for OCC
  4. * @author Alexander Lütz
  5. * @date 23-05-2012 (dd-mm-yyyy)
  6. */
  7. #include "core/basics/Config.h"
  8. #include "core/vector/SparseVectorT.h"
  9. #include "vislearning/cbaselib/ClassificationResults.h"
  10. #include "vislearning/baselib/ProgressBar.h"
  11. #include "fast-hik/tools.h"
  12. #include "fast-hik/MatFileIO.h"
  13. #include "fast-hik/ImageNetData.h"
  14. using namespace std;
  15. using namespace NICE;
  16. using namespace OBJREC;
  17. double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0, const bool & verbose = false)
  18. {
  19. double inner_sum(0.0);
  20. double s;
  21. double d;
  22. //this is the first version, where we needed on average 0.017988 s for each test sample
  23. // std::set<int> set_a;
  24. //
  25. //
  26. // for ( NICE::SparseVector::const_iterator i = a.begin(); i != a.end(); i++ )
  27. // {
  28. // double u (i->second);
  29. // double v (b.get(i->first));
  30. // s = ( u + v );
  31. // if ( fabs(s) < 10e-6 ) continue;
  32. // d = u-v;
  33. // inner_sum += d*d;
  34. // set_a.insert(i->first);
  35. // }
  36. //
  37. // for ( NICE::SparseVector::const_iterator i = b.begin(); i != b.end(); i++ )
  38. // {
  39. // if (set_a.find(i->first) != set_a.end()) //already worked on in first loop
  40. // continue;
  41. //
  42. // double u (i->second);
  43. // if ( fabs(u) < 10e-6 ) continue;
  44. // inner_sum += u*u;
  45. // }
  46. //new version, where we needed on average 0.001707 s for each test sample
  47. NICE::SparseVector::const_iterator aIt = a.begin();
  48. NICE::SparseVector::const_iterator bIt = b.begin();
  49. while ( (aIt != a.end()) && (bIt != b.end()) )
  50. {
  51. // std::cerr << "a: " << aIt->first << " b: " << bIt->first << std::endl;
  52. if (aIt->first == bIt->first)
  53. {
  54. s = ( aIt->second + bIt->second );
  55. // if (! fabs(s) < 10e-6 ) //for numerical reasons
  56. // {
  57. d = ( aIt->second - bIt->second );
  58. inner_sum += d * d;
  59. // }
  60. aIt++;
  61. bIt++;
  62. }
  63. else if ( aIt->first < bIt->first)
  64. {
  65. // if (! fabs(aIt->second) < 10e-6 )
  66. // {
  67. inner_sum += aIt->second * aIt->second;
  68. // }
  69. aIt++;
  70. }
  71. else
  72. {
  73. // if (! fabs(bIt->second) < 10e-6 )
  74. // {
  75. inner_sum += bIt->second * bIt->second;
  76. // }
  77. bIt++;
  78. }
  79. }
  80. //compute remaining values, if b reached the end but not a
  81. while (aIt != a.end())
  82. {
  83. inner_sum += aIt->second * aIt->second;
  84. aIt++;
  85. }
  86. //compute remaining values, if a reached the end but not b
  87. while (bIt != b.end())
  88. {
  89. inner_sum += bIt->second * bIt->second;
  90. bIt++;
  91. }
  92. if (verbose)
  93. std::cerr << "inner_sum before /= (2.0*sigma*sigma) " << inner_sum << std::endl;
  94. inner_sum /= (2.0*sigma*sigma);
  95. if (verbose)
  96. std::cerr << "inner_sum after /= (2.0*sigma*sigma) " << inner_sum << std::endl;
  97. double expValue = exp(-inner_sum);
  98. if (verbose)
  99. std::cerr << "resulting expValue " << expValue << std::endl;
  100. return exp(-inner_sum); //expValue;
  101. }
  102. /**
  103. test the basic functionality of fast-hik hyperparameter optimization
  104. */
  105. int main (int argc, char **argv)
  106. {
  107. std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
  108. Config conf ( argc, argv );
  109. string resultsfile = conf.gS("main", "results", "results.txt" );
  110. int positiveClass = conf.gI("main", "positive_class");
  111. double noise = conf.gD("main", "noise", 0.01);
  112. double kernelSigma = conf.gD("main", "kernelSigma", 2.0);
  113. int nrOfExamplesPerClass = conf.gI("main", "nrOfExamplesPerClass", 50);
  114. nrOfExamplesPerClass = std::min(nrOfExamplesPerClass, 100); // we do not have more than 100 examples per class
  115. int nrOfClassesToConcidere = conf.gI("main", "nrOfClassesToConcidere", 1000);
  116. nrOfClassesToConcidere = std::min(nrOfClassesToConcidere, 1000); //we do not have more than 1000 classes
  117. std::cerr << "Positive class is " << positiveClass << std::endl;
  118. std::vector<SparseVector> trainingData;
  119. NICE::Vector y;
  120. std::cerr << "Reading ImageNet data ..." << std::endl;
  121. bool imageNetLocal = conf.gB("main", "imageNetLocal" , false);
  122. string imageNetPath;
  123. if (imageNetLocal)
  124. imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/";
  125. else
  126. imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/";
  127. ImageNetData imageNetTrain ( imageNetPath + "demo/" );
  128. imageNetTrain.preloadData( "train", "training" );
  129. trainingData = imageNetTrain.getPreloadedData();
  130. y = imageNetTrain.getPreloadedLabels();
  131. std::cerr << "Reading of training data finished" << std::endl;
  132. std::cerr << "trainingData.size(): " << trainingData.size() << std::endl;
  133. std::cerr << "y.size(): " << y.size() << std::endl;
  134. std::cerr << "Reading ImageNet test data files (takes some seconds)..." << std::endl;
  135. ImageNetData imageNetTest ( imageNetPath + "demo/" );
  136. imageNetTest.preloadData ( "val", "testing" );
  137. imageNetTest.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" );
  138. double OverallPerformance(0.0);
  139. for (int cl = 0; cl < nrOfClassesToConcidere; cl++)
  140. {
  141. std::cerr << "run for class " << cl << std::endl;
  142. // ------------------------------ TRAINING ------------------------------
  143. NICE::Vector matrixDInv (nrOfExamplesPerClass, 0.0);
  144. //compute D
  145. //start with adding some noise, if necessary
  146. if (noise != 0.0)
  147. matrixDInv.set(noise);
  148. else
  149. matrixDInv.set(0.0);
  150. std::cerr << "set matrixDInv to noise - now compute the scores for this special type of matrix" << std::endl;
  151. if ( cl == 0)
  152. {
  153. std::cerr << "print first training example of class zero: " << std::endl;
  154. trainingData[0] >> std::cerr;
  155. }
  156. //now sum up all entries of each row in the original kernel matrix
  157. double kernelScore(0.0);
  158. for (int i = cl*100; i < cl*100+nrOfExamplesPerClass; i++)
  159. {
  160. // if ( (i % 50) == 0)
  161. std::cerr << i << " / " << nrOfExamplesPerClass << std::endl;
  162. for (int j = i; j < cl*100+nrOfExamplesPerClass; j++)
  163. {
  164. // std::cerr << j << " / " << nrOfExamplesPerClass << std::endl;
  165. if ( (cl == 0) && (i == 0))
  166. {
  167. kernelScore = measureDistance(trainingData[i],trainingData[j], kernelSigma, true /*verbose*/);
  168. }
  169. else
  170. kernelScore = measureDistance(trainingData[i],trainingData[j], kernelSigma);
  171. if (kernelScore == 0.0) std::cerr << "score of zero for examples " << i << " and " << j << std::endl;
  172. matrixDInv[i-cl*100] += kernelScore;
  173. if (i != j)
  174. matrixDInv[j-cl*100] += kernelScore;
  175. }
  176. }
  177. std::cerr << "invert the main diagonal" << std::endl;
  178. //compute its inverse
  179. for (int i = 0; i < nrOfExamplesPerClass; i++)
  180. {
  181. matrixDInv[i] = 1.0 / matrixDInv[i];
  182. }
  183. std::cerr << "resulting D-Vector (or matrix :) ) " << std::endl;
  184. std::cerr << matrixDInv << std::endl;
  185. std::cerr << "training done - now perform the evaluation" << std::endl;
  186. // ------------------------------ TESTING ------------------------------
  187. ClassificationResults results;
  188. std::cerr << "Classification step ... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl;
  189. ProgressBar pb;
  190. for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ )
  191. {
  192. pb.update ( imageNetTest.getNumPreloadedExamples() );
  193. const SparseVector & svec = imageNetTest.getPreloadedExample ( i );
  194. // SparseVector svec = imageNetTest.getPreloadedExample ( i );
  195. if ( i == 0)
  196. {
  197. std::cerr << "print first test example: " << std::endl;
  198. std::cerr << "this is of class " << (int)imageNetTest.getPreloadedLabel ( i ) << std::endl;
  199. // svec >> std::cerr;
  200. svec.store(std::cerr);
  201. }
  202. double kernelSelf (measureDistance(svec,svec, kernelSigma) ) ;
  203. NICE::Vector kernelVector (nrOfExamplesPerClass, 0.0);
  204. for (int j = 0; j < nrOfExamplesPerClass; j++)
  205. {
  206. kernelVector[j] = measureDistance(trainingData[j+cl*100],svec, kernelSigma);
  207. }
  208. if ( i == 0)
  209. {
  210. std::cerr << "print first kernel vector: " << kernelVector << std::endl;
  211. }
  212. NICE::Vector rightPart (nrOfExamplesPerClass);
  213. for (int j = 0; j < nrOfExamplesPerClass; j++)
  214. {
  215. rightPart[j] = kernelVector[j] * matrixDInv[j];
  216. }
  217. double uncertainty = kernelSelf - kernelVector.scalarProduct ( rightPart );
  218. FullVector scores ( 2 );
  219. scores[0] = 0.0;
  220. scores[1] = 1.0 - uncertainty;
  221. ClassificationResult r ( scores[1]<0.5 ? 0 : 1, scores );
  222. // set ground truth label
  223. r.classno_groundtruth = (((int)imageNetTest.getPreloadedLabel ( i )) == positiveClass) ? 1 : 0;
  224. // std::cerr << "scores: " << std::endl;
  225. // scores >> std::cerr;
  226. // std::cerr << "gt: " << r.classno_groundtruth << " -- " << r.classno << std::endl;
  227. results.push_back ( r );
  228. }
  229. // std::cerr << "Writing results to " << resultsfile << std::endl;
  230. // results.writeWEKA ( resultsfile, 1 );
  231. double perfvalue = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
  232. std::cerr << "Performance: " << perfvalue << std::endl;
  233. OverallPerformance += perfvalue;
  234. }
  235. OverallPerformance /= nrOfClassesToConcidere;
  236. std::cerr << "overall performance: " << OverallPerformance << std::endl;
  237. return 0;
  238. }