testNullSpace.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. /**
  2. * @file testNullSpace.cpp
  3. * @brief test function for class KCNullSpace
  4. * @author Paul Bodesheim
  5. * @date 28-11-2012 (dd-mm-yyyy)
  6. */
  7. #include <ctime>
  8. #include <time.h>
  9. #include "core/basics/Config.h"
  10. #ifdef NICE_USELIB_MATIO
  11. #include "core/basics/Timer.h"
  12. #include "core/vector/Algorithms.h"
  13. #include "core/vector/SparseVectorT.h"
  14. #include "vislearning/classifier/kernelclassifier/KCNullSpace.h"
  15. #include "vislearning/math/kernels/KernelData.h"
  16. #include "vislearning/cbaselib/ClassificationResults.h"
  17. #include "vislearning/baselib/ProgressBar.h"
  18. #include "core/matlabAccess/MatFileIO.h"
  19. #include "vislearning/matlabAccessHighLevel/ImageNetData.h"
  20. // #include <iostream>
  21. // #include <fstream>
  22. using namespace std;
  23. using namespace NICE;
  24. using namespace OBJREC;
  25. // --------------- THE KERNEL FUNCTION ( exponential kernel with euclidian distance ) ----------------------
  26. double measureDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b, const double & sigma = 2.0)
  27. {
  28. double inner_sum(0.0);
  29. double d;
  30. //new version, where we needed on average 0.001707 s for each test sample
  31. NICE::SparseVector::const_iterator aIt = a.begin();
  32. NICE::SparseVector::const_iterator bIt = b.begin();
  33. //compute the euclidian distance between both feature vectores (given as SparseVectors)
  34. while ( (aIt != a.end()) && (bIt != b.end()) )
  35. {
  36. if (aIt->first == bIt->first)
  37. {
  38. d = ( aIt->second - bIt->second );
  39. inner_sum += d * d;
  40. aIt++;
  41. bIt++;
  42. }
  43. else if ( aIt->first < bIt->first)
  44. {
  45. inner_sum += aIt->second * aIt->second;
  46. aIt++;
  47. }
  48. else
  49. {
  50. inner_sum += bIt->second * bIt->second;
  51. bIt++;
  52. }
  53. }
  54. //compute remaining values, if b reached the end but not a
  55. while (aIt != a.end())
  56. {
  57. inner_sum += aIt->second * aIt->second;
  58. aIt++;
  59. }
  60. //compute remaining values, if a reached the end but not b
  61. while (bIt != b.end())
  62. {
  63. inner_sum += bIt->second * bIt->second;
  64. bIt++;
  65. }
  66. //normalization of the exponent
  67. inner_sum /= (2.0*sigma*sigma);
  68. //finally, compute the RBF-kernel score (RBF = radial basis function)
  69. return exp(-inner_sum);
  70. }
  71. // --------------- THE KERNEL FUNCTION ( HIK ) ----------------------
  72. double minimumDistance ( const NICE::SparseVector & a, const NICE::SparseVector & b )
  73. {
  74. double inner_sum(0.0);
  75. NICE::SparseVector::const_iterator aIt = a.begin();
  76. NICE::SparseVector::const_iterator bIt = b.begin();
  77. //compute the minimum distance between both feature vectores (given as SparseVectors)
  78. while ( (aIt != a.end()) && (bIt != b.end()) )
  79. {
  80. if (aIt->first == bIt->first)
  81. {
  82. inner_sum += std::min( aIt->second , bIt->second );
  83. aIt++;
  84. bIt++;
  85. }
  86. else if ( aIt->first < bIt->first)
  87. {
  88. aIt++;
  89. }
  90. else
  91. {
  92. bIt++;
  93. }
  94. }
  95. return inner_sum;
  96. }
  97. /**
  98. test the basic functionality of fast-hik hyperparameter optimization
  99. */
  100. int main (int argc, char **argv)
  101. {
  102. std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
  103. Config conf ( argc, argv );
  104. string resultsfile = conf.gS("main", "results", "results.txt" );
  105. int nrOfExamplesPerClass = conf.gI("main", "nrOfExamplesPerClass", 100);
  106. nrOfExamplesPerClass = std::min(nrOfExamplesPerClass, 100); // we do not have more than 100 examples per class
  107. int maxKnownClass = conf.gI("KCNullSpace", "maxKnownClass", 5);
  108. int OCCsingleClassLabel = conf.gI("KCNullSpace", "OCCsingleClassLabel", 1);
  109. bool testVerbose = conf.gB("KCNullSpace", "verbose", false);
  110. std::cerr << "conf verbose: " << testVerbose << std::endl;
  111. // -------- read ImageNet data --------------
  112. std::vector<SparseVector> trainingData;
  113. NICE::Vector y;
  114. NICE::Vector yTest;
  115. std::cerr << "Reading ImageNet data ..." << std::endl;
  116. bool imageNetLocal = conf.gB("main", "imageNetLocal" , false);
  117. string imageNetPath;
  118. if (imageNetLocal)
  119. imageNetPath = "/users2/rodner/data/imagenet/devkit-1.0/";
  120. else
  121. imageNetPath = "/home/dbv/bilder/imagenet/devkit-1.0/";
  122. ImageNetData imageNetTrain ( imageNetPath + "demo/" );
  123. imageNetTrain.preloadData( "train", "training" );
  124. imageNetTrain.normalizeData("L1");
  125. trainingData = imageNetTrain.getPreloadedData();
  126. y = imageNetTrain.getPreloadedLabels();
  127. std::cerr << "Reading of training data finished" << std::endl;
  128. std::cerr << "trainingData.size(): " << trainingData.size() << std::endl;
  129. std::cerr << "y.size(): " << y.size() << std::endl;
  130. std::cerr << "Reading ImageNet test data files (takes some seconds)..." << std::endl;
  131. ImageNetData imageNetTest ( imageNetPath + "demo/" );
  132. imageNetTest.preloadData ( "val", "testing" );
  133. imageNetTest.normalizeData("L1");
  134. imageNetTest.loadExternalLabels ( imageNetPath + "data/ILSVRC2010_validation_ground_truth.txt" );
  135. yTest = imageNetTest.getPreloadedLabels();
  136. // ---------- SELECT TRAINING SET FOR MULTICLASS NOVELTY DETECTION AND COMPUTE KERNEL MATRIX ------------------------
  137. NICE::Vector knownClassLabels(maxKnownClass,0.0);
  138. for (int k=1; k<=maxKnownClass; k++)
  139. knownClassLabels(k-1) = k;
  140. std::vector<SparseVector> currentTrainingData;
  141. currentTrainingData.clear();
  142. NICE::Vector currentTrainingLabels(nrOfExamplesPerClass*knownClassLabels.size(),0);
  143. int kk(0);
  144. for (size_t i = 0; i < y.size(); i++)
  145. {
  146. for (size_t j=0; j<knownClassLabels.size(); j++)
  147. {
  148. if ( y[i] == knownClassLabels[j] )
  149. {
  150. currentTrainingLabels(kk) = knownClassLabels[j];
  151. currentTrainingData.push_back(trainingData[i]);
  152. kk++;
  153. break;
  154. }
  155. }
  156. }
  157. Timer tTrain;
  158. tTrain.start();
  159. //compute the kernel matrix
  160. NICE::Matrix kernelMatrix(nrOfExamplesPerClass*knownClassLabels.size(), nrOfExamplesPerClass*knownClassLabels.size(), 0.0);
  161. double kernelScore(0.0);
  162. for (size_t i = 0; i < kernelMatrix.rows(); i++)
  163. {
  164. for (size_t j = i; j < kernelMatrix.cols(); j++)
  165. {
  166. kernelScore = minimumDistance(currentTrainingData[i],currentTrainingData[j]);
  167. kernelMatrix(i,j) = kernelScore;
  168. if (i != j)
  169. kernelMatrix(j,i) = kernelScore;
  170. }
  171. }
  172. KernelData kernelData( &conf, kernelMatrix, "Kernel", false );
  173. KCNullSpace knfst( &conf);
  174. // train the model
  175. knfst.teach(&kernelData, currentTrainingLabels);
  176. tTrain.stop();
  177. std::cerr << "Time used for training " << ": " << tTrain.getLast() << std::endl;
  178. // some outputs of training
  179. std::cerr << "training set statistic: " << std::endl;
  180. for (std::map<int,int>::iterator it = knfst.getTrainingSetStatistic()->begin(); it != knfst.getTrainingSetStatistic()->end(); it++)
  181. {
  182. std::cerr << "class label: " << (*it).first << " number of class samples: " << (*it).second << std::endl;
  183. }
  184. std::cerr << "one-class setting?: " << knfst.isOneClass() << std::endl;
  185. std::cerr << "null space dimension: "<< knfst.getNullSpaceDimension() << std::endl;
  186. std::cerr << "target points: " << std::endl;
  187. for (std::map<int,NICE::Vector>::iterator it = knfst.getTargetPoints()->begin(); it != knfst.getTargetPoints()->end(); it++)
  188. std::cerr << (*it).second << std::endl;
  189. std::cerr << "training done - now perform the evaluation" << std::endl;
  190. // --------- TESTING MULTICLASS NOVELTY DETECTION ------------------------------
  191. std::cerr << "Multi-class novelty detection... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl;
  192. ClassificationResults results;
  193. ProgressBar pb;
  194. Timer tTest;
  195. tTest.start();
  196. for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ )
  197. {
  198. if ( (i % 100)==0)
  199. pb.update ( imageNetTest.getNumPreloadedExamples()/100 );
  200. const SparseVector & svec = imageNetTest.getPreloadedExample ( i );
  201. // compute (self) similarities
  202. double kernelSelf (minimumDistance(svec,svec) );
  203. NICE::Vector kernelVector (nrOfExamplesPerClass*knownClassLabels.size(), 0.0);
  204. for (uint j = 0; j < nrOfExamplesPerClass*knownClassLabels.size(); j++)
  205. {
  206. kernelVector[j] = minimumDistance(currentTrainingData[j],svec);
  207. }
  208. ClassificationResult r;
  209. r = knfst.noveltyDetection( kernelVector, kernelSelf);
  210. // set ground truth label
  211. r.classno_groundtruth = 0;
  212. for (size_t j=0; j<knownClassLabels.size(); j++)
  213. {
  214. if ( yTest[i] == knownClassLabels[j] )
  215. {
  216. r.classno_groundtruth = 1;
  217. break;
  218. }
  219. }
  220. // remember the results for the evaluation lateron
  221. results.push_back ( r );
  222. }
  223. tTest.stop();
  224. std::cerr << "Time used for evaluation: " << tTest.getLast() << std::endl;
  225. double timeForSingleExample(0.0);
  226. timeForSingleExample = tTest.getLast()/imageNetTest.getNumPreloadedExamples();
  227. std::cerr.precision(10);
  228. std::cerr << "time used for evaluation of single elements: " << timeForSingleExample << std::endl;
  229. // run the AUC-evaluation
  230. double perfvalue( 0.0 );
  231. perfvalue = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
  232. std::cerr << " novelty detection performance: " << perfvalue << std::endl;
  233. // --------- TESTING MULTICLASS CLASSIFICATION ------------------------------
  234. results.clear();
  235. tTest.start();
  236. for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ )
  237. {
  238. // only use samples of known classes
  239. if ( yTest[i] > maxKnownClass)
  240. {
  241. continue;
  242. }
  243. const SparseVector & svec = imageNetTest.getPreloadedExample ( i );
  244. // compute (self) similarities
  245. double kernelSelf (minimumDistance(svec,svec) );
  246. NICE::Vector kernelVector (nrOfExamplesPerClass*knownClassLabels.size(), 0.0);
  247. for (uint j = 0; j < nrOfExamplesPerClass*knownClassLabels.size(); j++)
  248. {
  249. kernelVector[j] = minimumDistance(currentTrainingData[j],svec);
  250. }
  251. ClassificationResult r;
  252. r = knfst.classifyKernel( kernelVector, kernelSelf);
  253. // set ground truth label
  254. for (uint j=0; j < knownClassLabels.size(); j++)
  255. {
  256. if (yTest[i] == knownClassLabels[j])
  257. {
  258. r.classno_groundtruth = j;
  259. break;
  260. }
  261. }
  262. // remember the results for the evaluation lateron
  263. results.push_back ( r );
  264. }
  265. tTest.stop();
  266. std::cerr << "Time used for evaluation: " << tTest.getLast() << std::endl;
  267. timeForSingleExample = tTest.getLast()/imageNetTest.getNumPreloadedExamples();
  268. std::cerr.precision(10);
  269. std::cerr << "time used for evaluation of single elements: " << timeForSingleExample << std::endl;
  270. // run the AUC-evaluation
  271. perfvalue = results.getAverageRecognitionRate();
  272. std::cerr << " classification performance: " << perfvalue << std::endl;
  273. // ---------- SELECT TRAINING SET FOR ONECLASS CLASSIFICATION AND COMPUTE KERNEL MATRIX ------------------------
  274. currentTrainingData.clear();
  275. currentTrainingLabels.clear();
  276. for (size_t i = 0; i < y.size(); i++)
  277. {
  278. if ( y[i] == OCCsingleClassLabel )
  279. {
  280. currentTrainingLabels.append(OCCsingleClassLabel);
  281. currentTrainingData.push_back(trainingData[i]);
  282. }
  283. }
  284. tTrain.start();
  285. //compute the kernel matrix
  286. NICE::Matrix kernelMatrixOCC(currentTrainingData.size(), currentTrainingData.size(), 0.0);
  287. std::cerr << "OCC Kernel Matrix: " << kernelMatrixOCC.rows() << " x " << kernelMatrixOCC.cols() << std::endl;
  288. for (size_t i = 0; i < kernelMatrixOCC.rows(); i++)
  289. {
  290. for (size_t j = i; j < kernelMatrixOCC.cols(); j++)
  291. {
  292. kernelScore = minimumDistance(currentTrainingData[i],currentTrainingData[j]);
  293. kernelMatrixOCC(i,j) = kernelScore;
  294. if (i != j)
  295. kernelMatrixOCC(j,i) = kernelScore;
  296. }
  297. }
  298. filebuf fb;
  299. fb.open("/home/bodesheim/experiments/kernelMatrixOCC.txt",ios::out);
  300. ostream os (&fb);
  301. os << kernelMatrixOCC;
  302. fb.close();
  303. KernelData kernelDataOCC( &conf, kernelMatrixOCC, "Kernel", false );
  304. // train the model
  305. std::cerr << "Train OCC model... " << std::endl;
  306. knfst.teach(&kernelDataOCC, currentTrainingLabels);
  307. tTrain.stop();
  308. std::cerr << "Time used for training " << ": " << tTrain.getLast() << std::endl;
  309. // some outputs of training
  310. std::cerr << "training set statistic: " << std::endl;
  311. for (std::map<int,int>::iterator itt = knfst.getTrainingSetStatistic()->begin(); itt != knfst.getTrainingSetStatistic()->end(); itt++)
  312. {
  313. std::cerr << "class label: " << (*itt).first << " number of class samples: " << (*itt).second << std::endl;
  314. }
  315. std::cerr << "one-class setting?: " << knfst.isOneClass() << std::endl;
  316. std::cerr << "null space dimension: "<< knfst.getNullSpaceDimension() << std::endl;
  317. std::cerr << "target points: " << std::endl;
  318. for (std::map<int,NICE::Vector>::const_iterator it = knfst.getTargetPoints()->begin(); it != knfst.getTargetPoints()->end(); it++)
  319. std::cerr << (*it).second << std::endl;
  320. std::cerr << "training done - now perform the evaluation" << std::endl;
  321. // --------- TESTING OCC ------------------------------
  322. std::cerr << "OCC... with " << imageNetTest.getNumPreloadedExamples() << " examples" << std::endl;
  323. results.clear();
  324. tTest.start();
  325. ProgressBar pb3;
  326. std::cerr << "start for loop" << std::endl;
  327. for ( uint i = 0 ; i < (uint)imageNetTest.getNumPreloadedExamples(); i++ )
  328. {
  329. if ( (i % 100)==0)
  330. pb3.update ( imageNetTest.getNumPreloadedExamples()/100 );
  331. const SparseVector & svec = imageNetTest.getPreloadedExample ( i );
  332. //compute (self) similarities
  333. double kernelSelf (minimumDistance(svec,svec) );
  334. NICE::Vector kernelVector (currentTrainingData.size(), 0.0);
  335. for (uint j = 0; j < currentTrainingData.size(); j++)
  336. {
  337. kernelVector[j] = minimumDistance(currentTrainingData[j],svec);
  338. }
  339. ClassificationResult r;
  340. r = knfst.noveltyDetection( kernelVector, kernelSelf);
  341. // set ground truth label
  342. r.classno_groundtruth = 0;
  343. if ( yTest[i] == OCCsingleClassLabel )
  344. {
  345. r.classno_groundtruth = 1;
  346. }
  347. else
  348. {
  349. r.classno_groundtruth = 0;
  350. }
  351. //remember the results for the evaluation lateron
  352. results.push_back ( r );
  353. }
  354. tTest.stop();
  355. std::cerr << "Time used for evaluation: " << tTest.getLast() << std::endl;
  356. double timeForSingleExampleOCC = tTest.getLast()/imageNetTest.getNumPreloadedExamples();
  357. std::cerr.precision(10);
  358. std::cerr << "time used for evaluation of single elements: " << timeForSingleExampleOCC << std::endl;
  359. // run the AUC-evaluation
  360. double perfvalueOCC = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
  361. std::cerr << " occ performance: " << perfvalueOCC << std::endl;
  362. return 0;
  363. }
  364. #else
  365. int main (int argc, char **argv)
  366. {
  367. std::cerr << "MatIO library is missing in your system - this program will have no effect. " << std::endl;
  368. }
  369. #endif