eccv2012-synthetic.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. /**
  2. * @file eccv2012-synthetic.cpp
  3. * @brief ECCV 2012 Experiment with synthetic histograms to show the possibility of feature relevance selection
  4. * @author Alexander Freytag
  5. * @date 17-02-2012 (dd-mm-yyyy)
  6. */
  7. #include <vector>
  8. #include <fstream>
  9. #include <iostream>
  10. #include <sstream>
  11. #include <limits>
  12. #include <core/basics/vectorio.h>
  13. #include <core/basics/Config.h>
  14. #include <core/basics/numerictools.h>
  15. #include <core/basics/Timer.h>
  16. #include <core/image/Histogram.h>
  17. #include <core/vector/VectorT.h>
  18. #include <vislearning/cbaselib/ClassificationResults.h>
  19. #include <gp-hik-core/FastMinKernel.h>
  20. #include <gp-hik-core/FMKGPHyperparameterOptimization.h>
  21. #include <gp-hik-core/parameterizedFunctions/PFAbsExp.h>
  22. #include <gp-hik-core/parameterizedFunctions/PFExp.h>
  23. #include <gp-hik-core/parameterizedFunctions/PFWeightedDim.h>
  24. #include <gp-hik-core/tools.h>
  25. using namespace std;
  26. using namespace NICE;
  27. using namespace OBJREC;
  28. void sampleDataOneExample(std::vector<double> & trainData, const int & classNr)
  29. {
  30. double sum(0.0);
  31. double h1,h2,h3,h4,h5,h6,h7,h8;
  32. if (classNr == 1)
  33. {
  34. while (true)
  35. {
  36. h1 = fabs(randGaussDouble(0.03)); sum += h1;
  37. h2 = randDouble(0.25); sum += h2;
  38. h3 = fabs(randGaussDouble(0.07)); sum += h3;
  39. h4 = fabs(randGaussDouble(0.05)); sum += h4;
  40. h5 = randDouble(0.25); sum += h5;
  41. h6 = randDouble(0.25); sum += h6;
  42. h7 = randDouble(0.25); sum += h7;
  43. if (sum <= 1.0) // if sum is smaller than 1.0, everything is ok
  44. break;
  45. sum = 0.0;
  46. }
  47. h8 = 1.0-sum;
  48. }
  49. else
  50. {
  51. while (true)
  52. {
  53. h1 = randDouble(0.25); sum += h1;
  54. h2 = fabs(randGaussDouble(0.07)); sum += h2;
  55. h3 = fabs(randGaussDouble(0.12)); sum += h3;
  56. h4 = fabs(randGaussDouble(0.05)); sum += h4;
  57. h5 = randDouble(0.25); sum += h5;
  58. h6 = randDouble(0.25); sum += h6;
  59. h7 = randDouble(0.25); sum += h7;
  60. if (sum <= 1.0) // if sum is smaller than 1.0, everything is ok
  61. break;
  62. sum = 0.0;
  63. }
  64. h8 = 1.0-sum;
  65. }
  66. trainData.push_back(h1);
  67. trainData.push_back(h2);
  68. trainData.push_back(h3);
  69. trainData.push_back(h4);
  70. trainData.push_back(h5);
  71. trainData.push_back(h6);
  72. trainData.push_back(h7);
  73. trainData.push_back(h8);
  74. }
  75. void sampleData(std::vector< std::vector<double> > & trainData, NICE::Vector & y, const int & nrExamplesPerClass)
  76. {
  77. // initRand();
  78. trainData.clear();
  79. y.resize(2*nrExamplesPerClass);
  80. for (int i = 0; i < nrExamplesPerClass; i++)
  81. {
  82. //sample positive example
  83. y[2*i] = 1;
  84. std::vector<double> trainDataOneExample;
  85. sampleDataOneExample(trainDataOneExample, 1);
  86. trainData.push_back(trainDataOneExample);
  87. //sample negative example
  88. trainDataOneExample.clear();
  89. y[2*i+1] = -1;
  90. sampleDataOneExample(trainDataOneExample, -1);
  91. trainData.push_back(trainDataOneExample);
  92. }
  93. }
  94. void evaluateRandomDistribution(const std::vector< std::vector<double> > & trainData, const NICE::Vector & y, std::vector<NICE::Histogram> & histograms)
  95. {
  96. histograms.resize(16); // 8 dimensions in this synthetic example for two classes
  97. //init
  98. for (int i = 0; i < 16; i++)
  99. {
  100. histograms[i] = NICE::Histogram ( 0.0, 0.25, 10 ); // min, max, numberBins
  101. }
  102. histograms[0] = NICE::Histogram ( 0.0, 0.25, 10 );
  103. histograms[3] = NICE::Histogram ( 0.0, 0.25, 10 );
  104. histograms[9] = NICE::Histogram ( 0.0, 0.25, 10 );
  105. histograms[11] = NICE::Histogram ( 0.0, 0.25, 10 );
  106. histograms[7] = NICE::Histogram ( 0.0, 1.0, 10 );
  107. histograms[15] = NICE::Histogram ( 0.0, 1.0, 10 );
  108. for (int i = 0; i < 16; i++)
  109. {
  110. histograms[i].set(0);
  111. }
  112. //start
  113. int clAdd(0);
  114. for (int i = 0; i < trainData.size(); i++)
  115. {
  116. // std::cerr << i << " / " << trainData.size() << std::endl;
  117. //evaluation for the first class
  118. if (y[i] == 1)
  119. {
  120. histograms[0].increaseBin((int)floor(trainData[i][0]*40));
  121. histograms[1].increaseBin((int)floor(trainData[i][1]*40));
  122. histograms[2].increaseBin((int)floor(trainData[i][2]*40));
  123. histograms[3].increaseBin((int)floor(trainData[i][3]*40));
  124. histograms[4].increaseBin((int)floor(trainData[i][4]*40));
  125. histograms[5].increaseBin((int)floor(trainData[i][5]*40));
  126. histograms[6].increaseBin((int)floor(trainData[i][6]*40));
  127. histograms[7].increaseBin((int)floor(trainData[i][7]*10));
  128. }
  129. else //evaluation for the second class
  130. {
  131. histograms[8].increaseBin((int)floor(trainData[i][0]*40));
  132. histograms[9].increaseBin((int)floor(trainData[i][1]*40));
  133. histograms[10].increaseBin((int)floor(trainData[i][2]*40));
  134. histograms[11].increaseBin((int)floor(trainData[i][3]*40));
  135. histograms[12].increaseBin((int)floor(trainData[i][4]*40));
  136. histograms[13].increaseBin((int)floor(trainData[i][5]*40));
  137. histograms[14].increaseBin((int)floor(trainData[i][6]*40));
  138. histograms[15].increaseBin((int)floor(trainData[i][7]*10));
  139. }
  140. }
  141. }
  142. /**
  143. ECCV 2012 Experiment with synthetic data
  144. */
  145. int main (int argc, char **argv)
  146. {
  147. std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
  148. initRand();
  149. Config conf ( argc, argv );
  150. Config confBaseline ( conf );
  151. confBaseline.sS("HIKGP", "optimization_method", "none");
  152. string pf_baseline_s = conf.gS("main", "transformBaseline", "absexp");
  153. string pf_featRel_s = conf.gS("main", "transformFeatRel", "weightedDim");
  154. int nrRuns = conf.gI("main", "nrRuns", 1);
  155. int testSize = conf.gI("main", "testSize", 150); // per category
  156. bool printRandomDistribution = conf.gB("main", "printRandomDistribution", false);
  157. int dim(8);
  158. double weightsLowerBound(numeric_limits<double>::min( ));
  159. double weightsUpperBound(numeric_limits<double>::max( ));
  160. // double weightsLowerBound(-1.0 * weightsUpperBound);
  161. ParameterizedFunction *pfBaseline;
  162. ParameterizedFunction *pfFeatRel;
  163. if ( pf_baseline_s == "absexp" )
  164. pfBaseline = new PFAbsExp();
  165. else if ( pf_baseline_s == "exp" )
  166. pfBaseline = new PFExp();
  167. else
  168. fthrow(Exception, "Parameterized function type " << pf_baseline_s << " not yet implemented");
  169. if ( pf_featRel_s == "weightedDim" )
  170. pfFeatRel = new PFWeightedDim(dim,weightsLowerBound,weightsUpperBound);//(pfBaseline);
  171. else
  172. fthrow(Exception, "Parameterized function type " << pf_featRel_s << " not yet implemented");
  173. std::cerr << "Transformation type baseline: " << pf_baseline_s << std::endl;
  174. std::cerr << "Transformation type FeatRel: " << pf_featRel_s << std::endl;
  175. std::vector<int> trainSizes; // per category
  176. // trainSizes.push_back(5);
  177. // trainSizes.push_back(10);
  178. // trainSizes.push_back(15);
  179. // trainSizes.push_back(20);
  180. // trainSizes.push_back(50);
  181. // trainSizes.push_back(75);
  182. // trainSizes.push_back(100);
  183. trainSizes.push_back(500);
  184. std::vector<std::vector<double> > ARRs_baseline;
  185. std::vector<std::vector<double> > ARRs_featRel;
  186. std::vector<std::vector<double> > AUCs_baseline;
  187. std::vector<std::vector<double> > AUCs_featRel;
  188. for (std::vector<int>::const_iterator trainSize = trainSizes.begin(); trainSize != trainSizes.end(); trainSize++)
  189. {
  190. std::cerr << "trainSize: " << *trainSize << std::endl;
  191. double AARRBaseline(0.0); // averaged average recognition rate :)
  192. double AARRFeatRel(0.0); // averaged average recognition rate :)
  193. double AAUCBaseline(0.0); // averaged area under curve :)
  194. double AAUCFeatRel(0.0); // averaged area under curve :)
  195. std::vector<double> ARRs_baseline_SingleSize;
  196. std::vector<double> ARRs_featRel_SingleSize;
  197. std::vector<double> AUCs_baseline_SingleSize;
  198. std::vector<double> AUCs_featRel_SingleSize;
  199. for (int run = 0; run < nrRuns; run++)
  200. {
  201. std::cerr << "run: " << run << std::endl;
  202. //----------------- TRAINING -------------------------
  203. //sample the training data
  204. std::vector< std::vector<double> > trainData;
  205. NICE::Vector yTrain;
  206. sampleData(trainData,yTrain, *trainSize);
  207. if (printRandomDistribution)
  208. {
  209. std::vector<double> borders;
  210. borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(1.0);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(0.25);borders.push_back(1.0);
  211. std::cerr << "print distribution of features " << std::endl;
  212. std::vector<NICE::Histogram> histograms;
  213. evaluateRandomDistribution(trainData, yTrain, histograms);
  214. for (int i = 0; i < histograms.size(); i++)
  215. {
  216. int sum (0);
  217. std::string fn = "/home/luetz/code/fast-hik/nice/fast-hik/hist";
  218. std::stringstream s1;
  219. s1 << i/8;
  220. fn += s1.str();
  221. fn += "-";
  222. std::stringstream s2;
  223. s2 << i%8;
  224. fn += s2.str();
  225. std::cerr << "filename: "<< fn.c_str() << std::endl;
  226. std::fstream outfile;
  227. outfile.open( fn.c_str(), ios::out );
  228. if (outfile.is_open())
  229. {
  230. for (int k = 0; k < histograms[i].bins(); k++)
  231. {
  232. outfile << borders[i]*k/ (double)histograms[i].bins() << " " << histograms[i][k] << std::endl;
  233. sum += histograms[i][k];
  234. }
  235. outfile.close();
  236. }
  237. else{
  238. std::cerr << "error while opening file " << fn << std::endl;
  239. }
  240. }
  241. std::cerr << "ending the function, we only printed the distributions" << std::endl;
  242. return 0;
  243. }
  244. std::vector<double> meanValues;
  245. calculateMeanPerDimension(trainData, meanValues);
  246. transposeVectorOfVectors ( trainData );
  247. //baseline without feature relevance
  248. double noise = 0.1;
  249. FastMinKernel *fmkBaseline = new FastMinKernel ( trainData, noise, dim );
  250. FMKGPHyperparameterOptimization hyperBaseline ( &confBaseline, pfBaseline, fmkBaseline );
  251. hyperBaseline.optimize ( yTrain );
  252. //with optimization of feature relevance (= optimization of weights for each dimension)
  253. FastMinKernel *fmkFeatRel = new FastMinKernel ( trainData, noise, dim );
  254. // std::cerr << "print Parameter of pfWeightedDim" << std::endl;
  255. // std::cerr << pfFeatRel->parameters() << std::endl;
  256. // std::cerr << "print Matrix after transformation" << std::endl;
  257. // pfFeatRel->applyFunctionToFeatureMatrix(fmkFeatRel->featureMatrix());
  258. // fmkFeatRel->featureMatrix().print();
  259. FMKGPHyperparameterOptimization hyperFeatRel ( &conf, pfFeatRel, fmkFeatRel );
  260. hyperFeatRel.optimize ( yTrain );
  261. std::cerr << "meanValues: ";
  262. for (std::vector<double>::const_iterator meanIt = meanValues.begin(); meanIt != meanValues.end(); meanIt++)
  263. {
  264. std::cerr << *meanIt << " ";
  265. }
  266. std::cerr << std::endl << std::endl;
  267. //----------------- TESTING -------------------------
  268. //sample the training data
  269. std::vector< std::vector<double> > testData;
  270. NICE::Vector yTest;
  271. sampleData(testData,yTest, testSize);
  272. // std::cerr << "Printing testData: " << std::endl;
  273. // printMatrix<double>(testData);
  274. // std::cerr << yTest << std::endl;
  275. Timer t;
  276. Matrix confusionBaseline ( 2, 2, 0.0 );
  277. Matrix confusionFeatRel ( 2, 2, 0.0 );
  278. ClassificationResults resultsBaseline;
  279. ClassificationResults resultsFeatRel;
  280. for ( uint i = 0 ; i < testData.size(); i++ )
  281. {
  282. const Vector xstar(testData[i]);
  283. // the following is just to be sure that we
  284. // do not count the time necessary for conversion
  285. SparseVector xstar_sparse ( xstar ); //default tolerance is 10e-10
  286. int classno_groundtruth = yTest[i];
  287. //dirty :(
  288. if ((classno_groundtruth) < 0)
  289. classno_groundtruth = 0;
  290. SparseVector scoresBaseline;
  291. t.start();
  292. uint classno_estimated_baseline = hyperBaseline.classify ( xstar_sparse, scoresBaseline );
  293. t.stop();
  294. scoresBaseline.store(cerr);
  295. cerr << "baseline [" << i << " / " << testData.size() << "] " << classno_estimated_baseline << " " << classno_groundtruth << " time: " << t.getLast() << endl;
  296. confusionBaseline( classno_groundtruth, classno_estimated_baseline ) += 1;
  297. // building the result
  298. ClassificationResult rBaseline ( classno_estimated_baseline, scoresBaseline );
  299. // set ground truth label
  300. rBaseline.classno_groundtruth = classno_groundtruth;
  301. resultsBaseline.push_back ( rBaseline );
  302. SparseVector scoresFeatRel;
  303. t.start();
  304. uint classno_estimated_featRel = hyperFeatRel.classify ( xstar_sparse, scoresFeatRel );
  305. t.stop();
  306. scoresFeatRel.store(cerr);
  307. cerr << "FeatRel [" << i << " / " << testData.size() << "] " << classno_estimated_featRel << " " << classno_groundtruth << " time: " << t.getLast() << endl;
  308. confusionFeatRel( classno_groundtruth, classno_estimated_featRel ) += 1;
  309. // building the result
  310. ClassificationResult rFeatRel ( classno_estimated_featRel, scoresFeatRel );
  311. // set ground truth label
  312. rFeatRel.classno_groundtruth = classno_groundtruth;
  313. resultsFeatRel.push_back ( rFeatRel );
  314. }
  315. confusionBaseline.normalizeRowsL1();
  316. confusionFeatRel.normalizeRowsL1();
  317. // --------------- ARR evaluation --------------------
  318. cerr << confusionBaseline << endl;
  319. cerr << "average recognition rate baseline: " << confusionBaseline.trace()/confusionBaseline.rows() << endl;
  320. cerr << confusionFeatRel << endl;
  321. cerr << "average recognition rate featRel: " << confusionFeatRel.trace()/confusionFeatRel.rows() << endl;
  322. AARRBaseline += (confusionBaseline.trace()/confusionBaseline.rows()) / nrRuns;
  323. ARRs_baseline_SingleSize.push_back(confusionBaseline.trace()/confusionBaseline.rows());
  324. AARRFeatRel += (confusionFeatRel.trace()/confusionFeatRel.rows()) / nrRuns;
  325. ARRs_featRel_SingleSize.push_back(confusionFeatRel.trace()/confusionFeatRel.rows());
  326. // --------------- AUC evaluation --------------------
  327. double perfvalueBaseline = resultsBaseline.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
  328. cerr << "AUC Baseline: " << perfvalueBaseline << endl;
  329. double perfvalueFeatRel = resultsFeatRel.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
  330. cerr << "AUC FeatRel: " << perfvalueFeatRel << endl;
  331. AAUCBaseline += perfvalueBaseline / nrRuns;
  332. AUCs_baseline_SingleSize.push_back(perfvalueBaseline);
  333. AAUCFeatRel += perfvalueFeatRel / nrRuns;
  334. AUCs_featRel_SingleSize.push_back(perfvalueFeatRel);
  335. }
  336. ARRs_baseline.push_back(ARRs_baseline_SingleSize);
  337. ARRs_featRel.push_back(ARRs_featRel_SingleSize);
  338. AUCs_baseline.push_back(AUCs_baseline_SingleSize);
  339. AUCs_featRel.push_back(AUCs_featRel_SingleSize);
  340. }
  341. std::cerr << "================ EVALUATION ARR======================== " << std::endl;
  342. std::cerr << "trainsize << meanBaseline << stdDevBaseline << meanFeatRel << stdDevFeatRel " << std::endl;
  343. for (uint trainSizeIdx = 0; trainSizeIdx < trainSizes.size(); trainSizeIdx++)
  344. {
  345. double meanBaseline( calculating_mean(ARRs_baseline[trainSizeIdx]) );
  346. double meanFeatRel( calculating_mean(ARRs_featRel[trainSizeIdx]) );
  347. double stdDevBaseline(calculating_std_dev(ARRs_baseline[trainSizeIdx], meanBaseline));
  348. double stdDevFeatRel(calculating_std_dev(ARRs_featRel[trainSizeIdx], meanFeatRel));
  349. std::cerr << trainSizes[trainSizeIdx] << " " << meanBaseline << " " << stdDevBaseline << " " << meanFeatRel << " " << stdDevFeatRel << std::endl;
  350. }
  351. std::cerr << std::endl << std::endl << "================ EVALUATION AUC======================== " << std::endl;
  352. std::cerr << "trainsize << meanBaseline << stdDevBaseline << meanFeatRel << stdDevFeatRel " << std::endl;
  353. for (uint trainSizeIdx = 0; trainSizeIdx < trainSizes.size(); trainSizeIdx++)
  354. {
  355. double meanBaseline( calculating_mean(AUCs_baseline[trainSizeIdx]) );
  356. double meanFeatRel( calculating_mean(AUCs_featRel[trainSizeIdx]) );
  357. double stdDevBaseline(calculating_std_dev(AUCs_baseline[trainSizeIdx], meanBaseline));
  358. double stdDevFeatRel(calculating_std_dev(AUCs_featRel[trainSizeIdx], meanFeatRel));
  359. std::cerr << trainSizes[trainSizeIdx] << " " << meanBaseline << " " << stdDevBaseline << " " << meanFeatRel << " " << stdDevFeatRel << std::endl;
  360. }
  361. return 0;
  362. }