IL_AL_Binary.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784
  1. /**
  2. * @file IL_AL_Binary.cpp
  3. * @brief Incrementally train the GP HIK classifier using the predictive variance and its approximations to select new samples, perform binary tests
  4. * @author Alexander Freytag
  5. * @date 11-06-2012
  6. */
  7. #include <vector>
  8. #include <stdlib.h>
  9. #include <time.h>
  10. #include <set>
  11. #include <core/basics/Config.h>
  12. #include <core/basics/StringTools.h>
  13. #include <core/vector/SparseVectorT.h>
  14. #include <core/vector/VectorT.h>
  15. //----------
  16. #include "vislearning/baselib/ProgressBar.h"
  17. #include <vislearning/baselib/Globals.h>
  18. #include <vislearning/classifier/kernelclassifier/KCGPRegOneVsAll.h>
  19. #include <vislearning/classifier/fpclassifier/gphik/FPCGPHIK.h>
  20. #include "vislearning/cbaselib/MultiDataset.h"
  21. #include <vislearning/cbaselib/LabeledSet.h>
  22. #include "vislearning/cbaselib/ClassificationResults.h"
  23. #include <vislearning/math/kernels/KernelData.h>
  24. //----------
  25. #include "gp-hik-exp/progs/datatools.h"
  26. //----------
  27. //
  28. using namespace std;
  29. using namespace NICE;
  30. using namespace OBJREC;
  31. enum verbose_level {NONE = 0, LOW = 1, MEDIUM = 2, EVERYTHING = 3};
  32. enum QueryStrategy{
  33. RANDOM = 0,
  34. GPMEAN,
  35. GPPREDVAR,
  36. GPHEURISTIC
  37. };
  38. std::string convertInt(int number)
  39. {
  40. stringstream ss;//create a stringstream
  41. ss << number;//add number to the stream
  42. return ss.str();//return a string with the contents of the stream
  43. }
  44. /**
  45. Computes from randomly or deterministically choosen trainimages kernelmatrizes and evaluates their performance, using ROI-optimization
  46. */
  47. int main ( int argc, char **argv )
  48. {
  49. std::cout.precision ( 10 );
  50. std::cerr.precision ( 10 );
  51. NICE::Config conf ( argc, argv );
  52. int trainExPerClass = conf.gI ( "main", "trainExPerClass", 10 );
  53. int incrementalAddSize = conf.gI("main", "incrementalAddSize", 1);
  54. int nrOfIncrements = conf.gI("main", "nrOfIncrements", 9);
  55. int num_runs = conf.gI ( "main", "num_runs", 10 );
  56. bool do_classification = conf.gB ( "main", "do_classification", true );
  57. double squaredNoise = pow( conf.gD("GPHIKClassifier", "noise", 0.01) , 2);
  58. int minClass = conf.gI( "main", "minClass", 0);
  59. int maxClass = conf.gI( "main", "maxClass", 15);
  60. string queryStrategyString = conf.gS( "main", "queryStrategy", "random");
  61. QueryStrategy queryStrategy;
  62. if (queryStrategyString.compare("gpMean") == 0)
  63. {
  64. queryStrategy = GPMEAN;
  65. }
  66. else if (queryStrategyString.compare("gpPredVar") == 0)
  67. {
  68. queryStrategy = GPPREDVAR;
  69. }
  70. else if (queryStrategyString.compare("gpHeuristic") == 0)
  71. {
  72. queryStrategy = GPHEURISTIC;
  73. }
  74. else
  75. {
  76. queryStrategy = RANDOM;
  77. }
  78. int verbose_int = conf.gI ( "GP_IL", "verbose", 0 );
  79. verbose_level verbose ( NONE );
  80. switch ( verbose_int )
  81. {
  82. case 0:
  83. verbose = NONE;
  84. break;
  85. case 1:
  86. verbose = LOW;
  87. break;
  88. case 2:
  89. verbose = MEDIUM;
  90. break;
  91. case 3:
  92. verbose = EVERYTHING;
  93. break;
  94. }
  95. std::string locationOfPermutations = conf.gS( "main", "locationOfPermutations", "/home/luetz/data/images/caltech-101/" );
  96. std::string classselection_train = conf.gS( "main", "classselection_train", "*" );
  97. std::string classselection_test = conf.gS( "main", "classselection_test", "*" );
  98. std::string examples_train = conf.gS( "main", "examples_train", "seq * 100" );
  99. std::string examples_test = conf.gS( "main", "examples_test", "seq * 50" );
  100. /* initialize random seed: */
  101. srand ( time ( NULL ) ); //with 0 for reproductive results
  102. // srand ( 0 ); //with 0 for reproductive results
  103. for (int currentClass = minClass; currentClass <= maxClass; currentClass++)
  104. {
  105. std::cerr << "start binary experiments for class " << currentClass << std::endl;
  106. // =========================== INIT ===========================
  107. std::vector<std::vector<double> > recognitions_rates(nrOfIncrements+1);
  108. std::vector<std::vector<double> > AUC_scores(nrOfIncrements+1);
  109. std::vector<std::vector<float> > classification_times(nrOfIncrements+1);
  110. std::vector<std::vector<float> > IL_training_times(nrOfIncrements);
  111. for ( int run = 0; run < num_runs; run++ )
  112. {
  113. std::cerr << "run: " << run << std::endl;
  114. NICE::Config confCurrentRun ( conf );
  115. confCurrentRun.sS( "train"+convertInt(run), "dataset", locationOfPermutations+"run"+convertInt(run)+".train" );
  116. confCurrentRun.sS( "train"+convertInt(run), "classselection_train", classselection_train );
  117. confCurrentRun.sS( "train"+convertInt(run), "examples_train", examples_train );
  118. confCurrentRun.sS( "test"+convertInt(run), "dataset", locationOfPermutations+"run"+convertInt(run)+".test" );
  119. confCurrentRun.sS( "test"+convertInt(run), "classselection_test", classselection_test );
  120. confCurrentRun.sS( "train"+convertInt(run), "examples_test", examples_test );
  121. //15-scenes settings
  122. std::string ext = confCurrentRun.gS("main", "ext", ".txt");
  123. std::cerr << "Using cache extension: " << ext << std::endl;
  124. OBJREC::MultiDataset md ( &confCurrentRun );
  125. std::cerr << "now read the dataset" << std::endl;
  126. // read training set
  127. vector< NICE::Vector > trainDataOrig;
  128. Vector y;
  129. string trainRun ( "train" + convertInt( run ) );
  130. std::cerr << "look for " << trainRun << std::endl;
  131. const LabeledSet *train = md[ trainRun ]; //previously, we only selected "train", no we select the permutation for this run
  132. //we just store the filenames to have a look which image we picked in every step
  133. std::vector<std::string> filenamesTraining;
  134. readData< std::vector< NICE::Vector >, NICE::Vector > ( confCurrentRun, *train, trainDataOrig, y, filenamesTraining, ext );
  135. std::cerr << "dimension: "<< trainDataOrig[0].size() << std::endl;
  136. std::cerr << "length L1: " << trainDataOrig[0].normL1() << " length L2: " << trainDataOrig[0].normL2() <<std::endl;
  137. std::cerr << "label vector after reading: " << y << std::endl;
  138. bool firstPositivePrinted( false );
  139. //assure the binary setting
  140. for ( uint i = 0; i < y.size(); i++ )
  141. {
  142. if ( y[i] == currentClass)
  143. {
  144. if ( !firstPositivePrinted )
  145. {
  146. std::cerr << "first positive example: " << filenamesTraining[i] << std::endl;
  147. firstPositivePrinted = true;
  148. }
  149. y[i] = 1;
  150. }
  151. else
  152. y[i] = 0;//-1;
  153. }
  154. std::cerr << "resulting binary label vector:" << y << std::endl;
  155. std::set<int> classesAvailable;
  156. classesAvailable.insert( 0 ); //we have a single negative class
  157. classesAvailable.insert( 1 ); //and we have a single positive class
  158. std::map<int,int> nrExamplesPerClassInDataset; //simply count how many examples for every class are available
  159. std::map<int,std::vector<int> > examplesPerClassInDataset; //as well as their corresponding indices in the dataset
  160. //initialize this storage
  161. for (std::set<int>::const_iterator it = classesAvailable.begin(); it != classesAvailable.end(); it++)
  162. {
  163. nrExamplesPerClassInDataset.insert(std::pair<int,int>(*it,0));
  164. examplesPerClassInDataset.insert(std::pair<int,std::vector<int> >(*it,std::vector<int>(0)));
  165. }
  166. //store the indices of the examples
  167. for ( uint i = 0; i < y.size(); i++ )
  168. {
  169. (examplesPerClassInDataset.find( y[i] )->second).push_back(i);
  170. }
  171. //and count how many examples are in every class
  172. for (std::map<int,std::vector<int> >::const_iterator it = examplesPerClassInDataset.begin(); it != examplesPerClassInDataset.end(); it++)
  173. {
  174. nrExamplesPerClassInDataset.find(it->first)->second = it->second.size();
  175. }
  176. //simple output to tell how many examples we have for every class
  177. for ( std::map<int,int>::const_iterator it = nrExamplesPerClassInDataset.begin(); it != nrExamplesPerClassInDataset.end(); it++)
  178. {
  179. cerr << it->first << ": " << it->second << endl;
  180. }
  181. Examples examples;
  182. //count how many examples of every class we have while actively selecting new examples
  183. //NOTE works only if we have subsequent class numbers
  184. NICE::Vector pickedExamplesPerClass( classesAvailable.size(), trainExPerClass);
  185. std::map<int,std::vector<int> > examplesPerClassInDatasetTmp (examplesPerClassInDataset);
  186. //chose examples for every class used for training
  187. //we will always use the first examples from each class, since the dataset comes already randomly ordered
  188. for (std::set<int>::const_iterator clIt = classesAvailable.begin(); clIt != classesAvailable.end(); clIt++)
  189. {
  190. std::map<int,std::vector<int> >::iterator exIt = examplesPerClassInDatasetTmp.find(*clIt);
  191. std::cerr << "pick training examples for class " << *clIt << std::endl;
  192. for (int i = 0; i < trainExPerClass; i++)
  193. {
  194. std::cerr << "i: " << i << std::endl;
  195. int exampleIndex ( 0 ); //old: rand() % ( exIt->second.size() ) );
  196. std::cerr << "pick example " << exIt->second[exampleIndex] << " - " << y[exIt->second[exampleIndex] ] << " -- " << filenamesTraining[exIt->second[exampleIndex]] << std::endl;
  197. Example example;
  198. NICE::Vector & xTrain = trainDataOrig[exIt->second[exampleIndex]];
  199. example.svec = new SparseVector(xTrain);
  200. //let's take this example and its corresponding label (which should be *clIt)
  201. examples.push_back ( pair<int, Example> ( y[exIt->second[exampleIndex] ], example ) );
  202. //
  203. exIt->second.erase(exIt->second.begin()+exampleIndex);
  204. }
  205. }
  206. std::vector<std::string> filenamesUnlabeled;
  207. filenamesUnlabeled.clear();
  208. //which examples are left to be actively chosen lateron?
  209. std::vector<int> unlabeledExamples( y.size() - trainExPerClass*classesAvailable.size() );
  210. int exCnt( 0 );
  211. for (std::set<int>::const_iterator clIt = classesAvailable.begin(); clIt != classesAvailable.end(); clIt++ )
  212. {
  213. std::map<int,std::vector<int> >::iterator exIt = examplesPerClassInDatasetTmp.find(*clIt);
  214. //list all examples of this specific class
  215. for (std::vector<int>::const_iterator it = exIt->second.begin(); it != exIt->second.end(); it++)
  216. {
  217. unlabeledExamples[exCnt] = *it;
  218. exCnt++;
  219. filenamesUnlabeled.push_back( filenamesTraining[*it] );
  220. }
  221. }
  222. time_t prep_start_time = clock();
  223. //TODO balancing?
  224. //this should decrease the random suffering, so we will not do this ;)
  225. FPCGPHIK * classifier = new FPCGPHIK( &confCurrentRun );
  226. FeaturePool fp; // will be ignored
  227. classifier->train ( fp, examples );
  228. float time_preparation = ( float ) ( clock() - prep_start_time ) ;
  229. std::cerr << "Time for initial training: " << time_preparation / CLOCKS_PER_SEC << std::endl;
  230. int nrOfClassesUsed = classesAvailable.size();
  231. // ------------------ TESTING
  232. string testRun ( "test" + convertInt( run ) );
  233. const LabeledSet *test = md[ testRun ]; //previously, we only selected "test", now we select the permutation for this run
  234. VVector testData;
  235. Vector yTest;
  236. readData< VVector, Vector > ( confCurrentRun, *test, testData, yTest, ext );
  237. NICE::Matrix confusionMatrix ( 2, 2 );
  238. confusionMatrix.set ( 0.0 );
  239. time_t start_time = clock();
  240. std::vector<int> chosen_examples_per_class ( nrOfClassesUsed );
  241. std::cerr << "Current statistic about picked examples per class: " << pickedExamplesPerClass << std::endl;
  242. if ( do_classification )
  243. {
  244. ClassificationResults results;
  245. for ( uint i = 0 ; i < testData.size(); i++ )
  246. {
  247. Example example;
  248. const Vector & xstar = testData[i];
  249. SparseVector xstar_sparse ( xstar );
  250. OBJREC::ClassificationResult result;
  251. example.svec = &xstar_sparse;
  252. result = classifier->classify( example );
  253. result.classno_groundtruth = ( yTest[i] == 1 ) ? 1 : 0;
  254. // std::cerr << "gt: " << result.classno_groundtruth << " -- " << result.classno << std::endl;
  255. // (result.scores).store( std::cerr );
  256. confusionMatrix ( result.classno_groundtruth , result.classno ) ++;
  257. results.push_back( result );
  258. }
  259. float time_classification = ( float ) ( clock() - start_time ) ;
  260. if ( verbose >= LOW )
  261. cerr << "Time for Classification with " << nrOfClassesUsed*trainExPerClass << " training-examples: " << time_classification / CLOCKS_PER_SEC << " [s]" << endl;
  262. ( classification_times[0] ).push_back ( time_classification / CLOCKS_PER_SEC );
  263. confusionMatrix.normalizeRowsL1();
  264. std::cerr << confusionMatrix;
  265. double avg_recognition_rate = 0.0;
  266. for ( int i = 0 ; i < ( int ) confusionMatrix.rows(); i++ )
  267. {
  268. avg_recognition_rate += confusionMatrix ( i, i );
  269. }
  270. avg_recognition_rate /= confusionMatrix.rows();
  271. std::cerr << "class: " << currentClass << " run: " << run << " avg recognition rate: " << avg_recognition_rate*100 << " % -- " << examples.size() << " training examples used" << std::endl;
  272. recognitions_rates[0].push_back ( avg_recognition_rate*100 );
  273. std::cerr << "perform auc evaluation "<< std::endl;
  274. double score = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
  275. std::cerr << "class: " << currentClass << " run: " << run << " AUC-score: " << score << " % -- " << examples.size() << " training examples used" << std::endl << std::endl;
  276. AUC_scores[0].push_back ( score*100 );
  277. }
  278. //Now start the Incremental-Learning-Part
  279. for (int incrementationStep = 0; incrementationStep < nrOfIncrements; incrementationStep++)
  280. {
  281. //chose examples for every class used for training
  282. Examples newExamples;
  283. //simply count how many possible example we have
  284. int nrOfPossibleExamples( unlabeledExamples.size() );
  285. if (queryStrategy == RANDOM)
  286. {
  287. std::cerr << "print chosen examples: " << std::endl;
  288. for (int i = 0; i < incrementalAddSize; i++)
  289. {
  290. int exampleIndex ( rand() % ( unlabeledExamples.size() ) );
  291. Example newExample;
  292. NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[exampleIndex] ];
  293. newExample.svec = new SparseVector( xTrain );
  294. int label( y[ unlabeledExamples[exampleIndex] ] );
  295. newExamples.push_back ( pair<int, Example> ( label, newExample ) );
  296. unlabeledExamples.erase( unlabeledExamples.begin()+exampleIndex );
  297. std::cerr << exampleIndex+1 << " / " << incrementalAddSize << " : " << filenamesUnlabeled[ exampleIndex ] << std::endl;
  298. filenamesUnlabeled.erase( filenamesUnlabeled.begin()+exampleIndex );
  299. pickedExamplesPerClass[label]++;
  300. }
  301. }// end computation for RANDOM
  302. else if ( (queryStrategy == GPMEAN) || (queryStrategy == GPPREDVAR) || (queryStrategy == GPHEURISTIC) )
  303. {
  304. //compute uncertainty values for all examples according to the query strategy
  305. std::vector<std::pair<int,double> > scores;
  306. scores.clear();
  307. time_t unc_pred_start_time = clock();
  308. // std::cerr << "possible examples to query: " << unlabeledExamples.size() << std::endl;
  309. for (uint exIndex = 0; exIndex < unlabeledExamples.size(); exIndex++)
  310. {
  311. Example example;
  312. NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[exIndex] ];
  313. SparseVector xTrainSparse ( xTrain );
  314. example.svec = &xTrainSparse;
  315. if (queryStrategy == GPMEAN)
  316. {
  317. ClassificationResult r = classifier->classify( example );
  318. double bestScore( numeric_limits<double>::max() );
  319. for( int clCnt = 0; clCnt < nrOfClassesUsed; clCnt++)
  320. {
  321. if ( fabs(r.scores[clCnt]) < bestScore )
  322. bestScore = fabs(r.scores[clCnt]);
  323. }
  324. scores.push_back( std::pair<int,double> ( exIndex, bestScore ) );
  325. }
  326. else if (queryStrategy == GPPREDVAR)
  327. {
  328. double uncertainty;
  329. //use the pred variance computation specified in the config file
  330. classifier->predictUncertainty( example, uncertainty );
  331. //take the maximum of the scores for the predictive variance
  332. scores.push_back( std::pair<int,double> ( exIndex, uncertainty) );
  333. }
  334. else if (queryStrategy == GPHEURISTIC)
  335. {
  336. double uncertainty;
  337. //use the pred variance computation specified in the config file
  338. classifier->predictUncertainty( example, uncertainty );
  339. //compute the mean values for every class
  340. ClassificationResult r = classifier->classify( example );
  341. NICE::Vector heuristicValues ( r.scores.size(), 0);
  342. for ( int tmp = 0; tmp < heuristicValues.size(); tmp++ )
  343. {
  344. heuristicValues[tmp] = fabs(r.scores[tmp]) / sqrt( squaredNoise + uncertainty );
  345. }
  346. //take the minimum of the scores for the heuristic measure
  347. scores.push_back( std::pair<int,double> ( exIndex, heuristicValues.Min()) );
  348. }
  349. }
  350. float time_score_computation = ( float ) ( clock() - unc_pred_start_time ) ;
  351. //pick the ones with best score
  352. //we could speed this up using a more sophisticated search method
  353. if (queryStrategy == GPPREDVAR) //take the maximum of the scores for the predictive variance
  354. {
  355. std::set<int> chosenExamplesForThisRun;
  356. chosenExamplesForThisRun.clear();
  357. for (int i = 0; i < incrementalAddSize; i++)
  358. {
  359. std::vector<std::pair<int,double> >::iterator bestExample = scores.begin();
  360. std::vector<std::pair<int,double> >::iterator worstExample = scores.begin();
  361. for (std::vector<std::pair<int,double> >::iterator jIt = scores.begin(); jIt !=scores.end(); jIt++)
  362. {
  363. if (jIt->second > bestExample->second)
  364. bestExample = jIt;
  365. if (jIt->second < worstExample->second)
  366. worstExample = jIt;
  367. }
  368. std::cerr << "i: " << i << " bestExample: " << bestExample->second << " worstExample: " << worstExample->second << std::endl;
  369. Example newExample;
  370. NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[bestExample->first] ];
  371. newExample.svec = new SparseVector( xTrain );
  372. //actually this is the ACTIVE LEARNING step (query a label)
  373. int label( y[ unlabeledExamples[bestExample->first] ] );
  374. newExamples.push_back ( pair<int, Example> ( label, newExample ) );
  375. //remember the index, to safely remove this example afterwards from unlabeledExamples
  376. chosenExamplesForThisRun.insert(bestExample->first);
  377. scores.erase(bestExample);
  378. pickedExamplesPerClass[label]++;
  379. }
  380. // std::cerr << "print chosen examples: " << std::endl;
  381. /* int tmpCnt(0);
  382. for (std::set<int>::const_iterator it = chosenExamplesForThisRun.begin(); it != chosenExamplesForThisRun.end(); it++, tmpCnt++)
  383. {
  384. std::cerr << tmpCnt+1 << " / " << incrementalAddSize << " : " << filenamesUnlabeled[ *it ] << std::endl;
  385. } */
  386. //delete the queried examples from the set of unlabeled ones
  387. //do this in an decreasing order in terms of indices to ensure valid access
  388. for (std::set<int>::const_reverse_iterator it = chosenExamplesForThisRun.rbegin(); it != chosenExamplesForThisRun.rend(); it++)
  389. {
  390. unlabeledExamples.erase( unlabeledExamples.begin()+(*it) );
  391. }
  392. }
  393. else //take the minimum of the scores for the heuristic and the gp mean (minimum margin)
  394. {
  395. std::set<int> chosenExamplesForThisRun;
  396. chosenExamplesForThisRun.clear();
  397. for (int i = 0; i < incrementalAddSize; i++)
  398. {
  399. std::vector<std::pair<int,double> >::iterator bestExample = scores.begin();
  400. std::vector<std::pair<int,double> >::iterator worstExample = scores.begin();
  401. for (std::vector<std::pair<int,double> >::iterator jIt = scores.begin(); jIt !=scores.end(); jIt++)
  402. {
  403. if (jIt->second < bestExample->second)
  404. bestExample = jIt;
  405. if (jIt->second > worstExample->second)
  406. worstExample = jIt;
  407. }
  408. std::cerr << "i: " << i << " bestExample: " << bestExample->second << " worstExample: " << worstExample->second << std::endl;
  409. Example newExample;
  410. NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[bestExample->first] ];
  411. newExample.svec = new SparseVector( xTrain );
  412. //actually this is the ACTIVE LEARNING step (query a label)
  413. int label( y[ unlabeledExamples[bestExample->first] ] );
  414. newExamples.push_back ( pair<int, Example> ( label, newExample ) );
  415. //remember the index, to safely remove this example afterwards from unlabeledExamples
  416. chosenExamplesForThisRun.insert(bestExample->first);
  417. scores.erase(bestExample);
  418. pickedExamplesPerClass[label]++;
  419. }
  420. /* std::cerr << "print chosen examples: " << std::endl;
  421. int tmpCnt(0);
  422. for (std::set<int>::const_iterator it = chosenExamplesForThisRun.begin(); it != chosenExamplesForThisRun.end(); it++, tmpCnt++)
  423. {
  424. std::cerr << tmpCnt+1 << " / " << incrementalAddSize << " : " << filenamesUnlabeled[ *it ] << std::endl;
  425. } */
  426. //delete the queried example from the set of unlabeled ones
  427. //do this in an decreasing order in terms of indices to ensure valid access
  428. for (std::set<int>::const_reverse_iterator it = chosenExamplesForThisRun.rbegin(); it != chosenExamplesForThisRun.rend(); it++)
  429. {
  430. unlabeledExamples.erase( unlabeledExamples.begin()+(*it) );
  431. }
  432. }
  433. std::cerr << "Time used to compute query-scores for " << nrOfPossibleExamples << " examples: " << time_score_computation / CLOCKS_PER_SEC << " [s]" << std::endl;
  434. } // end computation for GPMEAN, GPPREDVAR, or GPHEURISTIC
  435. std::cerr << "Current statistic about picked examples per class: " << pickedExamplesPerClass << std::endl;
  436. time_t IL_add_start_time = clock();
  437. classifier->addMultipleExamples( newExamples );
  438. //remove the memory used in newExamples
  439. for ( uint tmp = 0; tmp < newExamples.size(); tmp++ )
  440. {
  441. delete newExamples[tmp].second.svec;
  442. newExamples[tmp].second.svec = NULL;
  443. }
  444. float time_IL_add = ( float ) ( clock() - IL_add_start_time ) ;
  445. std::cerr << "Time for IL-adding of " << incrementalAddSize << " examples to already " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*incrementationStep << " training-examples: " << time_IL_add / CLOCKS_PER_SEC << " [s]" << std::endl;
  446. IL_training_times[incrementationStep].push_back(time_IL_add / CLOCKS_PER_SEC);
  447. //do the classification for evaluating the benefit of new examples
  448. if ( do_classification )
  449. {
  450. time_t start_time = clock();
  451. ClassificationResults results;
  452. confusionMatrix.set( 0.0 );
  453. for ( uint i = 0 ; i < testData.size(); i++ )
  454. {
  455. Example example;
  456. const Vector & xstar = testData[i];
  457. SparseVector xstar_sparse ( xstar );
  458. example.svec = &xstar_sparse;
  459. OBJREC::ClassificationResult result;
  460. result = classifier->classify( example );
  461. result.classno_groundtruth = ( yTest[i] == 1 ) ? 1 : 0;
  462. results.push_back( result );
  463. confusionMatrix ( result.classno_groundtruth , result.classno ) ++;
  464. }
  465. float time_classification = ( float ) ( clock() - start_time ) ;
  466. if ( verbose >= LOW )
  467. std::cerr << "Time for Classification with " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training-examples: " << time_classification / CLOCKS_PER_SEC << " [s]" << std::endl;
  468. ( classification_times[incrementationStep+1] ).push_back ( time_classification / CLOCKS_PER_SEC );
  469. confusionMatrix.normalizeRowsL1();
  470. std::cerr << confusionMatrix;
  471. double avg_recognition_rate ( 0.0 );
  472. for ( int i = 0 ; i < ( int ) confusionMatrix.rows(); i++ )
  473. {
  474. avg_recognition_rate += confusionMatrix ( i, i );
  475. }
  476. avg_recognition_rate /= confusionMatrix.rows();
  477. std::cerr << "class: " << currentClass << " run: " << run << " avg recognition rate: " << avg_recognition_rate*100 << " % -- " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training examples used" << std::endl;
  478. recognitions_rates[incrementationStep+1].push_back ( avg_recognition_rate*100 );
  479. double score = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
  480. std::cerr << "class: " << currentClass << " run: " << run << " AUC-score: " << score << " % -- " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training examples used" << std::endl << std::endl;
  481. AUC_scores[incrementationStep+1].push_back ( score*100 );
  482. } //classification after IL adding */
  483. } //IL adding of different classes
  484. std::cerr << "Final statistic about picked examples per class: " << pickedExamplesPerClass << std::endl;
  485. //don't waste memory!
  486. delete classifier;
  487. for ( int tmp = 0; tmp < examples.size(); tmp++ )
  488. {
  489. delete examples[tmp].second.svec;
  490. examples[tmp].second.svec = NULL;
  491. }
  492. }//runs
  493. // ================= EVALUATION =========================
  494. int nrOfClassesUsed ( 2 ); //binary setting
  495. if ( do_classification )
  496. {
  497. std::cerr << "========================" << std::endl;
  498. std::cerr << " final evaluation for class: " << currentClass << std::endl;
  499. std::cerr << "content of classification_times: " << std::endl;
  500. for ( std::vector<std::vector<float> >::const_iterator it = classification_times.begin(); it != classification_times.end(); it++ )
  501. {
  502. for ( std::vector<float> ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ )
  503. {
  504. std::cerr << *jt << " ";
  505. }
  506. std::cerr << std::endl;
  507. }
  508. std::vector<float> mean_classification_times;
  509. std::vector<float> std_dev_classification_times;
  510. for ( std::vector<std::vector<float> >::const_iterator it = classification_times.begin(); it != classification_times.end(); it++ )
  511. {
  512. float mean_classification_time ( 0.0 );
  513. for ( std::vector<float>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
  514. {
  515. mean_classification_time += *itRun;
  516. }
  517. mean_classification_time /= it->size();
  518. mean_classification_times.push_back ( mean_classification_time );
  519. double std_dev_classification_time ( 0.0 );
  520. for ( std::vector<float>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
  521. {
  522. std_dev_classification_time += pow ( *itRun - mean_classification_time, 2 );
  523. }
  524. std_dev_classification_time /= it->size();
  525. std_dev_classification_time = sqrt ( std_dev_classification_time );
  526. std_dev_classification_times.push_back ( std_dev_classification_time );
  527. }
  528. int datasize ( nrOfClassesUsed*trainExPerClass );
  529. for ( uint i = 0; i < mean_classification_times.size(); i++)
  530. {
  531. std::cerr << "size: " << datasize << " mean classification time: " << mean_classification_times[i] << " std_dev classification time: " << std_dev_classification_times[i] << std::endl;
  532. datasize += incrementalAddSize ;
  533. }
  534. }
  535. else
  536. {
  537. std::cerr << "========================" << std::endl;
  538. std::cerr << "No classification done therefor no classification times available." << std::endl;
  539. }
  540. std::cerr << "========================" << std::endl;
  541. std::cerr << "content of IL_training_times for class : "<< currentClass << std::endl;
  542. for ( std::vector<std::vector<float> >::const_iterator it = IL_training_times.begin(); it != IL_training_times.end(); it++ )
  543. {
  544. for ( std::vector<float> ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ )
  545. {
  546. std::cerr << *jt << " ";
  547. }
  548. std::cerr << std::endl;
  549. }
  550. std::vector<float> mean_IL_training_times;
  551. std::vector<float> std_dev_IL_training_times;
  552. for ( std::vector<std::vector<float> >::const_iterator it = IL_training_times.begin(); it != IL_training_times.end(); it++ )
  553. {
  554. float mean_IL_training_time ( 0.0 );
  555. for ( std::vector<float>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
  556. {
  557. mean_IL_training_time += *itRun;
  558. }
  559. mean_IL_training_time /= it->size();
  560. mean_IL_training_times.push_back ( mean_IL_training_time );
  561. double std_dev_IL_training_time ( 0.0 );
  562. for ( std::vector<float>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
  563. {
  564. std_dev_IL_training_time += pow ( *itRun - mean_IL_training_time, 2 );
  565. }
  566. std_dev_IL_training_time /= it->size();
  567. std_dev_IL_training_time = sqrt ( std_dev_IL_training_time );
  568. std_dev_IL_training_times.push_back ( std_dev_IL_training_time );
  569. }
  570. int datasize ( nrOfClassesUsed*trainExPerClass );
  571. for ( uint i = 0; i < mean_IL_training_times.size(); i++)
  572. {
  573. cerr << "size: " << datasize << " and adding " << incrementalAddSize << " mean IL_training time: " << mean_IL_training_times[i] << " std_dev IL_training time: " << std_dev_IL_training_times[i] << endl;
  574. datasize += incrementalAddSize ;
  575. }
  576. if ( do_classification )
  577. {
  578. std::cerr << "========================" << std::endl;
  579. std::cerr << "content of recognition_rates for class : "<< currentClass << std::endl;
  580. for ( std::vector<std::vector<double> >::const_iterator it = recognitions_rates.begin(); it != recognitions_rates.end(); it++ )
  581. {
  582. for ( std::vector<double> ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ )
  583. {
  584. std::cerr << *jt << " ";
  585. }
  586. std::cerr << std::endl;
  587. }
  588. std::cerr << "calculating final recognition_rates for class : "<< currentClass << std::endl;
  589. std::vector<double> mean_recs;
  590. std::vector<double> std_dev_recs;
  591. for (std::vector<std::vector<double> >::const_iterator it = recognitions_rates.begin(); it != recognitions_rates.end(); it++ )
  592. {
  593. double mean_rec ( 0.0 );
  594. for ( std::vector<double>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
  595. {
  596. mean_rec += *itRun;
  597. }
  598. mean_rec /= it->size();
  599. mean_recs.push_back ( mean_rec );
  600. double std_dev_rec ( 0.0 );
  601. for ( std::vector<double>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
  602. {
  603. std_dev_rec += pow ( *itRun - mean_rec, 2 );
  604. }
  605. std_dev_rec /= it->size();
  606. std_dev_rec = sqrt ( std_dev_rec );
  607. std_dev_recs.push_back ( std_dev_rec );
  608. }
  609. int datasize ( nrOfClassesUsed*trainExPerClass );
  610. for ( uint i = 0; i < recognitions_rates.size(); i++)
  611. {
  612. std::cerr << "size: " << datasize << " mean_IL: " << mean_recs[i] << " std_dev_IL: " << std_dev_recs[i] << std::endl;
  613. datasize += incrementalAddSize ;
  614. }
  615. std::cerr << "========================" << std::endl;
  616. std::cerr << "content of AUC_scores for class : "<< currentClass << std::endl;
  617. for ( std::vector<std::vector<double> >::const_iterator it = AUC_scores.begin(); it != AUC_scores.end(); it++ )
  618. {
  619. for ( std::vector<double> ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ )
  620. {
  621. std::cerr << *jt << " ";
  622. }
  623. std::cerr << std::endl;
  624. }
  625. std::cerr << "calculating final AUC_scores for class : "<< currentClass << std::endl;
  626. std::vector<double> mean_aucs;
  627. std::vector<double> std_dev_aucs;
  628. for (std::vector<std::vector<double> >::const_iterator it = AUC_scores.begin(); it != AUC_scores.end(); it++ )
  629. {
  630. double mean_auc ( 0.0 );
  631. for ( std::vector<double>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
  632. {
  633. mean_auc += *itRun;
  634. }
  635. mean_auc /= it->size();
  636. mean_aucs.push_back ( mean_auc );
  637. double std_dev_auc ( 0.0 );
  638. for ( std::vector<double>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
  639. {
  640. std_dev_auc += pow ( *itRun - mean_auc, 2 );
  641. }
  642. std_dev_auc /= it->size();
  643. std_dev_auc = sqrt ( std_dev_auc );
  644. std_dev_aucs.push_back ( std_dev_auc );
  645. }
  646. datasize = nrOfClassesUsed*trainExPerClass;
  647. for ( uint i = 0; i < recognitions_rates.size(); i++)
  648. {
  649. std::cerr << "size: " << datasize << " mean_IL: " << mean_aucs[i] << " std_dev_IL: " << std_dev_aucs[i] << std::endl;
  650. datasize += incrementalAddSize ;
  651. }
  652. }
  653. else
  654. {
  655. std::cerr << "========================" << std::endl;
  656. std::cerr << "No classification done therefor no classification times available." << std::endl;
  657. }
  658. } //for int currentClass...
  659. return 0;
  660. }