activeLearningCheckerBoard.cpp 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. /**
  2. * @file ActiveLearningCheckerBoard.cpp
  3. * @brief Incrementally train the GP HIK classifier using the predictive variance and its approximations to select new samples, perform binary tests. We do not use the fast-hik implementations but perform the computations manually
  4. * @author Alexander Freytag
  5. * @date 11-06-2012
  6. */
  7. #include <vector>
  8. #include <iostream>
  9. #include <stdlib.h>
  10. #include <time.h>
  11. #include <set>
  12. #include <core/basics/Config.h>
  13. #include <core/basics/StringTools.h>
  14. #include <core/basics/Timer.h>
  15. #include <core/image/ImageT.h>
  16. #include <core/image/ColorImageT.h>
  17. #include <core/image/CircleT.h>
  18. #include <core/image/LineT.h>
  19. // QT Interface for image display
  20. // We only use the simple function showImage in this example, but there is far more
  21. // to explore in this area.
  22. #include <core/imagedisplay/ImageDisplay.h>
  23. #include "core/algebra/CholeskyRobust.h"
  24. #include "core/vector/Algorithms.h"
  25. #include <core/vector/SparseVectorT.h>
  26. #include <core/vector/VectorT.h>
  27. //----------
  28. #include "vislearning/baselib/ProgressBar.h"
  29. #include <vislearning/baselib/Globals.h>
  30. #include <vislearning/classifier/kernelclassifier/KCGPRegOneVsAll.h>
  31. #include <vislearning/classifier/fpclassifier/gphik/FPCGPHIK.h>
  32. #include "vislearning/cbaselib/MultiDataset.h"
  33. #include <vislearning/cbaselib/LabeledSet.h>
  34. #include "vislearning/cbaselib/ClassificationResults.h"
  35. #include <vislearning/math/kernels/KernelData.h>
  36. //----------
  37. #include "gp-hik-exp/progs/datatools.h"
  38. //----------
  39. using namespace std;
  40. using namespace NICE;
  41. using namespace OBJREC;
  42. enum QueryStrategy{
  43. RANDOM = 0,
  44. GPMEAN,
  45. GPPREDVAR,
  46. GPHEURISTIC,
  47. GPHEURISTICPLUS,
  48. GPBALANCE
  49. };
  50. std::string convertInt(int number)
  51. {
  52. stringstream ss;//create a stringstream
  53. ss << number;//add number to the stream
  54. return ss.str();//return a string with the contents of the stream
  55. }
  56. void sampleFromCheckerboard( const int & nrOfSectorsProDim, const int & sizeOfSector, const int & examplesPerSector, std::vector<NICE::Vector> & examples, NICE::Vector & labels)
  57. {
  58. int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim );
  59. //set the labels
  60. labels.resize( nrOfSectorsTotal * examplesPerSector );
  61. for ( int ex = 0; ex < examplesPerSector; ex++)
  62. {
  63. for ( int i = 0; i < nrOfSectorsProDim; i++)
  64. {
  65. for ( int j = 0; j < nrOfSectorsProDim; j++)
  66. {
  67. labels[ (i*nrOfSectorsProDim+j)*examplesPerSector + ex ] = ( i + j ) % 2;
  68. }
  69. }
  70. }
  71. for ( int i = 0; i < nrOfSectorsProDim; i++)
  72. {
  73. for ( int j = 0; j < nrOfSectorsProDim; j++)
  74. {
  75. for ( int ex = 0; ex < examplesPerSector; ex++)
  76. {
  77. NICE::Vector example( 3 );
  78. double xi ( rand() % sizeOfSector + i * sizeOfSector ) ;
  79. double yi ( rand() % sizeOfSector + j * sizeOfSector );
  80. //compute normalized histograms
  81. example[0] = xi / (nrOfSectorsTotal*sizeOfSector);
  82. example[1] = yi / (nrOfSectorsTotal*sizeOfSector);
  83. example[2] = 1.0 - example[0] - example[1];
  84. examples.push_back( example );
  85. }
  86. }
  87. }
  88. }
  89. void paintImageBorders( NICE::ColorImage & img, const int & nrOfSectorsProDim, const int & sizeOfSector )
  90. {
  91. std::cerr << "img.width(): " << img.width() << " img.height(): " << img.height() << std::endl;
  92. std::cerr << "nrOfSectorsProDim*sizeOfSector-1: " << nrOfSectorsProDim*sizeOfSector-1 << std::endl;
  93. NICE::Line l1 ( NICE::Coord( 0, 0 ) , NICE::Coord ( 0, nrOfSectorsProDim*sizeOfSector-1) );
  94. NICE::Line l2 ( NICE::Coord( 0, nrOfSectorsProDim*sizeOfSector-1 ) , NICE::Coord ( nrOfSectorsProDim*sizeOfSector-1, nrOfSectorsProDim*sizeOfSector-1) );
  95. NICE::Line l3 ( NICE::Coord( nrOfSectorsProDim*sizeOfSector-1, nrOfSectorsProDim*sizeOfSector-1 ) , NICE::Coord ( nrOfSectorsProDim*sizeOfSector-1, 0) );
  96. NICE::Line l4 ( NICE::Coord( nrOfSectorsProDim*sizeOfSector-1, 0 ) , NICE::Coord ( 0, 0 ) );
  97. l1.draw( img, Color ( 0, 0, 0 ) );
  98. l2.draw( img, Color ( 0, 0, 0 ) );
  99. l3.draw( img, Color ( 0, 0, 0 ) );
  100. l4.draw( img, Color ( 0, 0, 0 ) );
  101. }
  102. void paintSectorsInImage( NICE::ColorImage & img, const int & nrOfSectorsProDim, const int & sizeOfSector )
  103. {
  104. for ( int i = 1; i < nrOfSectorsProDim; i++ )
  105. {
  106. NICE::Line lHor ( NICE::Coord( 0, i*sizeOfSector ) , NICE::Coord ( nrOfSectorsProDim*sizeOfSector, i*sizeOfSector) );
  107. NICE::Line lVer ( NICE::Coord( i*sizeOfSector, 0 ) , NICE::Coord ( i*sizeOfSector, nrOfSectorsProDim*sizeOfSector) );
  108. lHor.draw( img, Color ( 0, 0, 0 ) );
  109. lVer.draw( img, Color ( 0, 0, 0 ) );
  110. }
  111. }
  112. void paintLabeledExamples( NICE::ColorImage & img, const NICE::Vector & y, const Examples & examples, const int & nrOfSectorsProDim, const int & sizeOfSector, const int & diameter )
  113. {
  114. int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim );
  115. for ( uint lE = 0; lE < examples.size(); lE++)
  116. {
  117. // if ( y[lE] != 1)
  118. // {
  119. // NICE::Circle circ ( NICE::Coord( (int) ( (* examples[lE].second.svec) [0] *nrOfSectorsTotal *sizeOfSector) ,
  120. // (int) (( (* examples[lE].second.svec) [1]) * nrOfSectorsTotal *sizeOfSector) ), diameter );
  121. // circ.draw ( img, Color ( 255, 0, 0 ) );
  122. // }
  123. // else
  124. // {
  125. // NICE::Circle circ ( NICE::Coord( (int) ( (* examples[lE].second.svec) [0] * nrOfSectorsTotal *sizeOfSector) ,
  126. // (int) ( (* examples[lE].second.svec) [1] * nrOfSectorsTotal *sizeOfSector) ), diameter );
  127. // circ.draw ( img, Color ( 0, 0, 255 ) );
  128. // }
  129. int thickness (2);
  130. for ( int i = 0; i < thickness; i++)
  131. {
  132. NICE::Circle circ ( NICE::Coord( (int) ( (* examples[lE].second.svec) [0] * nrOfSectorsTotal *sizeOfSector) ,
  133. (int) ( (* examples[lE].second.svec) [1] * nrOfSectorsTotal *sizeOfSector) ), diameter-i );
  134. circ.draw ( img, Color ( 0, 0, 0 ) ); //old: ( 0, 255, 0 )
  135. }
  136. }
  137. }
  138. void paintUnlabeledExamples( NICE::ColorImage & img, const vector< NICE::Vector > & trainDataOrig, const NICE::Vector & y, const std::vector<int> & unlabeledExamples, const int & nrOfSectorsProDim, const int & sizeOfSector, const int & diameter )
  139. {
  140. int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim );
  141. for ( uint uE = 0; uE < unlabeledExamples.size(); uE++)
  142. {
  143. if ( y[ unlabeledExamples[uE] ] == 0)
  144. {
  145. NICE::Circle circ ( NICE::Coord( (int) (trainDataOrig[ unlabeledExamples[uE] ] [0] * nrOfSectorsTotal *sizeOfSector),
  146. (int) (trainDataOrig[ unlabeledExamples[uE] ] [1] * nrOfSectorsTotal *sizeOfSector) ) , diameter );
  147. circ.draw ( img, Color ( 255, 0, 0 ) );
  148. }
  149. else
  150. {
  151. NICE::Circle circ ( NICE::Coord( (int) (trainDataOrig[ unlabeledExamples[uE] ] [0] * nrOfSectorsTotal *sizeOfSector) ,
  152. (int) (trainDataOrig[ unlabeledExamples[uE] ] [1] * nrOfSectorsTotal *sizeOfSector) ) , diameter );
  153. circ.draw ( img, Color ( 0, 0, 255 ) );
  154. }
  155. }
  156. }
  157. void paintClassificationResult( NICE::ColorImage & img, const NICE::Vector& xstar, const int & diameter, const ClassificationResult & result, const int & nrOfSectorsProDim, const int & sizeOfSector )
  158. {
  159. int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim );
  160. NICE::Circle circ ( NICE::Coord( (int) ( xstar[0] * nrOfSectorsTotal *sizeOfSector) ,
  161. (int) ( xstar[1] * nrOfSectorsTotal *sizeOfSector) ), diameter );
  162. if (result.classno == 1) // classified as negative
  163. {
  164. circ.draw ( img, Color ( 0, 0, 255 ) );
  165. }
  166. else // classified as positive
  167. {
  168. circ.draw ( img, Color ( 255, 0, 0 ) );
  169. }
  170. }
  171. void paintQueriedExamples( NICE::ColorImage & img, const NICE::Vector& xstar, const int & diameter, const int & nrOfSectorsProDim, const int & sizeOfSector )
  172. {
  173. int nrOfSectorsTotal ( nrOfSectorsProDim * nrOfSectorsProDim );
  174. int thickness (2);
  175. for ( int i = 0; i < thickness; i++)
  176. {
  177. NICE::Circle circ ( NICE::Coord( (int) ( xstar[0] * nrOfSectorsTotal *sizeOfSector) ,
  178. (int) ( xstar[1] * nrOfSectorsTotal *sizeOfSector) ), diameter-i );
  179. circ.draw ( img, Color ( 0, 0, 0 ) ); //old: ( 0, 255, 0 )
  180. }
  181. }
  182. /**
  183. Computes from randomly or deterministically choosen trainimages kernelmatrizes and evaluates their performance, using ROI-optimization
  184. */
  185. int main ( int argc, char **argv )
  186. {
  187. std::cout.precision ( 10 );
  188. std::cerr.precision ( 10 );
  189. NICE::Config conf ( argc, argv );
  190. int trainExPerClass = conf.gI ( "main", "trainExPerClass", 10 );
  191. int incrementalAddSize = conf.gI("main", "incrementalAddSize", 1);
  192. int nrOfIncrements = conf.gI("main", "nrOfIncrements", 9);
  193. int num_runs = conf.gI ( "main", "num_runs", 10 );
  194. bool do_classification = conf.gB ( "main", "do_classification", true );
  195. double noise = conf.gD("GPHIKClassifier", "noise", 0.01);
  196. double squaredNoise = pow( noise, 2);
  197. int sizeOfSector = conf.gI( "main", "sizeOfSector", 250 );
  198. int nrOfSectorsProDim = conf.gI( "main", "nrOfSectorsProDim", 2 );
  199. int examplesPerSector = conf.gI( "main", "examplesPerSector", 5 );
  200. int examplesPerSectorTest = conf.gI( "main", "examplesPerSectorTest", 50 );
  201. bool visualizationOfResults = conf.gB( "main", "visualizationOfResults", true );
  202. bool paintSectorBorders = conf.gB( "main", "paintSectorBorders" , true );
  203. bool saveImages = conf.gB( "main", "saveImages", false );
  204. std::string destinationForImages = conf.gS( "main", "destinationForImages", "" );
  205. string queryStrategyString = conf.gS( "main", "queryStrategy", "random");
  206. QueryStrategy queryStrategy;
  207. if (queryStrategyString.compare("gpMean") == 0)
  208. {
  209. queryStrategy = GPMEAN;
  210. }
  211. else if (queryStrategyString.compare("gpPredVar") == 0)
  212. {
  213. queryStrategy = GPPREDVAR;
  214. }
  215. else if (queryStrategyString.compare("gpHeuristic") == 0)
  216. {
  217. queryStrategy = GPHEURISTIC;
  218. }
  219. else if (queryStrategyString.compare("gpHeuristicPlus") == 0)
  220. {
  221. queryStrategy = GPHEURISTICPLUS;
  222. }
  223. else if (queryStrategyString.compare("gpBalance") == 0)
  224. {
  225. queryStrategy = GPBALANCE;
  226. }
  227. else
  228. {
  229. queryStrategy = RANDOM;
  230. }
  231. bool verbose = conf.gB ( "main", "verbose", false );
  232. /* initialize random seed: */
  233. // srand ( time ( NULL ) ); //with 0 for reproductive results
  234. // srand ( 0 ); //with 0 for reproductive results
  235. // =========================== INIT ===========================
  236. std::vector<std::vector<double> > recognitions_rates(nrOfIncrements+1);
  237. std::vector<std::vector<double> > AUC_scores(nrOfIncrements+1);
  238. std::vector<std::vector<float> > classification_times(nrOfIncrements+1);
  239. std::vector<std::vector<float> > IL_training_times(nrOfIncrements);
  240. for ( int run = 0; run < num_runs; run++ )
  241. {
  242. std::cerr << "run: " << run << std::endl;
  243. srand ( run * 100000 ); //with 0 for reproductive results
  244. // read training set
  245. vector< NICE::Vector > trainDataOrig;
  246. Vector y;
  247. sampleFromCheckerboard( nrOfSectorsProDim, sizeOfSector, examplesPerSector, trainDataOrig, y );
  248. // ------------------ TESTING
  249. std::vector<NICE::Vector> testData;
  250. Vector yTest;
  251. sampleFromCheckerboard( nrOfSectorsProDim, sizeOfSector, examplesPerSectorTest, testData, yTest );
  252. if ( verbose )
  253. {
  254. for (uint i = 0; i < trainDataOrig.size(); i++ )
  255. {
  256. std::cerr << i << " : " << trainDataOrig[i] << std::endl;
  257. }
  258. std::cerr << "resulting binary label vector:" << y << std::endl;
  259. }
  260. std::set<int> classesAvailable;
  261. classesAvailable.insert( 0 ); //we have a single negative class
  262. classesAvailable.insert( 1 ); //and we have a single positive class
  263. std::map<int,int> nrExamplesPerClassInDataset; //simply count how many examples for every class are available
  264. std::map<int,std::vector<int> > examplesPerClassInDataset; //as well as their corresponding indices in the dataset
  265. //initialize this storage
  266. for (std::set<int>::const_iterator it = classesAvailable.begin(); it != classesAvailable.end(); it++)
  267. {
  268. nrExamplesPerClassInDataset.insert(std::pair<int,int>(*it,0));
  269. examplesPerClassInDataset.insert(std::pair<int,std::vector<int> >(*it,std::vector<int>(0)));
  270. }
  271. //store the indices of the examples
  272. for ( uint i = 0; i < y.size(); i++ )
  273. {
  274. (examplesPerClassInDataset.find( y[i] )->second).push_back(i);
  275. }
  276. //and count how many examples are in every class
  277. for (std::map<int,std::vector<int> >::const_iterator it = examplesPerClassInDataset.begin(); it != examplesPerClassInDataset.end(); it++)
  278. {
  279. nrExamplesPerClassInDataset.find(it->first)->second = it->second.size();
  280. }
  281. //simple output to tell how many examples we have for every class
  282. for ( std::map<int,int>::const_iterator it = nrExamplesPerClassInDataset.begin(); it != nrExamplesPerClassInDataset.end(); it++)
  283. {
  284. cerr << it->first << ": " << it->second << endl;
  285. }
  286. Examples examples;
  287. //count how many examples of every class we have while actively selecting new examples
  288. //NOTE works only if we have subsequent class numbers
  289. NICE::Vector pickedExamplesPerClass( classesAvailable.size(), trainExPerClass);
  290. std::map<int,std::vector<int> > examplesPerClassInDatasetTmp (examplesPerClassInDataset);
  291. //chose examples for every class used for training
  292. //we will always use the first examples from each class, since the dataset comes already randomly ordered
  293. for (std::set<int>::const_iterator clIt = classesAvailable.begin(); clIt != classesAvailable.end(); clIt++)
  294. {
  295. std::map<int,std::vector<int> >::iterator exIt = examplesPerClassInDatasetTmp.find(*clIt);
  296. if ( verbose )
  297. std::cerr << "pick training examples for class " << *clIt << std::endl;
  298. for (int i = 0; i < trainExPerClass; i++)
  299. {
  300. if ( verbose )
  301. std::cerr << "i: " << i << std::endl;
  302. int exampleIndex ( rand() % ( exIt->second.size() ) );
  303. if ( verbose )
  304. std::cerr << "pick example " << exIt->second[exampleIndex] << " - " << y[exIt->second[exampleIndex] ] << std::endl;
  305. Example example;
  306. NICE::Vector & xTrain = trainDataOrig[exIt->second[exampleIndex]];
  307. example.svec = new SparseVector(xTrain);
  308. //let's take this example and its corresponding label (which should be *clIt)
  309. examples.push_back ( pair<int, Example> ( y[exIt->second[exampleIndex] ], example ) );
  310. //
  311. exIt->second.erase(exIt->second.begin()+exampleIndex);
  312. }
  313. }
  314. for (uint i = 0; i < examples.size(); i++ )
  315. {
  316. std::cerr << i << " : ";
  317. examples[i].second.svec->store(std::cerr);
  318. }
  319. //which examples are left to be actively chosen lateron?
  320. std::vector<int> unlabeledExamples( y.size() - trainExPerClass*classesAvailable.size() );
  321. int exCnt( 0 );
  322. for (std::set<int>::const_iterator clIt = classesAvailable.begin(); clIt != classesAvailable.end(); clIt++ )
  323. {
  324. std::map<int,std::vector<int> >::iterator exIt = examplesPerClassInDatasetTmp.find(*clIt);
  325. //list all examples of this specific class
  326. for (std::vector<int>::const_iterator it = exIt->second.begin(); it != exIt->second.end(); it++)
  327. {
  328. unlabeledExamples[exCnt] = *it;
  329. exCnt++;
  330. }
  331. }
  332. //Fast-HIK
  333. FPCGPHIK * classifier = new FPCGPHIK( &conf );
  334. time_t prep_start_time = clock();
  335. FeaturePool fp; // will be ignored
  336. classifier->train ( fp, examples );
  337. float time_preparation = ( float ) ( clock() - prep_start_time ) ;
  338. std::cerr << "Time for training: " << time_preparation / CLOCKS_PER_SEC << std::endl;
  339. //this is only needed for the visualization
  340. NICE::Vector yBinGP ( examples.size(), -1 );
  341. for ( uint i = 0; i < examples.size(); i++ )
  342. {
  343. if ( examples[i].first == 1)
  344. yBinGP[i] = 1;
  345. }
  346. std::cerr << "yBinGP: " << yBinGP << std::endl;
  347. int nrOfClassesUsed = classesAvailable.size();
  348. if ( visualizationOfResults )
  349. {
  350. NICE::ColorImage img ( nrOfSectorsProDim*sizeOfSector, nrOfSectorsProDim*sizeOfSector );
  351. img.set( 255, 255, 255 );
  352. if ( paintSectorBorders )
  353. paintSectorsInImage( img, nrOfSectorsProDim, sizeOfSector );
  354. paintImageBorders( img, nrOfSectorsProDim, sizeOfSector );
  355. //paint the example that we can query
  356. paintLabeledExamples( img, yBinGP, examples, nrOfSectorsProDim, sizeOfSector, 10 );
  357. //and those that we already know
  358. paintUnlabeledExamples( img, trainDataOrig, y, unlabeledExamples, nrOfSectorsProDim, sizeOfSector, 2 );
  359. if ( saveImages )
  360. {
  361. img.writePPM ( destinationForImages + "imgAL_run"+convertInt(run)+"_000_initialBoard.ppm" );
  362. }
  363. else
  364. showImage(img, "Initial Checkerboard");
  365. }
  366. /* // ------------------ TESTING
  367. std::vector<NICE::Vector> testData;
  368. Vector yTest;
  369. sampleFromCheckerboard( nrOfSectorsProDim, sizeOfSector, examplesPerSectorTest, testData, yTest ); */
  370. NICE::Matrix confusionMatrix ( 2, 2 );
  371. confusionMatrix.set ( 0.0 );
  372. time_t start_time = clock();
  373. std::vector<int> chosen_examples_per_class ( nrOfClassesUsed );
  374. std::cerr << "Current statistic about picked examples per class: " << pickedExamplesPerClass << std::endl;
  375. if ( do_classification )
  376. {
  377. NICE::ColorImage imgTest;
  378. if ( visualizationOfResults )
  379. {
  380. imgTest.resize ( nrOfSectorsProDim*sizeOfSector, nrOfSectorsProDim*sizeOfSector );
  381. imgTest.set( 255, 255, 255 );
  382. if ( paintSectorBorders )
  383. paintSectorsInImage( imgTest, nrOfSectorsProDim, sizeOfSector );
  384. paintImageBorders( imgTest, nrOfSectorsProDim, sizeOfSector );
  385. //again paint our labeled training images used so far
  386. paintLabeledExamples( imgTest, yBinGP, examples, nrOfSectorsProDim, sizeOfSector, 10 );
  387. }
  388. ClassificationResults results;
  389. ClassificationResult result;
  390. for ( uint i = 0 ; i < testData.size(); i++ )
  391. {
  392. const Vector & xstar = testData[i];
  393. SparseVector xstar_sparse ( xstar );
  394. Example example;
  395. example.svec = &xstar_sparse;
  396. result = classifier->classify( example );
  397. if ( visualizationOfResults )
  398. paintClassificationResult( imgTest, xstar, 2, result, nrOfSectorsProDim, sizeOfSector );
  399. result.classno_groundtruth = ( yTest[i] == 1 ) ? 1 : 0;
  400. confusionMatrix ( result.classno_groundtruth , result.classno ) ++;
  401. results.push_back( result );
  402. }
  403. if ( visualizationOfResults )
  404. {
  405. if ( saveImages )
  406. {
  407. imgTest.writePPM ( destinationForImages + "imgAL_run"+convertInt(run)+"_incStep_"+convertInt(0)+"ClassifResult.ppm" );
  408. }
  409. else
  410. showImage(imgTest, "Classification Result");
  411. }
  412. float time_classification = ( float ) ( clock() - start_time ) ;
  413. if ( verbose )
  414. cerr << "Time for Classification with " << nrOfClassesUsed*trainExPerClass << " training-examples: " << time_classification / CLOCKS_PER_SEC << " [s]" << endl;
  415. ( classification_times[0] ).push_back ( time_classification / CLOCKS_PER_SEC );
  416. confusionMatrix.normalizeRowsL1();
  417. std::cerr << confusionMatrix;
  418. double avg_recognition_rate = 0.0;
  419. for ( int i = 0 ; i < ( int ) confusionMatrix.rows(); i++ )
  420. {
  421. avg_recognition_rate += confusionMatrix ( i, i );
  422. }
  423. avg_recognition_rate /= confusionMatrix.rows();
  424. std::cerr << " run: " << run << " avg recognition rate: " << avg_recognition_rate*100 << " % -- " << examples.size() << " training examples used" << std::endl;
  425. recognitions_rates[0].push_back ( avg_recognition_rate*100 );
  426. std::cerr << "number of classified examples: " << results.size() << std::endl;
  427. std::cerr << "perform auc evaluation "<< std::endl;
  428. double aucScore = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
  429. std::cerr << " run: " << run << " AUC-score: " << aucScore << " % -- " << examples.size() << " training examples used" << std::endl << std::endl;
  430. AUC_scores[0].push_back ( aucScore*100 );
  431. }
  432. //Now start the Incremental-Learning-Part
  433. for (int incrementationStep = 0; incrementationStep < nrOfIncrements; incrementationStep++)
  434. {
  435. //simply count how many possible example we have
  436. int nrOfPossibleExamples( unlabeledExamples.size() );
  437. //chose examples for every class used for training
  438. Examples newExamples;
  439. NICE::ColorImage imgAL;
  440. if ( visualizationOfResults )
  441. {
  442. imgAL.resize ( nrOfSectorsProDim*sizeOfSector, nrOfSectorsProDim*sizeOfSector );
  443. imgAL.set( 255, 255, 255 );
  444. if ( paintSectorBorders )
  445. paintSectorsInImage( imgAL, nrOfSectorsProDim, sizeOfSector );
  446. paintImageBorders( imgAL, nrOfSectorsProDim, sizeOfSector );
  447. //again paint our labeled training images used so far
  448. paintLabeledExamples( imgAL, yBinGP, examples, nrOfSectorsProDim, sizeOfSector, 10 );
  449. //and paint the unlabeled examples that are available to query
  450. paintUnlabeledExamples( imgAL, trainDataOrig, y, unlabeledExamples, nrOfSectorsProDim, sizeOfSector, 2 );
  451. }
  452. if (queryStrategy == RANDOM)
  453. {
  454. if ( verbose )
  455. std::cerr << "print chosen examples: " << std::endl;
  456. for (int i = 0; i < incrementalAddSize; i++)
  457. {
  458. int exampleIndex ( rand() % ( unlabeledExamples.size() ) );
  459. Example newExample;
  460. NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[exampleIndex] ];
  461. newExample.svec = new SparseVector( xTrain );
  462. int label( y[ unlabeledExamples[exampleIndex] ] );
  463. //store this example for the visualization
  464. examples.push_back ( pair<int, Example> ( label, newExample ) );
  465. //and store it to add it to the classifier
  466. newExamples.push_back ( pair<int, Example> ( label, newExample ) );
  467. unlabeledExamples.erase( unlabeledExamples.begin()+exampleIndex );
  468. if ( verbose )
  469. std::cerr << exampleIndex+1 << " / " << incrementalAddSize << std::endl;
  470. pickedExamplesPerClass[label]++;
  471. yBinGP.append(label);
  472. if ( visualizationOfResults )
  473. paintQueriedExamples( imgAL, xTrain, 10, nrOfSectorsProDim, sizeOfSector );
  474. }
  475. }// end computation for RANDOM
  476. else if ( (queryStrategy == GPMEAN) || (queryStrategy == GPPREDVAR) || (queryStrategy == GPHEURISTIC) || (queryStrategy == GPHEURISTICPLUS) || GPBALANCE)
  477. {
  478. //compute uncertainty values for all examples according to the query strategy
  479. std::vector<std::pair<int,double> > scores;
  480. scores.clear();
  481. time_t unc_pred_start_time = clock();
  482. for (uint exIndex = 0; exIndex < unlabeledExamples.size(); exIndex++)
  483. {
  484. NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[exIndex] ];
  485. SparseVector xTrainSparse ( xTrain );
  486. Example example;
  487. example.svec = &xTrainSparse;
  488. if (queryStrategy == GPMEAN)
  489. {
  490. //compute the resulting score
  491. ClassificationResult r = classifier->classify( example );
  492. //we only have two classes with "inverse" outputs
  493. scores.push_back( std::pair<int,double> ( exIndex, fabs(r.scores[0]) ) );
  494. }
  495. else if (queryStrategy == GPPREDVAR)
  496. {
  497. double uncertainty;
  498. //use the pred variance computation specified in the config file
  499. classifier->predictUncertainty( example, uncertainty );
  500. //take the maximum of the scores for the predictive variance
  501. scores.push_back( std::pair<int,double> ( exIndex, uncertainty) );
  502. }
  503. else if (queryStrategy == GPHEURISTIC)
  504. {
  505. double uncertainty;
  506. //use the pred variance computation specified in the config file
  507. classifier->predictUncertainty( example, uncertainty );
  508. //compute the mean values for every class
  509. ClassificationResult r = classifier->classify( example );
  510. //take the minimum of the scores for the heuristic measure
  511. scores.push_back( std::pair<int,double> ( exIndex, fabs(r.scores[0]) / sqrt( squaredNoise + uncertainty )) );
  512. }
  513. else if (queryStrategy == GPHEURISTICPLUS)
  514. {
  515. double uncertainty;
  516. //use the pred variance computation specified in the config file
  517. classifier->predictUncertainty( example, uncertainty );
  518. //compute the mean values for every class
  519. ClassificationResult r = classifier->classify( example );
  520. //take the minimum of the scores for the heuristic measure
  521. scores.push_back( std::pair<int,double> ( exIndex, fabs(r.scores[0]) + sqrt( squaredNoise + uncertainty )) );
  522. }
  523. else if (queryStrategy == GPBALANCE)
  524. {
  525. double uncertainty;
  526. //use the pred variance computation specified in the config file
  527. classifier->predictUncertainty( example, uncertainty );
  528. //compute the mean values for every class
  529. ClassificationResult r = classifier->classify( example );
  530. double scorePositive (fabs (r.scores[0] - 1.0 ));
  531. double scoreNegative (fabs (r.scores[0] + 1.0 ));
  532. double score = scorePositive < scoreNegative ? scorePositive : scoreNegative;
  533. //take the minimum of the scores for the heuristic measure
  534. scores.push_back( std::pair<int,double> ( exIndex, score / ( squaredNoise + uncertainty )) );
  535. }
  536. }
  537. float time_score_computation = ( float ) ( clock() - unc_pred_start_time ) ;
  538. //pick the ones with best score
  539. //we could speed this up using a more sophisticated search method
  540. if ( (queryStrategy == GPPREDVAR) || (queryStrategy == GPHEURISTICPLUS) )//take the maximum of the scores for the predictive variance or the new weight
  541. {
  542. std::set<int> chosenExamplesForThisRun;
  543. chosenExamplesForThisRun.clear();
  544. for (int i = 0; i < incrementalAddSize; i++)
  545. {
  546. std::vector<std::pair<int,double> >::iterator bestExample = scores.begin();
  547. std::vector<std::pair<int,double> >::iterator worstExample = scores.begin();
  548. for (std::vector<std::pair<int,double> >::iterator jIt = scores.begin(); jIt !=scores.end(); jIt++)
  549. {
  550. if (jIt->second > bestExample->second)
  551. bestExample = jIt;
  552. if (jIt->second < worstExample->second)
  553. worstExample = jIt;
  554. }
  555. if ( verbose )
  556. std::cerr << "i: " << i << " bestExample: " << bestExample->second << " worstExample: " << worstExample->second << std::endl;
  557. Example newExample;
  558. NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[bestExample->first] ];
  559. newExample.svec = new SparseVector( xTrain );
  560. //actually this is the ACTIVE LEARNING step (query a label)
  561. int label( y[ unlabeledExamples[bestExample->first] ] );
  562. //store this example for the visualization
  563. examples.push_back ( pair<int, Example> ( label, newExample ) );
  564. //and store it to add it to the classifier
  565. newExamples.push_back ( pair<int, Example> ( label, newExample ) );
  566. //remember the index, to safely remove this example afterwards from unlabeledExamples
  567. chosenExamplesForThisRun.insert(bestExample->first);
  568. scores.erase(bestExample);
  569. pickedExamplesPerClass[label]++;
  570. yBinGP.append(label);
  571. if ( visualizationOfResults )
  572. paintQueriedExamples( imgAL, xTrain, 10, nrOfSectorsProDim, sizeOfSector );
  573. }
  574. //delete the queried examples from the set of unlabeled ones
  575. //do this in an decreasing order in terms of indices to ensure valid access
  576. for (std::set<int>::const_reverse_iterator it = chosenExamplesForThisRun.rbegin(); it != chosenExamplesForThisRun.rend(); it++)
  577. {
  578. unlabeledExamples.erase( unlabeledExamples.begin()+(*it) );
  579. }
  580. }
  581. else //take the minimum of the scores for the heuristic, heuristicPlus and the gp mean (minimum margin)
  582. {
  583. std::set<int> chosenExamplesForThisRun;
  584. chosenExamplesForThisRun.clear();
  585. for (int i = 0; i < incrementalAddSize; i++)
  586. {
  587. std::vector<std::pair<int,double> >::iterator bestExample = scores.begin();
  588. std::vector<std::pair<int,double> >::iterator worstExample = scores.begin();
  589. for (std::vector<std::pair<int,double> >::iterator jIt = scores.begin(); jIt !=scores.end(); jIt++)
  590. {
  591. if (jIt->second < bestExample->second)
  592. bestExample = jIt;
  593. if (jIt->second > worstExample->second)
  594. worstExample = jIt;
  595. }
  596. if ( verbose )
  597. std::cerr << "i: " << i << " bestExample: " << bestExample->second << " worstExample: " << worstExample->second << std::endl;
  598. Example newExample;
  599. NICE::Vector & xTrain = trainDataOrig[ unlabeledExamples[bestExample->first] ];
  600. newExample.svec = new SparseVector( xTrain );
  601. //actually this is the ACTIVE LEARNING step (query a label)
  602. int label( y[ unlabeledExamples[bestExample->first] ] );
  603. //store this example for the visualization
  604. examples.push_back ( pair<int, Example> ( label, newExample ) );
  605. //and store it to add it to the classifier
  606. newExamples.push_back ( pair<int, Example> ( label, newExample ) );
  607. //remember the index, to safely remove this example afterwards from unlabeledExamples
  608. chosenExamplesForThisRun.insert(bestExample->first);
  609. scores.erase(bestExample);
  610. pickedExamplesPerClass[label]++;
  611. yBinGP.append(label);
  612. if ( visualizationOfResults )
  613. paintQueriedExamples( imgAL, xTrain, 10, nrOfSectorsProDim, sizeOfSector );
  614. }
  615. //delete the queried example from the set of unlabeled ones
  616. //do this in an decreasing order in terms of indices to ensure valid access
  617. for (std::set<int>::const_reverse_iterator it = chosenExamplesForThisRun.rbegin(); it != chosenExamplesForThisRun.rend(); it++)
  618. {
  619. unlabeledExamples.erase( unlabeledExamples.begin()+(*it) );
  620. }
  621. }
  622. std::cerr << "Time used to compute query-scores for " << nrOfPossibleExamples << " examples: " << time_score_computation / CLOCKS_PER_SEC << " [s]" << std::endl;
  623. } // end computation for GPMEAN, GPPREDVAR, GPHEURISTIC, GPHEURISTICPLUS
  624. if ( visualizationOfResults )
  625. {
  626. if ( saveImages )
  627. {
  628. imgAL.writePPM ( destinationForImages + "imgAL_run"+convertInt(run)+"_incStep_"+convertInt(incrementationStep+1)+"_queries.ppm" );
  629. }
  630. else
  631. showImage(imgAL, "Old and new queried example");
  632. }
  633. std::cerr << "Current statistic about picked examples per class: " << pickedExamplesPerClass << std::endl;
  634. //incremental learning
  635. classifier->addMultipleExamples( newExamples );
  636. //do the classification for evaluating the benefit of new examples
  637. if ( do_classification )
  638. {
  639. NICE::ColorImage imgTest;
  640. if ( visualizationOfResults )
  641. {
  642. imgTest.resize( nrOfSectorsProDim*sizeOfSector, nrOfSectorsProDim*sizeOfSector );
  643. imgTest.set( 255, 255, 255 );
  644. if ( paintSectorBorders )
  645. paintSectorsInImage( imgTest, nrOfSectorsProDim, sizeOfSector );
  646. paintImageBorders( imgTest, nrOfSectorsProDim, sizeOfSector );
  647. //again paint our labeled training images used so far
  648. paintLabeledExamples( imgTest, yBinGP, examples, nrOfSectorsProDim, sizeOfSector, 10 );
  649. }
  650. time_t start_time = clock();
  651. ClassificationResults results;
  652. confusionMatrix.set( 0.0 );
  653. ClassificationResult result;
  654. for ( uint i = 0 ; i < testData.size(); i++ )
  655. {
  656. const Vector & xstar = testData[i];
  657. SparseVector xstar_sparse ( xstar );
  658. Example example;
  659. example.svec = &xstar_sparse;
  660. result = classifier->classify( example );
  661. if ( visualizationOfResults )
  662. paintClassificationResult( imgTest, xstar, 2, result, nrOfSectorsProDim, sizeOfSector );
  663. result.classno_groundtruth = ( yTest[i] == 1 ) ? 1 : 0;
  664. results.push_back( result );
  665. confusionMatrix ( result.classno_groundtruth , result.classno ) ++;
  666. }
  667. float time_classification = ( float ) ( clock() - start_time ) ;
  668. if ( verbose )
  669. std::cerr << "Time for Classification with " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training-examples: " << time_classification / CLOCKS_PER_SEC << " [s]" << std::endl;
  670. ( classification_times[incrementationStep+1] ).push_back ( time_classification / CLOCKS_PER_SEC );
  671. confusionMatrix.normalizeRowsL1();
  672. std::cerr << confusionMatrix;
  673. double avg_recognition_rate ( 0.0 );
  674. for ( int i = 0 ; i < ( int ) confusionMatrix.rows(); i++ )
  675. {
  676. avg_recognition_rate += confusionMatrix ( i, i );
  677. }
  678. avg_recognition_rate /= confusionMatrix.rows();
  679. std::cerr << " run: " << run << " avg recognition rate: " << avg_recognition_rate*100 << " % -- " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training examples used" << std::endl;
  680. recognitions_rates[incrementationStep+1].push_back ( avg_recognition_rate*100 );
  681. double score = results.getBinaryClassPerformance( ClassificationResults::PERF_AUC );
  682. std::cerr << " run: " << run << " AUC-score: " << score << " % -- " << nrOfClassesUsed*trainExPerClass+incrementalAddSize*(incrementationStep+1) << " training examples used" << std::endl << std::endl;
  683. AUC_scores[incrementationStep+1].push_back ( score*100 );
  684. if ( visualizationOfResults )
  685. {
  686. if ( saveImages )
  687. {
  688. imgTest.writePPM ( destinationForImages + "imgAL_run"+convertInt(run)+"_incStep_"+convertInt(incrementationStep+1)+"ClassifResult.ppm" );
  689. }
  690. else
  691. showImage(imgTest, "Classification result after inc step " + convertInt(incrementationStep+1));
  692. }
  693. } //classification after IL adding */
  694. } //IL adding of different classes
  695. std::cerr << "Final statistic about picked examples per class: " << pickedExamplesPerClass << std::endl;
  696. //don't waste memory!
  697. for ( uint tmp = 0; tmp < examples.size(); tmp++ )
  698. {
  699. delete examples[tmp].second.svec;
  700. examples[tmp].second.svec = NULL;
  701. }
  702. }//runs
  703. // ================= EVALUATION =========================
  704. int nrOfClassesUsed ( 2 ); //binary setting
  705. if ( do_classification )
  706. {
  707. std::cerr << "========================" << std::endl;
  708. std::cerr << "recognition_rates" << std::endl;
  709. for ( std::vector<std::vector<double> >::const_iterator it = recognitions_rates.begin(); it != recognitions_rates.end(); it++ )
  710. {
  711. for ( std::vector<double> ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ )
  712. {
  713. std::cerr << *jt << " ";
  714. }
  715. std::cerr << std::endl;
  716. }
  717. std::vector<double> mean_recs;
  718. std::vector<double> std_dev_recs;
  719. for (std::vector<std::vector<double> >::const_iterator it = recognitions_rates.begin(); it != recognitions_rates.end(); it++ )
  720. {
  721. double mean_rec ( 0.0 );
  722. for ( std::vector<double>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
  723. {
  724. mean_rec += *itRun;
  725. }
  726. mean_rec /= it->size();
  727. mean_recs.push_back ( mean_rec );
  728. double std_dev_rec ( 0.0 );
  729. for ( std::vector<double>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
  730. {
  731. std_dev_rec += pow ( *itRun - mean_rec, 2 );
  732. }
  733. std_dev_rec /= it->size();
  734. std_dev_rec = sqrt ( std_dev_rec );
  735. std_dev_recs.push_back ( std_dev_rec );
  736. }
  737. int datasize ( nrOfClassesUsed*trainExPerClass );
  738. for ( uint i = 0; i < recognitions_rates.size(); i++)
  739. {
  740. std::cerr << "size: " << datasize << " meanRR: " << mean_recs[i] << " stdDevRR: " << std_dev_recs[i] << std::endl;
  741. datasize += incrementalAddSize ;
  742. }
  743. std::cerr << "========================" << std::endl;
  744. std::cerr << "AUC_scores" << std::endl;
  745. for ( std::vector<std::vector<double> >::const_iterator it = AUC_scores.begin(); it != AUC_scores.end(); it++ )
  746. {
  747. for ( std::vector<double> ::const_iterator jt = ( *it ).begin(); jt != ( *it ).end(); jt++ )
  748. {
  749. std::cerr << *jt << " ";
  750. }
  751. std::cerr << std::endl;
  752. }
  753. std::vector<double> mean_aucs;
  754. std::vector<double> std_dev_aucs;
  755. for (std::vector<std::vector<double> >::const_iterator it = AUC_scores.begin(); it != AUC_scores.end(); it++ )
  756. {
  757. double mean_auc ( 0.0 );
  758. for ( std::vector<double>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
  759. {
  760. mean_auc += *itRun;
  761. }
  762. mean_auc /= it->size();
  763. mean_aucs.push_back ( mean_auc );
  764. double std_dev_auc ( 0.0 );
  765. for ( std::vector<double>::const_iterator itRun = it->begin(); itRun != it->end(); itRun++ )
  766. {
  767. std_dev_auc += pow ( *itRun - mean_auc, 2 );
  768. }
  769. std_dev_auc /= it->size();
  770. std_dev_auc = sqrt ( std_dev_auc );
  771. std_dev_aucs.push_back ( std_dev_auc );
  772. }
  773. datasize = nrOfClassesUsed*trainExPerClass;
  774. for ( uint i = 0; i < recognitions_rates.size(); i++)
  775. {
  776. std::cerr << "size: " << datasize << " meanAUC: " << mean_aucs[i] << " stdDevAUC: " << std_dev_aucs[i] << std::endl;
  777. datasize += incrementalAddSize ;
  778. }
  779. }
  780. else
  781. {
  782. std::cerr << "========================" << std::endl;
  783. std::cerr << "No classification done therefor no classification times available." << std::endl;
  784. }
  785. return 0;
  786. }