SemSegNovelty.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. #include <sstream>
  2. #include <iostream>
  3. #include "SemSegNovelty.h"
  4. #include "core/image/FilterT.h"
  5. #include "gp-hik-exp/GPHIKClassifierNICE.h"
  6. #include "vislearning/baselib/ICETools.h"
  7. #include "vislearning/baselib/Globals.h"
  8. #include "vislearning/features/fpfeatures/SparseVectorFeature.h"
  9. #include "core/basics/StringTools.h"
  10. #include "core/basics/Timer.h"
  11. using namespace std;
  12. using namespace NICE;
  13. using namespace OBJREC;
  14. SemSegNovelty::SemSegNovelty ( const Config *conf,
  15. const MultiDataset *md )
  16. : SemanticSegmentation ( conf, & ( md->getClassNames ( "train" ) ) )
  17. {
  18. this->conf = conf;
  19. string section = "SemSegNovelty";
  20. featExtract = new LFColorWeijer ( conf );
  21. save_cache = conf->gB ( "FPCPixel", "save_cache", true );
  22. read_cache = conf->gB ( "FPCPixel", "read_cache", false );
  23. uncertdir = conf->gS("debug", "uncertainty", "uncertainty");
  24. cache = conf->gS ( "cache", "root", "" );
  25. classifier = new GPHIKClassifierNICE ( conf, "ClassiferGPHIK" );;
  26. whs = conf->gI ( section, "window_size", 10 );
  27. featdist = conf->gI ( section, "grid", 10 );
  28. testWSize = conf->gI (section, "test_window_size", 10);
  29. cn = md->getClassNames ( "train" );
  30. if ( read_cache )
  31. {
  32. string classifierdst = "/classifier.data";
  33. fprintf ( stderr, "SemSegNovelty:: Reading classifier data from %s\n", ( cache + classifierdst ).c_str() );
  34. try
  35. {
  36. if ( classifier != NULL )
  37. {
  38. classifier->read ( cache + classifierdst );
  39. }
  40. fprintf ( stderr, "SemSegNovelty:: successfully read\n" );
  41. }
  42. catch ( char *str )
  43. {
  44. cerr << "error reading data: " << str << endl;
  45. }
  46. }
  47. else
  48. {
  49. train ( md );
  50. }
  51. }
  52. SemSegNovelty::~SemSegNovelty()
  53. {
  54. // clean-up
  55. if ( classifier != NULL )
  56. delete classifier;
  57. if ( featExtract != NULL )
  58. delete featExtract;
  59. }
  60. void SemSegNovelty::train ( const MultiDataset *md )
  61. {
  62. const LabeledSet train = * ( *md ) ["train"];
  63. const LabeledSet *trainp = &train;
  64. ////////////////////////
  65. // feature extraction //
  66. ////////////////////////
  67. std::string forbidden_classes_s = conf->gS ( "analysis", "donttrain", "" );
  68. if ( forbidden_classes_s == "" )
  69. {
  70. forbidden_classes_s = conf->gS ( "analysis", "forbidden_classes", "" );
  71. }
  72. cn.getSelection ( forbidden_classes_s, forbidden_classes );
  73. //check the same thing for the training classes - this is very specific to our setup
  74. std::string forbidden_classesTrain_s = conf->gS ( "analysis", "donttrainTrain", "" );
  75. if ( forbidden_classesTrain_s == "" )
  76. {
  77. forbidden_classesTrain_s = conf->gS ( "analysis", "forbidden_classesTrain", "" );
  78. }
  79. cn.getSelection ( forbidden_classesTrain_s, forbidden_classesTrain );
  80. ProgressBar pb ( "Local Feature Extraction" );
  81. pb.show();
  82. int imgnb = 0;
  83. Examples examples;
  84. examples.filename = "training";
  85. int featdim = -1;
  86. classesInUse.clear();
  87. LOOP_ALL_S ( *trainp )
  88. {
  89. //EACH_S(classno, currentFile);
  90. EACH_INFO ( classno, info );
  91. std::string currentFile = info.img();
  92. CachedExample *ce = new CachedExample ( currentFile );
  93. const LocalizationResult *locResult = info.localization();
  94. if ( locResult->size() <= 0 )
  95. {
  96. fprintf ( stderr, "WARNING: NO ground truth polygons found for %s !\n",
  97. currentFile.c_str() );
  98. continue;
  99. }
  100. int xsize, ysize;
  101. ce->getImageSize ( xsize, ysize );
  102. Image labels ( xsize, ysize );
  103. labels.set ( 0 );
  104. locResult->calcLabeledImage ( labels, ( *classNames ).getBackgroundClass() );
  105. NICE::ColorImage img;
  106. try {
  107. img = ColorImage ( currentFile );
  108. } catch ( Exception ) {
  109. cerr << "SemSegNovelty: error opening image file <" << currentFile << ">" << endl;
  110. continue;
  111. }
  112. Globals::setCurrentImgFN ( currentFile );
  113. MultiChannelImageT<double> feats;
  114. // extract features
  115. featExtract->getFeats ( img, feats );
  116. featdim = feats.channels();
  117. feats.addChannel(featdim);
  118. for (int c = 0; c < featdim; c++)
  119. {
  120. ImageT<double> tmp = feats[c];
  121. ImageT<double> tmp2 = feats[c+featdim];
  122. NICE::FilterT<double, double, double>::gradientStrength (tmp, tmp2);
  123. }
  124. featdim += featdim;
  125. // compute integral images
  126. for ( int c = 0; c < featdim; c++ )
  127. {
  128. feats.calcIntegral ( c );
  129. }
  130. for ( int y = 0; y < ysize; y += featdist )
  131. {
  132. for ( int x = 0; x < xsize; x += featdist )
  133. {
  134. int classnoTmp = labels.getPixel ( x, y );
  135. if ( forbidden_classesTrain.find ( classnoTmp ) != forbidden_classesTrain.end() )
  136. {
  137. continue;
  138. }
  139. if (classesInUse.find(classnoTmp) == classesInUse.end())
  140. {
  141. classesInUse.insert(classnoTmp);
  142. }
  143. Example example;
  144. example.vec = NULL;
  145. example.svec = new SparseVector ( featdim );
  146. for ( int f = 0; f < featdim; f++ )
  147. {
  148. double val = feats.getIntegralValue ( x - whs, y - whs, x + whs, y + whs, f );
  149. if ( val > 1e-10 )
  150. ( *example.svec ) [f] = val;
  151. }
  152. example.svec->normalize();
  153. example.position = imgnb;
  154. examples.push_back ( pair<int, Example> ( classnoTmp, example ) );
  155. }
  156. }
  157. delete ce;
  158. imgnb++;
  159. pb.update ( trainp->count() );
  160. }
  161. numberOfClasses = classesInUse.size();
  162. std::cerr << "numberOfClasses: " << numberOfClasses << std::endl;
  163. std::cerr << "classes in use: " << std::endl;
  164. for (std::set<int>::const_iterator it = classesInUse.begin(); it != classesInUse.end(); it++)
  165. {
  166. std::cerr << *it << " ";
  167. }
  168. std::cerr << std::endl;
  169. pb.hide();
  170. //////////////////////
  171. // train classifier //
  172. //////////////////////
  173. FeaturePool fp;
  174. Feature *f = new SparseVectorFeature ( featdim );
  175. f->explode ( fp );
  176. delete f;
  177. if ( classifier != NULL )
  178. classifier->train ( fp, examples );
  179. else
  180. {
  181. cerr << "no classifier selected?!" << endl;
  182. exit ( -1 );
  183. }
  184. fp.destroy();
  185. if ( save_cache )
  186. {
  187. if ( classifier != NULL )
  188. classifier->save ( cache + "/classifier.data" );
  189. }
  190. ////////////
  191. //clean up//
  192. ////////////
  193. for ( int i = 0; i < ( int ) examples.size(); i++ )
  194. {
  195. examples[i].second.clean();
  196. }
  197. examples.clear();
  198. cerr << "SemSeg training finished" << endl;
  199. }
  200. void SemSegNovelty::semanticseg ( CachedExample *ce, NICE::Image & segresult, NICE::MultiChannelImageT<double> & probabilities )
  201. {
  202. Timer timer;
  203. timer.start();
  204. Examples examples;
  205. examples.filename = "testing";
  206. segresult.set ( 0 );
  207. int featdim = -1;
  208. std::string currentFile = Globals::getCurrentImgFN();
  209. int xsize, ysize;
  210. ce->getImageSize ( xsize, ysize );
  211. probabilities.reInit( xsize, ysize, cn.getMaxClassno() + 1);
  212. probabilities.set ( 0.0 );
  213. NICE::ColorImage img;
  214. try {
  215. img = ColorImage ( currentFile );
  216. } catch ( Exception ) {
  217. cerr << "SemSegNovelty: error opening image file <" << currentFile << ">" << endl;
  218. return;
  219. }
  220. MultiChannelImageT<double> feats;
  221. // extract features
  222. featExtract->getFeats ( img, feats );
  223. featdim = feats.channels();
  224. feats.addChannel(featdim);
  225. for (int c = 0; c < featdim; c++)
  226. {
  227. ImageT<double> tmp = feats[c];
  228. ImageT<double> tmp2 = feats[c+featdim];
  229. NICE::FilterT<double, double, double>::gradientStrength (tmp, tmp2);
  230. }
  231. featdim += featdim;
  232. // compute integral images
  233. for ( int c = 0; c < featdim; c++ )
  234. {
  235. feats.calcIntegral ( c );
  236. }
  237. FloatImage uncert ( xsize, ysize );
  238. uncert.set ( 0.0 );
  239. FloatImage gpUncertainty ( xsize, ysize );
  240. FloatImage gpMean ( xsize, ysize );
  241. FloatImage gpMeanRatio ( xsize, ysize );
  242. FloatImage gpWeightAll ( xsize, ysize );
  243. FloatImage gpWeightRatio ( xsize, ysize );
  244. gpUncertainty.set ( 0.0 );
  245. gpMean.set ( 0.0 );
  246. gpMeanRatio.set ( 0.0 );
  247. gpWeightAll.set ( 0.0 );
  248. gpWeightRatio.set ( 0.0 );
  249. double maxunc = -numeric_limits<double>::max();
  250. double maxGPUncertainty = -numeric_limits<double>::max();
  251. double maxGPMean = -numeric_limits<double>::max();
  252. double maxGPMeanRatio = -numeric_limits<double>::max();
  253. double maxGPWeightAll = -numeric_limits<double>::max();
  254. double maxGPWeightRatio = -numeric_limits<double>::max();
  255. timer.stop();
  256. cout << "first: " << timer.getLastAbsolute() << endl;
  257. //we need this lateron for active learning stuff
  258. double gpNoise = conf->gD("GPHIK", "noise", 0.01);
  259. timer.start();
  260. #pragma omp parallel for
  261. for ( int y = 0; y < ysize; y += testWSize )
  262. {
  263. Example example;
  264. example.vec = NULL;
  265. example.svec = new SparseVector ( featdim );
  266. for ( int x = 0; x < xsize; x += testWSize)
  267. {
  268. for ( int f = 0; f < featdim; f++ )
  269. {
  270. double val = feats.getIntegralValue ( x - whs, y - whs, x + whs, y + whs, f );
  271. if ( val > 1e-10 )
  272. ( *example.svec ) [f] = val;
  273. }
  274. example.svec->normalize();
  275. ClassificationResult cr = classifier->classify ( example );
  276. //we need this if we want to compute GP-AL-measure lateron
  277. double minMeanAbs ( numeric_limits<double>::max() );
  278. double maxMeanAbs ( 0.0 );
  279. double sndMaxMeanAbs ( 0.0 );
  280. double maxMean ( -numeric_limits<double>::max() );
  281. double sndMaxMean ( -numeric_limits<double>::max() );
  282. for ( int j = 0 ; j < cr.scores.size(); j++ )
  283. {
  284. if ( forbidden_classesTrain.find ( j ) != forbidden_classesTrain.end() )
  285. {
  286. continue;
  287. }
  288. //check whether we found a class with higher smaller abs mean than the current minimum
  289. if (abs(cr.scores[j]) < minMeanAbs)
  290. minMeanAbs = abs(cr.scores[j]);
  291. //check for larger abs mean as well
  292. if (abs(cr.scores[j]) > maxMeanAbs)
  293. {
  294. sndMaxMeanAbs = maxMeanAbs;
  295. maxMeanAbs = abs(cr.scores[j]);
  296. }
  297. // and also for the second highest mean of all classes
  298. else if (abs(cr.scores[j]) > sndMaxMeanAbs)
  299. {
  300. sndMaxMeanAbs = abs(cr.scores[j]);
  301. }
  302. //check for larger mean without abs as well
  303. if (cr.scores[j] > maxMean)
  304. {
  305. sndMaxMean = maxMean;
  306. maxMean = cr.scores[j];
  307. }
  308. // and also for the second highest mean of all classes
  309. else if (cr.scores[j] > sndMaxMean)
  310. {
  311. sndMaxMean = cr.scores[j];
  312. }
  313. }
  314. double firstTerm (1.0 / sqrt(cr.uncertainty+gpNoise));
  315. //compute the heuristic GP-UNCERTAINTY, as proposed by Kapoor et al. in IJCV 2010
  316. // GP-UNCERTAINTY : |mean| / sqrt(var^2 + gpnoise^2)
  317. double gpUncertaintyVal = maxMeanAbs*firstTerm; //firstTerm = 1.0 / sqrt(r.uncertainty+gpNoise))
  318. // compute results when we take the lowest mean value of all classes
  319. double gpMeanVal = minMeanAbs;
  320. //look at the difference in the absolut mean values for the most plausible class
  321. // and the second most plausible class
  322. double gpMeanRatioVal= maxMean - sndMaxMean;
  323. double gpWeightAllVal ( 0.0 );
  324. double gpWeightRatioVal ( 0.0 );
  325. if ( numberOfClasses > 2)
  326. {
  327. //compute the weight in the alpha-vector for every sample after assuming it to be
  328. // added to the training set.
  329. // Thereby, we measure its "importance" for the current model
  330. //
  331. //double firstTerm is already computed
  332. //
  333. //the second term is only needed when computing impacts
  334. //double secondTerm; //this is the nasty guy :/
  335. //--- compute the third term
  336. // this is the difference between predicted label and GT label
  337. std::vector<double> diffToPositive; diffToPositive.clear();
  338. std::vector<double> diffToNegative; diffToNegative.clear();
  339. double diffToNegativeSum(0.0);
  340. for ( int j = 0 ; j < cr.scores.size(); j++ )
  341. {
  342. if ( forbidden_classesTrain.find ( j ) != forbidden_classesTrain.end() )
  343. {
  344. continue;
  345. }
  346. // look at the difference to plus 1
  347. diffToPositive.push_back(abs(cr.scores[j] - 1));
  348. // look at the difference to -1
  349. diffToNegative.push_back(abs(cr.scores[j] + 1));
  350. //sum up the difference to -1
  351. diffToNegativeSum += abs(cr.scores[j] - 1);
  352. }
  353. //let's subtract for every class its diffToNegative from the sum, add its diffToPositive,
  354. //and use this as the third term for this specific class.
  355. //the final value is obtained by minimizing over all classes
  356. //
  357. // originally, we minimize over all classes after building the final score
  358. // however, the first and the second term do not depend on the choice of
  359. // y*, therefore we minimize here already
  360. double thirdTerm (numeric_limits<double>::max()) ;
  361. for(uint tmpCnt = 0; tmpCnt < diffToPositive.size(); tmpCnt++)
  362. {
  363. double tmpVal ( diffToPositive[tmpCnt] + (diffToNegativeSum-diffToNegative[tmpCnt]) );
  364. if (tmpVal < thirdTerm)
  365. thirdTerm = tmpVal;
  366. }
  367. gpWeightAllVal = thirdTerm*firstTerm;
  368. //now look on the ratio of the resulting weights for the most plausible
  369. // against the second most plausible class
  370. double thirdTermMostPlausible ( 0.0 ) ;
  371. double thirdTermSecondMostPlausible ( 0.0 ) ;
  372. for(uint tmpCnt = 0; tmpCnt < diffToPositive.size(); tmpCnt++)
  373. {
  374. if (diffToPositive[tmpCnt] > thirdTermMostPlausible)
  375. {
  376. thirdTermSecondMostPlausible = thirdTermMostPlausible;
  377. thirdTermMostPlausible = diffToPositive[tmpCnt];
  378. }
  379. else if (diffToPositive[tmpCnt] > thirdTermSecondMostPlausible)
  380. {
  381. thirdTermSecondMostPlausible = diffToPositive[tmpCnt];
  382. }
  383. }
  384. //compute the resulting score
  385. gpWeightRatioVal = (thirdTermMostPlausible - thirdTermSecondMostPlausible)*firstTerm;
  386. //finally, look for this feature how it would affect to whole model (summarized by weight-vector alpha), if we would
  387. //use it as an additional training example
  388. //TODO this would be REALLY computational demanding. Do we really want to do this?
  389. // gpImpactAll[s] ( pce[i].second.x, pce[i].second.y ) = thirdTerm*firstTerm*secondTerm;
  390. // gpImpactRatio[s] ( pce[i].second.x, pce[i].second.y ) = (thirdTermMostPlausible - thirdTermSecondMostPlausible)*firstTerm*secondTerm;
  391. }
  392. else //binary scenario
  393. {
  394. gpWeightAllVal = std::min( abs(cr.scores[*classesInUse.begin()]+1), abs(cr.scores[*classesInUse.begin()]-1) );
  395. gpWeightAllVal *= firstTerm;
  396. gpWeightRatioVal = gpWeightAllVal;
  397. }
  398. int xs = std::max(0, x - testWSize/2);
  399. int xe = std::min(xsize - 1, x + testWSize/2);
  400. int ys = std::max(0, y - testWSize/2);
  401. int ye = std::min(ysize - 1, y + testWSize/2);
  402. for (int yl = ys; yl <= ye; yl++)
  403. {
  404. for (int xl = xs; xl <= xe; xl++)
  405. {
  406. for ( int j = 0 ; j < cr.scores.size(); j++ )
  407. {
  408. probabilities ( xl, yl, j ) = cr.scores[j];
  409. }
  410. segresult ( xl, yl ) = cr.classno;
  411. uncert ( xl, yl ) = cr.uncertainty;
  412. gpUncertainty ( xl, yl ) = gpUncertaintyVal;
  413. gpMean ( xl, yl ) = gpMeanVal;
  414. gpMeanRatio ( xl, yl ) = gpMeanRatioVal;
  415. gpWeightAll ( xl, yl ) = gpWeightAllVal;
  416. gpWeightRatio ( xl, yl ) = gpWeightRatioVal;
  417. }
  418. }
  419. if (maxunc < cr.uncertainty)
  420. maxunc = cr.uncertainty;
  421. if (maxGPUncertainty < gpUncertaintyVal)
  422. maxGPUncertainty = gpUncertaintyVal;
  423. if (maxGPMean < gpMeanVal)
  424. maxGPMean = gpMeanVal;
  425. if (maxGPMeanRatio < gpMeanRatioVal)
  426. maxGPMeanRatio = gpMeanRatioVal;
  427. if (maxGPWeightAll < gpMeanRatioVal)
  428. maxGPWeightAll = gpWeightAllVal;
  429. if (maxGPWeightRatio < gpWeightRatioVal)
  430. maxGPWeightRatio = gpWeightRatioVal;
  431. // std::cerr << "uncertainty: " << gpUncertaintyVal << " minMean: " << gpMeanVal << " gpMeanRatio: " << gpMeanRatioVal << " weightAll: " << gpWeightAllVal << " weightRatio: "<< gpWeightRatioVal << std::endl;
  432. example.svec->clear();
  433. }
  434. delete example.svec;
  435. example.svec = NULL;
  436. }
  437. cout << "maxunertainty: " << maxunc << endl;
  438. timer.stop();
  439. cout << "second: " << timer.getLastAbsolute() << endl;
  440. timer.start();
  441. ColorImage imgrgb ( xsize, ysize );
  442. std::stringstream out;
  443. std::vector< std::string > list2;
  444. StringTools::split ( Globals::getCurrentImgFN (), '/', list2 );
  445. out << uncertdir << "/" << list2.back();
  446. uncert.writeRaw(out.str() + ".rawfloat");
  447. uncert(0, 0) = 0.0;
  448. uncert(0, 1) = 1.0+gpNoise;
  449. ICETools::convertToRGB ( uncert, imgrgb );
  450. imgrgb.write ( out.str() + "rough.png" );
  451. //invert images such that large numbers correspond to high impact, high variance, high importance, high novelty, ...
  452. for ( int y = 0; y < ysize; y++)
  453. {
  454. for (int x = 0; x < xsize; x++)
  455. {
  456. gpUncertainty(x,y) = maxGPUncertainty - gpUncertainty(x,y);
  457. gpMean(x,y) = maxGPMean - gpMean(x,y);
  458. gpMeanRatio(x,y) = maxGPMeanRatio - gpMeanRatio(x,y);
  459. gpWeightRatio(x,y) = maxGPWeightRatio - gpWeightRatio(x,y);
  460. }
  461. }
  462. //
  463. gpUncertainty(0, 0) = 0.0;
  464. gpUncertainty(0, 1) = maxGPUncertainty;
  465. ICETools::convertToRGB ( gpUncertainty, imgrgb );
  466. imgrgb.write ( out.str() + "gpUncertainty.png" );
  467. //
  468. gpMean(0, 0) = 0.0;
  469. gpMean(0, 1) = maxGPMean;
  470. ICETools::convertToRGB ( gpMean, imgrgb );
  471. imgrgb.write ( out.str() + "gpMean.png" );
  472. //
  473. gpMeanRatio(0, 0) = 0.0;
  474. gpMeanRatio(0, 1) = maxGPMeanRatio;
  475. ICETools::convertToRGB ( gpMeanRatio, imgrgb );
  476. imgrgb.write ( out.str() + "gpMeanRatio.png" );
  477. //
  478. gpWeightAll(0, 0) = 0.0;
  479. gpWeightAll(0, 1) = maxGPWeightAll;
  480. ICETools::convertToRGB ( gpWeightAll, imgrgb );
  481. imgrgb.write ( out.str() + "gpWeightAll.png" );
  482. //
  483. gpWeightRatio(0, 0) = 0.0;
  484. gpWeightRatio(0, 1) = maxGPWeightRatio;
  485. ICETools::convertToRGB ( gpWeightRatio, imgrgb );
  486. imgrgb.write ( out.str() + "gpWeightRatio.png" );
  487. timer.stop();
  488. cout << "last: " << timer.getLastAbsolute() << endl;
  489. }