SemSegNovelty.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622
  1. #include <sstream>
  2. #include <iostream>
  3. #include "SemSegNovelty.h"
  4. #include "core/image/FilterT.h"
  5. #include "gp-hik-exp/GPHIKClassifierNICE.h"
  6. #include "vislearning/baselib/ICETools.h"
  7. #include "vislearning/baselib/Globals.h"
  8. #include "vislearning/features/fpfeatures/SparseVectorFeature.h"
  9. #include "core/basics/StringTools.h"
  10. #include "core/basics/Timer.h"
  11. #include "segmentation/GenericRegionSegmentationMethodSelection.h"
  12. using namespace std;
  13. using namespace NICE;
  14. using namespace OBJREC;
  15. SemSegNovelty::SemSegNovelty ( const Config *conf,
  16. const MultiDataset *md )
  17. : SemanticSegmentation ( conf, & ( md->getClassNames ( "train" ) ) )
  18. {
  19. this->conf = conf;
  20. string section = "SemSegNovelty";
  21. featExtract = new LFColorWeijer ( conf );
  22. save_cache = conf->gB ( "FPCPixel", "save_cache", true );
  23. read_cache = conf->gB ( "FPCPixel", "read_cache", false );
  24. uncertdir = conf->gS("debug", "uncertainty", "uncertainty");
  25. cache = conf->gS ( "cache", "root", "" );
  26. classifier = new GPHIKClassifierNICE ( conf, "ClassiferGPHIK" );;
  27. whs = conf->gI ( section, "window_size", 10 );
  28. featdist = conf->gI ( section, "grid", 10 );
  29. testWSize = conf->gI (section, "test_window_size", 10);
  30. string rsMethode = conf->gS ( section, "segmentation", "none" );
  31. if(rsMethode == "none")
  32. {
  33. regionSeg = NULL;
  34. }
  35. else
  36. {
  37. RegionSegmentationMethod * tmpRegionSeg = GenericRegionSegmentationMethodSelection::selectRegionSegmentationMethod(&conf, rsMethode);
  38. if ( save_cache )
  39. regionSeg = new RSCache ( conf, tmpRegionSeg );
  40. else
  41. regionSeg = tmpseg;
  42. }
  43. cn = md->getClassNames ( "train" );
  44. if ( read_cache )
  45. {
  46. string classifierdst = "/classifier.data";
  47. fprintf ( stderr, "SemSegNovelty:: Reading classifier data from %s\n", ( cache + classifierdst ).c_str() );
  48. try
  49. {
  50. if ( classifier != NULL )
  51. {
  52. classifier->read ( cache + classifierdst );
  53. }
  54. fprintf ( stderr, "SemSegNovelty:: successfully read\n" );
  55. }
  56. catch ( char *str )
  57. {
  58. cerr << "error reading data: " << str << endl;
  59. }
  60. }
  61. else
  62. {
  63. train ( md );
  64. }
  65. }
  66. SemSegNovelty::~SemSegNovelty()
  67. {
  68. // clean-up
  69. if ( classifier != NULL )
  70. delete classifier;
  71. if ( featExtract != NULL )
  72. delete featExtract;
  73. }
  74. void SemSegNovelty::train ( const MultiDataset *md )
  75. {
  76. const LabeledSet train = * ( *md ) ["train"];
  77. const LabeledSet *trainp = &train;
  78. ////////////////////////
  79. // feature extraction //
  80. ////////////////////////
  81. std::string forbidden_classes_s = conf->gS ( "analysis", "donttrain", "" );
  82. if ( forbidden_classes_s == "" )
  83. {
  84. forbidden_classes_s = conf->gS ( "analysis", "forbidden_classes", "" );
  85. }
  86. cn.getSelection ( forbidden_classes_s, forbidden_classes );
  87. //check the same thing for the training classes - this is very specific to our setup
  88. std::string forbidden_classesTrain_s = conf->gS ( "analysis", "donttrainTrain", "" );
  89. if ( forbidden_classesTrain_s == "" )
  90. {
  91. forbidden_classesTrain_s = conf->gS ( "analysis", "forbidden_classesTrain", "" );
  92. }
  93. cn.getSelection ( forbidden_classesTrain_s, forbidden_classesTrain );
  94. ProgressBar pb ( "Local Feature Extraction" );
  95. pb.show();
  96. int imgnb = 0;
  97. Examples examples;
  98. examples.filename = "training";
  99. int featdim = -1;
  100. classesInUse.clear();
  101. LOOP_ALL_S ( *trainp )
  102. {
  103. //EACH_S(classno, currentFile);
  104. EACH_INFO ( classno, info );
  105. std::string currentFile = info.img();
  106. CachedExample *ce = new CachedExample ( currentFile );
  107. const LocalizationResult *locResult = info.localization();
  108. if ( locResult->size() <= 0 )
  109. {
  110. fprintf ( stderr, "WARNING: NO ground truth polygons found for %s !\n",
  111. currentFile.c_str() );
  112. continue;
  113. }
  114. int xsize, ysize;
  115. ce->getImageSize ( xsize, ysize );
  116. Image labels ( xsize, ysize );
  117. labels.set ( 0 );
  118. locResult->calcLabeledImage ( labels, ( *classNames ).getBackgroundClass() );
  119. NICE::ColorImage img;
  120. try {
  121. img = ColorImage ( currentFile );
  122. } catch ( Exception ) {
  123. cerr << "SemSegNovelty: error opening image file <" << currentFile << ">" << endl;
  124. continue;
  125. }
  126. Globals::setCurrentImgFN ( currentFile );
  127. MultiChannelImageT<double> feats;
  128. // extract features
  129. featExtract->getFeats ( img, feats );
  130. featdim = feats.channels();
  131. feats.addChannel(featdim);
  132. for (int c = 0; c < featdim; c++)
  133. {
  134. ImageT<double> tmp = feats[c];
  135. ImageT<double> tmp2 = feats[c+featdim];
  136. NICE::FilterT<double, double, double>::gradientStrength (tmp, tmp2);
  137. }
  138. featdim += featdim;
  139. // compute integral images
  140. for ( int c = 0; c < featdim; c++ )
  141. {
  142. feats.calcIntegral ( c );
  143. }
  144. for ( int y = 0; y < ysize; y += featdist )
  145. {
  146. for ( int x = 0; x < xsize; x += featdist )
  147. {
  148. int classnoTmp = labels.getPixel ( x, y );
  149. if ( forbidden_classesTrain.find ( classnoTmp ) != forbidden_classesTrain.end() )
  150. {
  151. continue;
  152. }
  153. if (classesInUse.find(classnoTmp) == classesInUse.end())
  154. {
  155. classesInUse.insert(classnoTmp);
  156. }
  157. Example example;
  158. example.vec = NULL;
  159. example.svec = new SparseVector ( featdim );
  160. for ( int f = 0; f < featdim; f++ )
  161. {
  162. double val = feats.getIntegralValue ( x - whs, y - whs, x + whs, y + whs, f );
  163. if ( val > 1e-10 )
  164. ( *example.svec ) [f] = val;
  165. }
  166. example.svec->normalize();
  167. example.position = imgnb;
  168. examples.push_back ( pair<int, Example> ( classnoTmp, example ) );
  169. }
  170. }
  171. delete ce;
  172. imgnb++;
  173. pb.update ( trainp->count() );
  174. }
  175. numberOfClasses = classesInUse.size();
  176. std::cerr << "numberOfClasses: " << numberOfClasses << std::endl;
  177. std::cerr << "classes in use: " << std::endl;
  178. for (std::set<int>::const_iterator it = classesInUse.begin(); it != classesInUse.end(); it++)
  179. {
  180. std::cerr << *it << " ";
  181. }
  182. std::cerr << std::endl;
  183. pb.hide();
  184. //////////////////////
  185. // train classifier //
  186. //////////////////////
  187. FeaturePool fp;
  188. Feature *f = new SparseVectorFeature ( featdim );
  189. f->explode ( fp );
  190. delete f;
  191. if ( classifier != NULL )
  192. classifier->train ( fp, examples );
  193. else
  194. {
  195. cerr << "no classifier selected?!" << endl;
  196. exit ( -1 );
  197. }
  198. fp.destroy();
  199. if ( save_cache )
  200. {
  201. if ( classifier != NULL )
  202. classifier->save ( cache + "/classifier.data" );
  203. }
  204. ////////////
  205. //clean up//
  206. ////////////
  207. for ( int i = 0; i < ( int ) examples.size(); i++ )
  208. {
  209. examples[i].second.clean();
  210. }
  211. examples.clear();
  212. cerr << "SemSeg training finished" << endl;
  213. }
  214. void SemSegNovelty::semanticseg ( CachedExample *ce, NICE::Image & segresult, NICE::MultiChannelImageT<double> & probabilities )
  215. {
  216. Timer timer;
  217. timer.start();
  218. Examples examples;
  219. examples.filename = "testing";
  220. segresult.set ( 0 );
  221. int featdim = -1;
  222. std::string currentFile = Globals::getCurrentImgFN();
  223. int xsize, ysize;
  224. ce->getImageSize ( xsize, ysize );
  225. probabilities.reInit( xsize, ysize, cn.getMaxClassno() + 1);
  226. probabilities.set ( 0.0 );
  227. NICE::ColorImage img;
  228. try {
  229. img = ColorImage ( currentFile );
  230. } catch ( Exception ) {
  231. cerr << "SemSegNovelty: error opening image file <" << currentFile << ">" << endl;
  232. return;
  233. }
  234. MultiChannelImageT<double> feats;
  235. // extract features
  236. featExtract->getFeats ( img, feats );
  237. featdim = feats.channels();
  238. feats.addChannel(featdim);
  239. for (int c = 0; c < featdim; c++)
  240. {
  241. ImageT<double> tmp = feats[c];
  242. ImageT<double> tmp2 = feats[c+featdim];
  243. NICE::FilterT<double, double, double>::gradientStrength (tmp, tmp2);
  244. }
  245. featdim += featdim;
  246. // compute integral images
  247. for ( int c = 0; c < featdim; c++ )
  248. {
  249. feats.calcIntegral ( c );
  250. }
  251. FloatImage uncert ( xsize, ysize );
  252. uncert.set ( 0.0 );
  253. FloatImage gpUncertainty ( xsize, ysize );
  254. FloatImage gpMean ( xsize, ysize );
  255. FloatImage gpMeanRatio ( xsize, ysize );
  256. FloatImage gpWeightAll ( xsize, ysize );
  257. FloatImage gpWeightRatio ( xsize, ysize );
  258. gpUncertainty.set ( 0.0 );
  259. gpMean.set ( 0.0 );
  260. gpMeanRatio.set ( 0.0 );
  261. gpWeightAll.set ( 0.0 );
  262. gpWeightRatio.set ( 0.0 );
  263. double maxunc = -numeric_limits<double>::max();
  264. double maxGPUncertainty = -numeric_limits<double>::max();
  265. double maxGPMean = -numeric_limits<double>::max();
  266. double maxGPMeanRatio = -numeric_limits<double>::max();
  267. double maxGPWeightAll = -numeric_limits<double>::max();
  268. double maxGPWeightRatio = -numeric_limits<double>::max();
  269. timer.stop();
  270. cout << "first: " << timer.getLastAbsolute() << endl;
  271. //we need this lateron for active learning stuff
  272. double gpNoise = conf->gD("GPHIK", "noise", 0.01);
  273. timer.start();
  274. #pragma omp parallel for
  275. for ( int y = 0; y < ysize; y += testWSize )
  276. {
  277. Example example;
  278. example.vec = NULL;
  279. example.svec = new SparseVector ( featdim );
  280. for ( int x = 0; x < xsize; x += testWSize)
  281. {
  282. for ( int f = 0; f < featdim; f++ )
  283. {
  284. double val = feats.getIntegralValue ( x - whs, y - whs, x + whs, y + whs, f );
  285. if ( val > 1e-10 )
  286. ( *example.svec ) [f] = val;
  287. }
  288. example.svec->normalize();
  289. ClassificationResult cr = classifier->classify ( example );
  290. //we need this if we want to compute GP-AL-measure lateron
  291. double minMeanAbs ( numeric_limits<double>::max() );
  292. double maxMeanAbs ( 0.0 );
  293. double sndMaxMeanAbs ( 0.0 );
  294. double maxMean ( -numeric_limits<double>::max() );
  295. double sndMaxMean ( -numeric_limits<double>::max() );
  296. for ( int j = 0 ; j < cr.scores.size(); j++ )
  297. {
  298. if ( forbidden_classesTrain.find ( j ) != forbidden_classesTrain.end() )
  299. {
  300. continue;
  301. }
  302. //check whether we found a class with higher smaller abs mean than the current minimum
  303. if (abs(cr.scores[j]) < minMeanAbs)
  304. minMeanAbs = abs(cr.scores[j]);
  305. //check for larger abs mean as well
  306. if (abs(cr.scores[j]) > maxMeanAbs)
  307. {
  308. sndMaxMeanAbs = maxMeanAbs;
  309. maxMeanAbs = abs(cr.scores[j]);
  310. }
  311. // and also for the second highest mean of all classes
  312. else if (abs(cr.scores[j]) > sndMaxMeanAbs)
  313. {
  314. sndMaxMeanAbs = abs(cr.scores[j]);
  315. }
  316. //check for larger mean without abs as well
  317. if (cr.scores[j] > maxMean)
  318. {
  319. sndMaxMean = maxMean;
  320. maxMean = cr.scores[j];
  321. }
  322. // and also for the second highest mean of all classes
  323. else if (cr.scores[j] > sndMaxMean)
  324. {
  325. sndMaxMean = cr.scores[j];
  326. }
  327. }
  328. double firstTerm (1.0 / sqrt(cr.uncertainty+gpNoise));
  329. //compute the heuristic GP-UNCERTAINTY, as proposed by Kapoor et al. in IJCV 2010
  330. // GP-UNCERTAINTY : |mean| / sqrt(var^2 + gpnoise^2)
  331. double gpUncertaintyVal = maxMeanAbs*firstTerm; //firstTerm = 1.0 / sqrt(r.uncertainty+gpNoise))
  332. // compute results when we take the lowest mean value of all classes
  333. double gpMeanVal = minMeanAbs;
  334. //look at the difference in the absolut mean values for the most plausible class
  335. // and the second most plausible class
  336. double gpMeanRatioVal= maxMean - sndMaxMean;
  337. double gpWeightAllVal ( 0.0 );
  338. double gpWeightRatioVal ( 0.0 );
  339. if ( numberOfClasses > 2)
  340. {
  341. //compute the weight in the alpha-vector for every sample after assuming it to be
  342. // added to the training set.
  343. // Thereby, we measure its "importance" for the current model
  344. //
  345. //double firstTerm is already computed
  346. //
  347. //the second term is only needed when computing impacts
  348. //double secondTerm; //this is the nasty guy :/
  349. //--- compute the third term
  350. // this is the difference between predicted label and GT label
  351. std::vector<double> diffToPositive; diffToPositive.clear();
  352. std::vector<double> diffToNegative; diffToNegative.clear();
  353. double diffToNegativeSum(0.0);
  354. for ( int j = 0 ; j < cr.scores.size(); j++ )
  355. {
  356. if ( forbidden_classesTrain.find ( j ) != forbidden_classesTrain.end() )
  357. {
  358. continue;
  359. }
  360. // look at the difference to plus 1
  361. diffToPositive.push_back(abs(cr.scores[j] - 1));
  362. // look at the difference to -1
  363. diffToNegative.push_back(abs(cr.scores[j] + 1));
  364. //sum up the difference to -1
  365. diffToNegativeSum += abs(cr.scores[j] - 1);
  366. }
  367. //let's subtract for every class its diffToNegative from the sum, add its diffToPositive,
  368. //and use this as the third term for this specific class.
  369. //the final value is obtained by minimizing over all classes
  370. //
  371. // originally, we minimize over all classes after building the final score
  372. // however, the first and the second term do not depend on the choice of
  373. // y*, therefore we minimize here already
  374. double thirdTerm (numeric_limits<double>::max()) ;
  375. for(uint tmpCnt = 0; tmpCnt < diffToPositive.size(); tmpCnt++)
  376. {
  377. double tmpVal ( diffToPositive[tmpCnt] + (diffToNegativeSum-diffToNegative[tmpCnt]) );
  378. if (tmpVal < thirdTerm)
  379. thirdTerm = tmpVal;
  380. }
  381. gpWeightAllVal = thirdTerm*firstTerm;
  382. //now look on the ratio of the resulting weights for the most plausible
  383. // against the second most plausible class
  384. double thirdTermMostPlausible ( 0.0 ) ;
  385. double thirdTermSecondMostPlausible ( 0.0 ) ;
  386. for(uint tmpCnt = 0; tmpCnt < diffToPositive.size(); tmpCnt++)
  387. {
  388. if (diffToPositive[tmpCnt] > thirdTermMostPlausible)
  389. {
  390. thirdTermSecondMostPlausible = thirdTermMostPlausible;
  391. thirdTermMostPlausible = diffToPositive[tmpCnt];
  392. }
  393. else if (diffToPositive[tmpCnt] > thirdTermSecondMostPlausible)
  394. {
  395. thirdTermSecondMostPlausible = diffToPositive[tmpCnt];
  396. }
  397. }
  398. //compute the resulting score
  399. gpWeightRatioVal = (thirdTermMostPlausible - thirdTermSecondMostPlausible)*firstTerm;
  400. //finally, look for this feature how it would affect to whole model (summarized by weight-vector alpha), if we would
  401. //use it as an additional training example
  402. //TODO this would be REALLY computational demanding. Do we really want to do this?
  403. // gpImpactAll[s] ( pce[i].second.x, pce[i].second.y ) = thirdTerm*firstTerm*secondTerm;
  404. // gpImpactRatio[s] ( pce[i].second.x, pce[i].second.y ) = (thirdTermMostPlausible - thirdTermSecondMostPlausible)*firstTerm*secondTerm;
  405. }
  406. else //binary scenario
  407. {
  408. gpWeightAllVal = std::min( abs(cr.scores[*classesInUse.begin()]+1), abs(cr.scores[*classesInUse.begin()]-1) );
  409. gpWeightAllVal *= firstTerm;
  410. gpWeightRatioVal = gpWeightAllVal;
  411. }
  412. int xs = std::max(0, x - testWSize/2);
  413. int xe = std::min(xsize - 1, x + testWSize/2);
  414. int ys = std::max(0, y - testWSize/2);
  415. int ye = std::min(ysize - 1, y + testWSize/2);
  416. for (int yl = ys; yl <= ye; yl++)
  417. {
  418. for (int xl = xs; xl <= xe; xl++)
  419. {
  420. for ( int j = 0 ; j < cr.scores.size(); j++ )
  421. {
  422. probabilities ( xl, yl, j ) = cr.scores[j];
  423. }
  424. segresult ( xl, yl ) = cr.classno;
  425. uncert ( xl, yl ) = cr.uncertainty;
  426. gpUncertainty ( xl, yl ) = gpUncertaintyVal;
  427. gpMean ( xl, yl ) = gpMeanVal;
  428. gpMeanRatio ( xl, yl ) = gpMeanRatioVal;
  429. gpWeightAll ( xl, yl ) = gpWeightAllVal;
  430. gpWeightRatio ( xl, yl ) = gpWeightRatioVal;
  431. }
  432. }
  433. if (maxunc < cr.uncertainty)
  434. maxunc = cr.uncertainty;
  435. if (maxGPUncertainty < gpUncertaintyVal)
  436. maxGPUncertainty = gpUncertaintyVal;
  437. if (maxGPMean < gpMeanVal)
  438. maxGPMean = gpMeanVal;
  439. if (maxGPMeanRatio < gpMeanRatioVal)
  440. maxGPMeanRatio = gpMeanRatioVal;
  441. if (maxGPWeightAll < gpMeanRatioVal)
  442. maxGPWeightAll = gpWeightAllVal;
  443. if (maxGPWeightRatio < gpWeightRatioVal)
  444. maxGPWeightRatio = gpWeightRatioVal;
  445. example.svec->clear();
  446. }
  447. delete example.svec;
  448. example.svec = NULL;
  449. }
  450. // std::cerr << "uncertainty: " << gpUncertaintyVal << " minMean: " << gpMeanVal << " gpMeanRatio: " << gpMeanRatioVal << " weightAll: " << gpWeightAllVal << " weightRatio: "<< gpWeightRatioVal << std::endl;
  451. //Regionen ermitteln
  452. if(regionSeg != NULL)
  453. {
  454. NICE::Matrix mask;
  455. int regionsize = regionSeg->segRegions ( img, mask );
  456. probabilities ( xl, yl, j ) = cr.scores[j];
  457. }
  458. timer.stop();
  459. cout << "second: " << timer.getLastAbsolute() << endl;
  460. timer.start();
  461. ColorImage imgrgb ( xsize, ysize );
  462. std::stringstream out;
  463. std::vector< std::string > list2;
  464. StringTools::split ( Globals::getCurrentImgFN (), '/', list2 );
  465. out << uncertdir << "/" << list2.back();
  466. uncert.writeRaw(out.str() + ".rawfloat");
  467. uncert(0, 0) = 0.0;
  468. uncert(0, 1) = 1.0+gpNoise;
  469. ICETools::convertToRGB ( uncert, imgrgb );
  470. imgrgb.write ( out.str() + "rough.png" );
  471. //invert images such that large numbers correspond to high impact, high variance, high importance, high novelty, ...
  472. for ( int y = 0; y < ysize; y++)
  473. {
  474. for (int x = 0; x < xsize; x++)
  475. {
  476. gpUncertainty(x,y) = maxGPUncertainty - gpUncertainty(x,y);
  477. gpMean(x,y) = maxGPMean - gpMean(x,y);
  478. gpMeanRatio(x,y) = maxGPMeanRatio - gpMeanRatio(x,y);
  479. gpWeightRatio(x,y) = maxGPWeightRatio - gpWeightRatio(x,y);
  480. }
  481. }
  482. //
  483. gpUncertainty(0, 0) = 0.0;
  484. gpUncertainty(0, 1) = maxGPUncertainty;
  485. ICETools::convertToRGB ( gpUncertainty, imgrgb );
  486. imgrgb.write ( out.str() + "gpUncertainty.png" );
  487. //
  488. gpMean(0, 0) = 0.0;
  489. gpMean(0, 1) = maxGPMean;
  490. ICETools::convertToRGB ( gpMean, imgrgb );
  491. imgrgb.write ( out.str() + "gpMean.png" );
  492. //
  493. gpMeanRatio(0, 0) = 0.0;
  494. gpMeanRatio(0, 1) = maxGPMeanRatio;
  495. ICETools::convertToRGB ( gpMeanRatio, imgrgb );
  496. imgrgb.write ( out.str() + "gpMeanRatio.png" );
  497. //
  498. gpWeightAll(0, 0) = 0.0;
  499. gpWeightAll(0, 1) = maxGPWeightAll;
  500. ICETools::convertToRGB ( gpWeightAll, imgrgb );
  501. imgrgb.write ( out.str() + "gpWeightAll.png" );
  502. //
  503. gpWeightRatio(0, 0) = 0.0;
  504. gpWeightRatio(0, 1) = maxGPWeightRatio;
  505. ICETools::convertToRGB ( gpWeightRatio, imgrgb );
  506. imgrgb.write ( out.str() + "gpWeightRatio.png" );
  507. timer.stop();
  508. cout << "last: " << timer.getLastAbsolute() << endl;
  509. }