#include #include #include "SemSegNovelty.h" #include "core/image/FilterT.h" #include "gp-hik-exp/GPHIKClassifierNICE.h" #include "vislearning/baselib/ICETools.h" #include "vislearning/baselib/Globals.h" #include "vislearning/features/fpfeatures/SparseVectorFeature.h" #include "core/basics/StringTools.h" #include "core/basics/Timer.h" #include "segmentation/GenericRegionSegmentationMethodSelection.h" using namespace std; using namespace NICE; using namespace OBJREC; SemSegNovelty::SemSegNovelty ( const Config *conf, const MultiDataset *md ) : SemanticSegmentation ( conf, & ( md->getClassNames ( "train" ) ) ) { this->conf = conf; globalMaxUncert = -numeric_limits::max(); string section = "SemSegNovelty"; featExtract = new LFColorWeijer ( conf ); save_cache = conf->gB ( "FPCPixel", "save_cache", true ); read_cache = conf->gB ( "FPCPixel", "read_cache", false ); uncertdir = conf->gS("debug", "uncertainty", "uncertainty"); cache = conf->gS ( "cache", "root", "" ); classifier = new GPHIKClassifierNICE ( conf, "ClassiferGPHIK" );; findMaximumUncert = conf->gB(section, "findMaximumUncert", true); whs = conf->gI ( section, "window_size", 10 ); featdist = conf->gI ( section, "grid", 10 ); testWSize = conf->gI (section, "test_window_size", 10); string rsMethode = conf->gS ( section, "segmentation", "none" ); if(rsMethode == "none") { regionSeg = NULL; } else { RegionSegmentationMethod *tmpRegionSeg = GenericRegionSegmentationMethodSelection::selectRegionSegmentationMethod(conf, rsMethode); if ( save_cache ) regionSeg = new RSCache ( conf, tmpRegionSeg ); else regionSeg = tmpRegionSeg; } cn = md->getClassNames ( "train" ); if ( read_cache ) { string classifierdst = "/classifier.data"; fprintf ( stderr, "SemSegNovelty:: Reading classifier data from %s\n", ( cache + classifierdst ).c_str() ); try { if ( classifier != NULL ) { classifier->read ( cache + classifierdst ); } fprintf ( stderr, "SemSegNovelty:: successfully read\n" ); } catch ( char *str ) { cerr << "error reading data: " << str << endl; } } else { train ( md ); } } SemSegNovelty::~SemSegNovelty() { if(newTrainExamples.size() > 0) { // most uncertain region showImage(maskedImg); //classifier->add(newTrainExamples) classifier->save ( cache + "/classifier.data" ); } // clean-up if ( classifier != NULL ) delete classifier; if ( featExtract != NULL ) delete featExtract; } void SemSegNovelty::visualizeRegion(const NICE::ColorImage &img, const NICE::Matrix ®ions, int region, NICE::ColorImage &outimage) { vector color; color.push_back(255); color.push_back(0); color.push_back(0); int width = img.width(); int height = img.height(); outimage.resize(width,height); for(int y = 0; y < height; y++) { for(int x = 0; x < width; x++) { if(regions(x,y) == region) { for(int c = 0; c < 3; c++) { outimage(x,y,c) = color[c]; } } else { for(int c = 0; c < 3; c++) { outimage(x,y,c) = img(x,y,c); } } } } } void SemSegNovelty::train ( const MultiDataset *md ) { const LabeledSet train = * ( *md ) ["train"]; const LabeledSet *trainp = &train; //////////////////////// // feature extraction // //////////////////////// std::string forbidden_classes_s = conf->gS ( "analysis", "donttrain", "" ); if ( forbidden_classes_s == "" ) { forbidden_classes_s = conf->gS ( "analysis", "forbidden_classes", "" ); } cn.getSelection ( forbidden_classes_s, forbidden_classes ); //check the same thing for the training classes - this is very specific to our setup std::string forbidden_classesTrain_s = conf->gS ( "analysis", "donttrainTrain", "" ); if ( forbidden_classesTrain_s == "" ) { forbidden_classesTrain_s = conf->gS ( "analysis", "forbidden_classesTrain", "" ); } cn.getSelection ( forbidden_classesTrain_s, forbidden_classesTrain ); ProgressBar pb ( "Local Feature Extraction" ); pb.show(); int imgnb = 0; Examples examples; examples.filename = "training"; int featdim = -1; classesInUse.clear(); LOOP_ALL_S ( *trainp ) { //EACH_S(classno, currentFile); EACH_INFO ( classno, info ); std::string currentFile = info.img(); CachedExample *ce = new CachedExample ( currentFile ); const LocalizationResult *locResult = info.localization(); if ( locResult->size() <= 0 ) { fprintf ( stderr, "WARNING: NO ground truth polygons found for %s !\n", currentFile.c_str() ); continue; } int xsize, ysize; ce->getImageSize ( xsize, ysize ); Image labels ( xsize, ysize ); labels.set ( 0 ); locResult->calcLabeledImage ( labels, ( *classNames ).getBackgroundClass() ); NICE::ColorImage img; try { img = ColorImage ( currentFile ); } catch ( Exception ) { cerr << "SemSegNovelty: error opening image file <" << currentFile << ">" << endl; continue; } Globals::setCurrentImgFN ( currentFile ); MultiChannelImageT feats; // extract features featExtract->getFeats ( img, feats ); featdim = feats.channels(); feats.addChannel(featdim); for (int c = 0; c < featdim; c++) { ImageT tmp = feats[c]; ImageT tmp2 = feats[c+featdim]; NICE::FilterT::gradientStrength (tmp, tmp2); } featdim += featdim; // compute integral images for ( int c = 0; c < featdim; c++ ) { feats.calcIntegral ( c ); } for ( int y = 0; y < ysize; y += featdist ) { for ( int x = 0; x < xsize; x += featdist ) { int classnoTmp = labels.getPixel ( x, y ); if ( forbidden_classesTrain.find ( classnoTmp ) != forbidden_classesTrain.end() ) { continue; } if (classesInUse.find(classnoTmp) == classesInUse.end()) { classesInUse.insert(classnoTmp); } Example example; example.vec = NULL; example.svec = new SparseVector ( featdim ); for ( int f = 0; f < featdim; f++ ) { double val = feats.getIntegralValue ( x - whs, y - whs, x + whs, y + whs, f ); if ( val > 1e-10 ) ( *example.svec ) [f] = val; } example.svec->normalize(); example.position = imgnb; examples.push_back ( pair ( classnoTmp, example ) ); } } delete ce; imgnb++; pb.update ( trainp->count() ); } numberOfClasses = classesInUse.size(); std::cerr << "numberOfClasses: " << numberOfClasses << std::endl; std::cerr << "classes in use: " << std::endl; for (std::set::const_iterator it = classesInUse.begin(); it != classesInUse.end(); it++) { std::cerr << *it << " "; } std::cerr << std::endl; pb.hide(); ////////////////////// // train classifier // ////////////////////// FeaturePool fp; Feature *f = new SparseVectorFeature ( featdim ); f->explode ( fp ); delete f; if ( classifier != NULL ) classifier->train ( fp, examples ); else { cerr << "no classifier selected?!" << endl; exit ( -1 ); } fp.destroy(); if ( save_cache ) { if ( classifier != NULL ) classifier->save ( cache + "/classifier.data" ); } //////////// //clean up// //////////// for ( int i = 0; i < ( int ) examples.size(); i++ ) { examples[i].second.clean(); } examples.clear(); cerr << "SemSeg training finished" << endl; } void SemSegNovelty::semanticseg ( CachedExample *ce, NICE::Image & segresult, NICE::MultiChannelImageT & probabilities ) { Timer timer; timer.start(); Image labels = segresult; segresult.set(0); int featdim = -1; std::string currentFile = Globals::getCurrentImgFN(); int xsize, ysize; ce->getImageSize ( xsize, ysize ); probabilities.reInit( xsize, ysize, cn.getMaxClassno() + 1); probabilities.setAll ( 0.0 ); NICE::ColorImage img; try { img = ColorImage ( currentFile ); } catch ( Exception ) { cerr << "SemSegNovelty: error opening image file <" << currentFile << ">" << endl; return; } MultiChannelImageT feats; // extract features featExtract->getFeats ( img, feats ); featdim = feats.channels(); feats.addChannel(featdim); for (int c = 0; c < featdim; c++) { ImageT tmp = feats[c]; ImageT tmp2 = feats[c+featdim]; NICE::FilterT::gradientStrength (tmp, tmp2); } featdim += featdim; // compute integral images for ( int c = 0; c < featdim; c++ ) { feats.calcIntegral ( c ); } FloatImage uncert ( xsize, ysize ); uncert.set ( 0.0 ); FloatImage gpUncertainty ( xsize, ysize ); FloatImage gpMean ( xsize, ysize ); FloatImage gpMeanRatio ( xsize, ysize ); FloatImage gpWeightAll ( xsize, ysize ); FloatImage gpWeightRatio ( xsize, ysize ); gpUncertainty.set ( 0.0 ); gpMean.set ( 0.0 ); gpMeanRatio.set ( 0.0 ); gpWeightAll.set ( 0.0 ); gpWeightRatio.set ( 0.0 ); double maxunc = -numeric_limits::max(); double maxGPUncertainty = -numeric_limits::max(); double maxGPMean = -numeric_limits::max(); double maxGPMeanRatio = -numeric_limits::max(); double maxGPWeightAll = -numeric_limits::max(); double maxGPWeightRatio = -numeric_limits::max(); timer.stop(); cout << "first: " << timer.getLastAbsolute() << endl; //we need this lateron for active learning stuff double gpNoise = conf->gD("GPHIK", "noise", 0.01); timer.start(); #pragma omp parallel for for ( int y = 0; y < ysize; y += testWSize ) { Example example; example.vec = NULL; example.svec = new SparseVector ( featdim ); for ( int x = 0; x < xsize; x += testWSize) { for ( int f = 0; f < featdim; f++ ) { double val = feats.getIntegralValue ( x - whs, y - whs, x + whs, y + whs, f ); if ( val > 1e-10 ) ( *example.svec ) [f] = val; } example.svec->normalize(); ClassificationResult cr = classifier->classify ( example ); //we need this if we want to compute GP-AL-measure lateron double minMeanAbs ( numeric_limits::max() ); double maxMeanAbs ( 0.0 ); double sndMaxMeanAbs ( 0.0 ); double maxMean ( -numeric_limits::max() ); double sndMaxMean ( -numeric_limits::max() ); for ( int j = 0 ; j < cr.scores.size(); j++ ) { if ( forbidden_classesTrain.find ( j ) != forbidden_classesTrain.end() ) { continue; } //check whether we found a class with higher smaller abs mean than the current minimum if (abs(cr.scores[j]) < minMeanAbs) minMeanAbs = abs(cr.scores[j]); //check for larger abs mean as well if (abs(cr.scores[j]) > maxMeanAbs) { sndMaxMeanAbs = maxMeanAbs; maxMeanAbs = abs(cr.scores[j]); } // and also for the second highest mean of all classes else if (abs(cr.scores[j]) > sndMaxMeanAbs) { sndMaxMeanAbs = abs(cr.scores[j]); } //check for larger mean without abs as well if (cr.scores[j] > maxMean) { sndMaxMean = maxMean; maxMean = cr.scores[j]; } // and also for the second highest mean of all classes else if (cr.scores[j] > sndMaxMean) { sndMaxMean = cr.scores[j]; } } double firstTerm (1.0 / sqrt(cr.uncertainty+gpNoise)); //compute the heuristic GP-UNCERTAINTY, as proposed by Kapoor et al. in IJCV 2010 // GP-UNCERTAINTY : |mean| / sqrt(var^2 + gpnoise^2) double gpUncertaintyVal = maxMeanAbs*firstTerm; //firstTerm = 1.0 / sqrt(r.uncertainty+gpNoise)) // compute results when we take the lowest mean value of all classes double gpMeanVal = minMeanAbs; //look at the difference in the absolut mean values for the most plausible class // and the second most plausible class double gpMeanRatioVal= maxMean - sndMaxMean; double gpWeightAllVal ( 0.0 ); double gpWeightRatioVal ( 0.0 ); if ( numberOfClasses > 2) { //compute the weight in the alpha-vector for every sample after assuming it to be // added to the training set. // Thereby, we measure its "importance" for the current model // //double firstTerm is already computed // //the second term is only needed when computing impacts //double secondTerm; //this is the nasty guy :/ //--- compute the third term // this is the difference between predicted label and GT label std::vector diffToPositive; diffToPositive.clear(); std::vector diffToNegative; diffToNegative.clear(); double diffToNegativeSum(0.0); for ( int j = 0 ; j < cr.scores.size(); j++ ) { if ( forbidden_classesTrain.find ( j ) != forbidden_classesTrain.end() ) { continue; } // look at the difference to plus 1 diffToPositive.push_back(abs(cr.scores[j] - 1)); // look at the difference to -1 diffToNegative.push_back(abs(cr.scores[j] + 1)); //sum up the difference to -1 diffToNegativeSum += abs(cr.scores[j] - 1); } //let's subtract for every class its diffToNegative from the sum, add its diffToPositive, //and use this as the third term for this specific class. //the final value is obtained by minimizing over all classes // // originally, we minimize over all classes after building the final score // however, the first and the second term do not depend on the choice of // y*, therefore we minimize here already double thirdTerm (numeric_limits::max()) ; for(uint tmpCnt = 0; tmpCnt < diffToPositive.size(); tmpCnt++) { double tmpVal ( diffToPositive[tmpCnt] + (diffToNegativeSum-diffToNegative[tmpCnt]) ); if (tmpVal < thirdTerm) thirdTerm = tmpVal; } gpWeightAllVal = thirdTerm*firstTerm; //now look on the ratio of the resulting weights for the most plausible // against the second most plausible class double thirdTermMostPlausible ( 0.0 ) ; double thirdTermSecondMostPlausible ( 0.0 ) ; for(uint tmpCnt = 0; tmpCnt < diffToPositive.size(); tmpCnt++) { if (diffToPositive[tmpCnt] > thirdTermMostPlausible) { thirdTermSecondMostPlausible = thirdTermMostPlausible; thirdTermMostPlausible = diffToPositive[tmpCnt]; } else if (diffToPositive[tmpCnt] > thirdTermSecondMostPlausible) { thirdTermSecondMostPlausible = diffToPositive[tmpCnt]; } } //compute the resulting score gpWeightRatioVal = (thirdTermMostPlausible - thirdTermSecondMostPlausible)*firstTerm; //finally, look for this feature how it would affect to whole model (summarized by weight-vector alpha), if we would //use it as an additional training example //TODO this would be REALLY computational demanding. Do we really want to do this? // gpImpactAll[s] ( pce[i].second.x, pce[i].second.y ) = thirdTerm*firstTerm*secondTerm; // gpImpactRatio[s] ( pce[i].second.x, pce[i].second.y ) = (thirdTermMostPlausible - thirdTermSecondMostPlausible)*firstTerm*secondTerm; } else //binary scenario { gpWeightAllVal = std::min( abs(cr.scores[*classesInUse.begin()]+1), abs(cr.scores[*classesInUse.begin()]-1) ); gpWeightAllVal *= firstTerm; gpWeightRatioVal = gpWeightAllVal; } int xs = std::max(0, x - testWSize/2); int xe = std::min(xsize - 1, x + testWSize/2); int ys = std::max(0, y - testWSize/2); int ye = std::min(ysize - 1, y + testWSize/2); for (int yl = ys; yl <= ye; yl++) { for (int xl = xs; xl <= xe; xl++) { for ( int j = 0 ; j < cr.scores.size(); j++ ) { probabilities ( xl, yl, j ) = cr.scores[j]; } segresult ( xl, yl ) = cr.classno; uncert ( xl, yl ) = cr.uncertainty; gpUncertainty ( xl, yl ) = gpUncertaintyVal; gpMean ( xl, yl ) = gpMeanVal; gpMeanRatio ( xl, yl ) = gpMeanRatioVal; gpWeightAll ( xl, yl ) = gpWeightAllVal; gpWeightRatio ( xl, yl ) = gpWeightRatioVal; } } if (maxunc < cr.uncertainty) maxunc = cr.uncertainty; if (maxGPUncertainty < gpUncertaintyVal) maxGPUncertainty = gpUncertaintyVal; if (maxGPMean < gpMeanVal) maxGPMean = gpMeanVal; if (maxGPMeanRatio < gpMeanRatioVal) maxGPMeanRatio = gpMeanRatioVal; if (maxGPWeightAll < gpMeanRatioVal) maxGPWeightAll = gpWeightAllVal; if (maxGPWeightRatio < gpWeightRatioVal) maxGPWeightRatio = gpWeightRatioVal; example.svec->clear(); } delete example.svec; example.svec = NULL; } // std::cerr << "uncertainty: " << gpUncertaintyVal << " minMean: " << gpMeanVal << " gpMeanRatio: " << gpMeanRatioVal << " weightAll: " << gpWeightAllVal << " weightRatio: "<< gpWeightRatioVal << std::endl; //Regionen ermitteln if(regionSeg != NULL) { NICE::Matrix mask; int amountRegions = regionSeg->segRegions ( img, mask ); //compute probs per region vector > regionProb(amountRegions,vector(probabilities.channels(),0.0)); vector regionNoveltyMeasure (amountRegions, 0.0); vector regionCounter(amountRegions, 0); for ( int y = 0; y < ysize; y++) { for (int x = 0; x < xsize; x++) { int r = mask(x,y); regionCounter[r]++; for(int j = 0; j < probabilities.channels(); j++) { regionProb[r][j] += probabilities ( x, y, j ); } regionNoveltyMeasure[r] += uncert(x,y); } } //find best class per region vector bestClassPerRegion(amountRegions,0); double maxuncert = -numeric_limits::max(); int maxUncertRegion = -1; for(int r = 0; r < amountRegions; r++) { double maxval = -numeric_limits::max(); for(int c = 0; c < probabilities.channels(); c++) { regionProb[r][c] /= regionCounter[r]; if(maxval < regionProb[r][c] && regionProb[r][c] != 0.0) { maxval = regionProb[r][c]; bestClassPerRegion[r] = c; } } regionNoveltyMeasure[r] /= regionCounter[r]; if(maxuncert < regionNoveltyMeasure[r]) { maxuncert = regionNoveltyMeasure[r]; maxUncertRegion = r; } } if(findMaximumUncert) { if(maxuncert > globalMaxUncert) { //save new important features Examples examples; for ( int y = 0; y < ysize; y += testWSize ) { for ( int x = 0; x < xsize; x += testWSize) { if(mask(x,y) == maxUncertRegion) { Example example; example.vec = NULL; example.svec = new SparseVector ( featdim ); int classnoTmp = labels(x,y); for ( int f = 0; f < featdim; f++ ) { double val = feats.getIntegralValue ( x - whs, y - whs, x + whs, y + whs, f ); if ( val > 1e-10 ) ( *example.svec ) [f] = val; } example.svec->normalize(); examples.push_back ( pair ( classnoTmp, example ) ); } } } if(examples.size() > 0) { newTrainExamples.clear(); newTrainExamples = examples; globalMaxUncert = maxuncert; visualizeRegion(img,mask,maxUncertRegion,maskedImg); } } } //write back best results per region for ( int y = 0; y < ysize; y++) { for (int x = 0; x < xsize; x++) { int r = mask(x,y); for(int j = 0; j < probabilities.channels(); j++) { probabilities ( x, y, j ) = regionProb[r][j]; } segresult(x,y) = bestClassPerRegion[r]; } } } timer.stop(); cout << "second: " << timer.getLastAbsolute() << endl; timer.start(); ColorImage imgrgb ( xsize, ysize ); std::stringstream out; std::vector< std::string > list2; StringTools::split ( Globals::getCurrentImgFN (), '/', list2 ); out << uncertdir << "/" << list2.back(); uncert.writeRaw(out.str() + ".rawfloat"); uncert(0, 0) = 0.0; uncert(0, 1) = 1.0+gpNoise; ICETools::convertToRGB ( uncert, imgrgb ); imgrgb.write ( out.str() + "rough.png" ); //invert images such that large numbers correspond to high impact, high variance, high importance, high novelty, ... for ( int y = 0; y < ysize; y++) { for (int x = 0; x < xsize; x++) { gpUncertainty(x,y) = maxGPUncertainty - gpUncertainty(x,y); gpMean(x,y) = maxGPMean - gpMean(x,y); gpMeanRatio(x,y) = maxGPMeanRatio - gpMeanRatio(x,y); gpWeightRatio(x,y) = maxGPWeightRatio - gpWeightRatio(x,y); } } // gpUncertainty(0, 0) = 0.0; gpUncertainty(0, 1) = maxGPUncertainty; ICETools::convertToRGB ( gpUncertainty, imgrgb ); imgrgb.write ( out.str() + "gpUncertainty.png" ); // gpMean(0, 0) = 0.0; gpMean(0, 1) = maxGPMean; ICETools::convertToRGB ( gpMean, imgrgb ); imgrgb.write ( out.str() + "gpMean.png" ); // gpMeanRatio(0, 0) = 0.0; gpMeanRatio(0, 1) = maxGPMeanRatio; ICETools::convertToRGB ( gpMeanRatio, imgrgb ); imgrgb.write ( out.str() + "gpMeanRatio.png" ); // gpWeightAll(0, 0) = 0.0; gpWeightAll(0, 1) = maxGPWeightAll; ICETools::convertToRGB ( gpWeightAll, imgrgb ); imgrgb.write ( out.str() + "gpWeightAll.png" ); // gpWeightRatio(0, 0) = 0.0; gpWeightRatio(0, 1) = maxGPWeightRatio; ICETools::convertToRGB ( gpWeightRatio, imgrgb ); imgrgb.write ( out.str() + "gpWeightRatio.png" ); timer.stop(); cout << "last: " << timer.getLastAbsolute() << endl; }