#include "SemSegContextTree.h" #include "vislearning/baselib/Globals.h" #include "vislearning/baselib/ProgressBar.h" #include "core/basics/StringTools.h" #include "vislearning/cbaselib/CachedExample.h" #include "vislearning/cbaselib/PascalResults.h" #include "vislearning/baselib/ColorSpace.h" #include "objrec/segmentation/RSMeanShift.h" #include "objrec/segmentation/RSGraphBased.h" #include "core/basics/numerictools.h" #include "core/basics/StringTools.h" #include "core/basics/FileName.h" #include "vislearning/baselib/ICETools.h" #include "core/basics/Timer.h" #include "core/basics/vectorio.h" #include "core/image/FilterT.h" #include #include #undef LOCALFEATS #undef WRITEGLOB #undef TEXTONMAP //#define LOCALFEATS using namespace OBJREC; using namespace std; using namespace NICE; SemSegContextTree::SemSegContextTree ( const Config *conf, const MultiDataset *md ) : SemanticSegmentation ( conf, & ( md->getClassNames ( "train" ) ) ) { this->conf = conf; string section = "SSContextTree"; lfcw = new LFColorWeijer ( conf ); grid = conf->gI ( section, "grid", 10 ); maxSamples = conf->gI ( section, "max_samples", 2000 ); minFeats = conf->gI ( section, "min_feats", 50 ); maxDepth = conf->gI ( section, "max_depth", 10 ); windowSize = conf->gI ( section, "window_size", 16 ); featsPerSplit = conf->gI ( section, "feats_per_split", 200 ); useShannonEntropy = conf->gB ( section, "use_shannon_entropy", true ); nbTrees = conf->gI ( section, "amount_trees", 1 ); string segmentationtype = conf->gS ( section, "segmentation_type", "meanshift" ); randomTests = conf->gI ( section, "random_tests", 10 ); bool saveLoadData = conf->gB ( "debug", "save_load_data", false ); string fileLocation = conf->gS ( "debug", "datafile", "tmp.txt" ); pixelWiseLabeling = false; if ( segmentationtype == "meanshift" ) segmentation = new RSMeanShift ( conf ); else if ( segmentationtype == "none" ) { segmentation = NULL; pixelWiseLabeling = true; } else if ( segmentationtype == "felzenszwalb" ) segmentation = new RSGraphBased ( conf ); else throw ( "no valid segmenation_type\n please choose between none, meanshift and felzenszwalb\n" ); ftypes = conf->gI ( section, "features", 100 );; string featsec = "Features"; if ( conf->gB ( featsec, "minus", true ) ) ops.push_back ( new Minus() ); if ( conf->gB ( featsec, "minus_abs", true ) ) ops.push_back ( new MinusAbs() ); if ( conf->gB ( featsec, "addition", true ) ) ops.push_back ( new Addition() ); if ( conf->gB ( featsec, "only1", true ) ) ops.push_back ( new Only1() ); if ( conf->gB ( featsec, "rel_x", true ) ) ops.push_back ( new RelativeXPosition() ); if ( conf->gB ( featsec, "rel_y", true ) ) ops.push_back ( new RelativeYPosition() ); if ( conf->gB ( featsec, "bi_int_cent", true ) ) cops.push_back ( new BiIntegralCenteredOps() ); if ( conf->gB ( featsec, "int_cent", true ) ) cops.push_back ( new IntegralCenteredOps() ); if ( conf->gB ( featsec, "int", true ) ) cops.push_back ( new IntegralOps() ); if ( conf->gB ( featsec, "haar_horz", true ) ) cops.push_back ( new HaarHorizontal() ); if ( conf->gB ( featsec, "haar_vert", true ) ) cops.push_back ( new HaarVertical() ); if ( conf->gB ( featsec, "haar_diag", true ) ) cops.push_back ( new HaarDiag() ); if ( conf->gB ( featsec, "haar3_horz", true ) ) cops.push_back ( new Haar3Horiz() ); if ( conf->gB ( featsec, "haar3_vert", true ) ) cops.push_back ( new Haar3Vert() ); if ( conf->gB ( featsec, "glob", true ) ) cops.push_back ( new GlobalFeats() ); useGradient = conf->gB ( featsec, "use_gradient", true ); useRegionFeature = conf->gB ( featsec, "use_region", true ); // geometric features of hoiem useHoiemFeatures = conf->gB( featsec, "use_hoiem_features", false ); if ( useHoiemFeatures ) { hoiemDirectory = conf->gS( featsec, "hoiem_directory" ); } opOverview = vector ( NBOPERATIONS, 0 ); contextOverview = vector > ( maxDepth, vector ( 2, 0.0 ) ); calcVal.push_back ( new MCImageAccess() ); calcVal.push_back ( new ClassificationResultAccess() ); classnames = md->getClassNames ( "train" ); /////////////////////////////////// // Train Segmentation Context Trees /////////////////////////////////// if ( saveLoadData ) { if ( FileMgt::fileExists ( fileLocation ) ) read ( fileLocation ); else { train ( md ); write ( fileLocation ); } } else { train ( md ); } } SemSegContextTree::~SemSegContextTree() { } double SemSegContextTree::getBestSplit ( std::vector > &feats, std::vector > ¤tfeats, std::vector > &integralImgs, const std::vector > &labels, int node, Operation *&splitop, double &splitval, const int &tree ) { Timer t; t.start(); int imgCount = 0, featdim = 0; try { imgCount = (int)feats.size(); } catch ( Exception ) { cerr << "no features computed?" << endl; } double bestig = -numeric_limits< double >::max(); splitop = NULL; splitval = -1.0; set >selFeats; map e; int featcounter = forest[tree][node].featcounter; if ( featcounter < minFeats ) { //cout << "only " << featcounter << " feats in current node -> it's a leaf" << endl; return 0.0; } vector fraction ( a.size(), 0.0 ); for ( uint i = 0; i < fraction.size(); i++ ) { if ( forbidden_classes.find ( labelmapback[i] ) != forbidden_classes.end() ) fraction[i] = 0; else fraction[i] = ( (double)maxSamples ) / ( (double)featcounter * a[i] * a.size() ); } featcounter = 0; for ( int iCounter = 0; iCounter < imgCount; iCounter++ ) { int xsize = (int)currentfeats[iCounter].width(); int ysize = (int)currentfeats[iCounter].height(); for ( int x = 0; x < xsize; x++ ) { for ( int y = 0; y < ysize; y++ ) { if ( currentfeats[iCounter].get ( x, y, tree ) == node ) { int cn = labels[iCounter] ( x, y ); double randD = (double)rand() / (double)RAND_MAX; if ( labelmap.find ( cn ) == labelmap.end() ) continue; if ( randD < fraction[labelmap[cn]] ) { vector tmp ( 3, 0 ); tmp[0] = iCounter; tmp[1] = x; tmp[2] = y; featcounter++; selFeats.insert ( tmp ); e[cn]++; } } } } } map::iterator mapit; double globent = 0.0; for ( mapit = e.begin() ; mapit != e.end(); mapit++ ) { double p = (double)( *mapit ).second / (double)featcounter; globent += p * log2 ( p ); } globent = -globent; if ( globent < 0.5 ) { return 0.0; } featsel.clear(); for ( int i = 0; i < featsPerSplit; i++ ) { int x1, x2, y1, y2; int ft = (int)( (double)rand() / (double)RAND_MAX * (double)ftypes ); int tmpws = windowSize; if ( integralImgs[0].width() == 0 ) ft = 0; if ( ft > 0 ) { tmpws *= 4; } x1 = (int)( (double)rand() / (double)RAND_MAX * (double)tmpws ) - tmpws / 2; x2 = (int)( (double)rand() / (double)RAND_MAX * (double)tmpws ) - tmpws / 2; y1 = (int)( (double)rand() / (double)RAND_MAX * (double)tmpws ) - tmpws / 2; y2 = (int)( (double)rand() / (double)RAND_MAX * (double)tmpws ) - tmpws / 2; if ( ft == 0 ) { int f1 = (int)( (double)rand() / (double)RAND_MAX * (double)featdim ); int f2 = (int)( (double)rand() / (double)RAND_MAX * (double)featdim ); int o = (int)( (double)rand() / (double)RAND_MAX * (double)ops.size() ); Operation *op = ops[o]->clone(); op->set ( x1, y1, x2, y2, f1, f2, calcVal[ft] ); op->setContext ( false ); featsel.push_back ( op ); } else if ( ft == 1 ) { int opssize = (int)ops.size(); //opssize = 0; int o = (int)( (double)rand() / (double)RAND_MAX * ( ( (double)cops.size() ) + (double)opssize ) ); Operation *op; if ( o < opssize ) { int chans = (int)forest[0][0].dist.size(); int f1 = (int)( (double)rand() / (double)RAND_MAX * (double)chans ); int f2 = (int)( (double)rand() / (double)RAND_MAX * (double)chans ); op = ops[o]->clone(); op->set ( x1, y1, x2, y2, f1, f2, calcVal[ft] ); op->setContext ( true ); } else { int chans = integralImgs[0].channels(); int f1 = (int)( (double)rand() / (double)RAND_MAX * (double)chans ); int f2 = (int)( (double)rand() / (double)RAND_MAX * (double)chans ); o -= opssize; op = cops[o]->clone(); op->set ( x1, y1, x2, y2, f1, f2, calcVal[ft] ); if ( f1 < (int)forest[0][0].dist.size() ) op->setContext ( true ); else op->setContext ( false ); } featsel.push_back ( op ); } } #pragma omp parallel for private(mapit) for ( int f = 0; f < featsPerSplit; f++ ) { double l_bestig = -numeric_limits< double >::max(); double l_splitval = -1.0; set >::iterator it; vector vals; double maxval = -numeric_limits::max(); double minval = numeric_limits::max(); for ( it = selFeats.begin() ; it != selFeats.end(); it++ ) { Features feat; feat.feats = &feats[ ( *it ) [0]]; feat.cfeats = ¤tfeats[ ( *it ) [0]]; feat.cTree = tree; feat.tree = &forest[tree]; feat.integralImg = &integralImgs[ ( *it ) [0]]; double val = featsel[f]->getVal ( feat, ( *it ) [1], ( *it ) [2] ); vals.push_back ( val ); maxval = std::max ( val, maxval ); minval = std::min ( val, minval ); } if ( minval == maxval ) continue; double scale = maxval - minval; vector splits; for ( int r = 0; r < randomTests; r++ ) { splits.push_back ( ( (double)rand() / (double)RAND_MAX*scale ) + minval ); } for ( int run = 0 ; run < randomTests; run++ ) { set >::iterator it2; double val = splits[run]; map eL, eR; int counterL = 0, counterR = 0; int counter2 = 0; for ( it2 = selFeats.begin() ; it2 != selFeats.end(); it2++, counter2++ ) { int cn = labels[ ( *it2 ) [0]] ( ( *it2 ) [1], ( *it2 ) [2] ); //cout << "vals[counter2] " << vals[counter2] << " val: " << val << endl; if ( vals[counter2] < val ) { //left entropie: eL[cn] = eL[cn] + 1; counterL++; } else { //right entropie: eR[cn] = eR[cn] + 1; counterR++; } } double leftent = 0.0; for ( mapit = eL.begin() ; mapit != eL.end(); mapit++ ) { double p = (double)( *mapit ).second / (double)counterL; leftent -= p * log2 ( p ); } double rightent = 0.0; for ( mapit = eR.begin() ; mapit != eR.end(); mapit++ ) { double p = (double)( *mapit ).second / (double)counterR; rightent -= p * log2 ( p ); } //cout << "rightent: " << rightent << " leftent: " << leftent << endl; double pl = (double)counterL / (double)( counterL + counterR ); double ig = globent - ( 1.0 - pl ) * rightent - pl * leftent; //double ig = globent - rightent - leftent; if ( useShannonEntropy ) { double esplit = - ( pl * log ( pl ) + ( 1 - pl ) * log ( 1 - pl ) ); ig = 2 * ig / ( globent + esplit ); } if ( ig > l_bestig ) { l_bestig = ig; l_splitval = val; } } #pragma omp critical { //cout << "globent: " << globent << " bestig " << bestig << " splitfeat: " << splitfeat << " splitval: " << splitval << endl; //cout << "globent: " << globent << " l_bestig " << l_bestig << " f: " << p << " l_splitval: " << l_splitval << endl; //cout << "p: " << featsubset[f] << endl; if ( l_bestig > bestig ) { bestig = l_bestig; splitop = featsel[f]; splitval = l_splitval; } } } //getchar(); //splitop->writeInfos(); //cout<< "ig: " << bestig << endl; //FIXME: delete all features! /*for(int i = 0; i < featsPerSplit; i++) { if(featsel[i] != splitop) delete featsel[i]; }*/ #ifdef debug cout << "globent: " << globent << " bestig " << bestig << " splitval: " << splitval << endl; #endif return bestig; } inline double SemSegContextTree::getMeanProb ( const int &x, const int &y, const int &channel, const MultiChannelImageT ¤tfeats ) { double val = 0.0; for ( int tree = 0; tree < nbTrees; tree++ ) { val += forest[tree][currentfeats.get ( x,y,tree ) ].dist[channel]; } return val / (double)nbTrees; } void SemSegContextTree::computeIntegralImage ( const NICE::MultiChannelImageT &infeats, NICE::MultiChannelImageT &integralImage ) { int xsize = infeats.width(); int ysize = infeats.height(); integralImage ( 0, 0 ).add ( infeats.get ( 0, 0 ) ); //first column for ( int y = 1; y < ysize; y++ ) { integralImage ( 0, y ).add ( infeats.get ( 0, y ) ); integralImage ( 0, y ).add ( integralImage ( 0, y - 1 ) ); } //first row for ( int x = 1; x < xsize; x++ ) { integralImage ( x, 0 ).add ( infeats.get ( x, 0 ) ); integralImage ( x, 0 ).add ( integralImage ( x - 1, 0 ) ); } //rest for ( int y = 1; y < ysize; y++ ) { for ( int x = 1; x < xsize; x++ ) { integralImage ( x, y ).add ( infeats.get ( x, y ) ); integralImage ( x, y ).add ( integralImage ( x, y - 1 ) ); integralImage ( x, y ).add ( integralImage ( x - 1, y ) ); integralImage ( x, y ).sub ( integralImage ( x - 1, y - 1 ) ); } } } void SemSegContextTree::computeIntegralImage ( const NICE::MultiChannelImageT ¤tfeats, const NICE::MultiChannelImageT &lfeats, NICE::MultiChannelImageT &integralImage ) { int xsize = currentfeats.width(); int ysize = currentfeats.height(); int channels = (int)forest[0][0].dist.size(); #pragma omp parallel for for ( int c = 0; c < channels; c++ ) { integralImage.set ( 0, 0, getMeanProb ( 0, 0, c, currentfeats ), c ); //first column for ( int y = 1; y < ysize; y++ ) { integralImage.set ( 0, y, getMeanProb ( 0, y, c, currentfeats ) + integralImage.get ( 0, y - 1, c ), c ); } //first row for ( int x = 1; x < xsize; x++ ) { integralImage.set ( x, 0, getMeanProb ( x, 0, c, currentfeats ) + integralImage.get ( x - 1, 0, c ), c ); } //rest for ( int y = 1; y < ysize; y++ ) { for ( int x = 1; x < xsize; x++ ) { double val = getMeanProb ( x, y, c, currentfeats ) + integralImage.get ( x, y - 1, c ) + integralImage.get ( x - 1, y, c ) - integralImage.get ( x - 1, y - 1, c ); integralImage.set ( x, y, val, c ); } } } int channels2 = (int)lfeats.channels(); xsize = lfeats.width(); ysize = lfeats.height(); if ( integralImage.get ( xsize - 1, ysize - 1, channels ) == 0.0 ) { #pragma omp parallel for for ( int c1 = 0; c1 < channels2; c1++ ) { int c = channels + c1; integralImage.set ( 0, 0, lfeats.get ( 0, 0, c1 ), c ); //first column for ( int y = 1; y < ysize; y++ ) { integralImage.set ( 0, y, lfeats.get ( 0, y, c1 ) + integralImage.get ( 0, y, c ), c ); } //first row for ( int x = 1; x < xsize; x++ ) { integralImage.set ( x, 0, lfeats.get ( x, 0, c1 ) + integralImage.get ( x, 0, c ), c ); } //rest for ( int y = 1; y < ysize; y++ ) { for ( int x = 1; x < xsize; x++ ) { double val = lfeats.get ( x, y, c1 ) + integralImage.get ( x, y - 1, c ) + integralImage.get ( x - 1, y, c ) - integralImage.get ( x - 1, y - 1, c ); integralImage.set ( x, y, val, c ); } } } } } inline double computeWeight ( const double &d, const double &dim ) { return 1.0 / ( pow ( 2, (double)( dim - d + 1 ) ) ); } void SemSegContextTree::train ( const MultiDataset *md ) { const LabeledSet train = * ( *md ) ["train"]; const LabeledSet *trainp = &train; ProgressBar pb ( "compute feats" ); pb.show(); //TODO: Speichefresser!, lohnt sich sparse? vector > allfeats; vector > currentfeats; vector > labels; #ifdef TEXTONMAP vector > textonMap; #endif vector > integralTexton; std::string forbidden_classes_s = conf->gS ( "analysis", "donttrain", "" ); if ( forbidden_classes_s == "" ) { forbidden_classes_s = conf->gS ( "analysis", "forbidden_classes", "" ); } classnames.getSelection ( forbidden_classes_s, forbidden_classes ); int imgcounter = 0; int amountPixels = 0; LOOP_ALL_S ( *trainp ) { EACH_INFO ( classno, info ); NICE::ColorImage img; std::string currentFile = info.img(); CachedExample *ce = new CachedExample ( currentFile ); const LocalizationResult *locResult = info.localization(); if ( locResult->size() <= 0 ) { fprintf ( stderr, "WARNING: NO ground truth polygons found for %s !\n", currentFile.c_str() ); continue; } fprintf ( stderr, "SemSegCsurka: Collecting pixel examples from localization info: %s\n", currentFile.c_str() ); int xsize, ysize; ce->getImageSize ( xsize, ysize ); amountPixels += xsize * ysize; MatrixT tmpMat ( xsize, ysize ); currentfeats.push_back ( MultiChannelImageT ( xsize, ysize, nbTrees ) ); currentfeats[imgcounter].setAll ( 0 ); #ifdef TEXTONMAP textonMap.push_back ( MultiChannelImageT ( xsize / grid + 1, ysize / grid + 1, 1 ) ); integralTexton.push_back ( MultiChannelImageT ( xsize / grid + 1, ysize / grid + 1, 1 ) ); #endif labels.push_back ( tmpMat ); try { img = ColorImage ( currentFile ); } catch ( Exception ) { cerr << "SemSeg: error opening image file <" << currentFile << ">" << endl; continue; } Globals::setCurrentImgFN ( currentFile ); //TODO: resize image?! MultiChannelImageT feats; allfeats.push_back ( feats ); // read image and do some simple transformations extractBasicFeatures (allfeats[imgcounter], img, currentFile); // getting groundtruth NICE::Image pixelLabels ( xsize, ysize ); pixelLabels.set ( 0 ); locResult->calcLabeledImage ( pixelLabels, ( *classNames ).getBackgroundClass() ); for ( int x = 0; x < xsize; x++ ) { for ( int y = 0; y < ysize; y++ ) { classno = pixelLabels.getPixel ( x, y ); labels[imgcounter] ( x, y ) = classno; if ( forbidden_classes.find ( classno ) != forbidden_classes.end() ) continue; labelcounter[classno]++; } } imgcounter++; pb.update ( trainp->count() ); delete ce; } pb.hide(); map::iterator mapit; int classes = 0; for ( mapit = labelcounter.begin(); mapit != labelcounter.end(); mapit++ ) { labelmap[mapit->first] = classes; labelmapback[classes] = mapit->first; classes++; } //////////////////////////////////////////////////// //define which featurextraction methods should be used for each channel #ifdef LOCALFEATS int colorchannels = 9; #else int colorchannels = 3; #endif if(useGradient) colorchannels *= 2; // gray value images for(int i = 0; i < colorchannels; i++) { channelType.push_back(0); } // regions if(useRegionFeature) channelType.push_back(2); // integral images for(int i = 0; i < colorchannels+classes; i++) { channelType.push_back(1); } int amountTypes = 3; channelsPerType = vector >(amountTypes, vector()); for(int i = 0; i < channelType.size(); i++) { channelsPerType[channelType[i]].push_back(i); } ftypes = std::min(amountTypes,ftypes); //////////////////////////////////////////////////// //balancing int featcounter = 0; a = vector ( classes, 0.0 ); for ( int iCounter = 0; iCounter < imgcounter; iCounter++ ) { int xsize = (int)currentfeats[iCounter].width(); int ysize = (int)currentfeats[iCounter].height(); for ( int x = 0; x < xsize; x++ ) { for ( int y = 0; y < ysize; y++ ) { featcounter++; int cn = labels[iCounter] ( x, y ); if ( labelmap.find ( cn ) == labelmap.end() ) continue; a[labelmap[cn]] ++; } } } for ( int i = 0; i < (int)a.size(); i++ ) { a[i] /= (double)featcounter; } #ifdef DEBUG for ( int i = 0; i < (int)a.size(); i++ ) { cout << "a[" << i << "]: " << a[i] << endl; } cout << "a.size: " << a.size() << endl; #endif depth = 0; int uniquenumber = 0; for ( int t = 0; t < nbTrees; t++ ) { vector tree; tree.push_back ( TreeNode() ); tree[0].dist = vector ( classes, 0.0 ); tree[0].depth = depth; tree[0].featcounter = amountPixels; tree[0].nodeNumber = uniquenumber; uniquenumber++; forest.push_back ( tree ); } vector startnode ( nbTrees, 0 ); bool allleaf = false; //int baseFeatSize = allfeats[0].size(); vector > integralImgs ( imgcounter, MultiChannelImageT() ); while ( !allleaf && depth < maxDepth ) { depth++; #ifdef DEBUG cout << "depth: " << depth << endl; #endif allleaf = true; vector > lastfeats = currentfeats; #if 1 Timer timer; timer.start(); #endif double weight = computeWeight ( depth, maxDepth ) - computeWeight ( depth - 1, maxDepth ); if ( depth == 1 ) { weight = computeWeight ( 1, maxDepth ); } for ( int tree = 0; tree < nbTrees; tree++ ) { int t = (int)forest[tree].size(); int s = startnode[tree]; startnode[tree] = t; //TODO vielleicht parallel wenn nächste schleife trotzdem noch parallelsiert würde, die hat mehr gewicht //#pragma omp parallel for for ( int i = s; i < t; i++ ) { if ( !forest[tree][i].isleaf && forest[tree][i].left < 0 ) { #if 0 timer.stop(); cout << "time 1: " << timer.getLast() << endl; timer.start(); #endif Operation *splitfeat = NULL; double splitval; double bestig = getBestSplit ( allfeats, lastfeats, integralImgs, labels, i, splitfeat, splitval, tree ); for ( int ii = 0; ii < lastfeats.size(); ii++ ) { for ( int c = 0; c < lastfeats[ii].channels(); c++ ) { short unsigned int minv, maxv; lastfeats[ii].statistics ( minv, maxv, c ); //cout << "min: " << minv << " max: " << maxv << endl; } } #if 0 timer.stop(); double tl = timer.getLast(); if ( tl > 10.0 ) { cout << "time 2: " << tl << endl; cout << "slow split: " << splitfeat->writeInfos() << endl; getchar(); } timer.start(); #endif forest[tree][i].feat = splitfeat; forest[tree][i].decision = splitval; if ( splitfeat != NULL ) { allleaf = false; int left = forest[tree].size(); forest[tree].push_back ( TreeNode() ); forest[tree].push_back ( TreeNode() ); int right = left + 1; forest[tree][i].left = left; forest[tree][i].right = right; forest[tree][left].dist = vector ( classes, 0.0 ); forest[tree][right].dist = vector ( classes, 0.0 ); forest[tree][left].depth = depth; forest[tree][right].depth = depth; forest[tree][left].featcounter = 0; forest[tree][right].featcounter = 0; forest[tree][left].nodeNumber = uniquenumber; int leftu = uniquenumber; uniquenumber++; forest[tree][right].nodeNumber = uniquenumber; int rightu = uniquenumber; uniquenumber++; forest[tree][right].featcounter = 0; #if 0 timer.stop(); cout << "time 3: " << timer.getLast() << endl; timer.start(); #endif #pragma omp parallel for for ( int iCounter = 0; iCounter < imgcounter; iCounter++ ) { int xsize = currentfeats[iCounter].width(); int ysize = currentfeats[iCounter].height(); for ( int x = 0; x < xsize; x++ ) { for ( int y = 0; y < ysize; y++ ) { if ( currentfeats[iCounter].get ( x, y, tree ) == i ) { Features feat; feat.feats = &allfeats[iCounter]; feat.cfeats = &lastfeats[iCounter]; feat.cTree = tree; feat.tree = &forest[tree]; feat.integralImg = &integralImgs[iCounter]; double val = splitfeat->getVal ( feat, x, y ); int subx = x / grid; int suby = y / grid; #pragma omp critical if ( val < splitval ) { currentfeats[iCounter].set ( x, y, left, tree ); if ( labelmap.find ( labels[iCounter] ( x, y ) ) != labelmap.end() ) forest[tree][left].dist[labelmap[labels[iCounter] ( x, y ) ]]++; forest[tree][left].featcounter++; SparseVectorInt v; v.insert ( pair ( leftu, weight ) ); #ifdef TEXTONMAP textonMap[iCounter] ( subx, suby ).add ( v ); #endif } else { currentfeats[iCounter].set ( x, y, right, tree ); if ( labelmap.find ( labels[iCounter] ( x, y ) ) != labelmap.end() ) forest[tree][right].dist[labelmap[labels[iCounter] ( x, y ) ]]++; forest[tree][right].featcounter++; //feld im subsampled finden und in diesem rechts hochzählen SparseVectorInt v; v.insert ( pair ( rightu, weight ) ); #ifdef TEXTONMAP textonMap[iCounter] ( subx, suby ).add ( v ); #endif } } } } } #if 0 timer.stop(); cout << "time 4: " << timer.getLast() << endl; timer.start(); #endif // forest[tree][right].featcounter = forest[tree][i].featcounter - forest[tree][left].featcounter; double lcounter = 0.0, rcounter = 0.0; for ( uint d = 0; d < forest[tree][left].dist.size(); d++ ) { if ( forbidden_classes.find ( labelmapback[d] ) != forbidden_classes.end() ) { forest[tree][left].dist[d] = 0; forest[tree][right].dist[d] = 0; } else { forest[tree][left].dist[d] /= a[d]; lcounter += forest[tree][left].dist[d]; forest[tree][right].dist[d] /= a[d]; rcounter += forest[tree][right].dist[d]; } } #if 0 timer.stop(); cout << "time 5: " << timer.getLast() << endl; timer.start(); #endif if ( lcounter <= 0 || rcounter <= 0 ) { cout << "lcounter : " << lcounter << " rcounter: " << rcounter << endl; cout << "splitval: " << splitval << " splittype: " << splitfeat->writeInfos() << endl; cout << "bestig: " << bestig << endl; for ( int iCounter = 0; iCounter < imgcounter; iCounter++ ) { int xsize = currentfeats[iCounter].width(); int ysize = currentfeats[iCounter].height(); int counter = 0; for ( int x = 0; x < xsize; x++ ) { for ( int y = 0; y < ysize; y++ ) { if ( lastfeats[iCounter].get ( x, y, tree ) == i ) { if ( ++counter > 30 ) break; Features feat; feat.feats = &allfeats[iCounter]; feat.cfeats = &lastfeats[iCounter]; feat.cTree = tree; feat.tree = &forest[tree]; feat.integralImg = &integralImgs[iCounter]; double val = splitfeat->getVal ( feat, x, y ); cout << "splitval: " << splitval << " val: " << val << endl; } } } } assert ( lcounter > 0 && rcounter > 0 ); } for ( uint d = 0; d < forest[tree][left].dist.size(); d++ ) { forest[tree][left].dist[d] /= lcounter; forest[tree][right].dist[d] /= rcounter; } } else { forest[tree][i].isleaf = true; } } } #if 0 timer.stop(); cout << "time after tree: " << timer.getLast() << endl; timer.start(); #endif } //compute integral images int channels = classes + allfeats[0].channels(); if ( integralImgs[0].width() == 0 ) { for ( int i = 0; i < imgcounter; i++ ) { int xsize = allfeats[i].width(); int ysize = allfeats[i].height(); integralImgs[i].reInit ( xsize, ysize, channels ); integralImgs[i].setAll ( 0.0 ); } } #if 0 timer.stop(); cout << "time for part1: " << timer.getLast() << endl; timer.start(); #endif #pragma omp parallel for for ( int i = 0; i < imgcounter; i++ ) { computeIntegralImage ( currentfeats[i], allfeats[i], integralImgs[i] ); #ifdef TEXTONMAP computeIntegralImage ( textonMap[i], integralTexton[i] ); #endif } #if 1 timer.stop(); cout << "time for depth " << depth << ": " << timer.getLast() << endl; #endif } #ifdef WRITEGLOB ofstream outstream ( "globtrain.feat" ); for ( int i = 0; i < textonMap.size(); i++ ) { set usedclasses; for ( uint x = 0; x < labels[i].rows(); x++ ) { for ( uint y = 0; y < labels[i].cols(); y++ ) { int classno = labels[i] ( x, y ); if ( forbidden_classes.find ( classno ) != forbidden_classes.end() ) continue; usedclasses.insert ( classno ); } } cout << "labels.cols: " << labels[i].cols() << " labels.rows " << labels[i].rows() << endl; cout << "currentfeats : " << allfeats[i].width() << " allfeats[i].height(); " << allfeats[i].height() << endl; set::iterator it; for ( it = usedclasses.begin() ; it != usedclasses.end(); it++ ) outstream << *it << " "; outstream << endl; integralTexton[i] ( integralTexton[i].width() - 1, integralTexton[i].height() - 1 ).store ( outstream ); } outstream.close(); #endif cout << "uniquenumber " << uniquenumber << endl; //getchar(); #ifdef DEBUG for ( int tree = 0; tree < nbTrees; tree++ ) { int t = (int)forest[tree].size(); for ( int i = 0; i < t; i++ ) { printf ( "tree[%i]: left: %i, right: %i", i, forest[tree][i].left, forest[tree][i].right ); if ( !forest[tree][i].isleaf && forest[tree][i].left != -1 ) { cout << ", feat: " << forest[tree][i].feat->writeInfos() << " "; opOverview[forest[tree][i].feat->getOps() ]++; contextOverview[forest[tree][i].depth][ (int)forest[tree][i].feat->getContext() ]++; } for ( int d = 0; d < (int)forest[tree][i].dist.size(); d++ ) { cout << " " << forest[tree][i].dist[d]; } cout << endl; } } for ( uint c = 0; c < ops.size(); c++ ) { cout << ops[c]->writeInfos() << ": " << opOverview[ops[c]->getOps() ] << endl; } for ( uint c = 0; c < cops.size(); c++ ) { cout << cops[c]->writeInfos() << ": " << opOverview[cops[c]->getOps() ] << endl; } for ( int d = 0; d < maxDepth; d++ ) { double sum = contextOverview[d][0] + contextOverview[d][1]; contextOverview[d][0] /= sum; contextOverview[d][1] /= sum; cout << "depth: " << d << " woContext: " << contextOverview[d][0] << " wContext: " << contextOverview[d][1] << endl; } #endif } void SemSegContextTree::extractBasicFeatures ( NICE::MultiChannelImageT &feats, const ColorImage &img, const string ¤tFile) { int xsize = img.width(); int ysize = img.height(); //TODO: resize image?! #ifdef LOCALFEATS lfcw->getFeats ( img, feats ); #else feats.reInit ( xsize, ysize, 3 ); for ( int x = 0; x < xsize; x++ ) { for ( int y = 0; y < ysize; y++ ) { for ( int r = 0; r < 3; r++ ) { feats.set ( x, y, img.getPixel ( x, y, r ), r ); } } } feats = ColorSpace::rgbtolab ( feats ); #endif if ( useGradient ) { int currentsize = feats.channels(); feats.addChannel ( currentsize ); for ( int c = 0; c < currentsize; c++ ) { ImageT tmp = feats[c]; ImageT tmp2 = feats[c+currentsize]; NICE::FilterT::sobel ( tmp, tmp2 ); } } // read the geometric cues produced by Hoiem et al. if ( useHoiemFeatures ) { // we could also give the following set as a config option string hoiemClasses_s = "sky 000 090-045 090-090 090-135 090 090-por 090-sol"; vector hoiemClasses; StringTools::split ( hoiemClasses_s, ' ', hoiemClasses ); // Now we have to do some fancy regular expressions :) // Original image filename: basel_000083.jpg // hoiem result: basel_000083_c_sky.png // Fancy class of Ferid which supports string handling especially for filenames FileName fn ( currentFile ); fn.removeExtension(); FileName fnBase = fn.extractFileName(); // counter for the channel index, starts with the current size of the destination multi-channel image int currentChannel = feats.channels(); // add a channel for each feature in advance feats.addChannel ( hoiemClasses.size() ); // loop through all geometric categories and add the images for ( vector::const_iterator i = hoiemClasses.begin(); i != hoiemClasses.end(); i++, currentChannel++ ) { string hoiemClass = *i; FileName fnConfidenceImage ( hoiemDirectory + fnBase.str() + "_c_" + hoiemClass + ".png" ); if ( ! fnConfidenceImage.fileExists() ) { fthrow(Exception, "Unable to read the Hoiem geometric confidence image: " << fnConfidenceImage.str() << " (original image is " << currentFile << ")" ); } else { Image confidenceImage ( fnConfidenceImage.str() ); // check whether the image size is consistent if ( confidenceImage.width() != feats.width() || confidenceImage.height() != feats.height() ) { fthrow(Exception, "The size of the geometric confidence image does not match with the original image size: " << fnConfidenceImage.str()); } ImageT dst = feats[currentChannel]; // copy standard image to double image for ( uint y = 0 ; y < confidenceImage.height(); y++ ) for ( uint x = 0 ; x < confidenceImage.width(); x++ ) feats(x, y, currentChannel) = (double)confidenceImage(x,y); } } } } void SemSegContextTree::semanticseg ( CachedExample *ce, NICE::Image & segresult, NICE::MultiChannelImageT & probabilities ) { int xsize; int ysize; ce->getImageSize ( xsize, ysize ); int numClasses = classNames->numClasses(); fprintf ( stderr, "ContextTree classification !\n" ); probabilities.reInit ( xsize, ysize, numClasses ); probabilities.setAll ( 0 ); #ifdef TEXTONMAP MultiChannelImageT textonMap ( xsize / grid + 1, ysize / grid + 1, 1 ); MultiChannelImageT integralTexton ( xsize / grid + 1, ysize / grid + 1, 1 ); #endif std::string currentFile = Globals::getCurrentImgFN(); MultiChannelImageT feats; NICE::ColorImage img; try { img = ColorImage ( currentFile ); } catch ( Exception ) { cerr << "SemSeg: error opening image file <" << currentFile << ">" << endl; return; } extractBasicFeatures(feats, img, currentFile); //read image and do some simple transformations bool allleaf = false; MultiChannelImageT integralImg; MultiChannelImageT currentfeats ( xsize, ysize, nbTrees ); currentfeats.setAll ( 0 ); depth = 0; for ( int d = 0; d < maxDepth && !allleaf; d++ ) { depth++; #ifdef TEXTONMAP double weight = computeWeight ( depth, maxDepth ) - computeWeight ( depth - 1, maxDepth ); if ( depth == 1 ) { weight = computeWeight ( 1, maxDepth ); } #endif allleaf = true; MultiChannelImageT lastfeats = currentfeats; int tree; #pragma omp parallel for private(tree) for ( tree = 0; tree < nbTrees; tree++ ) { for ( int x = 0; x < xsize; x++ ) { for ( int y = 0; y < ysize; y++ ) { int t = currentfeats.get ( x, y, tree ); if ( forest[tree][t].left > 0 ) { allleaf = false; Features feat; feat.feats = &feats; feat.cfeats = &lastfeats; feat.cTree = tree; feat.tree = &forest[tree]; feat.integralImg = &integralImg; double val = forest[tree][t].feat->getVal ( feat, x, y ); int subx = x / grid; int suby = y / grid; if ( val < forest[tree][t].decision ) { currentfeats.set ( x, y, forest[tree][t].left, tree ); #ifdef TEXTONMAP #pragma omp critical { SparseVectorInt v; v.insert ( pair ( forest[tree][forest[tree][t].left].nodeNumber, weight ) ); textonMap ( subx, suby ).add ( v ); } #endif } else { currentfeats.set ( x, y, forest[tree][t].right, tree ); #ifdef TEXTONMAP #pragma omp critical { SparseVectorInt v; v.insert ( pair ( forest[tree][forest[tree][t].right].nodeNumber, weight ) ); textonMap ( subx, suby ).add ( v ); } #endif } /*if ( x == xpos && y == ypos ) { cout << "val: " << val << " decision: " << forest[tree][t].decision << " details: " << forest[tree][t].feat->writeInfos() << endl; }*/ } } } if ( depth < maxDepth ) { //compute integral image int channels = (int)labelmap.size() + feats.channels(); if ( integralImg.width() == 0 ) { int xsize = feats.width(); int ysize = feats.height(); integralImg.reInit ( xsize, ysize, channels ); integralImg.setAll ( 0.0 ); } } } if ( depth < maxDepth ) { computeIntegralImage ( currentfeats, feats, integralImg ); #ifdef TEXTONMAP computeIntegralImage ( textonMap, integralTexton ); #endif } } // cout << forest[0][currentfeats.get ( xpos, ypos, 0 ) ].dist << endl; #ifdef WRITEGLOB ofstream outstream ( "globtest.feat", ofstream::app ); outstream << 0 << endl; integralTexton ( integralTexton.width() - 1, integralTexton.height() - 1 ).store ( outstream ); outstream.close(); #endif string cndir = conf->gS ( "SSContextTree", "cndir", "" ); int classes = (int)probabilities.channels(); vector useclass ( classes, 1 ); #ifdef WRITEGLOB std::vector< std::string > list; StringTools::split ( currentFile, '/', list ); string orgname = list.back(); ofstream ostream ( "filelist.txt", ofstream::app ); ostream << orgname << ".dat" << endl; ostream.close(); if ( cndir != "" ) { useclass = vector ( classes, 0 ); ifstream infile ( ( cndir + "/" + orgname + ".dat" ).c_str() ); #undef OLD #ifdef OLD while ( !infile.eof() && infile.good() ) { int tmp; infile >> tmp; assert ( tmp >= 0 && tmp < classes ); useclass[tmp] = 1; } #else int c = 0; vector probs ( classes, 0.0 ); while ( !infile.eof() && infile.good() ) { infile >> probs[c]; c++; } vector sorted = probs; sort ( sorted.begin(), sorted.end() ); double thr = sorted[10]; if ( thr < 0.0 ) thr = 0.0; for ( int c = 0; c < classes; c++ ) { if ( probs[c] < thr ) { useclass[c] = 1; } } #endif for ( int c = 0; c < classes; c++ ) { if ( useclass[c] == 0 ) { probabilities.set ( -numeric_limits< double >::max(), c ); } } } #endif if ( pixelWiseLabeling ) { //finales labeln: //long int offset = 0; for ( int x = 0; x < xsize; x++ ) { for ( int y = 0; y < ysize; y++ ) { double maxvalue = - numeric_limits::max(); //TODO: das kann auch nur pro knoten gemacht werden, nicht pro pixel int maxindex = 0; uint s = forest[0][0].dist.size(); for ( uint i = 0; i < s; i++ ) { int currentclass = labelmapback[i]; if ( useclass[currentclass] ) { probabilities ( x, y, currentclass ) = getMeanProb ( x, y, i, currentfeats ); if ( probabilities ( x, y, currentclass ) > maxvalue ) { maxvalue = probabilities ( x, y, currentclass ); maxindex = currentclass; } } } segresult.setPixel ( x, y, maxindex ); if ( maxvalue > 1 ) cout << "maxvalue: " << maxvalue << endl; } } #undef VISUALIZE #ifdef VISUALIZE for ( int j = 0 ; j < (int)probabilities.numChannels; j++ ) { //cout << "class: " << j << endl;//" " << cn.text ( j ) << endl; NICE::Matrix tmp ( probabilities.height(), probabilities.width() ); double maxval = -numeric_limits::max(); double minval = numeric_limits::max(); for ( int y = 0; y < probabilities.height(); y++ ) for ( int x = 0; x < probabilities.width(); x++ ) { double val = probabilities ( x, y, j ); tmp ( y, x ) = val; maxval = std::max ( val, maxval ); minval = std::min ( val, minval ); } tmp ( 0, 0 ) = 1.0; tmp ( 0, 1 ) = 0.0; NICE::ColorImage imgrgb ( probabilities.width(), probabilities.height() ); ICETools::convertToRGB ( tmp, imgrgb ); cout << "maxval = " << maxval << " minval: " << minval << " for class " << j << endl; //cn.text ( j ) << endl; std::string s; std::stringstream out; out << "tmpprebmap" << j << ".ppm"; s = out.str(); imgrgb.write ( s ); //showImage(imgrgb, "Ergebnis"); //getchar(); } cout << "fertsch" << endl; getchar(); cout << "weiter gehtsch" << endl; #endif } else { //final labeling using segmentation Matrix regions; //showImage(img); int regionNumber = segmentation->segRegions ( img, regions ); cout << "regions: " << regionNumber << endl; int dSize = forest[0][0].dist.size(); vector > regionProbs ( regionNumber, vector ( dSize, 0.0 ) ); vector bestlabels ( regionNumber, 0 ); for ( int y = 0; y < img.height(); y++ ) { for ( int x = 0; x < img.width(); x++ ) { int cregion = regions ( x, y ); for ( int d = 0; d < dSize; d++ ) { regionProbs[cregion][d] += getMeanProb ( x, y, d, currentfeats ); } } } for ( int r = 0; r < regionNumber; r++ ) { double maxval = regionProbs[r][0]; bestlabels[r] = 0; for ( int d = 1; d < dSize; d++ ) { if ( maxval < regionProbs[r][d] ) { maxval = regionProbs[r][d]; bestlabels[r] = d; } } bestlabels[r] = labelmapback[bestlabels[r]]; } for ( int y = 0; y < img.height(); y++ ) { for ( int x = 0; x < img.width(); x++ ) { segresult.setPixel ( x, y, bestlabels[regions ( x,y ) ] ); } } #define WRITEREGIONS #ifdef WRITEREGIONS RegionGraph rg; segmentation->getGraphRepresentation ( img, regions, rg ); for ( uint pos = 0; pos < regionProbs.size(); pos++ ) { rg[pos]->setProbs ( regionProbs[pos] ); } std::string s; std::stringstream out; std::vector< std::string > list; StringTools::split ( Globals::getCurrentImgFN (), '/', list ); out << "rgout/" << list.back() << ".graph"; string writefile = out.str(); rg.write ( writefile ); #endif } cout << "segmentation finished" << endl; } void SemSegContextTree::store ( std::ostream & os, int format ) const { os << nbTrees << endl; classnames.store ( os ); map::const_iterator it; os << labelmap.size() << endl; for ( it = labelmap.begin() ; it != labelmap.end(); it++ ) os << ( *it ).first << " " << ( *it ).second << endl; os << labelmapback.size() << endl; for ( it = labelmapback.begin() ; it != labelmapback.end(); it++ ) os << ( *it ).first << " " << ( *it ).second << endl; int trees = forest.size(); os << trees << endl; for ( int t = 0; t < trees; t++ ) { int nodes = forest[t].size(); os << nodes << endl; for ( int n = 0; n < nodes; n++ ) { os << forest[t][n].left << " " << forest[t][n].right << " " << forest[t][n].decision << " " << forest[t][n].isleaf << " " << forest[t][n].depth << " " << forest[t][n].featcounter << " " << forest[t][n].nodeNumber << endl; os << forest[t][n].dist << endl; if ( forest[t][n].feat == NULL ) os << -1 << endl; else { os << forest[t][n].feat->getOps() << endl; forest[t][n].feat->store ( os ); } } } os << channelType.size() << endl; for(int i = 0; i < channelType.size(); i++) { os << channelType[i] << " "; } os << endl; } void SemSegContextTree::restore ( std::istream & is, int format ) { is >> nbTrees; classnames.restore ( is ); int lsize; is >> lsize; labelmap.clear(); for ( int l = 0; l < lsize; l++ ) { int first, second; is >> first; is >> second; labelmap[first] = second; } is >> lsize; labelmapback.clear(); for ( int l = 0; l < lsize; l++ ) { int first, second; is >> first; is >> second; labelmapback[first] = second; } int trees; is >> trees; forest.clear(); for ( int t = 0; t < trees; t++ ) { vector tmptree; forest.push_back ( tmptree ); int nodes; is >> nodes; //cout << "nodes: " << nodes << endl; for ( int n = 0; n < nodes; n++ ) { TreeNode tmpnode; forest[t].push_back ( tmpnode ); is >> forest[t][n].left; is >> forest[t][n].right; is >> forest[t][n].decision; is >> forest[t][n].isleaf; is >> forest[t][n].depth; is >> forest[t][n].featcounter; is >> forest[t][n].nodeNumber; is >> forest[t][n].dist; int feattype; is >> feattype; assert ( feattype < NBOPERATIONS ); forest[t][n].feat = NULL; if ( feattype >= 0 ) { for ( uint o = 0; o < ops.size(); o++ ) { if ( ops[o]->getOps() == feattype ) { forest[t][n].feat = ops[o]->clone(); break; } } if ( forest[t][n].feat == NULL ) { for ( uint o = 0; o < cops.size(); o++ ) { if ( cops[o]->getOps() == feattype ) { forest[t][n].feat = cops[o]->clone(); break; } } } assert ( forest[t][n].feat != NULL ); forest[t][n].feat->restore ( is ); } } } channelType.clear(); int ctsize; is >> ctsize; for(int i = 0; i < ctsize; i++) { int tmp; is >> tmp; channelType.push_back(tmp); } }