#include #include #include "SemSegContextTree.h" #include "objrec/baselib/Globals.h" #include "objrec/baselib/ProgressBar.h" #include "objrec/baselib/StringTools.h" #include "objrec/baselib/Globals.h" #include "objrec/cbaselib/CachedExample.h" #include "objrec/cbaselib/PascalResults.h" #include #define BOUND(x,min,max) (((x)<(min))?(min):((x)>(max)?(max):(x))) using namespace OBJREC; using namespace std; using namespace NICE; class Minus:public Operation { public: virtual double getVal(const vector > > &feats, const int &x, const int &y) { int xsize = feats.size(); int ysize = feats[0].size(); double v1 = feats[BOUND(x+x1,0,xsize-1)][BOUND(y+y1,0,ysize-1)][channel1]; double v2 = feats[BOUND(x+x2,0,xsize-1)][BOUND(y+y2,0,ysize-1)][channel2]; return v1-v2; } virtual Operation* clone() { return new Minus(); }; }; class MinusAbs:public Operation { public: virtual double getVal(const vector > > &feats, const int &x, const int &y) { int xsize = feats.size(); int ysize = feats[0].size(); double v1 = feats[BOUND(x+x1,0,xsize-1)][BOUND(y+y1,0,ysize-1)][channel1]; double v2 = feats[BOUND(x+x2,0,xsize-1)][BOUND(y+y2,0,ysize-1)][channel2]; return abs(v1-v2); } virtual Operation* clone() { return new MinusAbs(); }; }; class Addition:public Operation { public: virtual double getVal(const vector > > &feats, const int &x, const int &y) { int xsize = feats.size(); int ysize = feats[0].size(); double v1 = feats[BOUND(x+x1,0,xsize-1)][BOUND(y+y1,0,ysize-1)][channel1]; double v2 = feats[BOUND(x+x2,0,xsize-1)][BOUND(y+y2,0,ysize-1)][channel2]; return v1+v2; } virtual Operation* clone() { return new Addition(); }; }; class Only1:public Operation { public: virtual double getVal(const vector > > &feats, const int &x, const int &y) { int xsize = feats.size(); int ysize = feats[0].size(); double v1 = feats[BOUND(x+x1,0,xsize-1)][BOUND(y+y1,0,ysize-1)][channel1]; return v1; } virtual Operation* clone() { return new Only1(); }; }; SemSegContextTree::SemSegContextTree( const Config *conf, const MultiDataset *md ) : SemanticSegmentation ( conf, &(md->getClassNames("train")) ) { string section = "SSContextTree"; lfcw = new LFColorWeijer(conf); grid = conf->gI(section, "grid", 10 ); maxSamples = conf->gI(section, "max_samples", 2000 ); minFeats = conf->gI(section, "min_feats", 50 ); maxDepth = conf->gI(section, "max_depth", 10 ); windowSize = conf->gI(section, "window_size", 16); featsPerSplit = conf->gI(section, "feats_per_split", 200); useShannonEntropy = conf->gB(section, "use_shannon_entropy", false); ops.push_back(new Minus()); ops.push_back(new MinusAbs()); ops.push_back(new Addition()); ops.push_back(new Only1()); /////////////////////////////////// // Train Segmentation Context Trees ////////////////////////////////// train ( md ); } SemSegContextTree::~SemSegContextTree() { } void SemSegContextTree::getBestSplit(const vector > > > &feats, vector > > ¤tfeats,const vector > > &labels, int node, Operation *&splitop, double &splitval) { int imgCount, featdim; try { imgCount = (int)feats.size(); featdim = feats[0][0][0].size(); } catch(Exception) { cerr << "no features computed?" << endl; } double bestig = -numeric_limits< double >::max(); splitop = NULL; splitval = -1.0; set >selFeats; map e; int featcounter = 0; for(int iCounter = 0; iCounter < imgCount; iCounter++) { int xsize = (int)currentfeats[iCounter].size(); int ysize = (int)currentfeats[iCounter][0].size(); for(int x = 0; x < xsize; x++) { for(int y = 0; y < ysize; y++) { if(currentfeats[iCounter][x][y] == node) { featcounter++; } } } } if(featcounter < minFeats) { cout << "only " << featcounter << " feats in current node -> it's a leaf" << endl; return; } vector fraction(a.size(),0.0); for(uint i = 0; i < fraction.size(); i++) { fraction[i] = ((double)maxSamples)/((double)featcounter*a[i]*a.size()); //cout << "fraction["< tmp(3,0); tmp[0] = iCounter; tmp[1] = x; tmp[2] = y; featcounter++; selFeats.insert(tmp); e[cn]++; } } } } } //cout << "size: " << selFeats.size() << endl; //getchar(); map::iterator mapit; double globent = 0.0; for ( mapit=e.begin() ; mapit != e.end(); mapit++ ) { //cout << "class: " << mapit->first << ": " << mapit->second << endl; double p = (double)(*mapit).second/(double)featcounter; globent += p*log2(p); } globent = -globent; if(globent < 0.5) { cout << "globent to small: " << globent << endl; return; } featsel.clear(); for(int i = 0; i < featsPerSplit; i++) { int x1 = (int)((double)rand()/(double)RAND_MAX*(double)windowSize)-windowSize/2; int x2 = (int)((double)rand()/(double)RAND_MAX*(double)windowSize)-windowSize/2; int y1 = (int)((double)rand()/(double)RAND_MAX*(double)windowSize)-windowSize/2; int y2 = (int)((double)rand()/(double)RAND_MAX*(double)windowSize)-windowSize/2; int f1 = (int)((double)rand()/(double)RAND_MAX*(double)featdim); int f2 = (int)((double)rand()/(double)RAND_MAX*(double)featdim); int o = (int)((double)rand()/(double)RAND_MAX*(double)ops.size()); Operation *op = ops[o]->clone(); op->set(x1,y1,x2,y2,f1,f2); featsel.push_back(op); } #pragma omp parallel for private(mapit) for(int f = 0; f < featsPerSplit; f++) { double l_bestig = -numeric_limits< double >::max(); double l_splitval = -1.0; set >::iterator it; vector vals; for ( it=selFeats.begin() ; it != selFeats.end(); it++ ) { vals.push_back(featsel[f]->getVal(feats[(*it)[0]],(*it)[1], (*it)[2])); } int counter = 0; for ( it=selFeats.begin() ; it != selFeats.end(); it++ , counter++) { set >::iterator it2; double val = vals[counter]; map eL, eR; int counterL = 0, counterR = 0; int counter2 = 0; for ( it2=selFeats.begin() ; it2 != selFeats.end(); it2++, counter2++ ) { int cn = labels[(*it2)[0]][(*it2)[1]][(*it2)[2]]; //cout << "vals[counter2] " << vals[counter2] << " val: " << val << endl; if(vals[counter2] < val) { //left entropie: eL[cn] = eL[cn]+1; counterL++; } else { //right entropie: eR[cn] = eR[cn]+1; counterR++; } } double leftent = 0.0; for ( mapit=eL.begin() ; mapit != eL.end(); mapit++ ) { double p = (double)(*mapit).second/(double)counterL; leftent -= p*log2(p); } double rightent = 0.0; for ( mapit=eR.begin() ; mapit != eR.end(); mapit++ ) { double p = (double)(*mapit).second/(double)counterR; rightent -= p*log2(p); } //cout << "rightent: " << rightent << " leftent: " << leftent << endl; double pl = (double)counterL/(double)(counterL+counterR); double ig = globent - (1.0-pl) * rightent - pl*leftent; //double ig = globent - rightent - leftent; if(useShannonEntropy) { double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) ); ig = 2*ig / ( globent + esplit ); } if(ig > l_bestig) { l_bestig = ig; l_splitval = val; } } #pragma omp critical { //cout << "globent: " << globent << " bestig " << bestig << " splitfeat: " << splitfeat << " splitval: " << splitval << endl; //cout << "globent: " << globent << " l_bestig " << l_bestig << " f: " << p << " l_splitval: " << l_splitval << endl; //cout << "p: " << featsubset[f] << endl; if(l_bestig > bestig) { bestig = l_bestig; splitop = featsel[f]; splitval = l_splitval; } } } /*for(int i = 0; i < featsPerSplit; i++) { if(featsel[i] != splitop) delete featsel[i]; }*/ #ifdef debug cout << "globent: " << globent << " bestig " << bestig << " splitval: " << splitval << endl; #endif } void SemSegContextTree::train ( const MultiDataset *md ) { const LabeledSet train = * ( *md ) ["train"]; const LabeledSet *trainp = &train; ProgressBar pb ( "compute feats" ); pb.show(); //TODO: Speichefresser!, lohnt sich sparse? vector > > > allfeats; vector > > currentfeats; vector > > labels; int imgcounter = 0; LOOP_ALL_S ( *trainp ) { EACH_INFO ( classno,info ); NICE::ColorImage img; std::string currentFile = info.img(); CachedExample *ce = new CachedExample ( currentFile ); const LocalizationResult *locResult = info.localization(); if ( locResult->size() <= 0 ) { fprintf ( stderr, "WARNING: NO ground truth polygons found for %s !\n", currentFile.c_str() ); continue; } fprintf ( stderr, "SemSegCsurka: Collecting pixel examples from localization info: %s\n", currentFile.c_str() ); int xsize, ysize; ce->getImageSize ( xsize, ysize ); vector > tmp = vector >(xsize, vector(ysize,0)); currentfeats.push_back(tmp); labels.push_back(tmp); try { img = ColorImage(currentFile); } catch (Exception) { cerr << "SemSeg: error opening image file <" << currentFile << ">" << endl; continue; } Globals::setCurrentImgFN ( currentFile ); //TODO: resize image?! vector > > feats; #if 0 lfcw->getFeats(img, feats); #else feats = vector > >(xsize,vector >(ysize,vector(3,0.0))); for(int x = 0; x < xsize; x++) { for(int y = 0; y < ysize; y++) { for(int r = 0; r < 3; r++) { feats[x][y][r] = img.getPixel(x,y,r); } } } #endif allfeats.push_back(feats); // getting groundtruth NICE::Image pixelLabels (xsize, ysize); pixelLabels.set(0); locResult->calcLabeledImage ( pixelLabels, ( *classNames ).getBackgroundClass() ); for(int x = 0; x < xsize; x++) { for(int y = 0; y < ysize; y++) { classno = pixelLabels.getPixel(x, y); labels[imgcounter][x][y] = classno; labelcounter[classno]++; //if ( forbidden_classes.find ( classno ) != forbidden_classes.end() ) //continue; } } imgcounter++; pb.update ( trainp->count()); delete ce; } pb.hide(); /*int opsize = (int)ops.size(); int featdim = (int)allfeats[0][0][0].size(); for(int x1 = -windowSize/2; x1 < windowSize/2+1; x1++) { for(int y1 = -windowSize/2; y1 < windowSize/2+1; y1++) { for(int x2 = -windowSize/2; x2 < windowSize/2+1; x2++) { for(int y2 = -windowSize/2; y2 < windowSize/2+1; y2++) { for(int f = 0; f < featdim; f++) { for(int o = 0; o < opsize; o++) { vector tmp(6,0); tmp[0] = x1; tmp[1] = y1; tmp[2] = x2; tmp[3] = y2; tmp[4] = f; tmp[5] = o; featsel.push_back(tmp); } } } } } }*/ map::iterator mapit; int classes = 0; for(mapit = labelcounter.begin(); mapit != labelcounter.end(); mapit++) { labelmap[mapit->first] = classes; labelmapback[classes] = mapit->first; classes++; } //balancing int featcounter = 0; a = vector(classes,0.0); for(int iCounter = 0; iCounter < imgcounter; iCounter++) { int xsize = (int)currentfeats[iCounter].size(); int ysize = (int)currentfeats[iCounter][0].size(); for(int x = 0; x < xsize; x++) { for(int y = 0; y < ysize; y++) { featcounter++; int cn = labels[iCounter][x][y]; a[labelmap[cn]] ++; } } } for(int i = 0; i < (int)a.size(); i++) { a[i] /= (double)featcounter; } #ifdef DEBUG for(int i = 0; i < (int)a.size(); i++) { cout << "a["<(classes,0.0); int depth = 0; tree[0].depth = depth; int startnode = 0; bool allleaf = false; while(!allleaf && depth < maxDepth) { allleaf = true; //TODO vielleicht parallel wenn nächste schleife trotzdem noch parallelsiert würde, die hat mehr gewicht int t = (int) tree.size(); int s = startnode; startnode = t; //#pragma omp parallel for for(int i = s; i < t; i++) { if(!tree[i].isleaf && tree[i].left < 0) { Operation *splitfeat = NULL; double splitval; getBestSplit(allfeats, currentfeats,labels, i, splitfeat, splitval); tree[i].feat = splitfeat; tree[i].decision = splitval; if(splitfeat != NULL) { allleaf = false; int left = tree.size(); tree.push_back(Node()); tree.push_back(Node()); int right = left+1; tree[i].left = left; tree[i].right = right; tree[left].dist = vector(classes, 0.0); tree[right].dist = vector(classes, 0.0); tree[left].depth = depth+1; tree[right].depth = depth+1; #pragma omp parallel for for(int iCounter = 0; iCounter < imgcounter; iCounter++) { int xsize = currentfeats[iCounter].size(); int ysize = currentfeats[iCounter][0].size(); for(int x = 0; x < xsize; x++) { for(int y = 0; y < ysize; y++) { if(currentfeats[iCounter][x][y] == i) { double val = splitfeat->getVal(allfeats[iCounter],x,y); if(val < splitval) { currentfeats[iCounter][x][y] = left; tree[left].dist[labelmap[labels[iCounter][x][y]]]++; } else { currentfeats[iCounter][x][y] = right; tree[right].dist[labelmap[labels[iCounter][x][y]]]++; } } } } } double lcounter = 0.0, rcounter = 0.0; for(uint d = 0; d < tree[left].dist.size(); d++) { //tree[left].dist[d]/=a[d]; lcounter +=tree[left].dist[d]; //tree[right].dist[d]/=a[d]; rcounter +=tree[right].dist[d]; } if(lcounter <= 0 || rcounter <= 0) { cout << "lcounter : " << lcounter << " rcounter: " << rcounter << endl; cout << "splitval: " << splitval << endl; assert(lcounter > 0 && rcounter > 0); } for(uint d = 0; d < tree[left].dist.size(); d++) { tree[left].dist[d]/=lcounter; tree[right].dist[d]/=rcounter; } } else { tree[i].isleaf = true; } } } //TODO: features neu berechnen! depth++; #ifdef DEBUG cout << "depth: " << depth << endl; #endif } #ifdef DEBUG int t = (int) tree.size(); for(int i = 0; i < t; i++) { printf("tree[%i]: left: %i, right: %i ", i, tree[i].left, tree[i].right); for(int d = 0; d < (int)tree[i].dist.size(); d++) { cout << " " << tree[i].dist[d]; } cout << endl; } #endif } void SemSegContextTree::semanticseg ( CachedExample *ce, NICE::Image & segresult,GenericImage & probabilities ) { int xsize; int ysize; ce->getImageSize ( xsize, ysize ); int numClasses = classNames->numClasses(); fprintf (stderr, "ContextTree classification !\n"); probabilities.reInit ( xsize, ysize, numClasses, true ); probabilities.setAll ( 0 ); NICE::ColorImage img; std::string currentFile = Globals::getCurrentImgFN(); try { img = ColorImage(currentFile); } catch (Exception) { cerr << "SemSeg: error opening image file <" << currentFile << ">" << endl; return; } //TODO: resize image?! vector > > feats; #if 0 lfcw->getFeats(img, feats); #else feats = vector > >(xsize,vector >(ysize,vector(3,0.0))); for(int x = 0; x < xsize; x++) { for(int y = 0; y < ysize; y++) { for(int r = 0; r < 3; r++) { feats[x][y][r] = img.getPixel(x,y,r); } } } #endif bool allleaf = false; vector > currentfeats = vector >(xsize, vector(ysize,0)); int depth = 0; while(!allleaf) { allleaf = true; //TODO vielleicht parallel wenn nächste schleife auch noch parallelsiert würde, die hat mehr gewicht //#pragma omp parallel for int t = (int) tree.size(); for(int i = 0; i < t; i++) { for(int x = 0; x < xsize; x++) { for(int y = 0; y < ysize; y++) { int t = currentfeats[x][y]; if(tree[t].left > 0) { allleaf = false; double val = tree[t].feat->getVal(feats,x,y); if(val < tree[t].decision) { currentfeats[x][y] = tree[t].left; } else { currentfeats[x][y] = tree[t].right; } } } } } //TODO: features neu berechnen! analog zum training depth++; } //finales labeln: long int offset = 0; for(int x = 0; x < xsize; x++) { for(int y = 0; y < ysize; y++,offset++) { int t = currentfeats[x][y]; double maxvalue = - numeric_limits::max(); //TODO: das muss nur pro knoten gemacht werden, nicht pro pixel int maxindex = 0; for(uint i = 0; i < tree[i].dist.size(); i++) { probabilities.data[labelmapback[i]][offset] = tree[t].dist[i]; if(tree[t].dist[i] > maxvalue) { maxvalue = tree[t].dist[i]; maxindex = labelmapback[i]; } segresult.setPixel(x,y,maxindex); } } } }