13 年之前 · aa8422836c
--- a/semseg/SemSegContextTree.cpp
+++ b/semseg/SemSegContextTree.cpp
@@ -21,7 +21,6 @@
 
				 #include <iostream>
			
 
				 
			
 
				 #undef WRITEGLOB
			
 
				-#undef TEXTONMAP
			
 
				 
			
 
				 #define DEBUG
			
 
				 
			
@@ -54,6 +53,17 @@ SemSegContextTree::SemSegContextTree (const Config *conf, const MultiDataset *md
 
				   nbTrees = conf->gI (section, "amount_trees", 1);
			
 
				 
			
 
				   string segmentationtype = conf->gS (section, "segmentation_type", "meanshift");
			
 
				+  
			
 
				+  useCategorization = conf->gB (section, "use_categorization", false);
			
 
				+  
			
 
				+  if(useCategorization)
			
 
				+  {
			
 
				+    fasthik = new GPHIKClassifier(conf);
			
 
				+  }
			
 
				+  else
			
 
				+  {
			
 
				+    fasthik = NULL;
			
 
				+  }
			
 
				 
			
 
				   randomTests = conf->gI (section, "random_tests", 10);
			
 
				 
			
@@ -118,8 +128,6 @@ SemSegContextTree::SemSegContextTree (const Config *conf, const MultiDataset *md
 
				     tops.push_back (new Haar3Horiz());
			
 
				   if (conf->gB (featsec, "haar3_vert", true))
			
 
				     tops.push_back (new Haar3Vert());
			
 
				-  if (conf->gB (featsec, "glob", true))
			
 
				-    tops.push_back (new GlobalFeats());
			
 
				 
			
 
				   ops.push_back (tops);
			
 
				   ops.push_back (tops);
			
@@ -358,6 +366,10 @@ double SemSegContextTree::getBestSplit (std::vector<NICE::MultiChannelImageT<dou
 
				       feat.cfeats = &currentfeats[ (*it) [0]];
			
 
				       feat.cTree = tree;
			
 
				       feat.tree = &forest[tree];
			
 
				+      
			
 
				+      assert(forest.size() > tree);
			
 
				+      assert(forest[tree][0].dist.size() > 0);
			
 
				+      
			
 
				       feat.rProbs = &regionProbs[(*it) [0]];
			
 
				       
			
 
				       double val = featsel[f]->getVal (feat, (*it) [1], (*it) [2]);
			
@@ -476,39 +488,6 @@ inline double SemSegContextTree::getMeanProb (const int &x, const int &y, const
 
				   return val / (double)nbTrees;
			
 
				 }
			
 
				 
			
 
				-void SemSegContextTree::computeIntegralImage (const NICE::MultiChannelImageT<SparseVectorInt> &infeats, NICE::MultiChannelImageT<SparseVectorInt> &integralImage)
			
 
				-{
			
 
				-  int xsize = infeats.width();
			
 
				-  int ysize = infeats.height();
			
 
				-  integralImage (0, 0).add (infeats.get (0, 0));
			
 
				-
			
 
				-  //first column
			
 
				-  for (int y = 1; y < ysize; y++)
			
 
				-  {
			
 
				-    integralImage (0, y).add (infeats.get (0, y));
			
 
				-    integralImage (0, y).add (integralImage (0, y - 1));
			
 
				-  }
			
 
				-
			
 
				-  //first row
			
 
				-  for (int x = 1; x < xsize; x++)
			
 
				-  {
			
 
				-    integralImage (x, 0).add (infeats.get (x, 0));
			
 
				-    integralImage (x, 0).add (integralImage (x - 1, 0));
			
 
				-  }
			
 
				-
			
 
				-  //rest
			
 
				-  for (int y = 1; y < ysize; y++)
			
 
				-  {
			
 
				-    for (int x = 1; x < xsize; x++)
			
 
				-    {
			
 
				-      integralImage (x, y).add (infeats.get (x, y));
			
 
				-      integralImage (x, y).add (integralImage (x, y - 1));
			
 
				-      integralImage (x, y).add (integralImage (x - 1, y));
			
 
				-      integralImage (x, y).sub (integralImage (x - 1, y - 1));
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				 void SemSegContextTree::computeIntegralImage (const NICE::MultiChannelImageT<unsigned short int> &currentfeats, NICE::MultiChannelImageT<double> &feats, int firstChannel)
			
 
				 {
			
 
				   int xsize = currentfeats.width();
			
@@ -591,10 +570,9 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				   vector<MultiChannelImageT<double> > allfeats;
			
 
				   vector<MultiChannelImageT<unsigned short int> > currentfeats;
			
 
				   vector<MatrixT<int> > labels;
			
 
				-#ifdef TEXTONMAP
			
 
				-  vector<MultiChannelImageT<SparseVectorInt> > textonMap;
			
 
				-  vector<MultiChannelImageT<SparseVectorInt> > integralTexton;
			
 
				-#endif
			
 
				+
			
 
				+  vector<SparseVector*> globalCategorFeats;
			
 
				+  vector<map<int,int> > classesPerImage;
			
 
				 
			
 
				   std::string forbidden_classes_s = conf->gS ("analysis", "donttrain", "");
			
 
				 
			
@@ -663,7 +641,7 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				       continue;
			
 
				     }
			
 
				 
			
 
				-    fprintf (stderr, "SemSegCsurka: Collecting pixel examples from localization info: %s\n", currentFile.c_str());
			
 
				+    fprintf (stderr, "SSContext: Collecting pixel examples from localization info: %s\n", currentFile.c_str());
			
 
				 
			
 
				     int xsize, ysize;
			
 
				     ce->getImageSize (xsize, ysize);
			
@@ -673,10 +651,6 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				 
			
 
				     currentfeats.push_back (MultiChannelImageT<unsigned short int> (xsize, ysize, nbTrees));
			
 
				     currentfeats[imgcounter].setAll (0);
			
 
				-#ifdef TEXTONMAP
			
 
				-    textonMap.push_back (MultiChannelImageT<SparseVectorInt> (xsize / grid + 1, ysize / grid + 1, 1));
			
 
				-    integralTexton.push_back (MultiChannelImageT<SparseVectorInt> (xsize / grid + 1, ysize / grid + 1, 1));
			
 
				-#endif
			
 
				 
			
 
				     labels.push_back (tmpMat);
			
 
				 
			
@@ -731,6 +705,25 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				 
			
 
				       }
			
 
				     }
			
 
				+    
			
 
				+    if(useCategorization)
			
 
				+    {
			
 
				+      globalCategorFeats.push_back(new SparseVector());
			
 
				+      classesPerImage.push_back(map<int,int>());
			
 
				+      
			
 
				+      for (int x = 0; x < xsize; x++)
			
 
				+      {
			
 
				+        for (int y = 0; y < ysize; y++)
			
 
				+        {
			
 
				+          classno = pixelLabels.getPixel (x, y);
			
 
				+
			
 
				+          if (forbidden_classes.find (classno) != forbidden_classes.end())
			
 
				+            continue;
			
 
				+
			
 
				+          classesPerImage[imgcounter][classno] = 1;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				 
			
 
				     imgcounter++;
			
 
				 
			
@@ -835,18 +828,18 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				 
			
 
				   depth = 0;
			
 
				 
			
 
				-  int uniquenumber = 0;
			
 
				+  uniquenumber = 0;
			
 
				 
			
 
				   for (int t = 0; t < nbTrees; t++)
			
 
				   {
			
 
				-    vector<TreeNode> tree;
			
 
				-    tree.push_back (TreeNode());
			
 
				-    tree[0].dist = vector<double> (classes, 0.0);
			
 
				-    tree[0].depth = depth;
			
 
				-    tree[0].featcounter = amountPixels;
			
 
				-    tree[0].nodeNumber = uniquenumber;
			
 
				+    vector<TreeNode> singletree;
			
 
				+    singletree.push_back (TreeNode());
			
 
				+    singletree[0].dist = vector<double> (classes, 0.0);
			
 
				+    singletree[0].depth = depth;
			
 
				+    singletree[0].featcounter = amountPixels;
			
 
				+    singletree[0].nodeNumber = uniquenumber;
			
 
				     uniquenumber++;
			
 
				-    forest.push_back (tree);
			
 
				+    forest.push_back (singletree);
			
 
				   }
			
 
				 
			
 
				   vector<int> startnode (nbTrees, 0);
			
@@ -981,11 +974,8 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				                       if (labelmap.find (labels[iCounter] (x, y)) != labelmap.end())
			
 
				                         forest[tree][left].dist[labelmap[labels[iCounter] (x, y) ]]++;
			
 
				                       forest[tree][left].featcounter++;
			
 
				-                      SparseVectorInt v;
			
 
				-                      v.insert (pair<int, double> (leftu, weight));
			
 
				-#ifdef TEXTONMAP
			
 
				-                      textonMap[iCounter] (subx, suby).add (v);
			
 
				-#endif
			
 
				+                      if(useCategorization)
			
 
				+                        (*globalCategorFeats[iCounter])[leftu]+=weight;
			
 
				                     }
			
 
				                     else
			
 
				                     {
			
@@ -993,12 +983,9 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				                       if (labelmap.find (labels[iCounter] (x, y)) != labelmap.end())
			
 
				                         forest[tree][right].dist[labelmap[labels[iCounter] (x, y) ]]++;
			
 
				                       forest[tree][right].featcounter++;
			
 
				-                      //feld im subsampled finden und in diesem rechts hochzählen
			
 
				-                      SparseVectorInt v;
			
 
				-                      v.insert (pair<int, double> (rightu, weight));
			
 
				-#ifdef TEXTONMAP
			
 
				-                      textonMap[iCounter] (subx, suby).add (v);
			
 
				-#endif
			
 
				+                      
			
 
				+                      if(useCategorization)
			
 
				+                        (*globalCategorFeats[iCounter])[rightu]+=weight;
			
 
				                     }
			
 
				                   }
			
 
				                 }
			
@@ -1130,9 +1117,6 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				     for (int i = 0; i < imgcounter; i++)
			
 
				     {
			
 
				       computeIntegralImage (currentfeats[i], allfeats[i], channelType.size() - classes);
			
 
				-#ifdef TEXTONMAP
			
 
				-      computeIntegralImage (textonMap[i], integralTexton[i]);
			
 
				-#endif
			
 
				     }
			
 
				 
			
 
				     if (firstiteration)
			
@@ -1153,6 +1137,36 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				   timer.stop();
			
 
				   cerr << "learning finished in: " << timer.getLastAbsolute() << " seconds" << endl;
			
 
				   timer.start();
			
 
				+  
			
 
				+  if(useCategorization)
			
 
				+  {
			
 
				+    for(uint i = 0; i < globalCategorFeats.size(); i++)
			
 
				+    {
			
 
				+      globalCategorFeats[i]->setDim(uniquenumber);
			
 
				+      globalCategorFeats[i]->normalize();
			
 
				+    }
			
 
				+    map<int,Vector> ys;
			
 
				+    
			
 
				+    int cCounter = 0;
			
 
				+    for(map<int,int>::iterator it = labelmap.begin(); it != labelmap.end(); it++, cCounter++)
			
 
				+    {
			
 
				+      ys[cCounter] = Vector(globalCategorFeats.size());
			
 
				+      for(int i = 0; i < imgcounter; i++)
			
 
				+      {
			
 
				+        if(classesPerImage[i].find(it->first) != classesPerImage[i].end())
			
 
				+        {
			
 
				+          ys[cCounter][i] = 1;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+          ys[cCounter][i] = -1;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    fasthik->train(globalCategorFeats, ys);
			
 
				+    
			
 
				+  }
			
 
				 
			
 
				 #ifdef WRITEGLOB
			
 
				   ofstream outstream ("globtrain.feat");
			
@@ -1386,10 +1400,7 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 
				   probabilities.reInit (xsize, ysize, numClasses);
			
 
				   probabilities.setAll (0);
			
 
				 
			
 
				-#ifdef TEXTONMAP
			
 
				-  MultiChannelImageT<SparseVectorInt> textonMap (xsize / grid + 1, ysize / grid + 1, 1);
			
 
				-  MultiChannelImageT<SparseVectorInt> integralTexton (xsize / grid + 1, ysize / grid + 1, 1);
			
 
				-#endif
			
 
				+  SparseVector *globalCategorFeat = new SparseVector();
			
 
				 
			
 
				   std::string currentFile = Globals::getCurrentImgFN();
			
 
				   MultiChannelImageT<double> feats;
			
@@ -1451,14 +1462,12 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 
				       }
			
 
				     }
			
 
				 
			
 
				-#ifdef TEXTONMAP
			
 
				     double weight = computeWeight (depth, maxDepth) - computeWeight (depth - 1, maxDepth);
			
 
				 
			
 
				     if (depth == 1)
			
 
				     {
			
 
				       weight = computeWeight (1, maxDepth);
			
 
				     }
			
 
				-#endif
			
 
				 
			
 
				     allleaf = true;
			
 
				 
			
@@ -1491,27 +1500,20 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 
				             if (val < forest[tree][t].decision)
			
 
				             {
			
 
				               currentfeats.set (x, y, forest[tree][t].left, tree);
			
 
				-#ifdef TEXTONMAP
			
 
				 #pragma omp critical
			
 
				               {
			
 
				-                SparseVectorInt v;
			
 
				-                v.insert (pair<int, double> (forest[tree][forest[tree][t].left].nodeNumber, weight));
			
 
				-                textonMap (subx, suby).add (v);
			
 
				+                if(useCategorization)
			
 
				+                  (*globalCategorFeat)[forest[tree][forest[tree][t].left].nodeNumber] += weight;
			
 
				               }
			
 
				-#endif
			
 
				             }
			
 
				             else
			
 
				             {
			
 
				               currentfeats.set (x, y, forest[tree][t].right, tree);
			
 
				-#ifdef TEXTONMAP
			
 
				 #pragma omp critical
			
 
				               {
			
 
				-                SparseVectorInt v;
			
 
				-                v.insert (pair<int, double> (forest[tree][forest[tree][t].right].nodeNumber, weight));
			
 
				-
			
 
				-                textonMap (subx, suby).add (v);
			
 
				+                if(useCategorization)
			
 
				+                  (*globalCategorFeat)[forest[tree][forest[tree][t].right].nodeNumber] += weight;
			
 
				               }
			
 
				-#endif
			
 
				             }
			
 
				           }
			
 
				         }
			
@@ -1522,7 +1524,6 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 
				     {
			
 
				       int xsize = currentfeats.width();
			
 
				       int ysize = currentfeats.height();
			
 
				-      int counter = 0;
			
 
				 
			
 
				 #pragma omp parallel for
			
 
				       for (int x = 0; x < xsize; x++)
			
@@ -1560,9 +1561,6 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 
				         feats.addChannel (classes + rawChannels);
			
 
				       }
			
 
				       computeIntegralImage (currentfeats, feats, channelType.size() - classes);
			
 
				-#ifdef TEXTONMAP
			
 
				-      computeIntegralImage (textonMap, integralTexton);
			
 
				-#endif
			
 
				       if (firstiteration)
			
 
				       {
			
 
				         firstiteration = false;
			
@@ -1645,6 +1643,31 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 
				   }
			
 
				 #endif
			
 
				 
			
 
				+  vector<int> classesInImg;
			
 
				+  
			
 
				+  if(useCategorization)
			
 
				+  {
			
 
				+    globalCategorFeat->setDim(uniquenumber);
			
 
				+    globalCategorFeat->normalize();
			
 
				+    ClassificationResult cr = fasthik->classify(globalCategorFeat);
			
 
				+    for (uint i = 0; i < classes; i++)
			
 
				+    {
			
 
				+      cerr << cr.scores[i] << " ";
			
 
				+      if(cr.scores[i] > 0.0/*-0.3*/)
			
 
				+      {
			
 
				+        classesInImg.push_back(i);
			
 
				+      }
			
 
				+    }
			
 
				+    cerr << "amount of classes: " << classes << " used classes: " << classesInImg.size() << endl;
			
 
				+  }
			
 
				+  else
			
 
				+  {
			
 
				+    for (uint i = 0; i < classes; i++)
			
 
				+    {
			
 
				+      classesInImg.push_back(i);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				   if (pixelWiseLabeling)
			
 
				   {
			
 
				     //finales labeln:
			
@@ -1657,8 +1680,9 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 
				         double maxvalue = - numeric_limits<double>::max(); //TODO: das kann auch nur pro knoten gemacht werden, nicht pro pixel
			
 
				         int maxindex = 0;
			
 
				 
			
 
				-        for (uint i = 0; i < classes; i++)
			
 
				+        for (uint c = 0; c < classesInImg.size(); c++)
			
 
				         {
			
 
				+          int i = classesInImg[c];
			
 
				           int currentclass = labelmapback[i];
			
 
				           if (useclass[currentclass])
			
 
				           {
			
@@ -1754,7 +1778,7 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 
				     regionProbs.clear();
			
 
				     regionProbs = vector<vector<double> >(amountRegions, vector<double> (classes, 0.0));
			
 
				 
			
 
				-    vector<int> bestlabels (amountRegions, 0);
			
 
				+    vector<int> bestlabels (amountRegions, labelmapback[classesInImg[0]]);
			
 
				 
			
 
				     for (int y = 0; y < img.height(); y++)
			
 
				     {
			
@@ -1762,8 +1786,9 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 
				       {
			
 
				         int cregion = regions (x, y);
			
 
				 
			
 
				-        for (int d = 0; d < classes; d++)
			
 
				+        for (uint c = 0; c < classesInImg.size(); c++)
			
 
				         {
			
 
				+          int d = classesInImg[c];
			
 
				           regionProbs[cregion][d] += getMeanProb (x, y, d, currentfeats);
			
 
				         }
			
 
				       }
			
@@ -1771,8 +1796,8 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 
				 
			
 
				     for (int r = 0; r < amountRegions; r++)
			
 
				     {
			
 
				-      double maxval = regionProbs[r][0];
			
 
				-      bestlabels[r] = 0;
			
 
				+      double maxval = regionProbs[r][classesInImg[0]];
			
 
				+      bestlabels[r] = classesInImg[0];
			
 
				 
			
 
				       for (int d = 1; d < classes; d++)
			
 
				       {
			
@@ -1870,6 +1895,8 @@ void SemSegContextTree::store (std::ostream & os, int format) const
 
				   }
			
 
				 
			
 
				   os << rawChannels << endl;
			
 
				+  
			
 
				+  os << uniquenumber << endl;
			
 
				 }
			
 
				 
			
 
				 void SemSegContextTree::restore (std::istream & is, int format)
			
@@ -1978,6 +2005,8 @@ void SemSegContextTree::restore (std::istream & is, int format)
 
				   }
			
 
				 
			
 
				   is >> rawChannels;
			
 
				+  
			
 
				+  is >> uniquenumber;
			
 
				 }
			
 
				 
			
 
				 
			
--- a/semseg/SemSegContextTree.h
+++ b/semseg/SemSegContextTree.h
@@ -15,6 +15,8 @@
 
				 
			
 
				 #include "objrec-froehlichexp/semseg/operations/Operations.h"
			
 
				 
			
 
				+#include "fast-hik/GPHIKClassifier.h"
			
 
				+
			
 
				 namespace OBJREC {
			
 
				 
			
 
				 /** Localization system */
			
@@ -106,6 +108,9 @@ class SemSegContextTree : public SemanticSegmentation, public NICE::Persistent
 
				     
			
 
				     /** use Regions as extra feature channel or not */
			
 
				     bool useRegionFeature;
			
 
				+    
			
 
				+    /** use external image categorization to avoid some classes */
			
 
				+    bool useCategorization;
			
 
				 
			
 
				     /** how to handle each channel
			
 
				      * 0: simple grayvalue features
			
@@ -133,6 +138,12 @@ class SemSegContextTree : public SemanticSegmentation, public NICE::Persistent
 
				     
			
 
				     /** amount of grayvalue Channels */
			
 
				     int rawChannels;
			
 
				+    
			
 
				+    /** classifier for categorization */
			
 
				+    OBJREC::GPHIKClassifier *fasthik;
			
 
				+    
			
 
				+    /** unique numbers for nodes */
			
 
				+    int uniquenumber;
			
 
				 
			
 
				   public:
			
 
				     /** simple constructor */
			
@@ -173,15 +184,6 @@ class SemSegContextTree : public SemanticSegmentation, public NICE::Persistent
 
				      * @return void
			
 
				      **/
			
 
				     void extractBasicFeatures ( NICE::MultiChannelImageT<double> &feats, const NICE::ColorImage &img, const std::string &currentFile, int &amountRegions);
			
 
				-    
			
 
				-    /**
			
 
				-     * @brief computes integral image for Sparse Multichannel Image
			
 
				-     *
			
 
				-     * @param currentfeats input features
			
 
				-     * @param integralImage output image (must be initilized)
			
 
				-     * @return void
			
 
				-     **/
			
 
				-    void computeIntegralImage ( const NICE::MultiChannelImageT<NICE::SparseVectorInt> &infeats, NICE::MultiChannelImageT<NICE::SparseVectorInt> &integralImage );
			
 
				 
			
 
				     /**
			
 
				      * compute best split for current settings
			
@@ -229,7 +231,6 @@ class SemSegContextTree : public SemanticSegmentation, public NICE::Persistent
 
				      * @return void
			
 
				      **/
			
 
				     virtual void clear () {}
			
 
				-
			
 
				 };
			
 
				 
			
 
				 } // namespace