瀏覽代碼

global classification included

Bjoern Froehlich 13 年之前
父節點
當前提交
aa8422836c
共有 2 個文件被更改,包括 133 次插入103 次删除
  1. 122 93
      semseg/SemSegContextTree.cpp
  2. 11 10
      semseg/SemSegContextTree.h

+ 122 - 93
semseg/SemSegContextTree.cpp

@@ -21,7 +21,6 @@
 #include <iostream>
 
 #undef WRITEGLOB
-#undef TEXTONMAP
 
 #define DEBUG
 
@@ -54,6 +53,17 @@ SemSegContextTree::SemSegContextTree (const Config *conf, const MultiDataset *md
   nbTrees = conf->gI (section, "amount_trees", 1);
 
   string segmentationtype = conf->gS (section, "segmentation_type", "meanshift");
+  
+  useCategorization = conf->gB (section, "use_categorization", false);
+  
+  if(useCategorization)
+  {
+    fasthik = new GPHIKClassifier(conf);
+  }
+  else
+  {
+    fasthik = NULL;
+  }
 
   randomTests = conf->gI (section, "random_tests", 10);
 
@@ -118,8 +128,6 @@ SemSegContextTree::SemSegContextTree (const Config *conf, const MultiDataset *md
     tops.push_back (new Haar3Horiz());
   if (conf->gB (featsec, "haar3_vert", true))
     tops.push_back (new Haar3Vert());
-  if (conf->gB (featsec, "glob", true))
-    tops.push_back (new GlobalFeats());
 
   ops.push_back (tops);
   ops.push_back (tops);
@@ -358,6 +366,10 @@ double SemSegContextTree::getBestSplit (std::vector<NICE::MultiChannelImageT<dou
       feat.cfeats = &currentfeats[ (*it) [0]];
       feat.cTree = tree;
       feat.tree = &forest[tree];
+      
+      assert(forest.size() > tree);
+      assert(forest[tree][0].dist.size() > 0);
+      
       feat.rProbs = &regionProbs[(*it) [0]];
       
       double val = featsel[f]->getVal (feat, (*it) [1], (*it) [2]);
@@ -476,39 +488,6 @@ inline double SemSegContextTree::getMeanProb (const int &x, const int &y, const
   return val / (double)nbTrees;
 }
 
-void SemSegContextTree::computeIntegralImage (const NICE::MultiChannelImageT<SparseVectorInt> &infeats, NICE::MultiChannelImageT<SparseVectorInt> &integralImage)
-{
-  int xsize = infeats.width();
-  int ysize = infeats.height();
-  integralImage (0, 0).add (infeats.get (0, 0));
-
-  //first column
-  for (int y = 1; y < ysize; y++)
-  {
-    integralImage (0, y).add (infeats.get (0, y));
-    integralImage (0, y).add (integralImage (0, y - 1));
-  }
-
-  //first row
-  for (int x = 1; x < xsize; x++)
-  {
-    integralImage (x, 0).add (infeats.get (x, 0));
-    integralImage (x, 0).add (integralImage (x - 1, 0));
-  }
-
-  //rest
-  for (int y = 1; y < ysize; y++)
-  {
-    for (int x = 1; x < xsize; x++)
-    {
-      integralImage (x, y).add (infeats.get (x, y));
-      integralImage (x, y).add (integralImage (x, y - 1));
-      integralImage (x, y).add (integralImage (x - 1, y));
-      integralImage (x, y).sub (integralImage (x - 1, y - 1));
-    }
-  }
-}
-
 void SemSegContextTree::computeIntegralImage (const NICE::MultiChannelImageT<unsigned short int> &currentfeats, NICE::MultiChannelImageT<double> &feats, int firstChannel)
 {
   int xsize = currentfeats.width();
@@ -591,10 +570,9 @@ void SemSegContextTree::train (const MultiDataset *md)
   vector<MultiChannelImageT<double> > allfeats;
   vector<MultiChannelImageT<unsigned short int> > currentfeats;
   vector<MatrixT<int> > labels;
-#ifdef TEXTONMAP
-  vector<MultiChannelImageT<SparseVectorInt> > textonMap;
-  vector<MultiChannelImageT<SparseVectorInt> > integralTexton;
-#endif
+
+  vector<SparseVector*> globalCategorFeats;
+  vector<map<int,int> > classesPerImage;
 
   std::string forbidden_classes_s = conf->gS ("analysis", "donttrain", "");
 
@@ -663,7 +641,7 @@ void SemSegContextTree::train (const MultiDataset *md)
       continue;
     }
 
-    fprintf (stderr, "SemSegCsurka: Collecting pixel examples from localization info: %s\n", currentFile.c_str());
+    fprintf (stderr, "SSContext: Collecting pixel examples from localization info: %s\n", currentFile.c_str());
 
     int xsize, ysize;
     ce->getImageSize (xsize, ysize);
@@ -673,10 +651,6 @@ void SemSegContextTree::train (const MultiDataset *md)
 
     currentfeats.push_back (MultiChannelImageT<unsigned short int> (xsize, ysize, nbTrees));
     currentfeats[imgcounter].setAll (0);
-#ifdef TEXTONMAP
-    textonMap.push_back (MultiChannelImageT<SparseVectorInt> (xsize / grid + 1, ysize / grid + 1, 1));
-    integralTexton.push_back (MultiChannelImageT<SparseVectorInt> (xsize / grid + 1, ysize / grid + 1, 1));
-#endif
 
     labels.push_back (tmpMat);
 
@@ -731,6 +705,25 @@ void SemSegContextTree::train (const MultiDataset *md)
 
       }
     }
+    
+    if(useCategorization)
+    {
+      globalCategorFeats.push_back(new SparseVector());
+      classesPerImage.push_back(map<int,int>());
+      
+      for (int x = 0; x < xsize; x++)
+      {
+        for (int y = 0; y < ysize; y++)
+        {
+          classno = pixelLabels.getPixel (x, y);
+
+          if (forbidden_classes.find (classno) != forbidden_classes.end())
+            continue;
+
+          classesPerImage[imgcounter][classno] = 1;
+        }
+      }
+    }
 
     imgcounter++;
 
@@ -835,18 +828,18 @@ void SemSegContextTree::train (const MultiDataset *md)
 
   depth = 0;
 
-  int uniquenumber = 0;
+  uniquenumber = 0;
 
   for (int t = 0; t < nbTrees; t++)
   {
-    vector<TreeNode> tree;
-    tree.push_back (TreeNode());
-    tree[0].dist = vector<double> (classes, 0.0);
-    tree[0].depth = depth;
-    tree[0].featcounter = amountPixels;
-    tree[0].nodeNumber = uniquenumber;
+    vector<TreeNode> singletree;
+    singletree.push_back (TreeNode());
+    singletree[0].dist = vector<double> (classes, 0.0);
+    singletree[0].depth = depth;
+    singletree[0].featcounter = amountPixels;
+    singletree[0].nodeNumber = uniquenumber;
     uniquenumber++;
-    forest.push_back (tree);
+    forest.push_back (singletree);
   }
 
   vector<int> startnode (nbTrees, 0);
@@ -981,11 +974,8 @@ void SemSegContextTree::train (const MultiDataset *md)
                       if (labelmap.find (labels[iCounter] (x, y)) != labelmap.end())
                         forest[tree][left].dist[labelmap[labels[iCounter] (x, y) ]]++;
                       forest[tree][left].featcounter++;
-                      SparseVectorInt v;
-                      v.insert (pair<int, double> (leftu, weight));
-#ifdef TEXTONMAP
-                      textonMap[iCounter] (subx, suby).add (v);
-#endif
+                      if(useCategorization)
+                        (*globalCategorFeats[iCounter])[leftu]+=weight;
                     }
                     else
                     {
@@ -993,12 +983,9 @@ void SemSegContextTree::train (const MultiDataset *md)
                       if (labelmap.find (labels[iCounter] (x, y)) != labelmap.end())
                         forest[tree][right].dist[labelmap[labels[iCounter] (x, y) ]]++;
                       forest[tree][right].featcounter++;
-                      //feld im subsampled finden und in diesem rechts hochzählen
-                      SparseVectorInt v;
-                      v.insert (pair<int, double> (rightu, weight));
-#ifdef TEXTONMAP
-                      textonMap[iCounter] (subx, suby).add (v);
-#endif
+                      
+                      if(useCategorization)
+                        (*globalCategorFeats[iCounter])[rightu]+=weight;
                     }
                   }
                 }
@@ -1130,9 +1117,6 @@ void SemSegContextTree::train (const MultiDataset *md)
     for (int i = 0; i < imgcounter; i++)
     {
       computeIntegralImage (currentfeats[i], allfeats[i], channelType.size() - classes);
-#ifdef TEXTONMAP
-      computeIntegralImage (textonMap[i], integralTexton[i]);
-#endif
     }
 
     if (firstiteration)
@@ -1153,6 +1137,36 @@ void SemSegContextTree::train (const MultiDataset *md)
   timer.stop();
   cerr << "learning finished in: " << timer.getLastAbsolute() << " seconds" << endl;
   timer.start();
+  
+  if(useCategorization)
+  {
+    for(uint i = 0; i < globalCategorFeats.size(); i++)
+    {
+      globalCategorFeats[i]->setDim(uniquenumber);
+      globalCategorFeats[i]->normalize();
+    }
+    map<int,Vector> ys;
+    
+    int cCounter = 0;
+    for(map<int,int>::iterator it = labelmap.begin(); it != labelmap.end(); it++, cCounter++)
+    {
+      ys[cCounter] = Vector(globalCategorFeats.size());
+      for(int i = 0; i < imgcounter; i++)
+      {
+        if(classesPerImage[i].find(it->first) != classesPerImage[i].end())
+        {
+          ys[cCounter][i] = 1;
+        }
+        else
+        {
+          ys[cCounter][i] = -1;
+        }
+      }
+    }
+
+    fasthik->train(globalCategorFeats, ys);
+    
+  }
 
 #ifdef WRITEGLOB
   ofstream outstream ("globtrain.feat");
@@ -1386,10 +1400,7 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
   probabilities.reInit (xsize, ysize, numClasses);
   probabilities.setAll (0);
 
-#ifdef TEXTONMAP
-  MultiChannelImageT<SparseVectorInt> textonMap (xsize / grid + 1, ysize / grid + 1, 1);
-  MultiChannelImageT<SparseVectorInt> integralTexton (xsize / grid + 1, ysize / grid + 1, 1);
-#endif
+  SparseVector *globalCategorFeat = new SparseVector();
 
   std::string currentFile = Globals::getCurrentImgFN();
   MultiChannelImageT<double> feats;
@@ -1451,14 +1462,12 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
       }
     }
 
-#ifdef TEXTONMAP
     double weight = computeWeight (depth, maxDepth) - computeWeight (depth - 1, maxDepth);
 
     if (depth == 1)
     {
       weight = computeWeight (1, maxDepth);
     }
-#endif
 
     allleaf = true;
 
@@ -1491,27 +1500,20 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
             if (val < forest[tree][t].decision)
             {
               currentfeats.set (x, y, forest[tree][t].left, tree);
-#ifdef TEXTONMAP
 #pragma omp critical
               {
-                SparseVectorInt v;
-                v.insert (pair<int, double> (forest[tree][forest[tree][t].left].nodeNumber, weight));
-                textonMap (subx, suby).add (v);
+                if(useCategorization)
+                  (*globalCategorFeat)[forest[tree][forest[tree][t].left].nodeNumber] += weight;
               }
-#endif
             }
             else
             {
               currentfeats.set (x, y, forest[tree][t].right, tree);
-#ifdef TEXTONMAP
 #pragma omp critical
               {
-                SparseVectorInt v;
-                v.insert (pair<int, double> (forest[tree][forest[tree][t].right].nodeNumber, weight));
-
-                textonMap (subx, suby).add (v);
+                if(useCategorization)
+                  (*globalCategorFeat)[forest[tree][forest[tree][t].right].nodeNumber] += weight;
               }
-#endif
             }
           }
         }
@@ -1522,7 +1524,6 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
     {
       int xsize = currentfeats.width();
       int ysize = currentfeats.height();
-      int counter = 0;
 
 #pragma omp parallel for
       for (int x = 0; x < xsize; x++)
@@ -1560,9 +1561,6 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
         feats.addChannel (classes + rawChannels);
       }
       computeIntegralImage (currentfeats, feats, channelType.size() - classes);
-#ifdef TEXTONMAP
-      computeIntegralImage (textonMap, integralTexton);
-#endif
       if (firstiteration)
       {
         firstiteration = false;
@@ -1645,6 +1643,31 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
   }
 #endif
 
+  vector<int> classesInImg;
+  
+  if(useCategorization)
+  {
+    globalCategorFeat->setDim(uniquenumber);
+    globalCategorFeat->normalize();
+    ClassificationResult cr = fasthik->classify(globalCategorFeat);
+    for (uint i = 0; i < classes; i++)
+    {
+      cerr << cr.scores[i] << " ";
+      if(cr.scores[i] > 0.0/*-0.3*/)
+      {
+        classesInImg.push_back(i);
+      }
+    }
+    cerr << "amount of classes: " << classes << " used classes: " << classesInImg.size() << endl;
+  }
+  else
+  {
+    for (uint i = 0; i < classes; i++)
+    {
+      classesInImg.push_back(i);
+    }
+  }
+
   if (pixelWiseLabeling)
   {
     //finales labeln:
@@ -1657,8 +1680,9 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
         double maxvalue = - numeric_limits<double>::max(); //TODO: das kann auch nur pro knoten gemacht werden, nicht pro pixel
         int maxindex = 0;
 
-        for (uint i = 0; i < classes; i++)
+        for (uint c = 0; c < classesInImg.size(); c++)
         {
+          int i = classesInImg[c];
           int currentclass = labelmapback[i];
           if (useclass[currentclass])
           {
@@ -1754,7 +1778,7 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
     regionProbs.clear();
     regionProbs = vector<vector<double> >(amountRegions, vector<double> (classes, 0.0));
 
-    vector<int> bestlabels (amountRegions, 0);
+    vector<int> bestlabels (amountRegions, labelmapback[classesInImg[0]]);
 
     for (int y = 0; y < img.height(); y++)
     {
@@ -1762,8 +1786,9 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
       {
         int cregion = regions (x, y);
 
-        for (int d = 0; d < classes; d++)
+        for (uint c = 0; c < classesInImg.size(); c++)
         {
+          int d = classesInImg[c];
           regionProbs[cregion][d] += getMeanProb (x, y, d, currentfeats);
         }
       }
@@ -1771,8 +1796,8 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 
     for (int r = 0; r < amountRegions; r++)
     {
-      double maxval = regionProbs[r][0];
-      bestlabels[r] = 0;
+      double maxval = regionProbs[r][classesInImg[0]];
+      bestlabels[r] = classesInImg[0];
 
       for (int d = 1; d < classes; d++)
       {
@@ -1870,6 +1895,8 @@ void SemSegContextTree::store (std::ostream & os, int format) const
   }
 
   os << rawChannels << endl;
+  
+  os << uniquenumber << endl;
 }
 
 void SemSegContextTree::restore (std::istream & is, int format)
@@ -1978,6 +2005,8 @@ void SemSegContextTree::restore (std::istream & is, int format)
   }
 
   is >> rawChannels;
+  
+  is >> uniquenumber;
 }
 
 

+ 11 - 10
semseg/SemSegContextTree.h

@@ -15,6 +15,8 @@
 
 #include "objrec-froehlichexp/semseg/operations/Operations.h"
 
+#include "fast-hik/GPHIKClassifier.h"
+
 namespace OBJREC {
 
 /** Localization system */
@@ -106,6 +108,9 @@ class SemSegContextTree : public SemanticSegmentation, public NICE::Persistent
     
     /** use Regions as extra feature channel or not */
     bool useRegionFeature;
+    
+    /** use external image categorization to avoid some classes */
+    bool useCategorization;
 
     /** how to handle each channel
      * 0: simple grayvalue features
@@ -133,6 +138,12 @@ class SemSegContextTree : public SemanticSegmentation, public NICE::Persistent
     
     /** amount of grayvalue Channels */
     int rawChannels;
+    
+    /** classifier for categorization */
+    OBJREC::GPHIKClassifier *fasthik;
+    
+    /** unique numbers for nodes */
+    int uniquenumber;
 
   public:
     /** simple constructor */
@@ -173,15 +184,6 @@ class SemSegContextTree : public SemanticSegmentation, public NICE::Persistent
      * @return void
      **/
     void extractBasicFeatures ( NICE::MultiChannelImageT<double> &feats, const NICE::ColorImage &img, const std::string &currentFile, int &amountRegions);
-    
-    /**
-     * @brief computes integral image for Sparse Multichannel Image
-     *
-     * @param currentfeats input features
-     * @param integralImage output image (must be initilized)
-     * @return void
-     **/
-    void computeIntegralImage ( const NICE::MultiChannelImageT<NICE::SparseVectorInt> &infeats, NICE::MultiChannelImageT<NICE::SparseVectorInt> &integralImage );
 
     /**
      * compute best split for current settings
@@ -229,7 +231,6 @@ class SemSegContextTree : public SemanticSegmentation, public NICE::Persistent
      * @return void
      **/
     virtual void clear () {}
-
 };
 
 } // namespace