Bjoern Froehlich преди 13 години
родител
ревизия
f45ccccdeb
променени са 4 файла, в които са добавени 255 реда и са изтрити 102 реда
  1. 246 92
      semseg/SemSegContextTree.cpp
  2. 1 1
      semseg/SemSegContextTree.h
  3. 2 6
      semseg/operations/Operations.cpp
  4. 6 3
      semseg/operations/Operations.h

+ 246 - 92
semseg/SemSegContextTree.cpp

@@ -98,7 +98,7 @@ SemSegContextTree::SemSegContextTree (const Config *conf, const MultiDataset *md
   ops.push_back (tops);
 
   tops.clear();
-  tops.push_back (new Equality());
+  tops.push_back (new RegionFeat());
   ops.push_back (tops);
 
   tops.clear();
@@ -187,7 +187,7 @@ SemSegContextTree::~SemSegContextTree()
 {
 }
 
-double SemSegContextTree::getBestSplit (std::vector<NICE::MultiChannelImageT<double> > &feats, std::vector<NICE::MultiChannelImageT<unsigned short int> > &currentfeats, const std::vector<NICE::MatrixT<int> > &labels, int node, Operation *&splitop, double &splitval, const int &tree)
+double SemSegContextTree::getBestSplit (std::vector<NICE::MultiChannelImageT<double> > &feats, std::vector<NICE::MultiChannelImageT<unsigned short int> > &currentfeats, const std::vector<NICE::MatrixT<int> > &labels, int node, Operation *&splitop, double &splitval, const int &tree, vector<vector<vector<double> > > &regionProbs)
 {
   Timer t;
   t.start();
@@ -279,7 +279,7 @@ double SemSegContextTree::getBestSplit (std::vector<NICE::MultiChannelImageT<dou
 
   /** vector of all possible features */
   std::vector<Operation*> featsel;
-  
+
   for (int i = 0; i < featsPerSplit; i++)
   {
     int x1, x2, y1, y2;
@@ -287,9 +287,9 @@ double SemSegContextTree::getBestSplit (std::vector<NICE::MultiChannelImageT<dou
 
     int tmpws = windowSize;
 
-    if (ft > 1 && firstiteration)
+    if (firstiteration)
       ft = 0;
-    
+
     if (channelsPerType[ft].size() == 0)
     {
       ft = 0;
@@ -300,21 +300,29 @@ double SemSegContextTree::getBestSplit (std::vector<NICE::MultiChannelImageT<dou
       //use larger window size for context features
       tmpws *= 4;
     }
-    
+
     x1 = (int)((double)rand() / (double)RAND_MAX * (double)tmpws) - tmpws / 2;
     x2 = (int)((double)rand() / (double)RAND_MAX * (double)tmpws) - tmpws / 2;
     y1 = (int)((double)rand() / (double)RAND_MAX * (double)tmpws) - tmpws / 2;
     y2 = (int)((double)rand() / (double)RAND_MAX * (double)tmpws) - tmpws / 2;
-      
+
     int f1 = (int)((double)rand() / (double)RAND_MAX * (double)channelsPerType[ft].size());
     int f2 = f1;
-    if((double)rand() / (double)RAND_MAX > 0.5)
+    if ((double)rand() / (double)RAND_MAX > 0.5)
       f2 = (int)((double)rand() / (double)RAND_MAX * (double)channelsPerType[ft].size());
     int o = (int)((double)rand() / (double)RAND_MAX * (double)ops[ft].size());
+
+    f1 = channelsPerType[ft][f1];
+    f2 = channelsPerType[ft][f2];
+    if(ft == 1)
+    {
+      int classes = (int)regionProbs[0][0].size();
+      f2 = (int)((double)rand() / (double)RAND_MAX * (double)classes);
+    }
     
     Operation *op = ops[ft][o]->clone();
 
-    op->set(x1, y1, x2, y2, channelsPerType[ft][f1], channelsPerType[ft][f2], calcVal[ft]);
+    op->set(x1, y1, x2, y2, f1, f2, calcVal[ft]);
     op->setFeatType(ft);
 
     if (ft == 3 || ft == 4)
@@ -341,12 +349,14 @@ double SemSegContextTree::getBestSplit (std::vector<NICE::MultiChannelImageT<dou
       feat.cfeats = &currentfeats[ (*it) [0]];
       feat.cTree = tree;
       feat.tree = &forest[tree];
+      feat.rProbs = &regionProbs[(*it) [0]];
+      
       double val = featsel[f]->getVal (feat, (*it) [1], (*it) [2]);
       vals.push_back (val);
       maxval = std::max (val, maxval);
       minval = std::min (val, minval);
     }
-    
+
     if (minval == maxval)
       continue;
 
@@ -422,7 +432,7 @@ double SemSegContextTree::getBestSplit (std::vector<NICE::MultiChannelImageT<dou
         l_splitval = val;
       }
     }
-    
+
     if (l_bestig > bestig)
     {
       bestig = l_bestig;
@@ -505,12 +515,12 @@ void SemSegContextTree::computeIntegralImage (const NICE::MultiChannelImageT<uns
     {
       int corg = integralMap[it].first;
       int cint = integralMap[it].second;
-      
+
       for (int y = 0; y < ysize; y++)
       {
         for (int x = 0; x < xsize; x++)
         {
-          feats(x,y,cint) = feats(x,y,corg);
+          feats(x, y, cint) = feats(x, y, corg);
         }
       }
       feats.calcIntegral(cint);
@@ -522,21 +532,21 @@ void SemSegContextTree::computeIntegralImage (const NICE::MultiChannelImageT<uns
 #pragma omp parallel for
   for (int c = 0; c < channels; c++)
   {
-    
+
     feats (0, 0, firstChannel + c) = getMeanProb (0, 0, c, currentfeats);
 
     //first column
     for (int y = 1; y < ysize; y++)
     {
       feats (0, y, firstChannel + c) = getMeanProb (0, y, c, currentfeats)
-                                         + feats (0, y - 1, firstChannel + c);
+                                       + feats (0, y - 1, firstChannel + c);
     }
 
     //first row
     for (int x = 1; x < xsize; x++)
     {
       feats (x, 0, firstChannel + c) = getMeanProb (x, 0, c, currentfeats)
-                                         + feats (x - 1, 0, firstChannel + c);
+                                       + feats (x - 1, 0, firstChannel + c);
     }
 
     //rest
@@ -545,12 +555,12 @@ void SemSegContextTree::computeIntegralImage (const NICE::MultiChannelImageT<uns
       for (int x = 1; x < xsize; x++)
       {
         feats (x, y, firstChannel + c) = getMeanProb (x, y, c, currentfeats)
-                                           + feats (x, y - 1, firstChannel + c)
-                                           + feats (x - 1, y, firstChannel + c)
-                                           - feats (x - 1, y - 1, firstChannel + c);
+                                         + feats (x, y - 1, firstChannel + c)
+                                         + feats (x - 1, y, firstChannel + c)
+                                         - feats (x - 1, y - 1, firstChannel + c);
       }
     }
-  } 
+  }
 }
 
 inline double computeWeight (const double &d, const double &dim)
@@ -578,8 +588,9 @@ void SemSegContextTree::train (const MultiDataset *md)
 #endif
 
   std::string forbidden_classes_s = conf->gS ("analysis", "donttrain", "");
-  
+
   vector<vector<vector<double> > > regionProbs;
+  vector<vector<int> > rSize;
   vector<int> amountRegionpI;
 
   if (forbidden_classes_s == "")
@@ -592,7 +603,37 @@ void SemSegContextTree::train (const MultiDataset *md)
   int imgcounter = 0;
 
   int amountPixels = 0;
-  
+
+  ////////////////////////////////////////////////////
+  //define which featurextraction methods should be used for each channel
+  rawChannels = 3;
+
+  // how many channels without integral image
+  int shift = 0;
+
+  if (useGradient)
+    rawChannels *= 2;
+
+  if (useWeijer)
+    rawChannels += 11;
+
+  if (useHoiemFeatures)
+    rawChannels += 8;
+
+  // gray value images
+  for (int i = 0; i < rawChannels; i++)
+  {
+    channelType.push_back (0);
+  }
+
+  // regions
+  if (useRegionFeature)
+  {
+    channelType.push_back (1);
+    shift++;
+  }
+
+///////////////////////////////////////////////////////////////////
 
   LOOP_ALL_S (*trainp)
   {
@@ -609,7 +650,7 @@ void SemSegContextTree::train (const MultiDataset *md)
     if (locResult->size() <= 0)
     {
       fprintf (stderr, "WARNING: NO ground truth polygons found for %s !\n",
-                currentFile.c_str());
+               currentFile.c_str());
       continue;
     }
 
@@ -646,10 +687,18 @@ void SemSegContextTree::train (const MultiDataset *md)
     int amountRegions;
     // read image and do some simple transformations
     extractBasicFeatures (allfeats[imgcounter], img, currentFile, amountRegions);
-    
-    if(useRegionFeature)
+
+    if (useRegionFeature)
     {
       amountRegionpI.push_back(amountRegions);
+      rSize.push_back(vector<int>(amountRegions, 0));
+      for (int y = 0; y < ysize; y++)
+      {
+        for (int x = 0; x < xsize; x++)
+        {
+          rSize[imgcounter][allfeats[imgcounter](x, y, rawChannels)]++;
+        }
+      }
     }
 
     // getting groundtruth
@@ -692,43 +741,7 @@ void SemSegContextTree::train (const MultiDataset *md)
     classes++;
   }
 
-  if(useRegionFeature)
-  {
-    for(int a = 0; a < (int)amountRegionpI.size(); a++)
-    {
-      regionProbs.push_back(vector<vector<double> > (amountRegionpI[a], vector<double> (classes, 0.0)));
-    }
-  }
-
-////////////////////////////////////////////////////
-  //define which featurextraction methods should be used for each channel
-  rawChannels = 3;
-
-  // how many channels without integral image
-  int shift = 0;
-
-  if (useGradient)
-    rawChannels *= 2;
-
-  if (useWeijer)
-    rawChannels += 11;
-
-  if (useHoiemFeatures)
-    rawChannels += 8;
-
-  // gray value images
-  for (int i = 0; i < rawChannels; i++)
-  {
-    channelType.push_back (0);
-  }
-
-  // regions
-  if (useRegionFeature)
-  {
-    channelType.push_back (1);
-    shift++;
-  }
-
+///////////////////////////////////////////////////////////////////
   for (int i = 0; i < rawChannels; i++)
   {
     channelType.push_back (2);
@@ -764,6 +777,15 @@ void SemSegContextTree::train (const MultiDataset *md)
   ftypes = std::min (amountTypes, ftypes);
 
 ////////////////////////////////////////////////////
+
+  if (useRegionFeature)
+  {
+    for (int a = 0; a < (int)amountRegionpI.size(); a++)
+    {
+      regionProbs.push_back(vector<vector<double> > (amountRegionpI[a], vector<double> (classes, 0.0)));
+    }
+  }
+
   //balancing
   int featcounter = 0;
 
@@ -835,6 +857,24 @@ void SemSegContextTree::train (const MultiDataset *md)
 #endif
     allleaf = true;
     vector<MultiChannelImageT<unsigned short int> > lastfeats = currentfeats;
+    vector<vector<vector<double> > > lastRegionProbs = regionProbs;
+
+    if (useRegionFeature)
+    {
+      int rSize = (int)regionProbs.size();
+      for (int a = 0; a < rSize; a++)
+      {
+        int rSize2 = (int)regionProbs[a].size();
+        for (int b = 0; b < rSize2; b++)
+        {
+          int rSize3 = (int)regionProbs[a][b].size();
+          for (int c = 0; c < rSize3; c++)
+          {
+            regionProbs[a][b][c] = 0.0;
+          }
+        }
+      }
+    }
 
 #if 1
     Timer timerDepth;
@@ -862,7 +902,7 @@ void SemSegContextTree::train (const MultiDataset *md)
         {
           Operation *splitfeat = NULL;
           double splitval;
-          double bestig = getBestSplit (allfeats, lastfeats, labels, i, splitfeat, splitval, tree);
+          double bestig = getBestSplit (allfeats, lastfeats, labels, i, splitfeat, splitval, tree, lastRegionProbs);
 
           for (int ii = 0; ii < (int)lastfeats.size(); ii++)
           {
@@ -920,6 +960,7 @@ void SemSegContextTree::train (const MultiDataset *md)
                     feat.cfeats = &lastfeats[iCounter];
                     feat.cTree = tree;
                     feat.tree = &forest[tree];
+                    feat.rProbs = &lastRegionProbs[iCounter];
                     double val = splitfeat->getVal (feat, x, y);
 
                     //int subx = x / grid;
@@ -1000,6 +1041,7 @@ void SemSegContextTree::train (const MultiDataset *md)
                       feat.cfeats = &lastfeats[iCounter];
                       feat.cTree = tree;
                       feat.tree = &forest[tree];
+                      feat.rProbs = &lastRegionProbs[iCounter];
 
                       double val = splitfeat->getVal (feat, x, y);
 
@@ -1025,6 +1067,48 @@ void SemSegContextTree::train (const MultiDataset *md)
         }
       }
     }
+
+
+    if (useRegionFeature)
+    {
+      for (int iCounter = 0; iCounter < imgcounter; iCounter++)
+      {
+        int xsize = currentfeats[iCounter].width();
+        int ysize = currentfeats[iCounter].height();
+        int counter = 0;
+
+#pragma omp parallel for
+        for (int x = 0; x < xsize; x++)
+        {
+          for (int y = 0; y < ysize; y++)
+          {
+            for (int tree = 0; tree < nbTrees; tree++)
+            {
+              int node = currentfeats[iCounter].get(x, y, tree);
+              for (uint d = 0; d < forest[tree][node].dist.size(); d++)
+              {
+                regionProbs[iCounter][(int)(allfeats[iCounter](x, y, rawChannels))][d] += forest[tree][node].dist[d];
+              }
+            }
+          }
+        }
+      }
+
+      int rSize1 = (int)regionProbs.size();
+      for (int a = 0; a < rSize1; a++)
+      {
+        int rSize2 = (int)regionProbs[a].size();
+        for (int b = 0; b < rSize2; b++)
+        {
+          int rSize3 = (int)regionProbs[a][b].size();
+          for (int c = 0; c < rSize3; c++)
+          {
+            regionProbs[a][b][c] /= (double)(rSize[a][b]);
+          }
+        }
+      }
+    }
+
     //compute integral images
     if (firstiteration)
     {
@@ -1041,8 +1125,8 @@ void SemSegContextTree::train (const MultiDataset *md)
       computeIntegralImage (textonMap[i], integralTexton[i]);
 #endif
     }
-    
-    if(firstiteration)
+
+    if (firstiteration)
     {
       firstiteration = false;
     }
@@ -1052,6 +1136,9 @@ void SemSegContextTree::train (const MultiDataset *md)
 
     cout << "time for depth " << depth << ": " << timerDepth.getLastAbsolute() << endl;
 #endif
+    
+    lastfeats.clear();
+    lastRegionProbs.clear();
   }
 
   timer.stop();
@@ -1115,9 +1202,9 @@ void SemSegContextTree::train (const MultiDataset *md)
       cout << endl;
     }
   }
-  
-  std::map<int,int> featTypeCounter;
-  
+
+  std::map<int, int> featTypeCounter;
+
   for (int tree = 0; tree < nbTrees; tree++)
   {
     int t = (int)forest[tree].size();
@@ -1126,13 +1213,13 @@ void SemSegContextTree::train (const MultiDataset *md)
     {
       if (!forest[tree][i].isleaf && forest[tree][i].left != -1)
       {
-        featTypeCounter[forest[tree][i].feat->getFeatType()]+=1;
+        featTypeCounter[forest[tree][i].feat->getFeatType()] += 1;
       }
     }
   }
-  
+
   cout << "evaluation of featuretypes" << endl;
-  for(map<int,int>::const_iterator it = featTypeCounter.begin(); it != featTypeCounter.end(); it++)
+  for (map<int, int>::const_iterator it = featTypeCounter.begin(); it != featTypeCounter.end(); it++)
   {
     cerr << it->first << ": " << it->second << endl;
   }
@@ -1250,22 +1337,21 @@ void SemSegContextTree::extractBasicFeatures (NICE::MultiChannelImageT<double> &
       }
     }
   }
-  
-  if(useRegionFeature)
+
+  if (useRegionFeature)
   {
     //using segmentation
     Matrix regions;
     amountRegions = segmentation->segRegions (img, regions);
-    
+
     int cchannel = feats.channels();
     feats.addChannel(1);
-    
-    assert(feats.width() == regions.cols());
-    for(int y = 0; y < regions.rows(); y++)
+
+    for (int y = 0; y < regions.cols(); y++)
     {
-      for(int x = 0; x < regions.cols(); x++)
+      for (int x = 0; x < regions.rows(); x++)
       {
-        feats(x,y,cchannel) = regions(x,y);
+        feats(x, y, cchannel) = regions(x, y);
       }
     }
   }
@@ -1312,6 +1398,19 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
   int amountRegions;
   extractBasicFeatures (feats, img, currentFile, amountRegions); //read image and do some simple transformations
 
+  vector<int> rSize;
+  if (useRegionFeature)
+  {
+    rSize = vector<int>(amountRegions, 0);
+    for (int y = 0; y < ysize; y++)
+    {
+      for (int x = 0; x < xsize; x++)
+      {
+        rSize[feats(x, y, rawChannels)]++;
+      }
+    }
+  }
+
   bool allleaf = false;
 
   MultiChannelImageT<unsigned short int> currentfeats (xsize, ysize, nbTrees);
@@ -1320,9 +1419,28 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 
   depth = 0;
 
+  vector<vector<double> > regionProbs;
+  if (useRegionFeature)
+  {
+    regionProbs = vector<vector<double> > (amountRegions, vector<double> (classes, 0.0));
+  }
+
   for (int d = 0; d < maxDepth && !allleaf; d++)
   {
     depth++;
+    vector<vector<double> > lastRegionProbs = regionProbs;
+    if (useRegionFeature)
+    {
+      int rSize2 = (int)regionProbs.size();
+      for (int b = 0; b < rSize2; b++)
+      {
+        int rSize3 = (int)regionProbs[b].size();
+        for (int c = 0; c < rSize3; c++)
+        {
+          regionProbs[b][c] = 0.0;
+        }
+      }
+    }
 
 #ifdef TEXTONMAP
     double weight = computeWeight (depth, maxDepth) - computeWeight (depth - 1, maxDepth);
@@ -1355,6 +1473,7 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
             feat.cfeats = &lastfeats;
             feat.cTree = tree;
             feat.tree = &forest[tree];
+            feat.rProbs = &lastRegionProbs;
 
             double val = forest[tree][t].feat->getVal (feat, x, y);
 
@@ -1390,6 +1509,40 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
       }
     }
 
+    if (useRegionFeature)
+    {
+      int xsize = currentfeats.width();
+      int ysize = currentfeats.height();
+      int counter = 0;
+
+#pragma omp parallel for
+      for (int x = 0; x < xsize; x++)
+      {
+        for (int y = 0; y < ysize; y++)
+        {
+          for (int tree = 0; tree < nbTrees; tree++)
+          {
+            int node = currentfeats.get(x, y, tree);
+            for (uint d = 0; d < forest[tree][node].dist.size(); d++)
+            {
+              regionProbs[(int)(feats(x, y, rawChannels))][d] += forest[tree][node].dist[d];
+            }
+          }
+        }
+      }
+
+
+      int rSize2 = (int)regionProbs.size();
+      for (int b = 0; b < rSize2; b++)
+      {
+        int rSize3 = (int)regionProbs[b].size();
+        for (int c = 0; c < rSize3; c++)
+        {
+          regionProbs[b][c] /= (double)(rSize[b]);
+        }
+      }
+    }
+
     if (depth < maxDepth)
     {
       //compute integral images
@@ -1401,7 +1554,7 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
 #ifdef TEXTONMAP
       computeIntegralImage (textonMap, integralTexton);
 #endif
-      if(firstiteration)
+      if (firstiteration)
       {
         firstiteration = false;
       }
@@ -1558,29 +1711,29 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
   {
     //using segmentation
     Matrix regions;
-   
-    if(useRegionFeature)
+
+    if (useRegionFeature)
     {
       int rchannel = -1;
-      for(uint i = 0; i < channelType.size(); i++) 
+      for (uint i = 0; i < channelType.size(); i++)
       {
-        if(channelType[i] == 1)
+        if (channelType[i] == 1)
         {
           rchannel = i;
           break;
         }
       }
-      
+
       assert(rchannel > -1);
-      
+
       int xsize = feats.width();
       int ysize = feats.height();
       regions.resize(xsize, ysize);
-      for(int y = 0; y < ysize; y++)
+      for (int y = 0; y < ysize; y++)
       {
-        for(int x = 0; x < xsize; x++)
+        for (int x = 0; x < xsize; x++)
         {
-          regions(x,y) = feats(x,y,rchannel);
+          regions(x, y) = feats(x, y, rchannel);
         }
       }
     }
@@ -1588,9 +1741,10 @@ void SemSegContextTree::semanticseg (CachedExample *ce, NICE::Image & segresult,
     {
       amountRegions = segmentation->segRegions (img, regions);
     }
-    
-    vector<vector<double> > regionProbs(amountRegions, vector<double> (classes, 0.0));
-    
+
+    regionProbs.clear();
+    regionProbs = vector<vector<double> >(amountRegions, vector<double> (classes, 0.0));
+
     vector<int> bestlabels (amountRegions, 0);
 
     for (int y = 0; y < img.height(); y++)

+ 1 - 1
semseg/SemSegContextTree.h

@@ -193,7 +193,7 @@ class SemSegContextTree : public SemanticSegmentation, public NICE::Persistent
      * @param splitval
      * @return best information gain
      */
-    double getBestSplit ( std::vector<NICE::MultiChannelImageT<double> > &feats, std::vector<NICE::MultiChannelImageT<unsigned short int> > &currentfeats, const std::vector<NICE::MatrixT<int> > &labels, int node, Operation *&splitop, double &splitval, const int &tree );
+    double getBestSplit ( std::vector<NICE::MultiChannelImageT<double> > &feats, std::vector<NICE::MultiChannelImageT<unsigned short int> > &currentfeats, const std::vector<NICE::MatrixT<int> > &labels, int node, Operation *&splitop, double &splitval, const int &tree, std::vector<std::vector<std::vector<double> > > &regionProbs );
 
     /**
      * @brief computes the mean probability for a given class over all trees

+ 2 - 6
semseg/operations/Operations.cpp

@@ -98,13 +98,9 @@ std::string Operation::writeInfos()
   return ss.str();
 }
 
-double Equality::getVal ( const Features &feats, const int &x, const int &y )
+double RegionFeat::getVal ( const Features &feats, const int &x, const int &y )
 {
-  int xsize, ysize;
-  getXY ( feats, xsize, ysize );
-  double v1 = values->getVal ( feats, BOUND ( x + x1, 0, xsize - 1 ), BOUND ( y + y1, 0, ysize - 1 ), channel1 );
-  double v2 = values->getVal ( feats, BOUND ( x + x2, 0, xsize - 1 ), BOUND ( y + y2, 0, ysize - 1 ), channel2 );
-  return (double)(v1 == v2);
+  return (*feats.rProbs)[(*feats.feats)(x,y,channel1)][channel2];
 }
 
 double Minus::getVal ( const Features &feats, const int &x, const int &y )

+ 6 - 3
semseg/operations/Operations.h

@@ -104,6 +104,9 @@ struct Features {
 
   /** tree nodes */
   std::vector<TreeNode> *tree;
+  
+  /** probabilities for each region */
+  std::vector<std::vector<double> > *rProbs;
 };
 
 /**
@@ -383,7 +386,7 @@ class Operation
 /**
  * @brief simple equality check ?(A==B)
  **/
-class Equality: public Operation
+class RegionFeat: public Operation
 {
   public:
     /**
@@ -402,7 +405,7 @@ class Equality: public Operation
      **/
     virtual Operation* clone()
     {
-      return new Equality();
+      return new RegionFeat();
     }
 
     /**
@@ -411,7 +414,7 @@ class Equality: public Operation
      **/
     virtual std::string writeInfos()
     {
-      std::string out = "Equality";
+      std::string out = "RegionFeat";
 
       if ( values != NULL )
         out += values->writeInfos();