11 жил өмнө · 8de76fb768
--- a/semseg/SemSegContextTree.cpp
+++ b/semseg/SemSegContextTree.cpp
@@ -7,7 +7,8 @@
 
				 
			
 
				 #include "vislearning/cbaselib/CachedExample.h"
			
 
				 #include "vislearning/cbaselib/PascalResults.h"
			
 
				-#include "vislearning/baselib/ColorSpace.h"
			
 
				+//#include "vislearning/baselib/ColorSpace.h"
			
 
				+#include "vislearning/baselib/cc.h"
			
 
				 #include "segmentation/RSMeanShift.h"
			
 
				 #include "segmentation/RSGraphBased.h"
			
 
				 #include "segmentation/RSSlic.h"
			
@@ -485,7 +486,7 @@ void SemSegContextTree::computeIntegralImage ( const NICE::MultiChannelImage3DT<
 
				   int zsize = feats.depth();
			
 
				 
			
 
				   // integral images for raw channels
			
 
				-  if ( firstiteration )
			
 
				+  if ( firstiteration && ftypes > 2 )
			
 
				   {
			
 
				 #pragma omp parallel for
			
 
				     for ( int it = 0; it < ( int ) integralMap.size(); it++ )
			
@@ -673,12 +674,13 @@ void SemSegContextTree::train ( const LabeledSet * trainp )
 
				       nodeIndices.push_back ( MultiChannelImage3DT<unsigned short int> ( xsize, ysize, zsize, nbTrees ) );
			
 
				       nodeIndices[imgCounter].setAll ( 0 );
			
 
				 
			
 
				-      MultiChannelImage3DT<double> feats;
			
 
				-      allfeats.push_back ( feats );
			
 
				+//      MultiChannelImage3DT<double> feats;
			
 
				+//      allfeats.push_back ( feats );
			
 
				 
			
 
				       int amountRegions;
			
 
				       // read image and do some simple transformations
			
 
				-      extractBasicFeatures ( allfeats[imgCounter], imgData, filelist, amountRegions );
			
 
				+      addFeatureMaps ( imgData, filelist, amountRegions );
			
 
				+      allfeats.push_back(imgData);
			
 
				 
			
 
				       if ( useRegionFeature )
			
 
				       {
			
@@ -1124,25 +1126,19 @@ void SemSegContextTree::train ( const LabeledSet * trainp )
 
				     }
			
 
				 
			
 
				     //compute integral images
			
 
				-    if ( firstiteration )
			
 
				+    int multi = std::max(0, ftypes-3);
			
 
				+    if ( firstiteration && ftypes > 2 )
			
 
				     {
			
 
				       // only add context layers if necessary
			
 
				-      int multi = std::max(0, ftypes-3);
			
 
				       for ( int i = 0; i < imgCounter; i++ )
			
 
				-      {
			
 
				         allfeats[i].addChannel ( (multi*classes) + rawChannels );
			
 
				-      }
			
 
				     }
			
 
				 
			
 
				-    for ( int i = 0; i < imgCounter; i++ )
			
 
				-    {
			
 
				-      computeIntegralImage ( nodeIndices[i], allfeats[i], channelType.size() - (2*classes) );
			
 
				-    }
			
 
				+    if (ftypes > 2)
			
 
				+      for ( int i = 0; i < imgCounter; i++ )
			
 
				+        computeIntegralImage ( nodeIndices[i], allfeats[i], channelType.size() - (multi*classes) );
			
 
				 
			
 
				-    if ( firstiteration )
			
 
				-    {
			
 
				-      firstiteration = false;
			
 
				-    }
			
 
				+    if ( firstiteration ) firstiteration = false;
			
 
				 
			
 
				 #if DEBUG
			
 
				     timerDepth.stop();
			
@@ -1267,139 +1263,111 @@ void SemSegContextTree::train ( const LabeledSet * trainp )
 
				   timer.start();
			
 
				 }
			
 
				 
			
 
				-void SemSegContextTree::extractBasicFeatures ( NICE::MultiChannelImage3DT<double> &feats, const NICE::MultiChannelImage3DT<double> &imgData, const vector<string> &filelist, int &amountRegions )
			
 
				+void SemSegContextTree::addFeatureMaps ( NICE::MultiChannelImage3DT<double> &imgData, const vector<string> &filelist, int &amountRegions )
			
 
				 {
			
 
				   int xsize = imgData.width();
			
 
				   int ysize = imgData.height();
			
 
				   int zsize = imgData.depth();
			
 
				 
			
 
				   amountRegions = 0;
			
 
				-  feats.reInit ( xsize, ysize, zsize, imgData.channels() );
			
 
				-  feats.setAll ( 0 );
			
 
				-
			
 
				-  //TODO: expand imgData instead of building feats!!! --> SAVING MEMORY
			
 
				 
			
 
				-  for ( int z = 0; z < zsize; z++ )
			
 
				+  // RGB to Lab
			
 
				+  if ( imagetype == IMAGETYPE_RGB )
			
 
				   {
			
 
				-    NICE::MultiChannelImageT<double> feats_tmp;
			
 
				-    feats_tmp.reInit ( xsize, ysize, 3 );
			
 
				-    if ( imagetype == IMAGETYPE_RGB )
			
 
				-    {
			
 
				-
			
 
				-      NICE::ColorImage img = imgData.getColor ( z );
			
 
				-      for ( int x = 0; x < xsize; x++ )
			
 
				-      {
			
 
				-        for ( int y = 0; y < ysize; y++ )
			
 
				-        {
			
 
				-          for ( int r = 0; r < 3; r++ )
			
 
				-          {
			
 
				-            feats_tmp.set ( x, y, img.getPixel ( x, y, r ), ( uint ) r );
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-    }
			
 
				-    else
			
 
				-    {
			
 
				-
			
 
				-      NICE::ImageT<double> img = imgData.getChannelT ( z,0 );
			
 
				-      for ( int x = 0; x < xsize; x++ )
			
 
				-      {
			
 
				-        for ( int y = 0; y < ysize; y++ )
			
 
				+    for ( int z = 0; z < zsize; z++ )
			
 
				+      for ( int y = 0; y < ysize; y++ )
			
 
				+        for ( int x = 0; x < xsize; x++ )
			
 
				         {
			
 
				-          feats_tmp.set ( x, y, img.getPixel ( x, y ), 0 );
			
 
				-        }
			
 
				-      }
			
 
				+          double R, G, B, X, Y, Z, L, a, b;
			
 
				+          R = ( double )imgData.get( x, y, z, 0 ) / 255.0;
			
 
				+          G = ( double )imgData.get( x, y, z, 1 ) / 255.0;
			
 
				+          B = ( double )imgData.get( x, y, z, 2 ) / 255.0;
			
 
				 
			
 
				-    }
			
 
				+          ColorConversion::ccRGBtoXYZ( R, G, B, &X, &Y, &Z, 0 );
			
 
				+          ColorConversion::ccXYZtoCIE_Lab( X, Y, Z, &L, &a, &b, 0 );
			
 
				 
			
 
				-    if ( imagetype == IMAGETYPE_RGB )
			
 
				-      feats_tmp = ColorSpace::rgbtolab ( feats_tmp );
			
 
				-
			
 
				-    for ( int x = 0; x < xsize; x++ )
			
 
				-    {
			
 
				-      for ( int y = 0; y < ysize; y++ )
			
 
				-      {
			
 
				-        if ( imagetype == IMAGETYPE_RGB )
			
 
				-        {
			
 
				-          for ( uint r = 0; r < 3; r++ )
			
 
				-          {
			
 
				-            feats.set ( x, y, z, feats_tmp.get ( x, y, r ), r );
			
 
				-          }
			
 
				+          imgData.set( x, y, z, L, 0 );
			
 
				+          imgData.set( x, y, z, a, 1 );
			
 
				+          imgData.set( x, y, z, b, 2 );
			
 
				         }
			
 
				-        else
			
 
				-        {
			
 
				-          feats.set ( x, y, z, feats_tmp.get ( x, y, 0 ), 0 );
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				+  }
			
 
				 
			
 
				-    // Gradient layers
			
 
				-    if ( useGradient )
			
 
				-    {
			
 
				-      int currentsize = feats_tmp.channels();
			
 
				-      feats_tmp.addChannel ( currentsize );
			
 
				+  // Gradient layers
			
 
				+  if ( useGradient )
			
 
				+  {
			
 
				+    int currentsize = imgData.channels();
			
 
				+    imgData.addChannel ( currentsize );
			
 
				 
			
 
				+    for ( int z = 0; z < zsize; z++ )
			
 
				       for ( int c = 0; c < currentsize; c++ )
			
 
				       {
			
 
				-        ImageT<double> tmp = feats_tmp[c];
			
 
				-        ImageT<double> tmp2 = feats_tmp[c+currentsize];
			
 
				-
			
 
				+        ImageT<double> tmp = imgData.getChannelT(z, c);
			
 
				+        ImageT<double> tmp2( xsize, ysize );
			
 
				         NICE::FilterT<double, double, double>::gradientStrength ( tmp, tmp2 );
			
 
				+        for ( int y = 0; y < ysize; y++ )
			
 
				+          for ( int x = 0; x < xsize; x++ )
			
 
				+            imgData.set(x, y, z, tmp2.getPixelQuick(x,y), c+currentsize);
			
 
				       }
			
 
				-    }
			
 
				+  }
			
 
				 
			
 
				-    // Weijer color names
			
 
				-    if ( useWeijer )
			
 
				+  // Weijer color names
			
 
				+  if ( useWeijer )
			
 
				+  {
			
 
				+    if ( imagetype == IMAGETYPE_RGB )
			
 
				     {
			
 
				-      if ( imagetype == IMAGETYPE_RGB )
			
 
				+      int currentsize = imgData.channels();
			
 
				+      imgData.addChannel ( 11 );
			
 
				+      for ( int z = 0; z < zsize; z++ )
			
 
				       {
			
 
				         NICE::ColorImage img = imgData.getColor ( z );
			
 
				         NICE::MultiChannelImageT<double> cfeats;
			
 
				         lfcw->getFeats ( img, cfeats );
			
 
				-        feats_tmp.addChannel ( cfeats );
			
 
				-      }
			
 
				-      else
			
 
				-      {
			
 
				-        cerr << "Can't compute weijer features of a grayscale image." << endl;
			
 
				+        for ( int c = 0; c < cfeats.channels(); c++)
			
 
				+          for ( int y = 0; y < ysize; y++ )
			
 
				+            for ( int x = 0; x < xsize; x++ )
			
 
				+              imgData.set(x, y, z, cfeats.get(x,y,(uint)c), c+currentsize);
			
 
				       }
			
 
				     }
			
 
				+    else
			
 
				+    {
			
 
				+      cerr << "Can't compute weijer features of a grayscale image." << endl;
			
 
				+    }
			
 
				+  }
			
 
				 
			
 
				-    // arbitrary additional layer as image
			
 
				-    if ( useAdditionalLayer )
			
 
				+  // arbitrary additional layer as image
			
 
				+  if ( useAdditionalLayer )
			
 
				+  {
			
 
				+    int currentsize = imgData.channels();
			
 
				+    imgData.addChannel ( 1 );
			
 
				+    for ( int z = 0; z < zsize; z++ )
			
 
				     {
			
 
				       vector<string> list;
			
 
				       StringTools::split ( filelist[z], '/', list );
			
 
				       string layerPath = StringTools::trim ( filelist[z], list.back() ) + "addlayer/" + list.back();
			
 
				       NICE::Image layer ( layerPath );
			
 
				-      feats_tmp.addChannel ( layer );
			
 
				+      for ( int y = 0; y < ysize; y++ )
			
 
				+        for ( int x = 0; x < xsize; x++ )
			
 
				+          imgData.set(x, y, z, layer.getPixelQuick(x,y), currentsize);
			
 
				     }
			
 
				+  }
			
 
				     
			
 
				-    // read the geometric cues produced by Hoiem et al.
			
 
				-    if ( useHoiemFeatures )
			
 
				-    {
			
 
				-      // we could also give the following set as a config option
			
 
				-      string hoiemClasses_s = "sky 000 090-045 090-090 090-135 090 090-por 090-sol";
			
 
				-      vector<string> hoiemClasses;
			
 
				-      StringTools::split ( hoiemClasses_s, ' ', hoiemClasses );
			
 
				-
			
 
				-      // Now we have to do some fancy regular expressions :)
			
 
				-      // Original image filename: basel_000083.jpg
			
 
				-      // hoiem result: basel_000083_c_sky.png
			
 
				+  // read the geometric cues produced by Hoiem et al.
			
 
				+  if ( useHoiemFeatures )
			
 
				+  {
			
 
				+    // we could also give the following set as a config option
			
 
				+    string hoiemClasses_s = "sky 000 090-045 090-090 090-135 090 090-por 090-sol";
			
 
				+    vector<string> hoiemClasses;
			
 
				+    StringTools::split ( hoiemClasses_s, ' ', hoiemClasses );
			
 
				 
			
 
				-      // Fancy class of Ferid which supports string handling especially for filenames
			
 
				+    int currentsize = imgData.channels();
			
 
				+    imgData.addChannel ( hoiemClasses.size() );
			
 
				+    for ( int z = 0; z < zsize; z++ )
			
 
				+    {
			
 
				       FileName fn ( filelist[z] );
			
 
				       fn.removeExtension();
			
 
				       FileName fnBase = fn.extractFileName();
			
 
				 
			
 
				-      // counter for the channel index, starts with the current size of the destination multi-channel image
			
 
				-      int currentChannel = feats_tmp.channels();
			
 
				-
			
 
				-      // add a channel for each feature in advance
			
 
				-      feats_tmp.addChannel ( hoiemClasses.size() );
			
 
				-
			
 
				-      // loop through all geometric categories and add the images
			
 
				-      for ( vector<string>::const_iterator i = hoiemClasses.begin(); i != hoiemClasses.end(); i++, currentChannel++ )
			
 
				+      for ( vector<string>::const_iterator i = hoiemClasses.begin(); i != hoiemClasses.end(); i++, currentsize++ )
			
 
				       {
			
 
				         string hoiemClass = *i;
			
 
				         FileName fnConfidenceImage ( hoiemDirectory + fnBase.str() + "_c_" + hoiemClass + ".png" );
			
@@ -1410,32 +1378,17 @@ void SemSegContextTree::extractBasicFeatures ( NICE::MultiChannelImage3DT<double
 
				         else
			
 
				         {
			
 
				           Image confidenceImage ( fnConfidenceImage.str() );
			
 
				-          // check whether the image size is consistent
			
 
				-          if ( confidenceImage.width() != feats_tmp.width() || confidenceImage.height() != feats_tmp.height() )
			
 
				+          if ( confidenceImage.width() != xsize || confidenceImage.height() != ysize )
			
 
				           {
			
 
				             fthrow ( Exception, "The size of the geometric confidence image does not match with the original image size: " << fnConfidenceImage.str() );
			
 
				           }
			
 
				-          ImageT<double> dst = feats_tmp[currentChannel];
			
 
				 
			
 
				           // copy standard image to double image
			
 
				-          for ( uint y = 0 ; y < ( uint ) confidenceImage.height(); y++ )
			
 
				-            for ( uint x = 0 ; x < ( uint ) confidenceImage.width(); x++ )
			
 
				-              feats_tmp ( x, y, currentChannel ) = ( double ) confidenceImage ( x, y );
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    uint oldChannels = feats.channels();
			
 
				-    if ( feats.channels() < feats_tmp.channels() )
			
 
				-      feats.addChannel ( feats_tmp.channels()-feats.channels() );
			
 
				+          for ( int y = 0 ; y < confidenceImage.height(); y++ )
			
 
				+            for ( int x = 0 ; x < confidenceImage.width(); x++ )
			
 
				+              imgData ( x, y, z, currentsize ) = ( double ) confidenceImage ( x, y );
			
 
				 
			
 
				-    for ( int x = 0; x < xsize; x++ )
			
 
				-    {
			
 
				-      for ( int y = 0; y < ysize; y++ )
			
 
				-      {
			
 
				-        for ( uint r = oldChannels; r < ( uint ) feats_tmp.channels(); r++ )
			
 
				-        {
			
 
				-          feats.set ( x, y, z, feats_tmp.get ( x, y, r ), r );
			
 
				+          currentsize++;
			
 
				         }
			
 
				       }
			
 
				     }
			
@@ -1448,25 +1401,20 @@ void SemSegContextTree::extractBasicFeatures ( NICE::MultiChannelImage3DT<double
 
				     regions.reInit( xsize, ysize, zsize );
			
 
				     amountRegions = segmentation->segRegions ( imgData, regions, imagetype );
			
 
				 
			
 
				-    int cchannel = feats.channels();
			
 
				-    feats.addChannel ( 1 );
			
 
				+    int currentsize = imgData.channels();
			
 
				+    imgData.addChannel ( 1 );
			
 
				 
			
 
				     for ( int z = 0; z < ( int ) regions.channels(); z++ )
			
 
				-    {
			
 
				       for ( int y = 0; y < regions.height(); y++ )
			
 
				-      {
			
 
				         for ( int x = 0; x < regions.width(); x++ )
			
 
				-        {
			
 
				-          feats.set ( x, y, z, regions ( x, y, ( uint ) z ), cchannel );
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				+          imgData.set ( x, y, z, regions ( x, y, ( uint ) z ), currentsize );
			
 
				+
			
 
				   }
			
 
				 
			
 
				 }
			
 
				 
			
 
				 void SemSegContextTree::classify (
			
 
				-    const NICE::MultiChannelImage3DT<double> & imgData,
			
 
				+    NICE::MultiChannelImage3DT<double> & imgData,
			
 
				     NICE::MultiChannelImageT<double> & segresult,
			
 
				     NICE::MultiChannelImage3DT<double> & probabilities,
			
 
				     const std::vector<std::string> & filelist )
			
@@ -1511,7 +1459,7 @@ void SemSegContextTree::classify (
 
				 
			
 
				   // Basic Features
			
 
				   int amountRegions;
			
 
				-  extractBasicFeatures ( feats, imgData, filelist, amountRegions );
			
 
				+  addFeatureMaps ( imgData, filelist, amountRegions );
			
 
				 
			
 
				   vector<int> rSize;
			
 
				   if ( useRegionFeature )
			
@@ -1524,7 +1472,7 @@ void SemSegContextTree::classify (
 
				       {
			
 
				         for ( int x = 0; x < xsize; x++ )
			
 
				         {
			
 
				-          rSize[feats ( x, y, z, rawChannels ) ]++;
			
 
				+          rSize[imgData ( x, y, z, rawChannels ) ]++;
			
 
				         }
			
 
				       }
			
 
				     }
			
@@ -1573,7 +1521,7 @@ void SemSegContextTree::classify (
 
				             {
			
 
				               noNewSplit = false;
			
 
				               Features feat;
			
 
				-              feat.feats = &feats;
			
 
				+              feat.feats = &imgData;
			
 
				               feat.nIndices = &lastNodeIndices;
			
 
				               feat.cTree = tree;
			
 
				               feat.tree = &forest[tree];
			
@@ -1632,7 +1580,7 @@ void SemSegContextTree::classify (
 
				               int node = nodeIndices.get ( x, y, z, tree );
			
 
				               for ( uint c = 0; c < forest[tree][node].dist.size(); c++ )
			
 
				               {
			
 
				-                int r = (int) feats ( x, y, z, rawChannels );
			
 
				+                int r = (int) imgData ( x, y, z, rawChannels );
			
 
				                 regionProbs[r][c] += forest[tree][node].dist[c];
			
 
				               }
			
 
				             }
			
@@ -1650,18 +1598,20 @@ void SemSegContextTree::classify (
 
				       }
			
 
				     }
			
 
				 
			
 
				+    int multi = std::max(0, ftypes-3);
			
 
				     if ( depth < maxDepth )
			
 
				     {
			
 
				       //compute integral images
			
 
				-      if ( firstiteration )
			
 
				-      {
			
 
				-        feats.addChannel ( (2*classes) + rawChannels );
			
 
				-      }
			
 
				-      computeIntegralImage ( nodeIndices, feats, channelType.size() - (2*classes) );
			
 
				-      if ( firstiteration )
			
 
				+      if ( firstiteration && ftypes > 2 )
			
 
				       {
			
 
				-        firstiteration = false;
			
 
				+        // only add context layers if necessary
			
 
				+        imgData.addChannel ( (multi*classes) + rawChannels );
			
 
				       }
			
 
				+
			
 
				+      if ( ftypes> 2 )
			
 
				+        computeIntegralImage ( nodeIndices, imgData, channelType.size() - (multi*classes) );
			
 
				+
			
 
				+      if ( firstiteration ) firstiteration = false;
			
 
				     }
			
 
				   }
			
 
				 
			
@@ -1794,9 +1744,9 @@ void SemSegContextTree::classify (
 
				   {
			
 
				     // labeling by region
			
 
				     NICE::MultiChannelImageT<int> regions;
			
 
				-    int xsize = feats.width();
			
 
				-    int ysize = feats.height();
			
 
				-    int zsize = feats.depth();
			
 
				+    int xsize = imgData.width();
			
 
				+    int ysize = imgData.height();
			
 
				+    int zsize = imgData.depth();
			
 
				     regions.reInit ( xsize, ysize, zsize );
			
 
				 
			
 
				     if ( useRegionFeature )
			
@@ -1819,7 +1769,7 @@ void SemSegContextTree::classify (
 
				         {
			
 
				           for ( int x = 0; x < xsize; x++ )
			
 
				           {
			
 
				-            regions.set ( x, y, feats ( x, y, z, rchannel ), ( uint ) z );
			
 
				+            regions.set ( x, y, imgData ( x, y, z, rchannel ), ( uint ) z );
			
 
				           }
			
 
				         }
			
 
				       }
			
--- a/semseg/SemSegContextTree.h
+++ b/semseg/SemSegContextTree.h
@@ -174,7 +174,7 @@ public:
 
				    * @param segresult segmentation results
			
 
				    * @param probabilities probabilities for each pixel
			
 
				    */
			
 
				-  void classify ( const NICE::MultiChannelImage3DT<double> &imgData,
			
 
				+  void classify ( NICE::MultiChannelImage3DT<double> &imgData,
			
 
				                   NICE::MultiChannelImageT<double> & segresult,
			
 
				                   NICE::MultiChannelImage3DT<double> & probabilities,
			
 
				                   const std::vector<std::string> & filelist );
			
@@ -202,7 +202,7 @@ public:
 
				    * @param currentFile image filename
			
 
				    * @return void
			
 
				    **/
			
 
				-  void extractBasicFeatures ( NICE::MultiChannelImage3DT<double> &feats, const NICE::MultiChannelImage3DT<double> &imgData, const std::vector<std::string> &filelist, int &amountRegions );
			
 
				+  void addFeatureMaps ( NICE::MultiChannelImage3DT<double> &imgData, const std::vector<std::string> &filelist, int &amountRegions );
			
 
				 
			
 
				   /**
			
 
				    * compute best split for current settings
			
--- a/semseg/SemanticSegmentation.h
+++ b/semseg/SemanticSegmentation.h
@@ -57,7 +57,7 @@ public:
 
				    *        corresponding probabilities for each pixel
			
 
				    * @param filelist filename list of images that represent slices of a stack
			
 
				    */
			
 
				-  virtual void classify ( const NICE::MultiChannelImage3DT<double> & imgData,
			
 
				+  virtual void classify ( NICE::MultiChannelImage3DT<double> & imgData,
			
 
				                           NICE::MultiChannelImageT<double> & segresult,
			
 
				                           NICE::MultiChannelImage3DT<double> & probabilities,
			
 
				                           const std::vector<std::string> & filelist ) = 0;