Răsfoiți Sursa

reduced memory usage

Sven Sickert 11 ani în urmă
părinte
comite
8de76fb768
3 a modificat fișierele cu 109 adăugiri și 159 ștergeri
  1. 106 156
      semseg/SemSegContextTree.cpp
  2. 2 2
      semseg/SemSegContextTree.h
  3. 1 1
      semseg/SemanticSegmentation.h

+ 106 - 156
semseg/SemSegContextTree.cpp

@@ -7,7 +7,8 @@
 
 #include "vislearning/cbaselib/CachedExample.h"
 #include "vislearning/cbaselib/PascalResults.h"
-#include "vislearning/baselib/ColorSpace.h"
+//#include "vislearning/baselib/ColorSpace.h"
+#include "vislearning/baselib/cc.h"
 #include "segmentation/RSMeanShift.h"
 #include "segmentation/RSGraphBased.h"
 #include "segmentation/RSSlic.h"
@@ -485,7 +486,7 @@ void SemSegContextTree::computeIntegralImage ( const NICE::MultiChannelImage3DT<
   int zsize = feats.depth();
 
   // integral images for raw channels
-  if ( firstiteration )
+  if ( firstiteration && ftypes > 2 )
   {
 #pragma omp parallel for
     for ( int it = 0; it < ( int ) integralMap.size(); it++ )
@@ -673,12 +674,13 @@ void SemSegContextTree::train ( const LabeledSet * trainp )
       nodeIndices.push_back ( MultiChannelImage3DT<unsigned short int> ( xsize, ysize, zsize, nbTrees ) );
       nodeIndices[imgCounter].setAll ( 0 );
 
-      MultiChannelImage3DT<double> feats;
-      allfeats.push_back ( feats );
+//      MultiChannelImage3DT<double> feats;
+//      allfeats.push_back ( feats );
 
       int amountRegions;
       // read image and do some simple transformations
-      extractBasicFeatures ( allfeats[imgCounter], imgData, filelist, amountRegions );
+      addFeatureMaps ( imgData, filelist, amountRegions );
+      allfeats.push_back(imgData);
 
       if ( useRegionFeature )
       {
@@ -1124,25 +1126,19 @@ void SemSegContextTree::train ( const LabeledSet * trainp )
     }
 
     //compute integral images
-    if ( firstiteration )
+    int multi = std::max(0, ftypes-3);
+    if ( firstiteration && ftypes > 2 )
     {
       // only add context layers if necessary
-      int multi = std::max(0, ftypes-3);
       for ( int i = 0; i < imgCounter; i++ )
-      {
         allfeats[i].addChannel ( (multi*classes) + rawChannels );
-      }
     }
 
-    for ( int i = 0; i < imgCounter; i++ )
-    {
-      computeIntegralImage ( nodeIndices[i], allfeats[i], channelType.size() - (2*classes) );
-    }
+    if (ftypes > 2)
+      for ( int i = 0; i < imgCounter; i++ )
+        computeIntegralImage ( nodeIndices[i], allfeats[i], channelType.size() - (multi*classes) );
 
-    if ( firstiteration )
-    {
-      firstiteration = false;
-    }
+    if ( firstiteration ) firstiteration = false;
 
 #if DEBUG
     timerDepth.stop();
@@ -1267,139 +1263,111 @@ void SemSegContextTree::train ( const LabeledSet * trainp )
   timer.start();
 }
 
-void SemSegContextTree::extractBasicFeatures ( NICE::MultiChannelImage3DT<double> &feats, const NICE::MultiChannelImage3DT<double> &imgData, const vector<string> &filelist, int &amountRegions )
+void SemSegContextTree::addFeatureMaps ( NICE::MultiChannelImage3DT<double> &imgData, const vector<string> &filelist, int &amountRegions )
 {
   int xsize = imgData.width();
   int ysize = imgData.height();
   int zsize = imgData.depth();
 
   amountRegions = 0;
-  feats.reInit ( xsize, ysize, zsize, imgData.channels() );
-  feats.setAll ( 0 );
-
-  //TODO: expand imgData instead of building feats!!! --> SAVING MEMORY
 
-  for ( int z = 0; z < zsize; z++ )
+  // RGB to Lab
+  if ( imagetype == IMAGETYPE_RGB )
   {
-    NICE::MultiChannelImageT<double> feats_tmp;
-    feats_tmp.reInit ( xsize, ysize, 3 );
-    if ( imagetype == IMAGETYPE_RGB )
-    {
-
-      NICE::ColorImage img = imgData.getColor ( z );
-      for ( int x = 0; x < xsize; x++ )
-      {
-        for ( int y = 0; y < ysize; y++ )
-        {
-          for ( int r = 0; r < 3; r++ )
-          {
-            feats_tmp.set ( x, y, img.getPixel ( x, y, r ), ( uint ) r );
-          }
-        }
-      }
-
-    }
-    else
-    {
-
-      NICE::ImageT<double> img = imgData.getChannelT ( z,0 );
-      for ( int x = 0; x < xsize; x++ )
-      {
-        for ( int y = 0; y < ysize; y++ )
+    for ( int z = 0; z < zsize; z++ )
+      for ( int y = 0; y < ysize; y++ )
+        for ( int x = 0; x < xsize; x++ )
         {
-          feats_tmp.set ( x, y, img.getPixel ( x, y ), 0 );
-        }
-      }
+          double R, G, B, X, Y, Z, L, a, b;
+          R = ( double )imgData.get( x, y, z, 0 ) / 255.0;
+          G = ( double )imgData.get( x, y, z, 1 ) / 255.0;
+          B = ( double )imgData.get( x, y, z, 2 ) / 255.0;
 
-    }
+          ColorConversion::ccRGBtoXYZ( R, G, B, &X, &Y, &Z, 0 );
+          ColorConversion::ccXYZtoCIE_Lab( X, Y, Z, &L, &a, &b, 0 );
 
-    if ( imagetype == IMAGETYPE_RGB )
-      feats_tmp = ColorSpace::rgbtolab ( feats_tmp );
-
-    for ( int x = 0; x < xsize; x++ )
-    {
-      for ( int y = 0; y < ysize; y++ )
-      {
-        if ( imagetype == IMAGETYPE_RGB )
-        {
-          for ( uint r = 0; r < 3; r++ )
-          {
-            feats.set ( x, y, z, feats_tmp.get ( x, y, r ), r );
-          }
+          imgData.set( x, y, z, L, 0 );
+          imgData.set( x, y, z, a, 1 );
+          imgData.set( x, y, z, b, 2 );
         }
-        else
-        {
-          feats.set ( x, y, z, feats_tmp.get ( x, y, 0 ), 0 );
-        }
-      }
-    }
+  }
 
-    // Gradient layers
-    if ( useGradient )
-    {
-      int currentsize = feats_tmp.channels();
-      feats_tmp.addChannel ( currentsize );
+  // Gradient layers
+  if ( useGradient )
+  {
+    int currentsize = imgData.channels();
+    imgData.addChannel ( currentsize );
 
+    for ( int z = 0; z < zsize; z++ )
       for ( int c = 0; c < currentsize; c++ )
       {
-        ImageT<double> tmp = feats_tmp[c];
-        ImageT<double> tmp2 = feats_tmp[c+currentsize];
-
+        ImageT<double> tmp = imgData.getChannelT(z, c);
+        ImageT<double> tmp2( xsize, ysize );
         NICE::FilterT<double, double, double>::gradientStrength ( tmp, tmp2 );
+        for ( int y = 0; y < ysize; y++ )
+          for ( int x = 0; x < xsize; x++ )
+            imgData.set(x, y, z, tmp2.getPixelQuick(x,y), c+currentsize);
       }
-    }
+  }
 
-    // Weijer color names
-    if ( useWeijer )
+  // Weijer color names
+  if ( useWeijer )
+  {
+    if ( imagetype == IMAGETYPE_RGB )
     {
-      if ( imagetype == IMAGETYPE_RGB )
+      int currentsize = imgData.channels();
+      imgData.addChannel ( 11 );
+      for ( int z = 0; z < zsize; z++ )
       {
         NICE::ColorImage img = imgData.getColor ( z );
         NICE::MultiChannelImageT<double> cfeats;
         lfcw->getFeats ( img, cfeats );
-        feats_tmp.addChannel ( cfeats );
-      }
-      else
-      {
-        cerr << "Can't compute weijer features of a grayscale image." << endl;
+        for ( int c = 0; c < cfeats.channels(); c++)
+          for ( int y = 0; y < ysize; y++ )
+            for ( int x = 0; x < xsize; x++ )
+              imgData.set(x, y, z, cfeats.get(x,y,(uint)c), c+currentsize);
       }
     }
+    else
+    {
+      cerr << "Can't compute weijer features of a grayscale image." << endl;
+    }
+  }
 
-    // arbitrary additional layer as image
-    if ( useAdditionalLayer )
+  // arbitrary additional layer as image
+  if ( useAdditionalLayer )
+  {
+    int currentsize = imgData.channels();
+    imgData.addChannel ( 1 );
+    for ( int z = 0; z < zsize; z++ )
     {
       vector<string> list;
       StringTools::split ( filelist[z], '/', list );
       string layerPath = StringTools::trim ( filelist[z], list.back() ) + "addlayer/" + list.back();
       NICE::Image layer ( layerPath );
-      feats_tmp.addChannel ( layer );
+      for ( int y = 0; y < ysize; y++ )
+        for ( int x = 0; x < xsize; x++ )
+          imgData.set(x, y, z, layer.getPixelQuick(x,y), currentsize);
     }
+  }
     
-    // read the geometric cues produced by Hoiem et al.
-    if ( useHoiemFeatures )
-    {
-      // we could also give the following set as a config option
-      string hoiemClasses_s = "sky 000 090-045 090-090 090-135 090 090-por 090-sol";
-      vector<string> hoiemClasses;
-      StringTools::split ( hoiemClasses_s, ' ', hoiemClasses );
-
-      // Now we have to do some fancy regular expressions :)
-      // Original image filename: basel_000083.jpg
-      // hoiem result: basel_000083_c_sky.png
+  // read the geometric cues produced by Hoiem et al.
+  if ( useHoiemFeatures )
+  {
+    // we could also give the following set as a config option
+    string hoiemClasses_s = "sky 000 090-045 090-090 090-135 090 090-por 090-sol";
+    vector<string> hoiemClasses;
+    StringTools::split ( hoiemClasses_s, ' ', hoiemClasses );
 
-      // Fancy class of Ferid which supports string handling especially for filenames
+    int currentsize = imgData.channels();
+    imgData.addChannel ( hoiemClasses.size() );
+    for ( int z = 0; z < zsize; z++ )
+    {
       FileName fn ( filelist[z] );
       fn.removeExtension();
       FileName fnBase = fn.extractFileName();
 
-      // counter for the channel index, starts with the current size of the destination multi-channel image
-      int currentChannel = feats_tmp.channels();
-
-      // add a channel for each feature in advance
-      feats_tmp.addChannel ( hoiemClasses.size() );
-
-      // loop through all geometric categories and add the images
-      for ( vector<string>::const_iterator i = hoiemClasses.begin(); i != hoiemClasses.end(); i++, currentChannel++ )
+      for ( vector<string>::const_iterator i = hoiemClasses.begin(); i != hoiemClasses.end(); i++, currentsize++ )
       {
         string hoiemClass = *i;
         FileName fnConfidenceImage ( hoiemDirectory + fnBase.str() + "_c_" + hoiemClass + ".png" );
@@ -1410,32 +1378,17 @@ void SemSegContextTree::extractBasicFeatures ( NICE::MultiChannelImage3DT<double
         else
         {
           Image confidenceImage ( fnConfidenceImage.str() );
-          // check whether the image size is consistent
-          if ( confidenceImage.width() != feats_tmp.width() || confidenceImage.height() != feats_tmp.height() )
+          if ( confidenceImage.width() != xsize || confidenceImage.height() != ysize )
           {
             fthrow ( Exception, "The size of the geometric confidence image does not match with the original image size: " << fnConfidenceImage.str() );
           }
-          ImageT<double> dst = feats_tmp[currentChannel];
 
           // copy standard image to double image
-          for ( uint y = 0 ; y < ( uint ) confidenceImage.height(); y++ )
-            for ( uint x = 0 ; x < ( uint ) confidenceImage.width(); x++ )
-              feats_tmp ( x, y, currentChannel ) = ( double ) confidenceImage ( x, y );
-        }
-      }
-    }
-
-    uint oldChannels = feats.channels();
-    if ( feats.channels() < feats_tmp.channels() )
-      feats.addChannel ( feats_tmp.channels()-feats.channels() );
+          for ( int y = 0 ; y < confidenceImage.height(); y++ )
+            for ( int x = 0 ; x < confidenceImage.width(); x++ )
+              imgData ( x, y, z, currentsize ) = ( double ) confidenceImage ( x, y );
 
-    for ( int x = 0; x < xsize; x++ )
-    {
-      for ( int y = 0; y < ysize; y++ )
-      {
-        for ( uint r = oldChannels; r < ( uint ) feats_tmp.channels(); r++ )
-        {
-          feats.set ( x, y, z, feats_tmp.get ( x, y, r ), r );
+          currentsize++;
         }
       }
     }
@@ -1448,25 +1401,20 @@ void SemSegContextTree::extractBasicFeatures ( NICE::MultiChannelImage3DT<double
     regions.reInit( xsize, ysize, zsize );
     amountRegions = segmentation->segRegions ( imgData, regions, imagetype );
 
-    int cchannel = feats.channels();
-    feats.addChannel ( 1 );
+    int currentsize = imgData.channels();
+    imgData.addChannel ( 1 );
 
     for ( int z = 0; z < ( int ) regions.channels(); z++ )
-    {
       for ( int y = 0; y < regions.height(); y++ )
-      {
         for ( int x = 0; x < regions.width(); x++ )
-        {
-          feats.set ( x, y, z, regions ( x, y, ( uint ) z ), cchannel );
-        }
-      }
-    }
+          imgData.set ( x, y, z, regions ( x, y, ( uint ) z ), currentsize );
+
   }
 
 }
 
 void SemSegContextTree::classify (
-    const NICE::MultiChannelImage3DT<double> & imgData,
+    NICE::MultiChannelImage3DT<double> & imgData,
     NICE::MultiChannelImageT<double> & segresult,
     NICE::MultiChannelImage3DT<double> & probabilities,
     const std::vector<std::string> & filelist )
@@ -1511,7 +1459,7 @@ void SemSegContextTree::classify (
 
   // Basic Features
   int amountRegions;
-  extractBasicFeatures ( feats, imgData, filelist, amountRegions );
+  addFeatureMaps ( imgData, filelist, amountRegions );
 
   vector<int> rSize;
   if ( useRegionFeature )
@@ -1524,7 +1472,7 @@ void SemSegContextTree::classify (
       {
         for ( int x = 0; x < xsize; x++ )
         {
-          rSize[feats ( x, y, z, rawChannels ) ]++;
+          rSize[imgData ( x, y, z, rawChannels ) ]++;
         }
       }
     }
@@ -1573,7 +1521,7 @@ void SemSegContextTree::classify (
             {
               noNewSplit = false;
               Features feat;
-              feat.feats = &feats;
+              feat.feats = &imgData;
               feat.nIndices = &lastNodeIndices;
               feat.cTree = tree;
               feat.tree = &forest[tree];
@@ -1632,7 +1580,7 @@ void SemSegContextTree::classify (
               int node = nodeIndices.get ( x, y, z, tree );
               for ( uint c = 0; c < forest[tree][node].dist.size(); c++ )
               {
-                int r = (int) feats ( x, y, z, rawChannels );
+                int r = (int) imgData ( x, y, z, rawChannels );
                 regionProbs[r][c] += forest[tree][node].dist[c];
               }
             }
@@ -1650,18 +1598,20 @@ void SemSegContextTree::classify (
       }
     }
 
+    int multi = std::max(0, ftypes-3);
     if ( depth < maxDepth )
     {
       //compute integral images
-      if ( firstiteration )
-      {
-        feats.addChannel ( (2*classes) + rawChannels );
-      }
-      computeIntegralImage ( nodeIndices, feats, channelType.size() - (2*classes) );
-      if ( firstiteration )
+      if ( firstiteration && ftypes > 2 )
       {
-        firstiteration = false;
+        // only add context layers if necessary
+        imgData.addChannel ( (multi*classes) + rawChannels );
       }
+
+      if ( ftypes> 2 )
+        computeIntegralImage ( nodeIndices, imgData, channelType.size() - (multi*classes) );
+
+      if ( firstiteration ) firstiteration = false;
     }
   }
 
@@ -1794,9 +1744,9 @@ void SemSegContextTree::classify (
   {
     // labeling by region
     NICE::MultiChannelImageT<int> regions;
-    int xsize = feats.width();
-    int ysize = feats.height();
-    int zsize = feats.depth();
+    int xsize = imgData.width();
+    int ysize = imgData.height();
+    int zsize = imgData.depth();
     regions.reInit ( xsize, ysize, zsize );
 
     if ( useRegionFeature )
@@ -1819,7 +1769,7 @@ void SemSegContextTree::classify (
         {
           for ( int x = 0; x < xsize; x++ )
           {
-            regions.set ( x, y, feats ( x, y, z, rchannel ), ( uint ) z );
+            regions.set ( x, y, imgData ( x, y, z, rchannel ), ( uint ) z );
           }
         }
       }

+ 2 - 2
semseg/SemSegContextTree.h

@@ -174,7 +174,7 @@ public:
    * @param segresult segmentation results
    * @param probabilities probabilities for each pixel
    */
-  void classify ( const NICE::MultiChannelImage3DT<double> &imgData,
+  void classify ( NICE::MultiChannelImage3DT<double> &imgData,
                   NICE::MultiChannelImageT<double> & segresult,
                   NICE::MultiChannelImage3DT<double> & probabilities,
                   const std::vector<std::string> & filelist );
@@ -202,7 +202,7 @@ public:
    * @param currentFile image filename
    * @return void
    **/
-  void extractBasicFeatures ( NICE::MultiChannelImage3DT<double> &feats, const NICE::MultiChannelImage3DT<double> &imgData, const std::vector<std::string> &filelist, int &amountRegions );
+  void addFeatureMaps ( NICE::MultiChannelImage3DT<double> &imgData, const std::vector<std::string> &filelist, int &amountRegions );
 
   /**
    * compute best split for current settings

+ 1 - 1
semseg/SemanticSegmentation.h

@@ -57,7 +57,7 @@ public:
    *        corresponding probabilities for each pixel
    * @param filelist filename list of images that represent slices of a stack
    */
-  virtual void classify ( const NICE::MultiChannelImage3DT<double> & imgData,
+  virtual void classify ( NICE::MultiChannelImage3DT<double> & imgData,
                           NICE::MultiChannelImageT<double> & segresult,
                           NICE::MultiChannelImage3DT<double> & probabilities,
                           const std::vector<std::string> & filelist ) = 0;