Bladeren bron

bugfixes, 2d mode, 16bit grayscale images

Sven Sickert 12 jaren geleden
bovenliggende
commit
14f5a6b95b

+ 6 - 0
image/MultiChannelImage3DT.h

@@ -123,12 +123,18 @@ public:
   /** convert to ice image */
   void convertToGrey( NICE::Image & img, int z, uint channel = 0, bool normalize = true ) const;
 
+	/** convert to ice image template */
+  void convertToGrey( NICE::ImageT<P> & img, int z, uint channel = 0, bool normalize = false ) const;
+	
   /** convert to ice colorimage */
   void convertToColor( NICE::ColorImage & img, int z, const int chan1 = 0, const int chan2 = 1, const int chan3 = 2 ) const;
 
   /** return image for visualization */
   Image getChannel( int z, uint channel = 0 ) const;
 
+	/** return image for visualization */
+  ImageT<P> getChannelT( int z, uint channel = 0 ) const;
+
   /** return rgb image for visualization */
   ColorImage getColor(int z) const;
 

+ 62 - 4
image/MultiChannelImage3DT.tcc

@@ -353,10 +353,21 @@ Image MultiChannelImage3DT<P>::getChannel( int z, uint channel ) const
 
   NICE::Image img;
   convertToGrey( img, z, channel, true );
-  /*
-      P min, max;
-      statistics ( min, max, channel );
-      fprintf (stderr, "MultiChannelImage3DT<>::showChannel: max %f min %f\n", (double)max, (double)min );*/
+
+  return img;
+}
+
+template<class P>
+ImageT<P> MultiChannelImage3DT<P>::getChannelT( int z, uint channel ) const
+{
+  assert( channel < numChannels );
+
+  NICE::ImageT<P> img;
+  convertToGrey( img, z, channel, false );
+  
+  P min, max;
+  statistics ( min, max, channel );
+  fprintf (stderr, "MultiChannelImage3DT<>::showChannel: max %f min %f\n", (double)max, (double)min );
 
   return img;
 }
@@ -408,6 +419,53 @@ void MultiChannelImage3DT<P>::convertToGrey( NICE::Image & img, int z, uint chan
   }
 }
 
+/** convert to ice image template */
+template<class P>
+void MultiChannelImage3DT<P>::convertToGrey( NICE::ImageT<P> & img, int z, uint channel,  bool normalize ) const
+{
+  assert( channel < numChannels );
+
+  P min, max;
+
+  if ( normalize ) {
+    statistics( min, max, channel );
+    fprintf( stderr, "MultiChannelImage3DT<>::showChannel: max %f min %f\n", ( double )max, ( double )min );
+  }
+
+  bool skip_assignment = false;
+
+  img.resize( xsize, ysize );
+
+  if ( normalize )
+    if ( max - min < std::numeric_limits<double>::min() )
+    {
+      img.set( max );
+      skip_assignment = true;
+      fprintf( stderr, "MultiChannelImage3DT::showChannel: image is uniform! (%f)\n", ( double )max );
+    }
+
+
+  if ( ! skip_assignment )
+  {
+    long k = 0;
+
+    for ( int y = 0 ; y < ysize; y++ )
+    {
+      for ( int x = 0 ; x < xsize ; x++, k++ )
+      {
+        if ( normalize )
+        {
+          img.setPixel( x, y, ( int )(( data[channel][z*xsize*ysize + k] - min ) * 255 / ( max - min ) ) );
+        }
+        else
+        {
+          img.setPixel( x, y, ( int )( data[channel][z*xsize*ysize + k] ) );
+        }
+      }
+    }
+  }
+}
+
 template<class P>
 void MultiChannelImage3DT<P>::convertToColor( NICE::ColorImage & img, int z, const int chan1, const int chan2, const int chan3) const
 {

+ 18 - 9
progs/testSemanticSegmentation.cpp

@@ -78,6 +78,8 @@ int main( int argc, char **argv )
 
   bool write_results_pascal = conf.gB( "debug", "write_results_pascal", false );
 
+	bool run_3dseg = conf.gB( "debug", "run_3dseg", true);
+
   std::string resultdir = conf.gS( "debug", "resultdir", "." );
 
   if ( write_results )
@@ -140,7 +142,15 @@ int main( int argc, char **argv )
     segresult.addChannel( lm );
     gt.addChannel( lm_gt );
 
-		if ( depthCount < zsizeVec[idx] ) continue;
+		int depthBoundary = 0;
+		int zsize = 1;
+		if (run_3dseg)
+		{
+			depthBoundary = zsizeVec[idx];
+			zsize = zsizeVec[idx];
+		}
+
+		if ( depthCount < depthBoundary ) continue;
 
     NICE::MultiChannelImage3DT<double> probabilities;
 		NICE::MultiChannelImage3DT<double> imgData;
@@ -150,7 +160,7 @@ int main( int argc, char **argv )
     fprintf( stderr, "testSemanticSegmentation: Segmentation finished !\n" );
 
     // save to file
-		for (int z = 0; z < zsizeVec[idx]; z++)
+		for (int z = 0; z < zsize; z++)
 		{
 			std::string fname = StringTools::baseName( filelist[z], false );
 
@@ -185,16 +195,16 @@ int main( int argc, char **argv )
 				NICE::ColorImage rgb;
 				NICE::ColorImage rgb_gt;
 
-				NICE::Image lm( segresult.width(), segresult.height() );
-				NICE::Image lm_gt( segresult.width(), segresult.height() );
 				for ( int y = 0 ; y < segresult.height(); y++ )
 				{
 					for ( int x = 0 ; x < segresult.width(); x++ )
 					{
 						lm.setPixel( x, y, segresult.get( x, y, (uint)z ) );
-						lm_gt.setPixel( x, y, gt.get( x, y, (uint)z ) );
+						if (run_3dseg)
+							lm_gt.setPixel( x, y, gt.get( x, y, (uint)z ) );
 					}
 				}
+
 				classNames.labelToRGB( lm, rgb );
 				classNames.labelToRGB( lm_gt, rgb_gt );
 
@@ -223,16 +233,15 @@ int main( int argc, char **argv )
 		}
 
 //#pragma omp critical
-		for (int z = 0; z < zsizeVec[idx]; z++)
+		for (int z = 0; z < zsize; z++)
 		{
-			NICE::Image lm( segresult.width(), segresult.height() );
-			NICE::Image lm_gt( segresult.width(), segresult.height() );
 			for ( int y = 0 ; y < segresult.height(); y++ )
 			{
 				for ( int x = 0 ; x < segresult.width(); x++ )
 				{
 					lm.setPixel( x, y, segresult.get( x, y, (uint)z ) );
-					lm_gt.setPixel( x, y, gt.get( x, y, (uint)z ) );
+					if (run_3dseg)
+						lm_gt.setPixel( x, y, gt.get( x, y, (uint)z ) );
 				}
 			}
 			NICE::Matrix M( classNames.getMaxClassno() + 1, classNames.getMaxClassno() + 1 );

+ 75 - 29
semseg/SemSegContextTree.cpp

@@ -3,6 +3,8 @@
 #include "vislearning/baselib/ProgressBar.h"
 #include "core/basics/StringTools.h"
 
+#include "core/imagedisplay/ImageDisplay.h"
+
 #include "vislearning/cbaselib/CachedExample.h"
 #include "vislearning/cbaselib/PascalResults.h"
 #include "vislearning/baselib/ColorSpace.h"
@@ -21,7 +23,7 @@
 #include <omp.h>
 #include <iostream>
 
-#define DEBUG
+//#define DEBUG
 
 using namespace OBJREC;
 using namespace std;
@@ -86,7 +88,7 @@ SemSegContextTree::SemSegContextTree (const Config *conf, const MultiDataset *md
 	else if (segmentationtype == "slic")
 		segmentation = new RSSlic (conf);
   else
-    throw ("no valid segmenation_type\n please choose between none, meanshift and felzenszwalb\n");
+    throw ("no valid segmenation_type\n please choose between none, meanshift, slic and felzenszwalb\n");
 
   ftypes = conf->gI (section, "features", 100);;
 
@@ -328,7 +330,7 @@ double SemSegContextTree::getBestSplit (std::vector<NICE::MultiChannelImage3DT<d
     if (ft > 1)
     {
       //use larger window size for context features
-      tmpws *= 1;
+      tmpws *= 3;
     }
     
     if(ft == 1)
@@ -651,6 +653,7 @@ void SemSegContextTree::train (const MultiDataset *md)
 
 	vector<int> zsizeVec;
 	getDepthVector( &train, zsizeVec );
+	bool run_3dseg = conf->gB( "debug", "run_3dseg", true);
 
   ProgressBar pb ("compute feats");
   pb.show();
@@ -748,10 +751,17 @@ void SemSegContextTree::train (const MultiDataset *md)
 
     fprintf (stderr, "SSContext: Collecting pixel examples from localization info: %s\n", file.c_str());
 
-		if ( depthCount < zsizeVec[imgcounter] ) continue;
+		int depthBoundary = 0;
+		if (run_3dseg)
+		{
+			depthBoundary = zsizeVec[imgcounter];
+		}
+
+		if ( depthCount < depthBoundary ) continue;
 
 		// all image slices collected -> make a 3d image
 		NICE::MultiChannelImage3DT<double> imgData;
+		
 		make3DImage( filelist, imgData );
 
     int xsize = imgData.width();
@@ -795,7 +805,10 @@ void SemSegContextTree::train (const MultiDataset *md)
       {
 				for (int z = 0; z < zsize; z++)
 				{
-					classno = pixelLabels(x, y, (uint)z);
+					if (run_3dseg)
+						classno = pixelLabels(x, y, (uint)z);
+					else
+						classno = pL.getPixelQuick(x,y);
 					labels[imgcounter].set(x, y, classno, (uint)z);
 
 					if (forbidden_classes.find (classno) != forbidden_classes.end())
@@ -817,7 +830,10 @@ void SemSegContextTree::train (const MultiDataset *md)
         {
 					for (int z = 0; z < zsize; z++)
 					{
-						classno = pixelLabels(x, y, (uint)z);
+						if (run_3dseg)
+							classno = pixelLabels(x, y, (uint)z);
+						else
+							classno = pL.getPixelQuick(x,y);
 
 						if (forbidden_classes.find (classno) != forbidden_classes.end())
 							continue;
@@ -1007,7 +1023,7 @@ void SemSegContextTree::train (const MultiDataset *md)
       const int t = (int)forest[tree].size();
       const int s = startnode[tree];
       startnode[tree] = t;
-#pragma omp parallel for
+//#pragma omp parallel for
       for (int i = s; i < t; i++)
       {
         if (!forest[tree][i].isleaf && forest[tree][i].left < 0)
@@ -1404,7 +1420,7 @@ void SemSegContextTree::extractBasicFeatures (NICE::MultiChannelImage3DT<double>
 
 		} else {
 
-			NICE::Image img = imgData.getChannel(z,0);
+			NICE::ImageT<double> img = imgData.getChannelT(z,0);
 			for (int x = 0; x < xsize; x++)
 			{
 				for (int y = 0; y < ysize; y++)
@@ -1753,7 +1769,7 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
   vector<int> useclass (allClasses, 1);
 
   vector<int> classesInImg;
-  
+
   if(useCategorization)
   {
     if(cndir != "")
@@ -1880,7 +1896,11 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
   else
   {
     //using segmentation
-    NICE::MultiChannelImageT<double> regions;
+    NICE::MultiChannelImageT<int> regions;
+		int xsize = feats.width();
+		int ysize = feats.height();
+		int zsize = feats.depth();
+		regions.reInit(xsize, ysize, zsize);
 
     if (useRegionFeature)
     {
@@ -1896,10 +1916,6 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 
       assert(rchannel > -1);
 
-			int xsize = feats.width();
-			int ysize = feats.height();
-			int zsize = feats.depth();
-			regions.reInit(xsize, ysize, zsize);
 			for (int z = 0; z < zsize; z++)
 			{
 				for (int y = 0; y < ysize; y++)
@@ -1914,29 +1930,45 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 		else
 		{
 			amountRegions = 0;
-			for ( int z = 0; z < zsize; z++ )
+			vector<int> chanSelect;
+			for (int i=0; i<3; i++)
+				chanSelect.push_back(i);
+			amountRegions = segmentation->segRegions( imgData, regions, chanSelect);
+
+#ifdef DEBUG
+			for ( unsigned int z = 0; z < (uint)zsize; z++)
 			{
-				NICE::ColorImage img = imgData.getColor(z);
-				Matrix regions_tmp;
-				int aR_tmp = segmentation->segRegions (img, regions_tmp);
-				if ( aR_tmp > amountRegions ) amountRegions = aR_tmp;
-				int numChans = regions.channels();
-				regions.addChannel( 1 );
-				for ( int y = 0; y < ysize; y++ )
+				NICE::Matrix regmask;
+				NICE::ColorImage colorimg(xsize, ysize);
+				NICE::ColorImage marked(xsize, ysize);
+				regmask.resize(xsize, ysize);
+				for ( int y = 0; y < ysize; y++)
 				{
-					for ( int x = 0; x < xsize; x++ )
+					for ( int x = 0; x < xsize; x++)
 					{
-						regions.set( x, y, regions_tmp(x,y), numChans );
+						regmask(x,y) = regions(x,y,z);
+						colorimg.setPixelQuick( x, y, 0, imgData.get(x,y,z,0) );
+						colorimg.setPixelQuick( x, y, 1, imgData.get(x,y,z,0) );
+						colorimg.setPixelQuick( x, y, 2, imgData.get(x,y,z,0) );
 					}
 				}
+				vector<int> colorvals;
+				colorvals.push_back(255);
+				colorvals.push_back(0);
+				colorvals.push_back(0);
+				segmentation->markContours( colorimg, regmask, colorvals, marked );
+				std::vector<string> list;
+				StringTools::split( filelist[z], '/', list );
+				string savePath = StringTools::trim( filelist[z], list.back() ) + "marked/" + list.back();
+				marked.write( savePath );
 			}
+#endif
 		}
 
     regionProbs.clear();
     regionProbs = vector<vector<double> >(amountRegions, vector<double> (classes, 0.0));
 
     vector<int> bestlabels (amountRegions, labelmapback[classesInImg[0]]);
-
 		for (int z = 0; z < zsize; z++)
 		{
 			for (int y = 0; y < ysize; y++)
@@ -1944,7 +1976,6 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 				for (int x = 0; x < xsize; x++)
 				{
 					int cregion = regions (x, y, (uint)z);
-
 					for (uint c = 0; c < classesInImg.size(); c++)
 					{
 						int d = classesInImg[c];
@@ -1953,6 +1984,7 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 				}
 			}
 		}
+
     for (int r = 0; r < amountRegions; r++)
     {
       double maxval = regionProbs[r][classesInImg[0]];
@@ -1969,7 +2001,6 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 
       bestlabels[r] = labelmapback[bestlabels[r]];
     }
-
 		for (int z = 0; z < zsize; z++)
 		{
 			for (int y = 0; y < ysize; y++)
@@ -1981,12 +2012,27 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 			}
 		}
 
-#define WRITEREGIONS
+//#define WRITEREGIONS
 #ifdef WRITEREGIONS
 		for (int z = 0; z < zsize; z++)
 		{
 			RegionGraph rg;
-			NICE::ColorImage img = imgData.getColor(z);
+			NICE::ColorImage img(xsize,ysize);
+			if (imagetype == IMAGETYPE_RGB)
+			{
+				img = imgData.getColor(z);
+			} else {
+				NICE::Image gray = imgData.getChannel(z);
+				for (int y = 0; y < ysize; y++)
+				{
+					for (int x = 0; x < xsize; x++)
+					{
+						int val = gray.getPixelQuick(x,y);
+						img.setPixelQuick(x, y, val, val, val);
+					}
+				}
+			}
+
 			Matrix regions_tmp(xsize,ysize);
 			for (int y = 0; y < ysize; y++)
 			{

+ 9 - 1
semseg/SemanticSegmentation.cpp

@@ -217,7 +217,15 @@ void SemanticSegmentation::make3DImage( const std::vector<std::string> & filelis
 				}
 			}
 		} else {
-			NICE::Image img = Preprocess::ReadImgAdv( filelist[it] );
+			NICE::ImageT<int> img;
+			try {
+				img.read ( filelist[it] );
+			}
+			catch(ImageException &)
+			{
+				fprintf (stderr, "Failed to open image file: %s\n", filelist[it].c_str() );
+				exit(-1);
+			}
 			if (!isInit)
 			{
 				imgData.reInit(img.width(),img.height(),filelist.size(),1);