12 jaren geleden · 14f5a6b95b
--- a/image/MultiChannelImage3DT.h
+++ b/image/MultiChannelImage3DT.h
@@ -123,12 +123,18 @@ public:
 
				   /** convert to ice image */
			
 
				   void convertToGrey( NICE::Image & img, int z, uint channel = 0, bool normalize = true ) const;
			
 
				 
			
 
				+	/** convert to ice image template */
			
 
				+  void convertToGrey( NICE::ImageT<P> & img, int z, uint channel = 0, bool normalize = false ) const;
			
 
				+	
			
 
				   /** convert to ice colorimage */
			
 
				   void convertToColor( NICE::ColorImage & img, int z, const int chan1 = 0, const int chan2 = 1, const int chan3 = 2 ) const;
			
 
				 
			
 
				   /** return image for visualization */
			
 
				   Image getChannel( int z, uint channel = 0 ) const;
			
 
				 
			
 
				+	/** return image for visualization */
			
 
				+  ImageT<P> getChannelT( int z, uint channel = 0 ) const;
			
 
				+
			
 
				   /** return rgb image for visualization */
			
 
				   ColorImage getColor(int z) const;
			
 
				 
			
--- a/image/MultiChannelImage3DT.tcc
+++ b/image/MultiChannelImage3DT.tcc
@@ -353,10 +353,21 @@ Image MultiChannelImage3DT<P>::getChannel( int z, uint channel ) const
 
				 
			
 
				   NICE::Image img;
			
 
				   convertToGrey( img, z, channel, true );
			
 
				-  /*
			
 
				-      P min, max;
			
 
				-      statistics ( min, max, channel );
			
 
				-      fprintf (stderr, "MultiChannelImage3DT<>::showChannel: max %f min %f\n", (double)max, (double)min );*/
			
 
				+
			
 
				+  return img;
			
 
				+}
			
 
				+
			
 
				+template<class P>
			
 
				+ImageT<P> MultiChannelImage3DT<P>::getChannelT( int z, uint channel ) const
			
 
				+{
			
 
				+  assert( channel < numChannels );
			
 
				+
			
 
				+  NICE::ImageT<P> img;
			
 
				+  convertToGrey( img, z, channel, false );
			
 
				+  
			
 
				+  P min, max;
			
 
				+  statistics ( min, max, channel );
			
 
				+  fprintf (stderr, "MultiChannelImage3DT<>::showChannel: max %f min %f\n", (double)max, (double)min );
			
 
				 
			
 
				   return img;
			
 
				 }
			
@@ -408,6 +419,53 @@ void MultiChannelImage3DT<P>::convertToGrey( NICE::Image & img, int z, uint chan
 
				   }
			
 
				 }
			
 
				 
			
 
				+/** convert to ice image template */
			
 
				+template<class P>
			
 
				+void MultiChannelImage3DT<P>::convertToGrey( NICE::ImageT<P> & img, int z, uint channel,  bool normalize ) const
			
 
				+{
			
 
				+  assert( channel < numChannels );
			
 
				+
			
 
				+  P min, max;
			
 
				+
			
 
				+  if ( normalize ) {
			
 
				+    statistics( min, max, channel );
			
 
				+    fprintf( stderr, "MultiChannelImage3DT<>::showChannel: max %f min %f\n", ( double )max, ( double )min );
			
 
				+  }
			
 
				+
			
 
				+  bool skip_assignment = false;
			
 
				+
			
 
				+  img.resize( xsize, ysize );
			
 
				+
			
 
				+  if ( normalize )
			
 
				+    if ( max - min < std::numeric_limits<double>::min() )
			
 
				+    {
			
 
				+      img.set( max );
			
 
				+      skip_assignment = true;
			
 
				+      fprintf( stderr, "MultiChannelImage3DT::showChannel: image is uniform! (%f)\n", ( double )max );
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+  if ( ! skip_assignment )
			
 
				+  {
			
 
				+    long k = 0;
			
 
				+
			
 
				+    for ( int y = 0 ; y < ysize; y++ )
			
 
				+    {
			
 
				+      for ( int x = 0 ; x < xsize ; x++, k++ )
			
 
				+      {
			
 
				+        if ( normalize )
			
 
				+        {
			
 
				+          img.setPixel( x, y, ( int )(( data[channel][z*xsize*ysize + k] - min ) * 255 / ( max - min ) ) );
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+          img.setPixel( x, y, ( int )( data[channel][z*xsize*ysize + k] ) );
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				 template<class P>
			
 
				 void MultiChannelImage3DT<P>::convertToColor( NICE::ColorImage & img, int z, const int chan1, const int chan2, const int chan3) const
			
 
				 {
			
--- a/progs/testSemanticSegmentation.cpp
+++ b/progs/testSemanticSegmentation.cpp
@@ -78,6 +78,8 @@ int main( int argc, char **argv )
 
				 
			
 
				   bool write_results_pascal = conf.gB( "debug", "write_results_pascal", false );
			
 
				 
			
 
				+	bool run_3dseg = conf.gB( "debug", "run_3dseg", true);
			
 
				+
			
 
				   std::string resultdir = conf.gS( "debug", "resultdir", "." );
			
 
				 
			
 
				   if ( write_results )
			
@@ -140,7 +142,15 @@ int main( int argc, char **argv )
 
				     segresult.addChannel( lm );
			
 
				     gt.addChannel( lm_gt );
			
 
				 
			
 
				-		if ( depthCount < zsizeVec[idx] ) continue;
			
 
				+		int depthBoundary = 0;
			
 
				+		int zsize = 1;
			
 
				+		if (run_3dseg)
			
 
				+		{
			
 
				+			depthBoundary = zsizeVec[idx];
			
 
				+			zsize = zsizeVec[idx];
			
 
				+		}
			
 
				+
			
 
				+		if ( depthCount < depthBoundary ) continue;
			
 
				 
			
 
				     NICE::MultiChannelImage3DT<double> probabilities;
			
 
				 		NICE::MultiChannelImage3DT<double> imgData;
			
@@ -150,7 +160,7 @@ int main( int argc, char **argv )
 
				     fprintf( stderr, "testSemanticSegmentation: Segmentation finished !\n" );
			
 
				 
			
 
				     // save to file
			
 
				-		for (int z = 0; z < zsizeVec[idx]; z++)
			
 
				+		for (int z = 0; z < zsize; z++)
			
 
				 		{
			
 
				 			std::string fname = StringTools::baseName( filelist[z], false );
			
 
				 
			
@@ -185,16 +195,16 @@ int main( int argc, char **argv )
 
				 				NICE::ColorImage rgb;
			
 
				 				NICE::ColorImage rgb_gt;
			
 
				 
			
 
				-				NICE::Image lm( segresult.width(), segresult.height() );
			
 
				-				NICE::Image lm_gt( segresult.width(), segresult.height() );
			
 
				 				for ( int y = 0 ; y < segresult.height(); y++ )
			
 
				 				{
			
 
				 					for ( int x = 0 ; x < segresult.width(); x++ )
			
 
				 					{
			
 
				 						lm.setPixel( x, y, segresult.get( x, y, (uint)z ) );
			
 
				-						lm_gt.setPixel( x, y, gt.get( x, y, (uint)z ) );
			
 
				+						if (run_3dseg)
			
 
				+							lm_gt.setPixel( x, y, gt.get( x, y, (uint)z ) );
			
 
				 					}
			
 
				 				}
			
 
				+
			
 
				 				classNames.labelToRGB( lm, rgb );
			
 
				 				classNames.labelToRGB( lm_gt, rgb_gt );
			
 
				 
			
@@ -223,16 +233,15 @@ int main( int argc, char **argv )
 
				 		}
			
 
				 
			
 
				 //#pragma omp critical
			
 
				-		for (int z = 0; z < zsizeVec[idx]; z++)
			
 
				+		for (int z = 0; z < zsize; z++)
			
 
				 		{
			
 
				-			NICE::Image lm( segresult.width(), segresult.height() );
			
 
				-			NICE::Image lm_gt( segresult.width(), segresult.height() );
			
 
				 			for ( int y = 0 ; y < segresult.height(); y++ )
			
 
				 			{
			
 
				 				for ( int x = 0 ; x < segresult.width(); x++ )
			
 
				 				{
			
 
				 					lm.setPixel( x, y, segresult.get( x, y, (uint)z ) );
			
 
				-					lm_gt.setPixel( x, y, gt.get( x, y, (uint)z ) );
			
 
				+					if (run_3dseg)
			
 
				+						lm_gt.setPixel( x, y, gt.get( x, y, (uint)z ) );
			
 
				 				}
			
 
				 			}
			
 
				 			NICE::Matrix M( classNames.getMaxClassno() + 1, classNames.getMaxClassno() + 1 );
			
--- a/semseg/SemSegContextTree.cpp
+++ b/semseg/SemSegContextTree.cpp
@@ -3,6 +3,8 @@
 
				 #include "vislearning/baselib/ProgressBar.h"
			
 
				 #include "core/basics/StringTools.h"
			
 
				 
			
 
				+#include "core/imagedisplay/ImageDisplay.h"
			
 
				+
			
 
				 #include "vislearning/cbaselib/CachedExample.h"
			
 
				 #include "vislearning/cbaselib/PascalResults.h"
			
 
				 #include "vislearning/baselib/ColorSpace.h"
			
@@ -21,7 +23,7 @@
 
				 #include <omp.h>
			
 
				 #include <iostream>
			
 
				 
			
 
				-#define DEBUG
			
 
				+//#define DEBUG
			
 
				 
			
 
				 using namespace OBJREC;
			
 
				 using namespace std;
			
@@ -86,7 +88,7 @@ SemSegContextTree::SemSegContextTree (const Config *conf, const MultiDataset *md
 
				 	else if (segmentationtype == "slic")
			
 
				 		segmentation = new RSSlic (conf);
			
 
				   else
			
 
				-    throw ("no valid segmenation_type\n please choose between none, meanshift and felzenszwalb\n");
			
 
				+    throw ("no valid segmenation_type\n please choose between none, meanshift, slic and felzenszwalb\n");
			
 
				 
			
 
				   ftypes = conf->gI (section, "features", 100);;
			
 
				 
			
@@ -328,7 +330,7 @@ double SemSegContextTree::getBestSplit (std::vector<NICE::MultiChannelImage3DT<d
 
				     if (ft > 1)
			
 
				     {
			
 
				       //use larger window size for context features
			
 
				-      tmpws *= 1;
			
 
				+      tmpws *= 3;
			
 
				     }
			
 
				     
			
 
				     if(ft == 1)
			
@@ -651,6 +653,7 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				 
			
 
				 	vector<int> zsizeVec;
			
 
				 	getDepthVector( &train, zsizeVec );
			
 
				+	bool run_3dseg = conf->gB( "debug", "run_3dseg", true);
			
 
				 
			
 
				   ProgressBar pb ("compute feats");
			
 
				   pb.show();
			
@@ -748,10 +751,17 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				 
			
 
				     fprintf (stderr, "SSContext: Collecting pixel examples from localization info: %s\n", file.c_str());
			
 
				 
			
 
				-		if ( depthCount < zsizeVec[imgcounter] ) continue;
			
 
				+		int depthBoundary = 0;
			
 
				+		if (run_3dseg)
			
 
				+		{
			
 
				+			depthBoundary = zsizeVec[imgcounter];
			
 
				+		}
			
 
				+
			
 
				+		if ( depthCount < depthBoundary ) continue;
			
 
				 
			
 
				 		// all image slices collected -> make a 3d image
			
 
				 		NICE::MultiChannelImage3DT<double> imgData;
			
 
				+		
			
 
				 		make3DImage( filelist, imgData );
			
 
				 
			
 
				     int xsize = imgData.width();
			
@@ -795,7 +805,10 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				       {
			
 
				 				for (int z = 0; z < zsize; z++)
			
 
				 				{
			
 
				-					classno = pixelLabels(x, y, (uint)z);
			
 
				+					if (run_3dseg)
			
 
				+						classno = pixelLabels(x, y, (uint)z);
			
 
				+					else
			
 
				+						classno = pL.getPixelQuick(x,y);
			
 
				 					labels[imgcounter].set(x, y, classno, (uint)z);
			
 
				 
			
 
				 					if (forbidden_classes.find (classno) != forbidden_classes.end())
			
@@ -817,7 +830,10 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				         {
			
 
				 					for (int z = 0; z < zsize; z++)
			
 
				 					{
			
 
				-						classno = pixelLabels(x, y, (uint)z);
			
 
				+						if (run_3dseg)
			
 
				+							classno = pixelLabels(x, y, (uint)z);
			
 
				+						else
			
 
				+							classno = pL.getPixelQuick(x,y);
			
 
				 
			
 
				 						if (forbidden_classes.find (classno) != forbidden_classes.end())
			
 
				 							continue;
			
@@ -1007,7 +1023,7 @@ void SemSegContextTree::train (const MultiDataset *md)
 
				       const int t = (int)forest[tree].size();
			
 
				       const int s = startnode[tree];
			
 
				       startnode[tree] = t;
			
 
				-#pragma omp parallel for
			
 
				+//#pragma omp parallel for
			
 
				       for (int i = s; i < t; i++)
			
 
				       {
			
 
				         if (!forest[tree][i].isleaf && forest[tree][i].left < 0)
			
@@ -1404,7 +1420,7 @@ void SemSegContextTree::extractBasicFeatures (NICE::MultiChannelImage3DT<double>
 
				 
			
 
				 		} else {
			
 
				 
			
 
				-			NICE::Image img = imgData.getChannel(z,0);
			
 
				+			NICE::ImageT<double> img = imgData.getChannelT(z,0);
			
 
				 			for (int x = 0; x < xsize; x++)
			
 
				 			{
			
 
				 				for (int y = 0; y < ysize; y++)
			
@@ -1753,7 +1769,7 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 
				   vector<int> useclass (allClasses, 1);
			
 
				 
			
 
				   vector<int> classesInImg;
			
 
				-  
			
 
				+
			
 
				   if(useCategorization)
			
 
				   {
			
 
				     if(cndir != "")
			
@@ -1880,7 +1896,11 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 
				   else
			
 
				   {
			
 
				     //using segmentation
			
 
				-    NICE::MultiChannelImageT<double> regions;
			
 
				+    NICE::MultiChannelImageT<int> regions;
			
 
				+		int xsize = feats.width();
			
 
				+		int ysize = feats.height();
			
 
				+		int zsize = feats.depth();
			
 
				+		regions.reInit(xsize, ysize, zsize);
			
 
				 
			
 
				     if (useRegionFeature)
			
 
				     {
			
@@ -1896,10 +1916,6 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 
				 
			
 
				       assert(rchannel > -1);
			
 
				 
			
 
				-			int xsize = feats.width();
			
 
				-			int ysize = feats.height();
			
 
				-			int zsize = feats.depth();
			
 
				-			regions.reInit(xsize, ysize, zsize);
			
 
				 			for (int z = 0; z < zsize; z++)
			
 
				 			{
			
 
				 				for (int y = 0; y < ysize; y++)
			
@@ -1914,29 +1930,45 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 
				 		else
			
 
				 		{
			
 
				 			amountRegions = 0;
			
 
				-			for ( int z = 0; z < zsize; z++ )
			
 
				+			vector<int> chanSelect;
			
 
				+			for (int i=0; i<3; i++)
			
 
				+				chanSelect.push_back(i);
			
 
				+			amountRegions = segmentation->segRegions( imgData, regions, chanSelect);
			
 
				+
			
 
				+#ifdef DEBUG
			
 
				+			for ( unsigned int z = 0; z < (uint)zsize; z++)
			
 
				 			{
			
 
				-				NICE::ColorImage img = imgData.getColor(z);
			
 
				-				Matrix regions_tmp;
			
 
				-				int aR_tmp = segmentation->segRegions (img, regions_tmp);
			
 
				-				if ( aR_tmp > amountRegions ) amountRegions = aR_tmp;
			
 
				-				int numChans = regions.channels();
			
 
				-				regions.addChannel( 1 );
			
 
				-				for ( int y = 0; y < ysize; y++ )
			
 
				+				NICE::Matrix regmask;
			
 
				+				NICE::ColorImage colorimg(xsize, ysize);
			
 
				+				NICE::ColorImage marked(xsize, ysize);
			
 
				+				regmask.resize(xsize, ysize);
			
 
				+				for ( int y = 0; y < ysize; y++)
			
 
				 				{
			
 
				-					for ( int x = 0; x < xsize; x++ )
			
 
				+					for ( int x = 0; x < xsize; x++)
			
 
				 					{
			
 
				-						regions.set( x, y, regions_tmp(x,y), numChans );
			
 
				+						regmask(x,y) = regions(x,y,z);
			
 
				+						colorimg.setPixelQuick( x, y, 0, imgData.get(x,y,z,0) );
			
 
				+						colorimg.setPixelQuick( x, y, 1, imgData.get(x,y,z,0) );
			
 
				+						colorimg.setPixelQuick( x, y, 2, imgData.get(x,y,z,0) );
			
 
				 					}
			
 
				 				}
			
 
				+				vector<int> colorvals;
			
 
				+				colorvals.push_back(255);
			
 
				+				colorvals.push_back(0);
			
 
				+				colorvals.push_back(0);
			
 
				+				segmentation->markContours( colorimg, regmask, colorvals, marked );
			
 
				+				std::vector<string> list;
			
 
				+				StringTools::split( filelist[z], '/', list );
			
 
				+				string savePath = StringTools::trim( filelist[z], list.back() ) + "marked/" + list.back();
			
 
				+				marked.write( savePath );
			
 
				 			}
			
 
				+#endif
			
 
				 		}
			
 
				 
			
 
				     regionProbs.clear();
			
 
				     regionProbs = vector<vector<double> >(amountRegions, vector<double> (classes, 0.0));
			
 
				 
			
 
				     vector<int> bestlabels (amountRegions, labelmapback[classesInImg[0]]);
			
 
				-
			
 
				 		for (int z = 0; z < zsize; z++)
			
 
				 		{
			
 
				 			for (int y = 0; y < ysize; y++)
			
@@ -1944,7 +1976,6 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 
				 				for (int x = 0; x < xsize; x++)
			
 
				 				{
			
 
				 					int cregion = regions (x, y, (uint)z);
			
 
				-
			
 
				 					for (uint c = 0; c < classesInImg.size(); c++)
			
 
				 					{
			
 
				 						int d = classesInImg[c];
			
@@ -1953,6 +1984,7 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 
				 				}
			
 
				 			}
			
 
				 		}
			
 
				+
			
 
				     for (int r = 0; r < amountRegions; r++)
			
 
				     {
			
 
				       double maxval = regionProbs[r][classesInImg[0]];
			
@@ -1969,7 +2001,6 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 
				 
			
 
				       bestlabels[r] = labelmapback[bestlabels[r]];
			
 
				     }
			
 
				-
			
 
				 		for (int z = 0; z < zsize; z++)
			
 
				 		{
			
 
				 			for (int y = 0; y < ysize; y++)
			
@@ -1981,12 +2012,27 @@ void SemSegContextTree::semanticseg ( NICE::MultiChannelImage3DT<double> & imgDa
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-#define WRITEREGIONS
			
 
				+//#define WRITEREGIONS
			
 
				 #ifdef WRITEREGIONS
			
 
				 		for (int z = 0; z < zsize; z++)
			
 
				 		{
			
 
				 			RegionGraph rg;
			
 
				-			NICE::ColorImage img = imgData.getColor(z);
			
 
				+			NICE::ColorImage img(xsize,ysize);
			
 
				+			if (imagetype == IMAGETYPE_RGB)
			
 
				+			{
			
 
				+				img = imgData.getColor(z);
			
 
				+			} else {
			
 
				+				NICE::Image gray = imgData.getChannel(z);
			
 
				+				for (int y = 0; y < ysize; y++)
			
 
				+				{
			
 
				+					for (int x = 0; x < xsize; x++)
			
 
				+					{
			
 
				+						int val = gray.getPixelQuick(x,y);
			
 
				+						img.setPixelQuick(x, y, val, val, val);
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				 			Matrix regions_tmp(xsize,ysize);
			
 
				 			for (int y = 0; y < ysize; y++)
			
 
				 			{
			
--- a/semseg/SemanticSegmentation.cpp
+++ b/semseg/SemanticSegmentation.cpp
@@ -217,7 +217,15 @@ void SemanticSegmentation::make3DImage( const std::vector<std::string> & filelis
 
				 				}
			
 
				 			}
			
 
				 		} else {
			
 
				-			NICE::Image img = Preprocess::ReadImgAdv( filelist[it] );
			
 
				+			NICE::ImageT<int> img;
			
 
				+			try {
			
 
				+				img.read ( filelist[it] );
			
 
				+			}
			
 
				+			catch(ImageException &)
			
 
				+			{
			
 
				+				fprintf (stderr, "Failed to open image file: %s\n", filelist[it].c_str() );
			
 
				+				exit(-1);
			
 
				+			}
			
 
				 			if (!isInit)
			
 
				 			{
			
 
				 				imgData.reInit(img.width(),img.height(),filelist.size(),1);