// Beispielhafter Aufruf: BUILD_x86_64/progs/testSemanticSegmentation -config /** * @file testSemanticSegmentation.cpp * @brief test semantic segmentation routines for 3d images and 2d images * @author Erik Rodner, Björn Fröhlich, Sven Sickert * @date 03/20/2008 */ #ifdef NICE_USELIB_OPENMP #include #endif #include "core/basics/Config.h" #include "core/basics/StringTools.h" #include "vislearning/baselib/ICETools.h" #include "core/image/MultiChannelImage3DT.h" #include "semseg/semseg/SemSegContextTree3D.h" #include "semseg/semseg/SemSegTools.h" #include "core/basics/ResourceStatistics.h" #include "core/image/Morph.h" #include #include #undef DEBUG using namespace OBJREC; using namespace NICE; using namespace std; void updateMatrix ( const NICE::ImageT & img, const NICE::ImageT & gt, NICE::Matrix & M, const set & forbidden_classes, map & classMapping ) { double subsamplex = gt.width() / ( double ) img.width(); double subsampley = gt.height() / ( double ) img.height(); for ( int y = 0 ; y < gt.height() ; y++ ) for ( int x = 0 ; x < gt.width() ; x++ ) { int xx = ( int ) ( x / subsamplex ); int yy = ( int ) ( y / subsampley ); if ( xx < 0 ) xx = 0; if ( yy < 0 ) yy = 0; if ( xx > img.width() - 1 ) xx = img.width() - 1; if ( yy > img.height() - 1 ) yy = img.height() - 1; int cimg = img.getPixel ( xx, yy ); int gimg = gt.getPixel ( x, y ); if ( forbidden_classes.find ( gimg ) == forbidden_classes.end() ) { M ( classMapping[gimg], classMapping[cimg] ) ++; } } } void startClassification (SemanticSegmentation *semseg, std::vector< NICE::Matrix > & M_vec, const Config & conf, const LabeledSet* testFiles, const ClassNames & classNames, const set & forbidden_classes, map & classMapping, const string & resultdir, const bool doCrossVal) { bool show_results = conf.gB ( "debug", "show_results", false ); bool write_results = conf.gB ( "debug", "write_results", false ); bool writeProbMaps = conf.gB ( "debug", "write_prob_maps", false ); if (doCrossVal) write_results = false; bool run_3Dseg = conf.gB( "SSContextTree", "run_3dseg", false); bool postProcessing = conf.gB( "main", "post_process", false); string output_type = conf.gS ( "debug", "output_type", "ppm" ); string output_postfix = conf.gS ( "debug", "output_postfix", "" ); vector< int > zsizeVec; semseg->getDepthVector ( testFiles, zsizeVec, run_3Dseg ); int depthCount = 0, idx = 0; vector< string > filelist; NICE::MultiChannelImageT segresult; NICE::MultiChannelImageT gt; for (LabeledSet::const_iterator it = testFiles->begin(); it != testFiles->end(); it++) { for (std::vector::const_iterator jt = it->second.begin(); jt != it->second.end(); jt++) { ImageInfo & info = *(*jt); std::string file = info.img(); filelist.push_back ( file ); depthCount++; NICE::ImageT lm; NICE::ImageT lm_gt; if ( info.hasLocalizationInfo() ) { const LocalizationResult *l_gt = info.localization(); lm.resize ( l_gt->xsize, l_gt->ysize ); lm.set ( 0 ); lm_gt.resize ( l_gt->xsize, l_gt->ysize ); lm_gt.set ( 0 ); l_gt->calcLabeledImage ( lm, classNames.getBackgroundClass() ); #ifdef DEBUG cout << "testSemanticSegmentation3D: Generating Labeled NICE::Image (Ground-Truth)" << endl; #endif l_gt->calcLabeledImage ( lm_gt, classNames.getBackgroundClass() ); } segresult.addChannel ( lm ); gt.addChannel ( lm_gt ); int depthBoundary = 0; if ( run_3Dseg ) { depthBoundary = zsizeVec[idx]; } if ( depthCount < depthBoundary ) continue; NICE::MultiChannelImage3DT probabilities; semseg->classify ( filelist, segresult, probabilities ); // save to file for ( int z = 0; z < segresult.channels(); z++ ) { std::string fname = StringTools::baseName ( filelist[z], false ); if ( show_results || write_results ) { NICE::ColorImage orig ( filelist[z] ); NICE::ColorImage rgb; NICE::ColorImage rgb_gt; NICE::ColorImage ov_rgb; NICE::ColorImage ov_rgb_gt; for ( int y = 0 ; y < orig.height(); y++ ) { for ( int x = 0 ; x < orig.width(); x++ ) { lm.setPixel ( x, y, segresult.get ( x, y, ( uint ) z ) ); if ( run_3Dseg ) lm_gt.setPixel ( x, y, gt.get ( x, y, ( uint ) z ) ); } } // confusion matrix NICE::Matrix M ( classMapping.size(), classMapping.size() ); M.set ( 0 ); SemSegTools::updateConfusionMatrix ( lm, lm_gt, M, forbidden_classes, classMapping ); M_vec.push_back ( M ); classNames.labelToRGB ( lm, rgb ); classNames.labelToRGB ( lm_gt, rgb_gt ); if (postProcessing) { // median filter for (int r = 0; r < 3; r++) { NICE::Image postIm(rgb.width(), rgb.height()); NICE::median(*(rgb.getChannel(r)), &postIm, 1); for (int y = 0; y < rgb.height(); y++) for (int x = 0; x < rgb.width(); x++) rgb.setPixel(x,y,r, postIm.getPixelQuick(x,y)); } } if ( write_results ) { SemSegTools::segmentToOverlay ( orig.getChannel(1), rgb, ov_rgb ); SemSegTools::segmentToOverlay ( orig.getChannel(1), rgb_gt, ov_rgb_gt ); std::stringstream out; if ( output_postfix.size() > 0 ) out << resultdir << "/" << fname << output_postfix; else out << resultdir << "/" << fname; #ifdef DEBUG cout << "Writing to file " << out.str() << "_*." << output_type << endl; #endif orig.write ( out.str() + "_orig." + output_type ); rgb.write ( out.str() + "_result." + output_type ); rgb_gt.write ( out.str() + "_groundtruth." + output_type ); ov_rgb.write ( out.str() + "_overlay_res." + output_type ); ov_rgb_gt.write ( out.str() + "_overlay_gt." + output_type ); // write Probability maps if (writeProbMaps) { NICE::ColorImage prob_map( probabilities.width(), probabilities.height() ); prob_map.set(0,0,0); int iNumChannels = probabilities.channels(); for ( int idxProbMap = 0; idxProbMap < iNumChannels; idxProbMap++) { for ( int y = 0 ; y < probabilities.height(); y++ ) { for ( int x = 0 ; x < probabilities.width(); x++ ) { double probVal = probabilities.get( x, y, z, idxProbMap ) * 255.0; int tmp = round(probVal); for ( int c = 0 ; c < 3 ; c++ ) prob_map.setPixel( x, y, c, tmp ); } } std::stringstream ssFileProbMap; //ssFileProbMap << out.str() << "_probs." << "c" << idxProbMap << "." << output_type; ssFileProbMap << out.str() << "_probs." << "c-" << classNames.code( idxProbMap ) << "." << output_type; //classNames prob_map.write ( ssFileProbMap.str() ); } } } } } // prepare for new 3d image filelist.clear(); segresult.reInit(0,0,0); gt.reInit(0,0,0); depthCount = 0; idx++; } } segresult.freeData(); } /** test semantic segmentation routines */ int main ( int argc, char **argv ) { std::set_terminate ( __gnu_cxx::__verbose_terminate_handler ); Config conf ( argc, argv ); ResourceStatistics rs; /*---------------CONFIGURATION---------------*/ bool doCrossVal = conf.gB ( "debug", "do_crossval", false ); string resultdir = conf.gS ( "debug", "resultdir", "." ); /*-------------------------------------------*/ #ifdef DEBUG cerr << "Writing Results to " << resultdir << endl; #endif std::vector< NICE::Matrix > M_vec; MultiDataset md ( &conf ); const ClassNames & classNames = md.getClassNames ( "train" ); set forbidden_classes; classNames.getSelection ( conf.gS ( "analysis", "forbidden_classes", "" ), forbidden_classes ); vector usedClasses ( classNames.numClasses(), true ); for ( set::const_iterator it = forbidden_classes.begin(); it != forbidden_classes.end(); ++it) { usedClasses [ *it ] = false; } map classMapping; int j = 0; for ( int i = 0; i < usedClasses.size(); i++ ) if (usedClasses[i]) { classMapping[i] = j; j++; } // initialize semantic segmentation method SemanticSegmentation *semseg = NULL; // TRAINING AND TESTING if (!doCrossVal) { semseg = new SemSegContextTree3D ( &conf, &classNames ); // STANDARD EVALUATION cout << "\nTRAINING" << endl; cout << "########\n" << endl; semseg->train( &md ); cout << "\nCLASSIFICATION" << endl; cout << "##############\n" << endl; const LabeledSet *testFiles = md["test"]; startClassification (semseg, M_vec, conf, testFiles, classNames, forbidden_classes, classMapping, resultdir, doCrossVal ); delete semseg; } else { // CROSS-VALIDATION for (int cval = 1; cval <= 10; cval++) { semseg = new SemSegContextTree3D ( &conf, &classNames ); stringstream ss; ss << cval; string cvaltrain = "train_cv" + ss.str(); string cvaltest = "test_cv" + ss.str(); cout << "\nTRAINING " << cval << endl; cout << "###########\n" << endl; const LabeledSet *trainFiles = md[cvaltrain]; semseg->train( trainFiles ); cout << "\nCLASSIFICATION " << cval << endl; cout << "#################\n" << endl; const LabeledSet *testFiles = md[cvaltest]; startClassification (semseg, M_vec, conf, testFiles, classNames, forbidden_classes, classMapping, resultdir, doCrossVal ); delete semseg; } } cout << "\nSTATISTICS" << endl; cout << "##########\n" << endl; long maxMemory; double userCPUTime, sysCPUTime; rs.getStatistics ( maxMemory, userCPUTime, sysCPUTime ); cout << "Memory (max): " << maxMemory << " KB" << endl; cout << "CPU Time (user): " << userCPUTime << " seconds" << endl; cout << "CPU Time (sys): " << sysCPUTime << " seconds" << endl; cout << "\nPERFORMANCE" << endl; cout << "###########\n" << endl; double overall = 0.0; double sumall = 0.0; NICE::Matrix M ( classMapping.size(), classMapping.size() ); M.set ( 0 ); for ( int s = 0; s < ( int ) M_vec.size(); s++ ) { NICE::Matrix M_tmp = M_vec[s]; for ( int r = 0; r < ( int ) M_tmp.rows(); r++ ) for ( int c = 0; c < ( int ) M_tmp.cols(); c++ ) { if ( r == c ) overall += M_tmp ( r, c ); sumall += M_tmp ( r, c ); M ( r, c ) += M_tmp ( r, c ); } } overall /= sumall; cout << "Confusion Matrix:" << endl; cout.precision(4); for (int r = 0; r < (int) M.rows(); r++) { for (int c = 0; c < (int) M.cols(); c++) cout << M(r,c)/sumall << " "; cout << endl; } // metrics for binary classification double precision, recall, f1score = -1.0; if (classNames.numClasses() == 2) { precision = (double)M(1,1) / (double)(M(1,1)+M(0,1)); recall = (double)M(1,1) / (double)(M(1,1)+M(1,0)); f1score = 2.0*(precision*recall)/(precision+recall); } // normalizing M using rows for ( int r = 0 ; r < ( int ) M.rows() ; r++ ) { double sum = 0.0; for ( int c = 0 ; c < ( int ) M.cols() ; c++ ) sum += M ( r, c ); if ( fabs ( sum ) > 1e-4 ) for ( int c = 0 ; c < ( int ) M.cols() ; c++ ) M ( r, c ) /= sum; } double avg_perf = 0.0; int classes_trained = 0; for ( int r = 0 ; r < ( int ) M.rows() ; r++ ) { if ( ( classNames.existsClassno ( r ) ) && ( forbidden_classes.find ( r ) == forbidden_classes.end() ) ) { avg_perf += M ( r, r ); double lsum = 0.0; for ( int r2 = 0; r2 < ( int ) M.rows(); r2++ ) { lsum += M ( r,r2 ); } if ( lsum != 0.0 ) { classes_trained++; } } } // print results of evaluation cout << "Overall Recogntion Rate: " << overall << endl; cout << "Average Recogntion Rate: " << avg_perf / ( classes_trained ) << endl; cout << "Lower Bound: " << 1.0 / classes_trained << endl; cout << "Precision: " << precision << endl; cout << "Recall: " << recall << endl; cout << "F1Score: " << f1score << endl; cout <<"\nClasses:" << endl; for ( int r = 0 ; r < ( int ) M.rows() ; r++ ) { if ( ( classNames.existsClassno ( r ) ) && ( forbidden_classes.find ( r ) == forbidden_classes.end() ) ) { std::string classname = classNames.text ( r ); cout << classname.c_str() << ": " << M ( r, r ) << endl; } } return 0; }