ComputerVisionJena
/
NICE_SemSeg


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622
							#include <sstream>
#include <iostream>

#include "SemSegNovelty.h"

#include "core/image/FilterT.h"
#include "gp-hik-exp/GPHIKClassifierNICE.h"
#include "vislearning/baselib/ICETools.h"
#include "vislearning/baselib/Globals.h"
#include "vislearning/features/fpfeatures/SparseVectorFeature.h"
#include "core/basics/StringTools.h"
#include "core/basics/Timer.h"
#include "segmentation/GenericRegionSegmentationMethodSelection.h"

using namespace std;
using namespace NICE;
using namespace OBJREC;

SemSegNovelty::SemSegNovelty ( const Config *conf,
                               const MultiDataset *md )
    : SemanticSegmentation ( conf, & ( md->getClassNames ( "train" ) ) )
{
  this->conf = conf;

  string section = "SemSegNovelty";

  featExtract = new LFColorWeijer ( conf );

  save_cache = conf->gB ( "FPCPixel", "save_cache", true );
  read_cache = conf->gB ( "FPCPixel", "read_cache", false );
  uncertdir = conf->gS("debug", "uncertainty", "uncertainty");
  cache = conf->gS ( "cache", "root", "" );
  
  classifier = new GPHIKClassifierNICE ( conf, "ClassiferGPHIK" );;

  whs = conf->gI ( section, "window_size", 10 );
  featdist = conf->gI ( section, "grid", 10 );
  testWSize = conf->gI (section, "test_window_size", 10);
  string rsMethode = conf->gS ( section, "segmentation", "none" );
 
  if(rsMethode == "none")
  {
    regionSeg = NULL;
  }
  else
  {
    RegionSegmentationMethod * tmpRegionSeg = GenericRegionSegmentationMethodSelection::selectRegionSegmentationMethod(&conf, rsMethode);    
    if ( save_cache )
      regionSeg = new RSCache ( conf, tmpRegionSeg );
    else
      regionSeg = tmpseg;
  }
  
  cn = md->getClassNames ( "train" );

  if ( read_cache )
  {
    string classifierdst = "/classifier.data";
    fprintf ( stderr, "SemSegNovelty:: Reading classifier data from %s\n", ( cache + classifierdst ).c_str() );

    try
    {
      if ( classifier != NULL )
      {
        classifier->read ( cache + classifierdst );
      }

      fprintf ( stderr, "SemSegNovelty:: successfully read\n" );
    }
    catch ( char *str )
    {
      cerr << "error reading data: " << str << endl;
    }
  }
  else
  {
    train ( md );
  }
}

SemSegNovelty::~SemSegNovelty()
{
  // clean-up
  if ( classifier != NULL )
    delete classifier;
  if ( featExtract != NULL )
    delete featExtract;
}

void SemSegNovelty::train ( const MultiDataset *md )
{
  const LabeledSet train = * ( *md ) ["train"];
  const LabeledSet *trainp = &train;

  ////////////////////////
  // feature extraction //
  ////////////////////////

  std::string forbidden_classes_s = conf->gS ( "analysis", "donttrain", "" );
  if ( forbidden_classes_s == "" )
  {
    forbidden_classes_s = conf->gS ( "analysis", "forbidden_classes", "" );
  }
  cn.getSelection ( forbidden_classes_s, forbidden_classes );
  
  //check the same thing for the training classes - this is very specific to our setup 
  std::string forbidden_classesTrain_s = conf->gS ( "analysis", "donttrainTrain", "" );
  if ( forbidden_classesTrain_s == "" )
  {
    forbidden_classesTrain_s = conf->gS ( "analysis", "forbidden_classesTrain", "" );
  }
  cn.getSelection ( forbidden_classesTrain_s, forbidden_classesTrain );


  ProgressBar pb ( "Local Feature Extraction" );
  pb.show();

  int imgnb = 0;

  Examples examples;
  examples.filename = "training";

  int featdim = -1;

  classesInUse.clear();  
  
  LOOP_ALL_S ( *trainp )
  {
    //EACH_S(classno, currentFile);
    EACH_INFO ( classno, info );

    std::string currentFile = info.img();

    CachedExample *ce = new CachedExample ( currentFile );

    const LocalizationResult *locResult = info.localization();
    if ( locResult->size() <= 0 )
    {
      fprintf ( stderr, "WARNING: NO ground truth polygons found for %s !\n",
                currentFile.c_str() );
      continue;
    }

    int xsize, ysize;
    ce->getImageSize ( xsize, ysize );

    Image labels ( xsize, ysize );
    labels.set ( 0 );
    locResult->calcLabeledImage ( labels, ( *classNames ).getBackgroundClass() );

    NICE::ColorImage img;
    try {
      img = ColorImage ( currentFile );
    } catch ( Exception ) {
      cerr << "SemSegNovelty: error opening image file <" << currentFile << ">" << endl;
      continue;
    }

    Globals::setCurrentImgFN ( currentFile );

    MultiChannelImageT<double> feats;

    // extract features
    featExtract->getFeats ( img, feats );
    featdim = feats.channels();
    feats.addChannel(featdim);

    for (int c = 0; c < featdim; c++)
    {
      ImageT<double> tmp = feats[c];
      ImageT<double> tmp2 = feats[c+featdim];

      NICE::FilterT<double, double, double>::gradientStrength (tmp, tmp2);
    }
    featdim += featdim;

    // compute integral images
    for ( int c = 0; c < featdim; c++ )
    {
      feats.calcIntegral ( c );
    }

    for ( int y = 0; y < ysize; y += featdist )
    {
      for ( int x = 0; x < xsize; x += featdist )
      {

        int classnoTmp = labels.getPixel ( x, y );
        
        if ( forbidden_classesTrain.find ( classnoTmp ) != forbidden_classesTrain.end() )
        {
          continue;
        }
        
        if (classesInUse.find(classnoTmp) == classesInUse.end())
        {
          classesInUse.insert(classnoTmp);
        }
        
        Example example;
        example.vec = NULL;
        example.svec = new SparseVector ( featdim );
        for ( int f = 0; f < featdim; f++ )
        {
          double val = feats.getIntegralValue ( x - whs, y - whs, x + whs, y + whs, f );
          if ( val > 1e-10 )
            ( *example.svec ) [f] = val;
        }

        example.svec->normalize();

        example.position = imgnb;
        examples.push_back ( pair<int, Example> ( classnoTmp, example ) );
      }
    }

    delete ce;
    imgnb++;
    pb.update ( trainp->count() );
  }
  
    
  numberOfClasses = classesInUse.size();
  std::cerr << "numberOfClasses: " << numberOfClasses << std::endl;  
  std::cerr << "classes in use: " << std::endl;
  for (std::set<int>::const_iterator it = classesInUse.begin(); it != classesInUse.end(); it++)
  {
    std::cerr << *it << " ";
  }    
  std::cerr << std::endl;

  pb.hide();


  //////////////////////
  // train classifier //
  //////////////////////
  FeaturePool fp;

  Feature *f = new SparseVectorFeature ( featdim );

  f->explode ( fp );
  delete f;

  if ( classifier != NULL )
    classifier->train ( fp, examples );
  else
  {
    cerr << "no classifier selected?!" << endl;
    exit ( -1 );
  }

  fp.destroy();

  if ( save_cache )
  {
    if ( classifier != NULL )
      classifier->save ( cache + "/classifier.data" );
  }

  ////////////
  //clean up//
  ////////////
  for ( int i = 0; i < ( int ) examples.size(); i++ )
  {
    examples[i].second.clean();
  }
  examples.clear();

  cerr << "SemSeg training finished" << endl;
}

void SemSegNovelty::semanticseg ( CachedExample *ce, NICE::Image & segresult, NICE::MultiChannelImageT<double> & probabilities )
{
  Timer timer;
  timer.start();

  Examples examples;
  examples.filename = "testing";

  segresult.set ( 0 );

  int featdim = -1;

  std::string currentFile = Globals::getCurrentImgFN();


  int xsize, ysize;
  ce->getImageSize ( xsize, ysize );

  probabilities.reInit( xsize, ysize, cn.getMaxClassno() + 1);
  probabilities.set ( 0.0 );

  NICE::ColorImage img;
  try {
    img = ColorImage ( currentFile );
  } catch ( Exception ) {
    cerr << "SemSegNovelty: error opening image file <" << currentFile << ">" << endl;
    return;
  }

  MultiChannelImageT<double> feats;

  // extract features
  featExtract->getFeats ( img, feats );
  featdim = feats.channels();
  feats.addChannel(featdim);

  for (int c = 0; c < featdim; c++)
  {
    ImageT<double> tmp = feats[c];
    ImageT<double> tmp2 = feats[c+featdim];

    NICE::FilterT<double, double, double>::gradientStrength (tmp, tmp2);
  }
  featdim += featdim;

  // compute integral images
  for ( int c = 0; c < featdim; c++ )
  {
    feats.calcIntegral ( c );
  }

  FloatImage uncert ( xsize, ysize );
  uncert.set ( 0.0 );
  
  FloatImage gpUncertainty ( xsize, ysize );
  FloatImage gpMean ( xsize, ysize );    
  FloatImage gpMeanRatio ( xsize, ysize );  
  FloatImage gpWeightAll ( xsize, ysize );
  FloatImage gpWeightRatio ( xsize, ysize );  
  
  gpUncertainty.set ( 0.0 );
  gpMean.set ( 0.0 );
  gpMeanRatio.set ( 0.0 );
  gpWeightAll.set ( 0.0 );
  gpWeightRatio.set ( 0.0 );

  double maxunc = -numeric_limits<double>::max();
  
  double maxGPUncertainty = -numeric_limits<double>::max();  
  double maxGPMean = -numeric_limits<double>::max();  
  double maxGPMeanRatio = -numeric_limits<double>::max();  
  double maxGPWeightAll = -numeric_limits<double>::max();  
  double maxGPWeightRatio = -numeric_limits<double>::max();  

  timer.stop();
  cout << "first: " << timer.getLastAbsolute() << endl;
  
  //we need this lateron for active learning stuff
  double gpNoise =  conf->gD("GPHIK", "noise", 0.01);
  
  timer.start();
#pragma omp parallel for
  for ( int y = 0; y < ysize; y += testWSize )
  {
    Example example;
    example.vec = NULL;
    example.svec = new SparseVector ( featdim );
    for ( int x = 0; x < xsize; x += testWSize)
    {
      for ( int f = 0; f < featdim; f++ )
      {
        double val = feats.getIntegralValue ( x - whs, y - whs, x + whs, y + whs, f );
        if ( val > 1e-10 )
          ( *example.svec ) [f] = val;
      }
      example.svec->normalize();

      ClassificationResult cr = classifier->classify ( example );
      
      //we need this if we want to compute GP-AL-measure lateron
      double minMeanAbs ( numeric_limits<double>::max() );
      double maxMeanAbs ( 0.0 );
      double sndMaxMeanAbs ( 0.0 );       
      double maxMean ( -numeric_limits<double>::max() );
      double sndMaxMean ( -numeric_limits<double>::max() );     
      
      for ( int j = 0 ; j < cr.scores.size(); j++ )
      {   
        if ( forbidden_classesTrain.find ( j ) != forbidden_classesTrain.end() )
        {
          continue;
        }
        
        //check whether we found a class with higher smaller abs mean than the current minimum
        if (abs(cr.scores[j]) < minMeanAbs)  
          minMeanAbs = abs(cr.scores[j]);
        //check for larger abs mean as well
        if (abs(cr.scores[j]) > maxMeanAbs)
        {
          sndMaxMeanAbs = maxMeanAbs;
          maxMeanAbs = abs(cr.scores[j]);
        }
        // and also for the second highest mean of all classes
        else if (abs(cr.scores[j]) > sndMaxMeanAbs)
        {
          sndMaxMeanAbs = abs(cr.scores[j]);
        }  
        //check for larger mean without abs as well
        if (cr.scores[j] > maxMean)
        {
          sndMaxMean = maxMean;
          maxMean = cr.scores[j];
        }
        // and also for the second highest mean of all classes
        else if (cr.scores[j] > sndMaxMean)
        {
          sndMaxMean = cr.scores[j];
        }          
      }

      double firstTerm (1.0 / sqrt(cr.uncertainty+gpNoise));
      
      //compute the heuristic GP-UNCERTAINTY, as proposed by Kapoor et al. in IJCV 2010
      // GP-UNCERTAINTY : |mean| / sqrt(var^2 + gpnoise^2)
      double gpUncertaintyVal = maxMeanAbs*firstTerm; //firstTerm = 1.0 / sqrt(r.uncertainty+gpNoise))
      
      // compute results when we take the lowest mean value of all classes
      double gpMeanVal = minMeanAbs;
      
      //look at the difference in the absolut mean values for the most plausible class
      // and the second most plausible class
      double gpMeanRatioVal= maxMean - sndMaxMean;
      
       double gpWeightAllVal ( 0.0 );
       double gpWeightRatioVal ( 0.0 );

       if ( numberOfClasses > 2)
       {
        //compute the weight in the alpha-vector for every sample after assuming it to be 
        // added to the training set.
        // Thereby, we measure its "importance" for the current model
        // 
        //double firstTerm is already computed
        //
        //the second term is only needed when computing impacts
        //double secondTerm; //this is the nasty guy :/
        
        //--- compute the third term
        // this is the difference between predicted label and GT label 
        std::vector<double> diffToPositive; diffToPositive.clear();
        std::vector<double> diffToNegative; diffToNegative.clear();
        double diffToNegativeSum(0.0);
        
        for ( int j = 0 ; j < cr.scores.size(); j++ )
        {
          if ( forbidden_classesTrain.find ( j ) != forbidden_classesTrain.end() )
          {
            continue;
          }          
          
          // look at the difference to plus 1          
          diffToPositive.push_back(abs(cr.scores[j] - 1));
          // look at the difference to -1          
          diffToNegative.push_back(abs(cr.scores[j] + 1));
          //sum up the difference to -1
          diffToNegativeSum += abs(cr.scores[j] - 1);
        }

        //let's subtract for every class its diffToNegative from the sum, add its diffToPositive,
        //and use this as the third term for this specific class.
        //the final value is obtained by minimizing over all classes
        //
        // originally, we minimize over all classes after building the final score
        // however, the first and the second term do not depend on the choice of
        // y*, therefore we minimize here already
        double thirdTerm (numeric_limits<double>::max()) ;
        for(uint tmpCnt = 0; tmpCnt < diffToPositive.size(); tmpCnt++)
        {
          double tmpVal ( diffToPositive[tmpCnt] + (diffToNegativeSum-diffToNegative[tmpCnt])   );
          if (tmpVal < thirdTerm)
            thirdTerm = tmpVal;
        }
        gpWeightAllVal = thirdTerm*firstTerm;        
        
        //now look on the ratio of the resulting weights for the most plausible
        // against the second most plausible class
        double thirdTermMostPlausible ( 0.0 ) ;
        double thirdTermSecondMostPlausible ( 0.0 ) ;
        for(uint tmpCnt = 0; tmpCnt < diffToPositive.size(); tmpCnt++)
        {
          if (diffToPositive[tmpCnt] > thirdTermMostPlausible)
          {
            thirdTermSecondMostPlausible = thirdTermMostPlausible;
            thirdTermMostPlausible = diffToPositive[tmpCnt];
          }
          else if (diffToPositive[tmpCnt] > thirdTermSecondMostPlausible)
          {
            thirdTermSecondMostPlausible = diffToPositive[tmpCnt];
          }
        }
        //compute the resulting score
        gpWeightRatioVal = (thirdTermMostPlausible - thirdTermSecondMostPlausible)*firstTerm;      

        //finally, look for this feature how it would affect to whole model (summarized by weight-vector alpha), if we would 
        //use it as an additional training example
        //TODO this would be REALLY computational demanding. Do we really want to do this?
  //         gpImpactAll[s] ( pce[i].second.x, pce[i].second.y ) = thirdTerm*firstTerm*secondTerm;
  //         gpImpactRatio[s] ( pce[i].second.x, pce[i].second.y ) = (thirdTermMostPlausible - thirdTermSecondMostPlausible)*firstTerm*secondTerm;
       }
       else //binary scenario
       {
         gpWeightAllVal = std::min( abs(cr.scores[*classesInUse.begin()]+1), abs(cr.scores[*classesInUse.begin()]-1) );
         gpWeightAllVal *= firstTerm;
         gpWeightRatioVal = gpWeightAllVal;
       }

      int xs = std::max(0, x - testWSize/2);
      int xe = std::min(xsize - 1, x + testWSize/2);
      int ys = std::max(0, y - testWSize/2);
      int ye = std::min(ysize - 1, y + testWSize/2);
      for (int yl = ys; yl <= ye; yl++)
      {
        for (int xl = xs; xl <= xe; xl++)
        {
          for ( int j = 0 ; j < cr.scores.size(); j++ )
          {
            probabilities ( xl, yl, j ) = cr.scores[j];
          }
          segresult ( xl, yl ) = cr.classno;
          uncert ( xl, yl ) = cr.uncertainty;
          
          gpUncertainty ( xl, yl ) = gpUncertaintyVal;
          gpMean ( xl, yl ) = gpMeanVal;
          gpMeanRatio ( xl, yl ) = gpMeanRatioVal;
          gpWeightAll ( xl, yl ) = gpWeightAllVal;
          gpWeightRatio ( xl, yl ) = gpWeightRatioVal;    
        }
      }

      if (maxunc < cr.uncertainty)
        maxunc = cr.uncertainty;
      
      if (maxGPUncertainty < gpUncertaintyVal)
        maxGPUncertainty = gpUncertaintyVal;
      if (maxGPMean < gpMeanVal)
        maxGPMean = gpMeanVal;
      if (maxGPMeanRatio < gpMeanRatioVal)
        maxGPMeanRatio = gpMeanRatioVal;
      if (maxGPWeightAll < gpMeanRatioVal)
        maxGPWeightAll = gpWeightAllVal;
      if (maxGPWeightRatio < gpWeightRatioVal)
        maxGPWeightRatio = gpWeightRatioVal;      
      
      example.svec->clear();
    }
    delete example.svec;
    example.svec = NULL;
  }

  //       std::cerr << "uncertainty: " << gpUncertaintyVal << " minMean: " << gpMeanVal << " gpMeanRatio: " << gpMeanRatioVal << " weightAll: " << gpWeightAllVal << " weightRatio: "<< gpWeightRatioVal << std::endl;
  
  
  //Regionen ermitteln
  if(regionSeg != NULL)
  {
    NICE::Matrix mask;
    int regionsize = regionSeg->segRegions ( img, mask );
    probabilities ( xl, yl, j ) = cr.scores[j];
  }

  timer.stop();
  cout << "second: " << timer.getLastAbsolute() << endl;
  timer.start();

  ColorImage imgrgb ( xsize, ysize );

  std::stringstream out;
  std::vector< std::string > list2;
  StringTools::split ( Globals::getCurrentImgFN (), '/', list2 );
  out << uncertdir << "/" << list2.back();
  
  uncert.writeRaw(out.str() + ".rawfloat");
  uncert(0, 0) = 0.0;
  uncert(0, 1) = 1.0+gpNoise;
  ICETools::convertToRGB ( uncert, imgrgb );
  imgrgb.write ( out.str() + "rough.png" );

  //invert images such that large numbers correspond to high impact, high variance, high importance, high novelty, ...
  for ( int y = 0; y < ysize; y++)
  {
    for (int x = 0; x < xsize; x++)
    {
      gpUncertainty(x,y) =  maxGPUncertainty - gpUncertainty(x,y);
      gpMean(x,y) = maxGPMean - gpMean(x,y);
      gpMeanRatio(x,y) = maxGPMeanRatio - gpMeanRatio(x,y);
      gpWeightRatio(x,y) = maxGPWeightRatio - gpWeightRatio(x,y);
    }
  }
  
  
  //  
  gpUncertainty(0, 0) = 0.0;
  gpUncertainty(0, 1) = maxGPUncertainty;
  ICETools::convertToRGB ( gpUncertainty, imgrgb );
  imgrgb.write ( out.str() + "gpUncertainty.png" );
  //
  gpMean(0, 0) = 0.0;
  gpMean(0, 1) = maxGPMean;  
  ICETools::convertToRGB ( gpMean, imgrgb );
  imgrgb.write ( out.str() + "gpMean.png" );
  //
  gpMeanRatio(0, 0) = 0.0;
  gpMeanRatio(0, 1) = maxGPMeanRatio;   
  ICETools::convertToRGB ( gpMeanRatio, imgrgb );
  imgrgb.write ( out.str() + "gpMeanRatio.png" );
  //
  gpWeightAll(0, 0) = 0.0;
  gpWeightAll(0, 1) = maxGPWeightAll;     
  ICETools::convertToRGB ( gpWeightAll, imgrgb );
  imgrgb.write ( out.str() + "gpWeightAll.png" );
  //
  gpWeightRatio(0, 0) = 0.0;
  gpWeightRatio(0, 1) = maxGPWeightRatio;     
  ICETools::convertToRGB ( gpWeightRatio, imgrgb );
  imgrgb.write ( out.str() + "gpWeightRatio.png" );    


  timer.stop();
  cout << "last: " << timer.getLastAbsolute() << endl;
}