/** 
* @file FPCGPHIK.cpp
* @brief feature pool interface for our GP HIK classifier
* @author Alexander Freytag
* @date 02/01/2012

*/
#include <iostream>

#include "core/basics/numerictools.h"
#include <core/basics/Timer.h>

#include "FPCGPHIK.h"

using namespace std;
using namespace NICE;
using namespace OBJREC;


FPCGPHIK::FPCGPHIK( const Config *conf, const string & confSection ) 
{
  this->verbose = conf->gB(confSection, "verbose", false);
  this->useSimpleBalancing = conf->gB(confSection, "use_simple_balancing", false);
  this->minSamples = conf->gI(confSection, "min_samples", -1);
  this->performOptimizationAfterIncrement = conf->gB(confSection, "performOptimizationAfterIncrement", true);
  
  classifier = new GPHIKClassifier(conf, confSection);
}

FPCGPHIK::~FPCGPHIK()
{
  if ( classifier != NULL )
    delete classifier;
}

ClassificationResult FPCGPHIK::classify ( Example & pe )
{
  const SparseVector *svec = pe.svec;

  if ( svec == NULL )
    fthrow(Exception, "FPCGPHIK requires example.svec (SparseVector stored in an Example struct)");
 return this->classify( svec ); 
}

ClassificationResult FPCGPHIK::classify ( const NICE::SparseVector * example )
{
  NICE::SparseVector scores;
  int result;
  
  double uncertainty;
 
  classifier->classify ( example,  result, scores, uncertainty);
  
  if ( scores.size() == 0 ) {
    fthrow(Exception, "Zero scores, something is likely to be wrong here: svec.size() = " << example->size() );
  }
  int classes = scores.getDim();
  FullVector fvscores(classes);
  
  NICE::SparseVector::const_iterator it;
  for(int c = 0; c < classes; c++)
  {
    it = scores.find(c);
    if ( it == scores.end() )
      fvscores[c] = -std::numeric_limits<double>::max();
    else
      fvscores[c] = it->second;
  }

  ClassificationResult r ( fvscores.maxElement(), fvscores );
  r.uncertainty = uncertainty;
  
  if (verbose)
  {
    std::cerr << " FPCGPHIK::classify scores" << std::endl;
    scores.store(std::cerr);
    std::cerr << " FPCGPHIK::classify fvscores" << std::endl;
    fvscores.store(std::cerr);
  }

  return r;
}

/** training process */
void FPCGPHIK::train ( FeaturePool & fp, Examples & examples )
{
  // we completely ignore the feature pool :)
  //
  initRand(0);
  Vector classCounts;
  int minClass = -1;
  
  if (verbose) 
    std::cerr << "FPCGPHIK::train" << std::endl;

  if ( useSimpleBalancing)
  {
    classCounts.resize( examples.getMaxClassNo()+1 );
    classCounts.set( 0.0 );
    for ( uint i = 0 ; i < examples.size() ; i++ )
      classCounts[ examples[i].first ]++;
    // we need a probability distribution
    //classCounts.normalizeL1();
    // we need the class index of the class with the least non-zero examples
    for ( uint i = 0 ; i < classCounts.size(); i++ )
      if ( (classCounts[i] > 0) && ((minClass < 0) || (classCounts[i] < classCounts[minClass])) )
        minClass = i;
    if (verbose)
    {
      cerr << "Class distribution: " << classCounts << endl;
      cerr << "Class with the least number of examples: " << minClass << endl;
    }
    if(minSamples < 0)
      minSamples = classCounts[minClass];
  }

  // (multi-class) label vector
  Vector y ( examples.size() /* maximum size */ );

  // flat structure of our training data
  std::vector< SparseVector * > sparseExamples;

  if (verbose)
    cerr << "Converting (and sampling) feature vectors" << endl;
  for ( uint i = 0 ; i < examples.size() ; i++ )
  {
    const Example & example = examples[i].second;
    int classno = examples[i].first;
    
    // simple weird balancing method
    if ( useSimpleBalancing ) 
    {
      double t = randDouble() * classCounts[classno];
      if ( t >= minSamples ) continue;
    }

    y[ sparseExamples.size() ] = classno;
    if ( example.svec == NULL )
      fthrow(Exception, "FPCGPHIK requires example.svec (SparseVector stored in an Example struct)");
    sparseExamples.push_back( example.svec );    
  }

  // we only use a subset for training
  y.resize( sparseExamples.size() );
  
  classifier->train(sparseExamples, y);
}

/** training process */
void FPCGPHIK::train ( const std::vector< SparseVector *> & examples, std::map<int, NICE::Vector> & binLabels )
{
  classifier->train(examples, binLabels);
}

void FPCGPHIK::clear ()
{
  if ( classifier != NULL )
    delete classifier;
  classifier = NULL;
}

FeaturePoolClassifier *FPCGPHIK::clone () const
{
  fthrow(Exception, "FPCGPHIK: clone() not yet implemented" );

  return NULL;
}

void FPCGPHIK::predictUncertainty( Example & pe, NICE::Vector & uncertainties )
{
  const SparseVector *svec = pe.svec;  
  if ( svec == NULL )
    fthrow(Exception, "FPCGPHIK requires example.svec (SparseVector stored in an Example struct)");
  classifier->predictUncertainty(svec, uncertainties);
}
   
void FPCGPHIK::predictUncertainty( const NICE::SparseVector * example, NICE::Vector & uncertainties )
{  
  classifier->predictUncertainty(example, uncertainties);
}

//---------------------------------------------------------------------
//                           protected methods
//---------------------------------------------------------------------
void FPCGPHIK::restore ( std::istream & is, int format )
{
  if (is.good())
  {
    classifier->restore(is, format);  
    
    std::string tmp;
    is >> tmp; //"performOptimizationAfterIncrement: "
    is >> this->performOptimizationAfterIncrement;
  }
  else
  {
    std::cerr << "FPCGPHIK::restore -- InStream not initialized - restoring not possible!" << std::endl;
  }
}

void FPCGPHIK::store ( std::ostream & os, int format ) const
{
  if (os.good())
  {
    os.precision (numeric_limits<double>::digits10 + 1);
    
    classifier->store(os, format);
    
    os << "performOptimizationAfterIncrement: " << performOptimizationAfterIncrement << std::endl;
  }
  else
  {
    std::cerr << "OutStream not initialized - storing not possible!" << std::endl;
  }
}

void FPCGPHIK::addExample( const Example & pe, const double & label)
{
  const SparseVector *svec = pe.svec;
  classifier->addExample(svec, label, this->performOptimizationAfterIncrement);
}

void FPCGPHIK::addMultipleExamples( Examples & newExamples)
{
  //are new examples available? If not, nothing has to be done
  if ( newExamples.size() < 1)
    return;
  
  // (multi-class) label vector
  Vector y ( newExamples.size() );

  // flat structure of our training data
  std::vector< const SparseVector * > sparseExamples;

  if (verbose)
    cerr << "Converting (and sampling) feature vectors" << endl;
  for ( uint i = 0 ; i < newExamples.size() ; i++ )
  {
    const Example & example = newExamples[i].second;
    int classno = newExamples[i].first;

    y[ i ] = classno;
    if ( example.svec == NULL )
      fthrow(Exception, "FPCGPHIK requires example.svec (SparseVector stored in an Example struct)");
    sparseExamples.push_back( example.svec );    
  }  
  
  classifier->addMultipleExamples(sparseExamples, y, this->performOptimizationAfterIncrement);  
}