/** 
* @file KCGPLaplace.cpp
* @brief Gaussian Process Regression for Classification
* @author Erik Rodner
* @date 12/03/2009

*/
#include <iostream>
#include <typeinfo>

#include "core/vector/Algorithms.h"

#include "KCGPLaplace.h"

#include "LHCumulativeGauss.h"

#include "vislearning/classifier/kernelclassifier/GPLaplaceOptimizationProblem.h"
#include "core/optimization/FirstOrderTrustRegion.h"
#include "core/optimization/FirstOrderRasmussen.h"

using namespace std;
using namespace NICE;
using namespace OBJREC;


KCGPLaplace::KCGPLaplace( const Config *conf, Kernel *kernel, const string & section ) 
	: KernelClassifier ( conf, kernel ), laplaceApproximation(conf, section)
{
	optimizeParameters = conf->gB(section, "optimize_parameters", true );
	string optimizationMethod_s = conf->gS(section, "optimization_method", "rasmussen" );

	if ( optimizationMethod_s == "rasmussen" ) 
		optimizationMethod = OPTIMIZATION_METHOD_RASMUSSEN;
	else if ( optimizationMethod_s == "trustregion" )
		optimizationMethod = OPTIMIZATION_METHOD_TRUSTREGION;
	else
		fthrow(Exception, "Optimization method " << optimizationMethod_s << " is unknown.");

	ParameterizedKernel *pkernelFunction = dynamic_cast< ParameterizedKernel * > ( kernelFunction );
	if ( optimizeParameters && (pkernelFunction == NULL) )
	{
		cerr << "KCGPLaplace: Unable to optimize hyperparameters with no specified kernel function" << endl;
		cerr << "KCGPLaplace: Switching to non-optimization mode" << endl;
		optimizeParameters = false;
	}

	// the only one supported by now
	likelihoodFunction = new LHCumulativeGauss( conf->gD(section, "likelihood_lengthscale", sqrt(2)) );

	verbose = conf->gB(section, "verbose", true );

}

KCGPLaplace::KCGPLaplace( const KCGPLaplace & src ) : KernelClassifier ( src ), laplaceApproximation ( src.laplaceApproximation )
{
	laplaceApproximation = src.laplaceApproximation;
	verbose = src.verbose;
	optimizeParameters = src.optimizeParameters;
	optimizationMethod = src.optimizationMethod;
	// we loose the length scale in this case
	likelihoodFunction = new LHCumulativeGauss(); // FIXME: clone() for likelihoods
}

KCGPLaplace::~KCGPLaplace()
{
	if ( likelihoodFunction != NULL )
		delete likelihoodFunction;
}


void KCGPLaplace::teach ( KernelData *kernelData, const NICE::Vector & _y )
{
	if ( _y.size() <= 0 ) {
		fthrow(Exception, "Number of training vectors is zero!");
	}
	
	this->y.resize ( _y.size() );
	this->y = _y;
	this->y = 2*this->y;
	this->y += -1.0;
	
	if ( (this->y.Min() != -1) || (this->y.Max() != 1) ) {
		fthrow(Exception, "This classifier is suitable only for binary classification problems" );
	}

	if ( optimizeParameters ) 
	{
		if ( (kernelFunction != NULL) )
		{
			ParameterizedKernel *kernelPara = dynamic_cast< ParameterizedKernel * > ( kernelFunction );
			if ( kernelPara == NULL ) {
				fthrow(Exception, "KCGPLaplace: you have to specify a parameterized kernel !");
			}
			GPLaplaceOptimizationProblem gpopt ( kernelData, this->y, kernelPara, likelihoodFunction, &laplaceApproximation, verbose );
			cout << "KCGPLaplace: Hyperparameter optimization ..." << endl;

			if ( optimizationMethod == OPTIMIZATION_METHOD_TRUSTREGION )
			{
				if ( verbose ) 
					cerr << "KCGPLaplace: using trust region optimizer" << endl;
				FirstOrderTrustRegion *optimizer = new FirstOrderTrustRegion();
				optimizer->setEpsilonG ( 0.01 );
				optimizer->setMaxIterations ( 200 );
				optimizer->optimizeFirst ( gpopt );
				delete optimizer;

			} else if ( optimizationMethod == OPTIMIZATION_METHOD_RASMUSSEN ) {
				if ( verbose ) 
					cerr << "KCGPLaplace: using conjugate gradient optimizer" << endl;

				FirstOrderRasmussen *optimizer = new FirstOrderRasmussen();
				optimizer->setEpsilonG ( 0.01 );
				optimizer->setMaxIterations ( -200 );
				optimizer->optimizeFirst ( gpopt );
				delete optimizer;
			} else {
				fthrow(Exception, "Unknown optimization method " << optimizationMethod );
			}
			
			cout << "KCGPLaplace: Hyperparameter optimization ...done" << endl;
		
			gpopt.update();

			Vector parameters;
			kernelPara->getParameters ( parameters );
			cout << "KCGPLaplace: Optimization finished: " << parameters << endl << endl;
		} else {
			fthrow(Exception, "KCGPRegression: you have to specify a kernel function !" );
		}
	} else {
		laplaceApproximation.approximate ( kernelData, this->y, likelihoodFunction );
	}

}

ClassificationResult KCGPLaplace::classifyKernel ( const NICE::Vector & kernelVector, double kernelSelf ) const
{
	double yEstimate = laplaceApproximation.predict ( kernelVector, kernelSelf, y, likelihoodFunction );

	FullVector scores ( 2 );
	scores[0] = 1.0 - yEstimate;
	scores[1] = yEstimate;
	ClassificationResult r ( (yEstimate < 0.5) ? 0 : 1, scores );

	return r;
}

KCGPLaplace *KCGPLaplace::clone() const
{
	return new KCGPLaplace ( *this );
}