/** 
* @file testKernelOptimization.cpp
* @brief test kernel optimization
* @author Erik Rodner
* @date 12/08/2009

*/

#include <sstream>
#include "core/basics/Config.h"
#include "vislearning/cbaselib/LabeledSet.h"
#include "vislearning/math/kernels/KernelExp.h"
#include "vislearning/regression/gpregression/GPRegressionOptimizationProblem.h"

using namespace std;
using namespace OBJREC;
using namespace NICE;

void testOverallDeriv ( KernelData *kernelData, const VVector & y, ParameterizedKernel & kernelFunction, const Vector & iparameters, double step, double interval )
{
	double a_max = 0.0;
	bool firstIteration = true;
	double oldObj = 0.0;

	GPRegressionOptimizationProblem gp_problem ( kernelData, y, &kernelFunction, true, NULL );
	for ( double s = -interval ; s <= interval ; s+=step )
	{
		Vector parameters ( iparameters );
		parameters[0] += s;

		gp_problem.setParameters( parameters );

		double obj = gp_problem.computeObjective ( );

		Vector gradient;
		gp_problem.computeGradient( gradient );
		double deriv = gradient[0];

		if ( ! firstIteration ) 
		{
			double derivApprox = ( obj - oldObj ) / step;
			double a = fabs(deriv - derivApprox);

			if ( a > a_max )
				a_max = a;
			cerr << "EVAL: " << parameters[0] << " objective " << obj << " D(closedform) " << deriv << " D(approximated) " << derivApprox << endl;

		}

		firstIteration = false;
		oldObj = obj;
	}

	cerr << "Maximum difference between real derivative and approximation: " << a_max << endl;
}

void testKernelDeriv ( KernelData *kernelData, ParameterizedKernel & kernelFunction, const Vector & iparameters, double step )
{
	double oldKernelSum = 0.0;
	bool firstIteration = true;
	double a_max = 0.0;
	for ( double s = -1.0 ; s <= 1.0 ; s+=step )
	{
		Vector parameters ( iparameters );
		parameters[0] += s;
		//cerr << parameters << endl;
		
		kernelFunction.setParameters ( parameters );
		kernelFunction.updateKernelData ( kernelData );
		
		Matrix jacobiMatrix;
		kernelFunction.getKernelJacobi ( 0, parameters, kernelData, jacobiMatrix );

		const Matrix & kernelMatrix = kernelData->getKernelMatrix();
		double kernelSum = 0.0;
		double derivSum = 0.0;
		for ( uint i = 0 ; i < kernelMatrix.rows() ; i++ )
			for ( uint j = 0 ; j < kernelMatrix.cols(); j++ )
			{
				kernelSum += kernelMatrix(i,j);
				derivSum += jacobiMatrix(i,j);
			}

		kernelSum /= kernelMatrix.rows() * kernelMatrix.cols();
		derivSum /= kernelMatrix.rows() * kernelMatrix.cols();

		if ( ! firstIteration ) 
		{
			double derivApprox = ( kernelSum - oldKernelSum ) / step;
			double a = fabs(derivSum - derivApprox);

			if ( a > a_max )
				a_max = a;
			//cerr << "K " << kernelSum << " D " << derivSum << " Diff " << ( kernelSum - oldKernelSum ) / step << endl;

		}

		firstIteration = false;
		oldKernelSum = kernelSum;
	}

	cerr << "Maximum difference between real derivative and approximation: " << a_max << endl;

}

/** 
    
    test kernel optimization 
    
*/
int main (int argc, char **argv)
{   
    std::set_terminate(__gnu_cxx::__verbose_terminate_handler);

    Config conf ( argc, argv );
    
	KernelExp kernelFunction ( conf.gD("KCGPRegression", "rbf_gamma", -2.5), 0.0 );
	
	LabeledSetVector train;
	string setfn = conf.gS("main", "train");
	int format = conf.gI("main", "format", 2 );
	train.read ( setfn, format );

	VVector trainUnlabeled;
	Vector y;
	train.getFlatRepresentation ( trainUnlabeled, y ); 

	KernelData kernelData ( &conf );
	for ( VVector::iterator i = trainUnlabeled.begin(); i != trainUnlabeled.end(); i++ )
	{
		Vector & v = *i;
		v.normL2();
	}

	kernelFunction.calcKernelData ( trainUnlabeled, &kernelData );

	Vector iparameters ( 2 );
	iparameters[0] = conf.gD("KCGPRegression", "rbf_gamma", -2.5);
	iparameters[1] = 0.0;

	double step = conf.gD("main", "step", 0.01 );
	double interval = conf.gD("main", "interval", 1.0 );
   
    bool testkernelderiv = conf.gB("main", "testkernelderiv", true );

	if ( testkernelderiv ) 
		testKernelDeriv ( &kernelData, kernelFunction, iparameters, step );    
	else {
		VVector yy;
		if ( (y.Max() == 1) && (y.Min() == 0) )
		{
			cerr << "Binary classification problem" << endl;
			Vector by ( y.size() );
			for ( size_t j = 0 ; j < y.size() ; j++ )
				by[j] = 2*y[j] - 1;
			yy.push_back (by);
		} else {
			int numClasses = y.Max() + 1;
			for ( size_t i = 0 ; i < numClasses ; i++ )
			{
				Vector by ( y.size() );
				for ( size_t j = 0 ; j < y.size() ; j++ )
					if ( y[j] == i ) by[j] = 1;
					else by[j] = -1;

				yy.push_back(by);
			}
		}
		testOverallDeriv ( &kernelData, yy, kernelFunction, iparameters, step, interval );
	}
    
    return 0;
}