#ifdef NICE_USELIB_CPPUNIT
#include "TestLaplace.h"
#include <string>
#include <iostream>
#include <stdexcept>
#include <core/basics/cppunitex.h>
#include <core/basics/numerictools.h>
#include <vislearning/classifier/kernelclassifier/LikelihoodFunction.h>
#include <vislearning/classifier/kernelclassifier/GPLaplaceOptimizationProblem.h>
#include <vislearning/classifier/kernelclassifier/LaplaceApproximation.h>
#include <vislearning/classifier/kernelclassifier/LHCumulativeGauss.h>
#include <vislearning/math/kernels/KernelLinearCombination.h>


using namespace NICE;
using namespace OBJREC;
using namespace std;

CPPUNIT_TEST_SUITE_REGISTRATION( TestLaplace );

void TestLaplace::setUp() {
}

void TestLaplace::tearDown() {
}

void TestLaplace::testLikelihoodDiff()
{
	uint numSteps = 10000;
	double lengthScale = 1.3;
	double bias = 0.4;
	LHCumulativeGauss l (lengthScale, bias);    
    
	for ( uint degree = 0 ; degree < 3 ; degree++ )
	{
		// test gradient
		double oldv = 0.0;
		double error = 0.0;
		for ( uint i = 0 ; i < numSteps; i++ )
		{
			double f = i / (double)numSteps;
			double v;
			if ( degree == 2 )
				v = l.hessian(1.0,f);
			else if ( degree == 1 )
				v = l.gradient(1.0,f);
			else
				v = l.logLike(1.0,f);
			if ( i > 0 )
			{
				double d = 0.0;
				d = (v - oldv)*numSteps;
				double diff = 0.0;
				
				if ( degree == 2 )
					diff = l.thirdgrad(1.0,f);
				else if ( degree == 1 )
					diff = l.hessian(1.0,f);
				else
					diff = l.gradient(1.0,f);
				error += fabs(d-diff);
			}
			oldv = v;
		}
		error /= numSteps;
		CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, error, 1e-4);
	}
}

void TestLaplace::testCumGaussPredict()
{
    // test integral stuff
	uint numSteps = 100000;
	
	double lengthScale = 1.3;
	double bias = 0.4;
	LHCumulativeGauss l (lengthScale, bias);    

	double mean = 0.0;
	double variance = 1.0;
	double sum = 0.0;
	for ( uint i = 0 ; i < numSteps ; i++ )
	{
		double f = randGaussDouble( sqrt(variance) ) + mean;
		sum += l.likelihood(1.0,f);
	}

	sum /= numSteps;
    
	CPPUNIT_ASSERT_DOUBLES_EQUAL(sum, l.predictAnalytically( mean, variance ), 1e-2);
}

void TestLaplace::testHyperParameterOptGradients()
{
	double step = 0.00001;
	double interval = 1.0;
	double a_max = 0.0;
    bool firstIteration = true;
    double oldObj = 0.0;

	Vector iparameters (1);
	iparameters[0] = log(1.0);

	Vector y (6, 1.0);
	for ( uint i = 0 ; i < 3 ; i++ )
		y[i] = -1.0;

	Matrix X (6, 2);	
	X(0,0) = 1.0; X(0,1) = 0.0;
	X(1,0) = 0.3; X(1,1) = 0.4;
	X(2,0) = 0.8; X(2,1) = 0.1;
	X(3,0) = -1.0; X(3,1) = 0.2;
	X(4,0) = -0.8; X(4,1) = 0.1;
	X(5,0) = -0.3; X(5,1) = 0.5;
	Matrix kernelMatrix (6,6);
	for ( uint i = 0 ; i < X.rows(); i++ )
		for ( uint j = 0 ; j < X.rows(); j++ )
			kernelMatrix(i,j) = exp(- (X.getRow(i) - X.getRow(j)).normL2());
	
	KernelData kernelData;
	kernelData.setCachedMatrix ( 0, &kernelMatrix );
	KernelLinearCombination kernelFunction ( 1 );
	kernelFunction.updateKernelData( &kernelData );
	

	LHCumulativeGauss likelihood;
	LaplaceApproximation laplaceApproximation;
    GPLaplaceOptimizationProblem gp_problem ( &kernelData, y, &kernelFunction, &likelihood, &laplaceApproximation, false );
    for ( double s = -interval ; s <= interval ; s+=step )
    {
        Vector parameters ( iparameters );
        parameters[0] += s;

        gp_problem.setParameters( parameters );

        double obj = gp_problem.computeObjective ( );

        Vector gradient;
        gp_problem.computeGradient( gradient );
        double deriv = gradient[0];

        if ( ! firstIteration )
        {
            double derivApprox = ( obj - oldObj ) / step;
            double a = fabs(deriv - derivApprox);

            if ( a > a_max )
                a_max = a;
            // DEBUGGING:
			// cerr << "EVAL: " << parameters[0] << " objective " << obj << " D(closedform) " << deriv << " D(approximated) " << derivApprox << endl;
        }

        firstIteration = false;
        oldObj = obj;
    }

	CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, a_max, 1e-5);
	
}

void TestLaplace::testModeFinding()
{
	Config conf;
	//conf.sB("LaplaceApproximation", "laplace_verbose", true );
	LHCumulativeGauss likelihood;

	Vector y (6, 1.0);
	for ( uint i = 0 ; i < 3 ; i++ )
		y[i] = -1.0;

	/** Test 1: Check the special case of a diagonal kernel matrix */
	{
		Matrix K (6,6);
		K.setIdentity();
		KernelData kernelData ( &conf, K );

		/* Due to the diagonal kernel matrix
		 * mode finding is really simple
		 *
		 * objective function:
		 * log L(y_i, f_i) - 0.5 * f_i^2
		 * = log ( ( ::erf(y*f)+1 ) / 2.0 ) - 0.5 * f_i^2
		 * gnuplot:
		 * plot [-1:1] log ((erf(x)+1) / 2.0) - 0.5*x**2
		 *
		 *
		 * and the mode f should obey the
		 * equation:
		 * L'(y_i, f_i) - f_i  = 0
		 *
		 * L'(y_i, f_i) = y_i*exp(-f_i^2)/(L(y,f)*sqrt(Pi)) ;
		 * L(y_i, f_i) = ( ::erf(y*f)+1 ) / 2.0
		 *
		 * y_i = 1 ->
		 * exp(-f_i^2)/sqrt(PI) = f_i * (erf(f)+1) / 2.0
		 * gnuplot
		 * plot [-1:1] exp(-x**2)/sqrt(pi), x * (erf(x)+1) / 2.0
		 *
		 * -> intersection at f = 0.541132
		 *
		 */
		
		LaplaceApproximation laplaceApproximation ( &conf );
		laplaceApproximation.approximate ( &kernelData, y, &likelihood );
		const Vector & mode = laplaceApproximation.getMode();

		for ( uint k = 0 ; k < mode.size(); k++ )
			CPPUNIT_ASSERT_DOUBLES_EQUAL( 0.541132, y[k] * mode[k], 1e-5 );
	}
	
	/** Test 2: Check the self-consistent equation of the Mode */
	{
		Matrix X (6, 2);	
		X(0,0) = 1.0; X(0,1) = 0.0;
		X(1,0) = 0.3; X(1,1) = 0.4;
		X(2,0) = 0.8; X(2,1) = 0.1;
		X(3,0) = -1.0; X(3,1) = 0.2;
		X(4,0) = -0.8; X(4,1) = 0.1;
		X(5,0) = -0.3; X(5,1) = 0.5;
		Matrix K (6,6);
		for ( uint i = 0 ; i < X.rows(); i++ )
			for ( uint j = 0 ; j < X.rows(); j++ )
				K(i,j) = exp(- (X.getRow(i) - X.getRow(j)).normL2());
		
		KernelData kernelData ( &conf, K );
		LaplaceApproximation laplaceApproximation ( &conf );
		laplaceApproximation.approximate ( &kernelData, y, &likelihood );
		const Vector & mode = laplaceApproximation.getMode();

		Vector gradLikelihood ( mode.size() );
		for ( uint i = 0; i < mode.size(); i++ )
			gradLikelihood[i] = likelihood.gradient ( y[i], mode[i] );

		// the mode should obey the following equation: mode = K gradLikelihood 
		// (cf. eq. 3.17 Rasmussen and Williams)
		CPPUNIT_ASSERT_DOUBLES_EQUAL ( 0.0, ( mode - K * gradLikelihood ).normL2(), 1e-15 );
	}
}

#endif