/** 
* @brief the cumulative gaussian likelihood function
* @author Erik Rodner
* @date 02/17/2010

*/
#include <iostream>
#include <math.h>

#include "LHCumulativeGauss.h"

using namespace OBJREC;
using namespace std;

static const double sqrt2pi = sqrt(2*M_PI);


LHCumulativeGauss::LHCumulativeGauss( double _lengthScale, double _bias )
{
	lengthScale = _lengthScale;
	bias = _bias;
}

double LHCumulativeGauss::stdNormPDF (double x)
{
	return exp(-x*x)/sqrt(M_PI);
}

double LHCumulativeGauss::thirdgrad ( double y, double f ) const
{
	/** Computing the third gradient of the LOG likelihood with
	 *  respect to f
	 *  dl / df hessian = hessian * (-2 * (f*lengthScale + bias)) *lengthScale
	 *  	+ gradient * (-2*lengthScale*lengthScale) - 2 * hessian * gradient
	 *
	 **/
	double gradient = this->gradient(y,f);
	double hessian = this->hessian(y,f);
	return -2* ( hessian*lengthScale*(f*lengthScale+bias) + gradient * lengthScale * lengthScale
		+ hessian * gradient );
}

double LHCumulativeGauss::hessian ( double y, double f ) const
{
	/** Computing the hessian of the LOG likelihood with respect to f 
	 * d^2 / (d^2 f) log l(x) = d/df (dl/df * 1/l(f)) 
	 *                        = (d^2 l)/(d^2 f) 1/l(f) - dl/df dl/df / (l(f)^2)
	 *                        = (d^2 l)/(d^2 f) 1/l(f) - gradient * gradient
	 *
	 * dl/df = y*lengthScale*stdNormPDF(f*lengthScale + bias)
	 * d/df dl/df = y*lengthScale*stdNormPDF(f*lengthScale + bias)*(-2 * (f*lengthScale + bias)) * lengthScale
	 * (d^2 l)/(d^2 f) / l(f) = gradient * (-2 * (f*lengthScale + bias)) * lengthScale 
	 * 
	 *
	 **/
	double gradient = this->gradient(y,f);
	return ( -2*(f*lengthScale + bias)*lengthScale*gradient - gradient*gradient );
}

double LHCumulativeGauss::gradient ( double y, double f ) const
{
	/* Computing the derivation of the LOG likelihood with respect to f
	 * erf'(x) = 2/pi e^{-x*x}
	 * phi'(x) = 1/pi e^{-x*x} = stdNormPDF(x)
	 * x = y*(f*lengthScale + bias)
	 * dx/dy = lengthScale*y
	 * d/dx log l(x) = dl/dx * 1/l(x)
	 * y is assumed to be in {-1, 1}
	 * stdNormPDF(y*(f*lengthScale + bias)) = stdNormPDF(f*lengthScale + bias)
	 * All of the above equations yield
	 */
	return ( y*lengthScale*stdNormPDF(f*lengthScale + bias)/likelihood(y,f) ); 
}

double LHCumulativeGauss::logLike ( double y, double f ) const
{
	return log ( likelihood(y, f) );
}

double LHCumulativeGauss::likelihood ( double y, double f ) const
{
	/*
	 * erf(x) = 2/pi \int_0^x e^{-t*t}
	 * The likelihood is denoted as phi(x) in the following.
	 * phi(x) = (erf(x) + 1)/2
	 */
	return ( ::erf(y*(f*lengthScale + bias))+1 ) / 2.0;
}

double LHCumulativeGauss::predictAnalytically ( double fmean, double fvariance ) const
{
	// Rasmussen, Williams page 74
	// \int_{-inf}^{inf} phi~((x-m)/v) N(x | mu, sigma^2) dx
	// = phi~( (mu-m)/(v*sqrt(1+sigma^2/v^2)) )
	// phi^(f*lengthScale + bias) = phi~(f*sqrt(2)*lengthScale + bias*sqrt(2)) 
	// 							v = 1 / (sqrt(2) * lengthScale)
	// 							m = - bias*sqrt(2) * v = - bias / lengthScale
	// 
	// phi~(f) = phi^( f/sqrt(2) ) = phi( (f/(sqrt(2)*lengthScale) - bias/lengthScale) )
	//
	// (mu-m)/(v*sqrt(1+sigma^2/v^2))
	// = (fmean + bias/lengthScale) * sqrt(2) * lengthScale / sqrt(1 + sigma^2 * 2 * lssqr)
	// -> we would like to put this into our likelihood
	// (fmean + bias/lengthScale) * sqrt(2) * lengthScale / sqrt(1 + sigma^2 * 2 * lssqr) / (sqrt(2) * lengthScale) - bias/lengthScale
	// = (fmean + bias/lengthScale) / sqrt(1 + sigma^2 * 2 * lssqr) - bias/lengthScale
	// 
	double lssqr = lengthScale * lengthScale;
	// variant without bias: return likelihood( 1.0, fmean / sqrt(1.0 +  2.0 * fvariance * lssqr) );
	return likelihood( 1.0, (fmean + bias/lengthScale) / sqrt(1.0 +  2.0 * fvariance * lssqr) - bias/lengthScale );
}