/** 
* @file TraceApproximation.cpp
* @brief approximation of the trace term within the gp regression gradient
* @author Erik Rodner
* @date 01/29/2010

*/
#include <iostream>

#include "TraceApproximation.h"

using namespace std;
using namespace NICE;
using namespace OBJREC;



TraceApproximation::TraceApproximation( const Config *conf, const string & section )
{
	numTraceSamples = conf->gI(section, "num_trace_samples", 100 ); 
	displayPoints = conf->gI(section, "display_points", 100 );
	if ( displayPoints > numTraceSamples )
		displayPoints = numTraceSamples;
}

TraceApproximation::~TraceApproximation()
{
}

void TraceApproximation::preSampling ( const KernelData *kernelData, Matrix & samples, Matrix & samplesPreMultiplied ) const
{
	if ( numTraceSamples == 0 ) return;

	uint kernelSize = kernelData->getKernelMatrixSize();
	samples.resize ( kernelSize, numTraceSamples );
	samplesPreMultiplied.resize ( kernelSize, numTraceSamples );

	cerr << "TraceApproximation: number of samples " << numTraceSamples << endl;
	cerr << "TraceApproximation: pre-sampling " << endl;
	for ( uint i = 0 ; i < kernelSize ; i++ )
		for ( uint k = 0 ; (int)k < numTraceSamples ; k++ )
			samples(i,k) = randGaussDouble ( 1.0 );

	cerr << "TraceApproximation: pre-multiplication ";
	for ( uint k = 0 ; (int)k < numTraceSamples; k++ )
	{
		if ( k % (numTraceSamples/displayPoints) == 0 )
		{
			cerr << ".";
			cerr.flush();
		}
		Vector rinv = samplesPreMultiplied.getColumnRef ( k );
		kernelData->computeInverseKernelMultiply ( samples.getColumn(k), rinv );
	}
	cerr << endl;
}

double TraceApproximation::getApproximateTraceTerm ( const KernelData *kernelData, const Matrix & jacobiMatrix ) const
{
	if ( numTraceSamples == 0 ) return 0.0;

	Vector r ( jacobiMatrix.rows() );
	Vector rinv;
	Vector rJacobi;

	double trace_approx = 0.0;
	double trace_approx_old = 0.0;
	double stddev = 0.0;

	cerr << "TraceApproximation: number of samples " << numTraceSamples << endl;
	cerr << "TraceApproximation: sampling ";
	for ( uint k = 0 ; (int)k < numTraceSamples; k++ )
	{
		if ( k % (numTraceSamples/displayPoints) == 0 )
		{
			cerr << ".";
			cerr.flush();
		}
		// normality is not required ? (says wikipedia)
		// but seems to yield "better results"
		r = Vector::GaussRandom ( r.size(), 0.0, 1.0 );
		kernelData->computeInverseKernelMultiply ( r, rinv );
		rJacobi = jacobiMatrix * r;
		rJacobi = r;
		double tau = rinv.scalarProduct ( rJacobi );

		// idea of RunningStat
		trace_approx = trace_approx + (tau - trace_approx) / (k+1);
		stddev += ( tau - trace_approx_old ) * ( tau - trace_approx );
		trace_approx_old = trace_approx;
	}
	cerr << endl;
	stddev /= (numTraceSamples-1);
	stddev = sqrt(stddev);
	double standard_error = stddev / sqrt(numTraceSamples);
	double rel_standard_error = standard_error / fabs(trace_approx);

	cerr << "TraceApproximation: relative error " << rel_standard_error << endl;

	return trace_approx;
}

double TraceApproximation::getApproximateTraceTermKronecker ( const KernelData *kernelData, const Matrix & A, const Matrix & B ) const
{
	if ( numTraceSamples == 0 ) return 0.0;

	Vector r ( A.rows() * B.rows() );
	Vector rinv;
	Vector rJacobi;

	double trace_approx = 0.0;
	double trace_approx_old = 0.0;
	double stddev = 0.0;

	cerr << "TraceApproximation: number of samples " << numTraceSamples << endl;
	cerr << "TraceApproximation: sampling ";
	for ( uint k = 0 ; (int)k < numTraceSamples; k++ )
	{
		if ( k % (numTraceSamples/displayPoints) == 0 )
		{
			cerr << ".";
			cerr.flush();
		}
		// normality is not required ? (says wikipedia)
		// but seems to yield "better results"
		r = Vector::GaussRandom ( r.size(), 0.0, 1.0 );
		kernelData->computeInverseKernelMultiply ( r, rinv );
		// this is the only difference -----------------------
		kroneckerProductMultiplication ( A, B, r, rJacobi );
		double tau = rinv.scalarProduct ( rJacobi );

		// idea of RunningStat
		trace_approx = trace_approx + (tau - trace_approx) / (k+1);
		stddev += ( tau - trace_approx_old ) * ( tau - trace_approx );
		trace_approx_old = trace_approx;
	}
	cerr << endl;
	stddev /= (numTraceSamples-1);
	stddev = sqrt(stddev);
	double standard_error = stddev / sqrt(numTraceSamples);
	double rel_standard_error = standard_error / fabs(trace_approx);

	cerr << "TraceApproximation: relative error " << rel_standard_error << endl;

	return trace_approx;
}

double TraceApproximation::getApproxTraceTermKroneckerPre ( Matrix & samples, Matrix & samplesPreMultiplied, const Matrix & A, const Matrix & B ) const
{
	if ( numTraceSamples == 0 ) return 0.0;

	double trace_approx = 0.0;
	double trace_approx_old = 0.0;
	double stddev = 0.0;

	Vector rJacobi;
	cerr << "TraceApproximation: estimating ";
	for ( uint k = 0 ; (int)k < numTraceSamples; k++ )
	{
		if ( k % (numTraceSamples/displayPoints) == 0 )
		{
			cerr << ".";
			cerr.flush();
		}
		// this is the only difference -----------------------
		Vector r = samples.getColumnRef(k);
		Vector rinv = samplesPreMultiplied.getColumnRef(k);
		kroneckerProductMultiplication ( A, B, r, rJacobi );
		double tau = rinv.scalarProduct ( rJacobi );

		// idea of RunningStat
		trace_approx = trace_approx + (tau - trace_approx) / (k+1);
		stddev += ( tau - trace_approx_old ) * ( tau - trace_approx );
		trace_approx_old = trace_approx;
	}
	cerr << endl;
	stddev /= (numTraceSamples-1);
	stddev = sqrt(stddev);
	double standard_error = stddev / sqrt(numTraceSamples);
	double rel_standard_error = standard_error / fabs(trace_approx);

	cerr << "TraceApproximation: relative error " << rel_standard_error << endl;

	return trace_approx;
}