/** 
 * @file VectorStatistics.cpp
 * @brief B. P. Welford Computation of Mean and Variance download at: http://www.johndcook.com/standard_deviation.html
 * @author Michael Koch
 * @date 18/02/2009
 */

#include "core/vector/VectorT.h"
#include "core/vector/MatrixT.h"

#include "vislearning/baselib/RunningStat.h"
#include "vislearning/baselib/VectorStatistics.h"

using namespace OBJREC;
using namespace std;
using namespace NICE;

//non-static

VectorStatistics::VectorStatistics(const NICE::Vector &data, bool compute)
{
	datavector = data;
	initVariables();
	if (compute)
	{
		calculateStatistics();
	}
}

VectorStatistics::~VectorStatistics()
{
}

void VectorStatistics::calculateStatistics()
{
	setMean();
	setMedian();
	setVariance();
	setSkewness();
	setKurtosis();
	setEntropy();
	setNormalizedEntropy();
}

void VectorStatistics::setData(const NICE::Vector &data)
{
	datavector = data;
	initVariables();
}
void VectorStatistics::initVariables()
{
	mean = 0.0;
	med = 0.0;
	var = 0.0;
	skew = 0.0;
	kurt = 0.0;
	entropy=0.0;
	meancalculated = false;
	mediancalculated = false;
	varcalculated = false;
	stdcalculated = false;
	skewcalculated = false;
	kurtcalculated = false;
	entropycalculated = false;
	normalizedentropy = false;
}
void VectorStatistics::setMean()
{
	if (!meancalculated)
	{
		mean = computeMean(datavector);
		meancalculated = true;
	}
}
void VectorStatistics::setMedian()
{
	if (!mediancalculated)
	{
		med = computeMedian(datavector);
		mediancalculated = true;
	}
}
void VectorStatistics::setVariance()
{
	if (!varcalculated)
	{
		var = computeVariance(datavector);
		varcalculated = true;
	}
}
void VectorStatistics::setSkewness()
{
	if (!skewcalculated)
	{
		skew = computeSkew(datavector);
		skewcalculated = true;
	}
}
void VectorStatistics::setKurtosis()
{
	if (!kurtcalculated)
	{
		kurt = computeKurtosis(datavector);
		kurtcalculated = true;
	}
}

void VectorStatistics::setEntropy()
{
	if (!entropycalculated)
	{
		entropy = computeEntropy(datavector);
		entropycalculated = true;
	}
}

void VectorStatistics::setNormalizedEntropy()
{
	if (!normalizedentropycalculated)
	{
		normalizedentropy = computeNormalizedEntropy(datavector);
		normalizedentropycalculated = true;
	}
}

double VectorStatistics::getMean()
{
	if (meancalculated)
	{
		return mean;
	}
	else
	{
		setMean();
		return mean;
	}
}
double VectorStatistics::getMedian()
{
	if (mediancalculated)
	{
		return med;
	}
	else
	{
		setMedian();
		return med;
	}
}
double VectorStatistics::getVariance()
{
	if (varcalculated)
	{
		return var;
	}
	else
	{
		setVariance();
		return var;
	}
}
double VectorStatistics::getStandardDeviation()
{
	if (varcalculated)
	{
		return sqrt(var);
	}
	else
	{
		setVariance();
		return sqrt(var);
	}
}
double VectorStatistics::getSkewness()
{
	if (skewcalculated)
	{
		return skew;
	}
	else
	{
		setSkewness();
		return skew;
	}
}
double VectorStatistics::getKurtosis()
{
	if (kurtcalculated)
	{
		return kurt;
	}
	else
	{
		setKurtosis();
		return kurt;
	}
}
double VectorStatistics::getEntropy()
{
	if (entropycalculated)
	{
		return entropy;
	}
	else
	{
		setEntropy();
		return entropy;
	}
}

double VectorStatistics::getNormalizedEntropy()
{
	if (normalizedentropycalculated)
	{
		return normalizedentropy;
	}
	else
	{
		setNormalizedEntropy();
		return normalizedentropy;
	}
}

//static
double VectorStatistics::computeMedian(const NICE::Vector &data)
{
	if (data.size() > 0)
	{
		NICE::Vector data_tmp(data);
		data_tmp.sortAscend();
		return data_tmp[data.size() / 2];
	}
	else
	{
		fprintf(stderr, "VectorStatistics: median - size of data is 0!\n");
		return 0.0;
	}
}
double VectorStatistics::computeNormalizedEntropy(const NICE::Vector &data)
{
	double entropy = computeEntropy(data);
	if (data.size()>1)
	{
	  double max_entropy = -log(1.0 / (double) data.size());
	  entropy/=max_entropy;
	}
	return entropy;
}
double VectorStatistics::computeEntropy(const NICE::Vector &data)
{
	double entropy = 0.0;
	double sum = 0.0;
	for (size_t i = 0; i < data.size(); i++)
	{
		double val = data[i];
		if (val <= 0.0)
			continue;
		entropy -= val * log(val);
		sum += val;
	}
	if (fabs(sum) > 1e-6)
	{
		entropy /= sum;
		entropy += log(sum);
	}
	else
	{
		fprintf(stderr,
				"VectorStatistics: entropy - sum of values is numerically small\n");
	}
	return entropy;
}

double VectorStatistics::computeSkew(const NICE::Vector &data)
{

	double skew = 0.0;

	double meanvalue = computeMean(data);
	double var = computeVariance(data);
	if (data.size() > 0 && var>1e-8)
	{
		for (size_t i = 0; i < data.size(); ++i)
		{

			skew += (data[i] - meanvalue) * (data[i] - meanvalue) * (data[i]
					- meanvalue);

		}

		skew *= sqrt(double(data.size())) / pow(var * double(data.size()), 1.5);
	}
	return skew;
}
double VectorStatistics::computeKurtosis(const NICE::Vector &data)
{

	double kurt = 0.0;

	double meanvalue = computeMean(data);
	double var = computeVariance(data);
	if (data.size() > 0 && var>1e-8)
	{
		for (size_t i = 0; i < data.size(); i++)
		{
			kurt += pow((data[i] - meanvalue), 4.0);
		}

		kurt /= double(data.size()) * pow(var, 2.0);

		kurt -= 3.0;
	}
	return kurt;
}

double VectorStatistics::computeMean(const NICE::Vector &data)
{
	RunningStat rs;
	for (size_t i = 0; i < data.size(); i++)
	{
		rs.Push(data[i]);
	}
	return rs.Mean();
}
double VectorStatistics::computeVariance(const NICE::Vector &data)
{
	RunningStat rs;
	for (size_t i = 0; i < data.size(); i++)
	{
		rs.Push(data[i]);
	}
	return rs.Variance();
}
double VectorStatistics::computeStandardDeviation(const NICE::Vector &data)
{
	RunningStat rs;
	for (size_t i = 0; i < data.size(); i++)
	{
		rs.Push(data[i]);
	}
	return rs.StandardDeviation();
}