#ifdef NICE_USELIB_OPENMP
#include <omp.h>
#endif

#include <stdio.h>

#include "GMM.h"
#include <core/vector/Algorithms.h>

#include "vislearning/math/cluster/KMeans.h"

#define DEBUGGMM

using namespace OBJREC;
using namespace NICE;
using namespace std;

GMM::GMM()
{
  gaussians = 3;
  maxiter = 200;
  featsperclass = 0;
  tau = 10.0;
  srand ( time ( NULL ) );
  comp = false;
}

GMM::GMM ( int _gaussians ) : gaussians ( _gaussians )
{
  maxiter = 200;
  featsperclass = 0;
  tau = 0.0;
  srand ( time ( NULL ) );
  pyramid = false;
  comp = false;
}

GMM::GMM ( const Config *conf, int _gaussians ) : gaussians ( _gaussians )
{
  this->conf = conf;

  if ( gaussians < 2 )
    gaussians = conf->gI ( "GMM", "gaussians", 2 );

  maxiter = conf->gI ( "GMM", "maxiter", 200 );

  featsperclass = conf->gI ( "GMM", "featsperclass", 0 );

  tau = conf->gD ( "GMM", "tau", 100.0 );

  pyramid = conf->gB ( "GMM", "pyramid", false );

  comp = false;

  srand ( time ( NULL ) );
}

void GMM::computeMixture ( Examples examples )
{
  int fsize = ( int ) examples.size();
  assert ( fsize >= gaussians );

  dim = examples[0].second.vec->size();

  int samples = fsize;
  if ( featsperclass > 0 )
  {
    samples = featsperclass * gaussians;
    samples = std::min ( samples, fsize );
  }

  // Copy data in Matrix
  VVector dataset;

  cout << "reduced training data for GMM from " << fsize << " features to " << samples << " features.";
  for ( int i = 0; i < samples; i++ )
  {
    int k = rand() % fsize;
    NICE::Vector *vec = examples[k].second.vec;
    dataset.push_back ( *vec );
  }

  computeMixture ( dataset );
}

void GMM::computeMixture ( const VVector &DataSet )
{
  // Learn the GMM model
  assert ( DataSet.size() >= ( uint ) gaussians );
  //initEMkMeans(DataSet); // initialize the model
  srand ( time ( NULL ) );

  bool poweroftwo = false;
  int power = 1;
  while ( power <= gaussians )
  {
    if ( gaussians == power )
      poweroftwo = true;
    power *= 2;
  }

  if ( poweroftwo && pyramid )
  {
    initEM ( DataSet ); // initialize the model
    int g = 1;

    while ( g <= gaussians )
    {
      cout << "g = " << g << endl;
      doEM ( DataSet, g );

      if ( 2*g <= gaussians )
      {
        for ( int i = g; i < g*2; i++ )
        {
          mu[i] = mu[i-g];
          sparse_sigma[i] = sparse_sigma[i-g];
          sparse_inv_sigma[i] = sparse_inv_sigma[i-g];
          log_det_sigma[i] = log_det_sigma[i-g];
          priors[i] = priors[i-g];

          double interval = 1.0;
          for ( int k = 0; k < ( int ) mu[i].size(); k++ )
          {
            interval = mu[i][k];
            interval = std::max ( interval, 0.1 );
            double r = ( interval * ( ( double ) rand() / ( double ) RAND_MAX ) ) - interval / 2.0;
            mu[i][k] += r;
          }
        }
      }
      g *= 2;
    }
  }
  else
  {
    initEMkMeans ( DataSet ); // initialize the model
    doEM ( DataSet, gaussians );
  }
  // performs EM
}

inline double diagDeterminant ( const NICE::Vector &sparse_mat )
{
  double det = 1.0;
  for ( int i = 0; i < ( int ) sparse_mat.size(); i++ )
  {
    det *= sparse_mat[i];
  }

  return det;
}

inline double logdiagDeterminant ( const NICE::Vector &sparse_mat )
{
  double det = 0.0;
  for ( int i = 0; i < ( int ) sparse_mat.size(); i++ )
  {
    det += log ( sparse_mat[i] );
  }

  return det;
}

inline NICE::Vector diagInverse ( const NICE::Vector &sparse_mat )
{
  NICE::Vector inv ( sparse_mat.size() );
  for ( int i = 0; i < ( int ) sparse_mat.size(); i++ )
  {
    inv[i] = 1.0 / sparse_mat[i];
  }
  return inv;
}

void GMM::initEMkMeans ( const VVector &DataSet )
{
  /*init GMM with k-Means*/
  KMeans k ( gaussians );
  VVector means;
  vector<double> weights;
  vector<int> assignment;
  k.cluster ( DataSet, means, weights, assignment );

  int nData = DataSet.size();
  this->dim = DataSet[0].size();
  cdimval = dim * log ( 2 * 3.14159 );
  vector<int> pop ( gaussians, 0 );
  priors.resize ( gaussians );
  mu = VVector ( gaussians, dim );
  log_det_sigma.clear();
  vector<int> allk;

  NICE::Vector globmean ( dim );
  globmean.set ( 0.0 );

  for ( int n = 0; n < ( int ) DataSet.size(); n++ ) /* getting the max value for time */
  {
    globmean = globmean + DataSet[n];
  }

  globmean *= ( 1.0 / nData );

  NICE::Vector sparse_globsigma;

  sparse_globsigma.resize ( dim );
  sparse_globsigma.set ( 0.0 );

  for ( int i = 0; i < ( int ) DataSet.size(); i++ ) // Covariances updates
  {
    // nur diagonal Elemente berechnen
    for ( int d = 0; d < dim; d++ )
    {
      double diff = ( DataSet[i][d] - globmean[d] );
      sparse_globsigma[d] += diff * diff;
    }
  }

  sparse_globsigma *= ( 1.0 / DataSet.size() );

  for ( int i = 0; i < gaussians; i++ )
  {
    NICE::Vector _inv_sigma = diagInverse ( sparse_globsigma );

    sparse_sigma.push_back ( sparse_globsigma );
    sparse_inv_sigma.push_back ( _inv_sigma );
    log_det_sigma.push_back ( logdiagDeterminant ( sparse_globsigma ) );
    mu[i] = means[i];
    //priors[i]=1.0/(double)gaussians; // set equi-probables states
    priors[i] = weights[i]; // set kMeans weights
  }
}

void GMM::initEM ( const VVector &DataSet )
{

  /* init the GaussianMixture by using randomized meanvectors */
  int nData = DataSet.size();
  this->dim = DataSet[0].size();
  cdimval = dim * log ( 2 * 3.14159 );
  vector<int> pop ( gaussians, 0 );
  priors.resize ( gaussians );
  mu = VVector ( gaussians, dim );
  log_det_sigma.clear();
  vector<int> allk;

  NICE::Vector globmean ( dim );
  globmean.set ( 0.0 );

  for ( int n = 0; n < ( int ) DataSet.size(); n++ ) /* getting the max value for time */
  {
    globmean = globmean + DataSet[n];
  }

  globmean *= ( 1.0 / nData );

  NICE::Vector sparse_globsigma;

  sparse_globsigma.resize ( dim );
  sparse_globsigma.set ( 0.0 );

  for ( int i = 0; i < ( int ) DataSet.size(); i++ ) // Covariances updates
  {
    // nur diagonal Elemente berechnen
    for ( int d = 0; d < dim; d++ )
    {
      double diff = ( DataSet[i][d] - globmean[d] );
      sparse_globsigma[d] += diff * diff;
    }
  }

  sparse_globsigma *= ( 1.0 / DataSet.size() );

  for ( int i = 0; i < gaussians; i++ )
  {
    priors[i] = 1.0 / ( double ) gaussians; // set equi-probables states
    NICE::Vector _inv_sigma = diagInverse ( sparse_globsigma );

    sparse_sigma.push_back ( sparse_globsigma );
    sparse_inv_sigma.push_back ( _inv_sigma );
    log_det_sigma.push_back ( logdiagDeterminant ( sparse_globsigma ) );
    bool newv = false;
    int k;
    while ( !newv )
    {
      newv = true;
      k = rand() % nData;

      for ( int nk = 0; nk < ( int ) allk.size(); nk++ )
        if ( allk[nk] == k )
        {
          newv = false;
        }
      if ( newv )
        allk.push_back ( k );
    }
    mu[i] = DataSet[k];
  }
}

inline void sumRow ( NICE::Matrix mat, NICE::Vector &res )
{
  int row = mat.rows();
  int column = mat.cols();
  res = NICE::Vector ( column );
  res.set ( 1e-5f );
//#pragma omp parallel for
  for ( int i = 0; i < column; i++ ) {
    for ( int j = 0; j < row; j++ ) {
      res[i] += mat ( j, i );
    }
  }
}

double GMM::logpdfState ( const NICE::Vector &Vin, int state )
{
  /* get the probability density for a given state and a given vector */
  double p;
  NICE::Vector dif;

  dif = Vin;
  dif -= mu[state];

  p = 0.0;
  for ( int i = 0; i < ( int ) dif.size(); i++ )
  {
    p += dif[i] * dif[i] * sparse_inv_sigma[state][i];
  }

  p = -0.5 * ( p + cdimval + log_det_sigma[state] );
  return p;
}

int GMM::doEM ( const VVector &DataSet, int nbgaussians )
{
  /* perform Expectation/Maximization on the given Dataset :
  Matrix DataSet(nSamples,Dimensions).
  The GaussianMixture Object must be initialised before
  (see initEM or initEMkMeans methods ) */

#ifdef DEBUG
  cerr << "GMM::start EM" << endl;
#endif

  int nData = DataSet.size();
  int iter = 0;
  double log_lik;
  double log_lik_threshold = 1e-6f;
  double log_lik_old = -1e10f;

  NICE::Matrix unity ( dim, dim, 0.0 );
  for ( int k = 0; k < dim; k++ )
    unity ( k, k ) = 1.0;

  //EM loop
  while ( true )
  {
#ifdef DEBUGGMM
    cerr << "GMM::EM: iteration: " << iter << endl;
#endif

    vector<double> sum_p;
    sum_p.resize ( nData );

    for ( int i = 0; i < nData; i++ )
    {
      sum_p[i] = 0.0;
    }

    NICE::Matrix pxi ( nData, gaussians );
    pxi.set ( 0.0 );
    NICE::Matrix pix ( nData, gaussians );
    pix.set ( 0.0 );
    NICE::Vector E;

    iter++;
    if ( iter > maxiter ) {
      cerr << "EM stops here. Max number of iterations (" << maxiter << ") has been reached." << endl;
      return iter;
    }

    double sum_log = 0.0;

    // computing expectation
    double maxp = -numeric_limits<double>::max();
    vector<double> maxptmp ( nData, -numeric_limits<double>::max() );
#pragma omp parallel for
    for ( int i = 0; i < nData; i++ )
    {
      for ( int j = 0; j < nbgaussians; j++ )
      {
        double p = logpdfState ( DataSet[i], j ); // log(P(x|i))
        maxptmp[i] = std::max ( maxptmp[i], p );
        pxi ( i, j ) = p;
      }
    }

    for ( int i = 0; i < nData; i++ )
    {
      maxp = std::max ( maxp, maxptmp[i] );
    }

#pragma omp parallel for
    for ( int i = 0; i < nData; i++ )
    {
      sum_p[i] = 0.0;
      for ( int j = 0; j < nbgaussians; j++ )
      {
        double p = pxi ( i, j ) - maxp; // log(P(x|i))
        p = exp ( p ); // P(x|i)

        if ( p < 1e-40 )
          p = 1e-40;

        pxi ( i, j ) = p;
        sum_p[i] += p * priors[j];
      }
    }

    for ( int i = 0; i < nData; i++ )
    {
      sum_log += log ( sum_p[i] );
    }

#pragma omp parallel for
    for ( int j = 0; j < nbgaussians; j++ )
    {
      for ( int i = 0; i < nData; i++ )
      {
        pix ( i, j ) = ( pxi ( i, j ) * priors[j] ) / sum_p[i]; // then P(i|x)
      }
    }

    // here we compute the log likehood
    log_lik = sum_log / nData;
#ifdef DEBUGGMM
    cout << "diff: " << fabs ( ( log_lik / log_lik_old ) - 1 ) << " thresh: " << log_lik_threshold << " sum: " << sum_log <<  endl;
    //cout << "logold: " << log_lik_old << " lognew: " << log_lik << endl;
#endif

    if ( fabs ( ( log_lik / log_lik_old ) - 1 ) < log_lik_threshold )
    {
      //if log likehood hasn't move enough, the algorithm has converged, exiting the loop
      return iter;
    }

    log_lik_old = log_lik;

    // Update Step
    sumRow ( pix, E );

#pragma omp parallel for
    for ( int j = 0; j < nbgaussians; j++ )
    {
      priors[j] = ( E[j] + tau ) / ( nData + tau * nbgaussians ); // new priors

      NICE::Vector tmu ( dim );
      tmu.set ( 0.0 );

      NICE::Vector sparse_tmsigma ( dim );
      sparse_tmsigma.set ( 0.0 );

      for ( int i = 0; i < nData; i++ ) // Means update loop
      {
        tmu = tmu + ( DataSet[i] * pix ( i, j ) );
      }

      NICE::Vector tmu2 = mu[j];
      mu[j] = tmu + tau * tmu2;
      mu[j] = mu[j] * ( 1.0 / ( E[j] + tau ) );

      for ( int i = 0; i < nData; i++ ) // Covariances updates
      {
        // nur diagonal Elemente berechnen
        for ( int d = 0; d < dim; d++ )
        {
          sparse_tmsigma[d] += DataSet[i][d] * DataSet[i][d] * pix ( i, j );
        }
      }

      NICE::Vector sparse_tmsigma2 ( dim );
      for ( int d = 0; d < dim; d++ )
      {
        sparse_tmsigma[d] += 1e-5f;
        sparse_tmsigma2[d] = sparse_sigma[j][d] + tmu2[d] * tmu2[d];
        sparse_sigma[j][d] = ( sparse_tmsigma[d] + tau * sparse_tmsigma2[d] ) / ( E[j] + tau ) - ( mu[j][d] * mu[j][d] );
      }
      sparse_inv_sigma[j] = diagInverse ( sparse_sigma[j] );

      log_det_sigma[j] = logdiagDeterminant ( sparse_sigma[j] );
    }
    if ( comp )
    {
      double dist = compare();
      cout << "dist for iteration " << iter << endl;
      cout << "d: " << dist << endl;
    }
  }
#ifdef DEBUG
  cerr << "GMM::finished EM after " << iter << " iterations" << endl;
#endif
  return iter;
}

int GMM::getBestClass ( const NICE::Vector &v, double *bprob )
{
  int bestclass = 0;
  double maxprob = logpdfState ( v, 0 ) + log ( priors[0] );  // log(P(x|i))

  for ( int i = 1; i < gaussians; i++ )
  {
    double prob = logpdfState ( v, i ) + log ( priors[i] );  // log(P(x|i))

    if ( prob > maxprob )
    {
      maxprob = prob;
      bestclass = i;
    }
  }

  if ( bprob != NULL )
    *bprob = maxprob;

  return bestclass;
}

void GMM::getProbs ( const NICE::Vector &vin, SparseVector &outprobs )
{
  outprobs.clear();
  outprobs.setDim ( gaussians );
  Vector probs;
  getProbs ( vin, probs );
  for ( int i = 0; i < gaussians; i++ )
  {
    if ( probs[i] > 1e-7f )
      outprobs[i] = probs[i];
  }
}

void GMM::getProbs ( const NICE::Vector &vin, Vector &outprobs )
{
  Vector probs;
  probs.resize ( gaussians );
  probs.set ( 0.0 );

  double probsum = 0.0;
  double maxp = -numeric_limits<double>::max();
  for ( int i = 0; i < gaussians; i++ )
  {
    probs[i] = logpdfState ( vin, i ) + log ( priors[i] );  // log(P(x|i))
    maxp = std::max ( maxp, probs[i] );
  }

  for ( int i = 0; i < gaussians; i++ )
  {
    probs[i] -= maxp;
    probs[i] = exp ( probs[i] );
    probsum += probs[i];
  }

  // normalize probs
#pragma omp parallel for
  for ( int i = 0; i < gaussians; i++ )
  {
    probs[i] /= probsum;
  }
  outprobs = probs;
}

void GMM::getFisher ( const NICE::Vector &vin, SparseVector &outprobs )
{
  int size = gaussians * 2 * dim;
  outprobs.clear();
  outprobs.setDim ( size );

  int counter = 0;

  NICE::Vector classprobs;
  classprobs.resize ( gaussians );
  classprobs.set ( 0.0 );

  double maxp = -numeric_limits<double>::max();
  for ( int i = 0; i < gaussians; i++ )
  {
    classprobs[i] = logpdfState ( vin, i ) + log ( priors[i] );  // log(P(x|i))

    maxp = std::max ( maxp, classprobs[i] );
  }

  for ( int i = 0; i < gaussians; i++ )
  {
    double p = classprobs[i] - maxp;

    p = exp ( p );

    for ( int d = 0; d < dim; d++ )
    {
      double diff = vin[d] - mu[i][d];

      double sigma2 = sparse_sigma[i][d] * sparse_sigma[i][d];
      double sigma3 = sigma2 * sparse_sigma[i][d];
      double normmu = sqrt ( priors[i] / sigma2 );
      double normsig = sqrt ( ( 2.0 * priors[i] ) / sigma2 );

      double gradmu = ( p * ( diff / sigma2 ) ) / normmu;
      double gradsig = ( p * ( ( diff * diff ) / sigma3 - 1.0 / sparse_sigma[i][d] ) ) / normsig;
      if ( fabs ( gradmu ) > 1e-7f )
        outprobs[counter] = gradmu;
      counter++;

      if ( fabs ( gradsig ) > 1e-7f )
        outprobs[counter] = gradsig;
      counter++;
    }
  }
}

void GMM::cluster ( const VVector & features, VVector & prototypes, vector<double> & weights, vector<int> & assignment )
{
  computeMixture ( features );

  prototypes.clear();
  weights.clear();
  assignment.clear();

  for ( int i = 0; i < ( int ) features.size(); i++ )
  {
    int c = getBestClass ( features[i] );
    assignment.push_back ( c );

    weights.push_back ( priors[c] );
  }
  for ( int c = 0; c < gaussians; c++ )
    prototypes.push_back ( mu[c] );

  cout << "tau: " << tau << endl;
}

void GMM::saveData ( const std::string filename )
{
  ofstream fout ( filename.c_str() );

  fout << gaussians << " " << dim << endl;

  mu >> fout;
  fout << endl;

  for ( int n = 0; n < gaussians; n++ )
  {
    fout << sparse_sigma[n] << endl;
  }

  for ( int n = 0; n < gaussians; n++ )
  {
    fout << priors[n] << " ";
  }

  fout.close();
}

bool GMM::loadData ( const std::string filename )
{
  cerr << "read GMM Data" << endl;
  ifstream fin ( filename.c_str() );

  if ( fin.fail() )
  {
    cerr << "GMM: Datei " << filename << " nicht gefunden!" << endl;
    return false;
  }

  fin >> gaussians;
  fin >> dim;
  cdimval = log ( pow ( 2 * 3.14159, dim ) );
  mu.clear();

  for ( int i = 0; i < gaussians; i++ )
  {
    NICE::Vector tmp;
    fin >> tmp;
    mu.push_back ( tmp );
  }

  log_det_sigma.clear();

  for ( int n = 0; n < gaussians; n++ )
  {
    NICE::Matrix _sigma;
    NICE::Vector _sparse_sigma;

    _sparse_sigma = NICE::Vector ( dim );
    fin >> _sparse_sigma;
    sparse_sigma.push_back ( _sparse_sigma );

    sparse_inv_sigma.push_back ( diagInverse ( sparse_sigma[n] ) );
    log_det_sigma.push_back ( logdiagDeterminant ( sparse_sigma[n] ) );
  }

  for ( int n = 0; n < gaussians; n++ )
  {
    double tmpd;
    fin >> tmpd;
    priors.push_back ( tmpd );
  }
  fin.close();

  cerr << "reading GMM Data finished" << endl;
  return true;
}

void GMM::getParams ( VVector &mean, VVector &sSigma, vector<double> &p )
{
  mean = mu;
  sSigma.resize ( gaussians );
  p.clear();
  for ( int i = 0; i < gaussians; i++ )
  {
    sSigma[i] = sparse_sigma[i];
    p.push_back ( priors[i] );
  }
}

void GMM::setCompareGM ( VVector mean, VVector sSigma, vector<double> p )
{
  mu2 = mean;
  sparse_sigma2 = sSigma;
  priors2 = vector<double> ( p );
}

double GMM::kPPK ( NICE::Vector sigma1, NICE::Vector sigma2, NICE::Vector mu1, NICE::Vector mu2, double p )
{
  double d = mu1.size();

  double det1 = 1.0;
  double det2 = 1.0;
  double det3 = 1.0;
  double eval = 0.0;
  for ( int i = 0; i < d; i++ )
  {
    det1 *= sigma1[i];
    det2 *= sigma2[i];
    double sigma = 1.0 / ( ( 1.0 / sigma1[i] + 1.0 / sigma2[i] ) * p );
    det3 *= sigma;
    double mu = p * ( mu1[i] * sigma1[i] + mu2[i] * sigma2[i] );
    eval += 0.5 * mu * sigma * mu - ( p * mu1[i] * mu1[i] ) / ( 2.0 * sigma1[i] ) - ( p * mu2[i] * mu2[i] ) / ( 2.0 * sigma2[i] );
  }

  return ( pow ( 2.0*M_PI, ( ( 1.0 - 2.0*p ) *d ) / 2.0 ) * pow ( det1, -p / 2.0 ) * pow ( det2, -p / 2.0 ) * pow ( det3, 0.5 ) * exp ( eval ) );
}

double GMM::compare()
{
  double distkij = 0.0;
  double distkjj = 0.0;
  double distkii = 0.0;
  for ( int i = 0; i < gaussians; i++ )
  {
    for ( int j = 0; j < gaussians; j++ )
    {
      double kij = kPPK ( sparse_sigma[i], sparse_sigma2[j], mu[i], mu2[j], 0.5 );
      double kii = kPPK ( sparse_sigma[i], sparse_sigma[j], mu[i], mu[j], 0.5 );
      double kjj = kPPK ( sparse_sigma2[i], sparse_sigma2[j], mu2[i], mu2[j], 0.5 );
      kij = priors[i] * priors2[j] * kij;
      kii = priors[i] * priors[j] * kii;
      kjj = priors2[i] * priors2[j] * kjj;
      distkij += kij;
      distkii += kii;
      distkjj += kjj;
    }
  }
  return ( distkij / ( sqrt ( distkii ) *sqrt ( distkjj ) ) );
}

void GMM::comparing ( bool c )
{
  comp = c;
}