/**
 * @file KMedian.cpp
 * @brief KMedian (aka K-medoid)
 * @author Alexander Freytag
 * @date 23-04-2013 (dd-mm-yyyy)
 */

#ifdef NICE_USELIB_OPENMP
#include <omp.h>
#endif

#include <iostream>
#include <map>
#include <algorithm> //to easily find the smallest value in a map

#include "vislearning/math/cluster/KMedian.h"
#include "vislearning/math/distances/genericDistance.h"

#include <set>

using namespace OBJREC;

using namespace std;

using namespace NICE;


typedef std::pair<int, double> MyPairType;
struct CompareSecond
{
    bool operator()(const MyPairType& left, const MyPairType& right) const
    {
        return left.second < right.second;
    }
};

#undef DEBUG_KMEDIAN_ASSIGNMENTS
// #define DEBUG_KMEDIAN_ASSIGNMENTS

#undef DEBUG_KMEDIAN_PROTOCOMP
// #define DEBUG_KMEDIAN_PROTOCOMP


KMedian::KMedian(const int & _noClusters, const std::string & _distanceType) :
  noClusters(_noClusters), distanceType(_distanceType)
{
  //srand(time(NULL));
  distancefunction = GenericDistanceSelection::selectDistance(distanceType);
  
  this->d_minDelta  = 1e-5;
  this->i_maxIterations = 200;
}

KMedian::KMedian( const NICE::Config *conf, const std::string & _section)
{       
  this->distanceType = conf->gS( _section, "distanceType", "euclidean" );
  this->distancefunction = GenericDistanceSelection::selectDistance(distanceType);
  
  this->d_minDelta  = conf->gD( _section, "minDelta", 1e-5 );
  this->i_maxIterations = conf->gI( _section, "maxIterations", 200);
  
  this->noClusters = conf->gI( _section, "noClusters", 20);
}

KMedian::~KMedian()
{
}

void KMedian::initial_guess(const VVector & features, VVector & prototypes)
{
  int j = 0;
  std::set<int, std::greater<int> > mark;

  for (VVector::iterator i = prototypes.begin(); i != prototypes.end(); i++, j++)
  {
    int k;

    do
    {
      k = rand() % features.size();
    } while (mark.find(k) != mark.end());

    mark.insert(mark.begin(), k);

    *i = features[k];
  }
}

int KMedian::compute_prototypes(const VVector & features, VVector & prototypes,
    std::vector<double> & weights, const std::vector<int> & assignment)
{
  
  #ifdef DEBUG_KMEDIAN_PROTOCOMP  
    std::cerr << "initial assignments: ";
    for (std::vector<int>::const_iterator assignIt = assignment.begin(); assignIt != assignment.end(); assignIt++)
    { 
      std::cerr << " " << *assignIt;
    } 
    std::cerr << std::endl;
  #endif
  
  //initialization
  for (int k = 0; k < noClusters; k++)
  {
    prototypes[k].set(0);
    weights[k] = 0;
  }
  
  NICE::VectorT<int> numberOfCurrentAssignments ( noClusters ) ;
  numberOfCurrentAssignments.set ( 0 );
  
  int exCnt = 0;  
  //how many examples are assigned to the current clusters?
  for (VVector::const_iterator i = features.begin(); i != features.end(); i++, exCnt++)
  {
    int k = assignment[exCnt];
    //increase counter for assigned cluster
    numberOfCurrentAssignments[ k ] ++;    
  }
    
  #ifdef DEBUG_KMEDIAN_PROTOCOMP    
    std::cerr << "k-median -- current assignmens: " << numberOfCurrentAssignments << std::endl << "noClusters: " << noClusters << std::endl;
  #endif
  
  //compute the median for every cluster
  #pragma omp parallel for
  for (int clusterCnt = 0; clusterCnt < noClusters; clusterCnt++)
  {    
    NICE::Vector overallDistances ( numberOfCurrentAssignments[ clusterCnt ] );
    VVector::const_iterator lastExampleWorkedOn = features.begin();
    int i_idxOfLastExampleWorkedOn ( 0 );
    uint i_exCntInt ( 0 );

    //this map will contain overall distances of all examples within the current clusters
    //we need separate maps for every cluster to allow parallelization
    std::map<int,double> distancesWithinCluster;
    for (VVector::const_iterator featIt = features.begin(); featIt != features.end(); featIt++, i_exCntInt++)
    {
      int k = assignment[i_exCntInt];
      
      //only considere examples currently assigned to cluster clusterCnt
      if ( k != clusterCnt)
      {
        continue;      
      }
      
      uint exCntIntTmp ( i_idxOfLastExampleWorkedOn ); //idx going over all features 
      for (VVector::const_iterator j = lastExampleWorkedOn ; j != features.end(); j++, exCntIntTmp++)
      {
        int kTmp;
        if ( exCntIntTmp < assignment.size() )
          kTmp = assignment[exCntIntTmp];
        else
        {
          //actually, this will be never be reached :)
          std::cerr << "ERROR: exCntIntTmp >= assignment.size() " << exCntIntTmp << " " << assignment.size() << std::endl;
        }
        
        //only considere examples currently assigned to cluster clusterCnt
        if ( kTmp != clusterCnt)
          continue;         
       
        
        double dist ( distancefunction->calculate( *featIt, *j) );
        if ( i_exCntInt < features.size() )
        {
          distancesWithinCluster[ i_exCntInt ] += dist;
          #ifdef DEBUG_KMEDIAN_PROTOCOMP
            std::cerr << "increase " << i_exCntInt << " by " << dist << " for " <<*featIt << " and " << *j << std::endl;
          #endif
        }
        else
        {
          //actually, this will be never be reached :)          
          std::cerr << "ERROR: i_exCntInt >= features.size() " << i_exCntInt << " " << features.size() << std::endl;
        }

        if ( i_exCntInt != exCntIntTmp )
        {
          if (exCntIntTmp < features.size() )
          {
            distancesWithinCluster[ exCntIntTmp ] += dist;
            #ifdef DEBUG_KMEDIAN_PROTOCOMP
              std::cerr << "increase also " << exCntIntTmp << " by " << dist << std::endl;
            #endif
          }
          else
            std::cerr << "ERROR: exCntIntTmp >= features.size() " << exCntIntTmp << " " << features.size() << std::endl;
        }
        
      }      
      
      //inc by one to avoid calculating some distances twice
      if ( ( featIt != features.end()) && ( (featIt +1 ) != features.end()) )
      {
        lastExampleWorkedOn = ( featIt + 1 );      
        i_idxOfLastExampleWorkedOn = i_exCntInt+1;
      }
    }
       
    #ifdef DEBUG_KMEDIAN_PROTOCOMP
      std::cerr << "distances for cluster " << clusterCnt << " ";
      for(std::map<int,double>::const_iterator distIt = distancesWithinCluster.begin(); distIt != distancesWithinCluster.end(); distIt++)
      {
        std::cerr << distIt->first << " " << distIt->second << " ";
      }
      std::cerr << std::endl;
    #endif
      
    //now compute the index of example with min overall distance
    int idxOfClusterMedian ( (min_element(distancesWithinCluster.begin(), distancesWithinCluster.end(), CompareSecond()))->first );
        
    #pragma omp critical
    prototypes[clusterCnt] = features[idxOfClusterMedian]; 
 
    //finished computations for cluster k
  }
  
  #ifdef DEBUG_KMEDIAN_PROTOCOMP
    std::cerr << " ----   prototypes after current iteration:  --- " << std::endl;
    for (NICE::VVector::const_iterator protoIt = prototypes.begin(); protoIt != prototypes.end(); protoIt++)
    {
      std::cerr << *protoIt << " ";
    }
    
    std::cerr << std::endl;
  #endif

  return 0;
}

double KMedian::compute_delta(const VVector & oldprototypes,
    const VVector & prototypes)
{
  double distance = 0;

  for (uint k = 0; k < oldprototypes.size(); k++)
  {
    distance += distancefunction->calculate(oldprototypes[k], prototypes[k]);
    
    #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
      fprintf(stderr, "KMedian::compute_delta: Distance:",
          distancefunction->calculate(oldprototypes[k], prototypes[k]));
    #endif
  }
  return distance;
}

double KMedian::compute_assignments(const VVector & features,
                                    const VVector & prototypes,
                                    std::vector<int> & assignment)
{
  int index = 0;
  for (VVector::const_iterator i = features.begin(); i != features.end(); i++, index++)
  {

    const NICE::Vector & x = *i;
    double mindist = std::numeric_limits<double>::max();
    int minclass = 0;

    int c = 0;
    
    #ifdef DEBUG_KMEDIAN_ASSIGNMENTS

        fprintf(stderr, "computing nearest prototype for std::vector %d\n",
            index);
    #endif
        
    for (VVector::const_iterator j = prototypes.begin(); j
        != prototypes.end(); j++, c++)
    {

      const NICE::Vector & p = *j;
      double distance = distancefunction->calculate(p, x);
      
      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
        fprintf(stderr, "KMedian::compute_delta: Distance: %f\n",
            distancefunction->calculate(p, x));
      #endif

      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
            cerr << p << endl;
            cerr << x << endl;
            fprintf(stderr, "distance to prototype %d is %f\n", c, distance);
      #endif

      if (distance < mindist)
      {
        minclass = c;
        mindist = distance;
      }
    }

    assignment[index] = minclass;
  }

  return 0.0;
}

double KMedian::compute_weights(const VVector & features,
                                std::vector<double> & weights,
                                std::vector<int> & assignment)
{
  for (int k = 0; k < noClusters; k++)
    weights[k] = 0;

  int j = 0;

  for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
  {
    int k = assignment[j];
    weights[k]++;
  }

  for (int k = 0; k < noClusters; k++)
    weights[k] = weights[k] / features.size();

  return 0.0;
}

void KMedian::cluster(const NICE::VVector & features,
                      NICE::VVector & prototypes,
                      std::vector<double> & weights,
                      std::vector<int> & assignment)
{
  NICE::VVector oldprototypes;

  prototypes.clear();
  weights.clear();
  assignment.clear();
  weights.resize(noClusters, 0);
  assignment.resize(features.size(), 0);

  int dimension;

  if ((int) features.size() >= noClusters)
    dimension = features[0].size();
  else
  {
    fprintf(stderr,
        "FATAL ERROR: Not enough feature vectors provided for kMedians -- number of Features: %i - number of clusters: %i\n", (int) features.size(), noClusters);
    exit(-1);
  }

  for (int k = 0; k < noClusters; k++)
  {
    prototypes.push_back( NICE::Vector(dimension) );
    prototypes[k].set(0);
  }
 
  bool successKMedian ( false );
  int iterations ( 0 );
  double delta ( std::numeric_limits<double>::max() );
  
  while ( !successKMedian )
  {
    //we assume that this run will be successful
    successKMedian = true;
    
    this->initial_guess(features, prototypes);

    iterations = 0;
    delta =  std::numeric_limits<double>::max();

    //until-loop over iterations
    do
    {
      iterations++;
      
//       #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
        std::cerr << "k-median iteration " << iterations << std::endl;
//       #endif
        
      this->compute_assignments( features, prototypes, assignment );

      if (iterations > 1)
        oldprototypes = prototypes;

      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
          fprintf(stderr, "KMedian::cluster compute_prototypes\n");
      #endif
      
      if ( this->compute_prototypes( features, prototypes, weights, assignment ) < 0 )
      {
        fprintf(stderr, "KMedian::cluster restart\n");
        successKMedian = false;
        break;
      }

      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
          fprintf(stderr, "KMedian::cluster compute_delta\n");
      #endif
      
      if (iterations > 1)
        delta = this->compute_delta( oldprototypes, prototypes );

      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
          this->print_iteration( iterations, prototypes, delta );
      #endif

    } while ((delta > d_minDelta) && (iterations < i_maxIterations));
    
  }

  std::cerr << "ended optimization  -- delta: " << delta  << " of d_minDelta: " << d_minDelta << " --- and iterations: " << iterations << " of i_maxIterations: " << i_maxIterations << std::endl;
    
  #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
    fprintf(stderr, "KMedian::cluster: iterations = %d, delta = %f\n",
        iterations, delta);
  #endif

  this->compute_weights( features, weights, assignment );
}

void KMedian::print_iteration( int iterations, VVector & prototypes, double delta )
{
  if (iterations > 1)
    fprintf(stderr, "KMedian::cluster: iteration=%d delta=%f\n", iterations,
        delta);
  else
    fprintf(stderr, "KMedian::cluster: iteration=%d\n", iterations);

  int k = 0;

  for (VVector::const_iterator i = prototypes.begin(); i != prototypes.end(); i++, k++)
  {
    fprintf(stderr, "class (%d)\n", k);
    cerr << "prototype = " << (*i) << endl;
  }
}