/** 
* @file VCSVMLight.cpp
* @brief Simple Nearest Neighbour Implementation
* @author Erik Rodner
* @date 10/25/2007

*/
#ifdef NICE_USELIB_SVMLIGHT

#include <iostream>

#include "vislearning/classifier/vclassifier/VCSVMLight.h"

// just because of some macro problems
# define SVM_LINEAR  0           /* linear kernel type */
# define SVM_POLY    1           /* polynoial kernel type */
# define SVM_RBF     2           /* rbf kernel type */
# define SVM_SIGMOID 3           /* sigmoid kernel type */
# define SVM_CUSTOM 4           /* custom kernel type */


using namespace OBJREC;

using namespace std;
using namespace NICE;

VCSVMLight::VCSVMLight ( const Config *_conf, const string & section ) 
    : VecClassifier ( _conf )
{
    finalModel = NULL;

    std::string kernel_type_s = _conf->gS(section, "kernel", "rbf");

    if ( (kernel_type_s == "none") || (kernel_type_s == "linear") )
    {
		kernel_type = SVM_LINEAR;
    } else if ( (kernel_type_s == "poly") ) {
		kernel_type = SVM_POLY;
    } else if ( (kernel_type_s == "rbf") ) {
		kernel_type = SVM_RBF;
    } else if ( (kernel_type_s == "sigmoid") ) {
		kernel_type = SVM_SIGMOID;
	} else if ( (kernel_type_s == "custom") ) {
		kernel_type = SVM_CUSTOM;
    } else {
		fthrow ( Exception, "Kernel method " << kernel_type_s << " not supported." );
    }

    std::string normalization_type_s = _conf->gS(section, "normalization_type", "euclidean" );
    if ( normalization_type_s == "none" ) {
		normalization_type = SVM_NORMALIZATION_NONE;
    } else if ( normalization_type_s == "euclidean" ) {
		normalization_type = SVM_NORMALIZATION_EUCLIDEAN;
    } else if ( normalization_type_s == "maxmin" ) {
		normalization_type = SVM_NORMALIZATION_01;
    } else {
		fthrow ( Exception, "Normalization method " << normalization_type << " not supported." );
    }
    fprintf (stderr, "VCSVMLight: kernel %d (%s) normalization %d (%s)\n", kernel_type, kernel_type_s.c_str(),
		normalization_type, normalization_type_s.c_str() );

    poly_degree = _conf->gI(section, "poly_degree", 1);
    rbf_gamma = _conf->gD(section, "rbf_gamma", 1.0);
    sigmoidpoly_scale = _conf->gD(section, "sigmoidpoly_scale", 1.0);
    sigmoidpoly_bias = _conf->gD(section, "sigmoidpoly_bias", 1.0);
    svm_c = _conf->gD(section, "C", 0.0 );
	use_crossvalidation = _conf->gB(section, "use_crossvalidation", true );
    optimize_parameters = _conf->gB(section, "optimize_parameters", true );

    rbf_gamma_min = _conf->gD(section, "rbf_gamma_min", 0.1 );
    rbf_gamma_max = _conf->gD(section, "rbf_gamma_max", 2.0 );
    rbf_gamma_step = _conf->gD(section, "rbf_gamma_step", 0.3 );
    svm_c_min = _conf->gD(section, "svm_c_min", 0.0 );
    svm_c_max = _conf->gD(section, "svm_c_max", 9.0 );
    svm_c_step = _conf->gD(section, "svm_c_step", 1.0 );

    bool calibrate_probabilities = _conf->gB(section, "calibrate_probabilities", true );
    if ( calibrate_probabilities )
    {
		logreg = new VCLogisticRegression ( _conf );
    } else {
		logreg = NULL;
    }

    // used in SVMLight
    verbosity = _conf->gI(section, "verbose", 1 );

    if ( optimize_parameters && use_crossvalidation )
    {
		fprintf (stderr, "VCSVMLight: SVMLight verbosity activated as a workaround for a bug within SVMLight\n");
		verbosity = 1;
    }

    maxClassNo = 1;
}

VCSVMLight::VCSVMLight ( const VCSVMLight & classifier ) : VecClassifier() 
{
	this->max = classifier.max;
	this->min = classifier.min;
	this->normalization_type = classifier.normalization_type;
	this->kernel_type = classifier.kernel_type;

	this->poly_degree = classifier.poly_degree;
	this->rbf_gamma = classifier.rbf_gamma;
	this->sigmoidpoly_scale = classifier.sigmoidpoly_scale;
	this->sigmoidpoly_bias = classifier.sigmoidpoly_bias;

	this->svm_c = classifier.svm_c;
	this->use_crossvalidation = classifier.use_crossvalidation;
	this->optimize_parameters = classifier.optimize_parameters;

	this->rbf_gamma_min = classifier.rbf_gamma_min;
	this->rbf_gamma_max = classifier.rbf_gamma_max;
	this->rbf_gamma_step = classifier.rbf_gamma_step;

	this->svm_c_min = classifier.svm_c_min;
	this->svm_c_max = classifier.svm_c_max;
	this->svm_c_step = classifier.svm_c_step;

	if ( classifier.logreg != NULL )
		this->logreg = classifier.logreg->clone();
	else
		this->logreg = NULL;

	if ( classifier.finalModel != NULL )
		this->finalModel = copy_model ( classifier.finalModel );
	else
		this->finalModel = NULL;

}


VCSVMLight::~VCSVMLight()
{
}

void VCSVMLight::normalizeVector ( int normalization_type, NICE::Vector & x ) const
{
    assert ( x.size() > 0 );
    if ( normalization_type == SVM_NORMALIZATION_EUCLIDEAN )
    {
	double length = x.normL2();
	if ( fabs(length) > 1e-12 )
	    for ( uint i = 0 ; i < x.size() ; i++ )
		x[i] /= length;
    } else if ( normalization_type == SVM_NORMALIZATION_01 ) {
	for ( uint i = 0 ; i < x.size() ; i++ )
	    if ( fabs(max[i]-min[i]) > 1e-20 )
		x[i] = (x[i] - min[i]) / (max[i] - min[i]);
    }
}

double VCSVMLight::getSVMScore ( const NICE::Vector & x ) const
{
    if ( finalModel != NULL ) {
		DOC *example; 
		NICE::Vector x_normalized ( x );
		normalizeVector ( normalization_type, x_normalized );
		WORD *words = (WORD *)my_malloc(sizeof(WORD)*(x.size()+10));

		long wc;
		for(wc=0; wc<(long)x.size(); wc++){
		   words[wc].weight = (FVAL)(x_normalized[wc]);
		   words[wc].wnum = wc+1;
		}
		words[wc].wnum = 0;
		
		long queryid=0;
		long slackid=0; 
		long costfactor=1;
		char comment [2] = "";

		example = create_example(0,queryid,slackid,costfactor,
					 create_svector(words,comment,1.0));

		double score = classify_example ( finalModel, example );

		free (words);
		free_example (example,1);

		return score;
    } else {
		fprintf (stderr, "VCSVMLight: no model found !!\n");
		exit(-1);
    }
    return 0.0;
}

ClassificationResult VCSVMLight::classify ( const NICE::Vector & x ) const
{
    double score = getSVMScore ( x );

    if ( logreg != NULL )
    {
		NICE::Vector y (1);
		y[0] = score;
		return logreg->classify ( y );
    } else {
		FullVector scores ( maxClassNo+1 );
		scores[0] = 0.0;
		scores[1] = score;
		// we are dealing only with binary classification
		if ( score < 0.0 )
			return ClassificationResult (0, scores);
		else
			return ClassificationResult (1, scores);
    }
}

void VCSVMLight::estimateMaxMin ( const LabeledSetVector & train )
{
    max.resize(0);
    min.resize(0);
    LOOP_ALL(train)
    {
		EACH(classno,v);
		UNUSED_PARAMETER(classno);
		if ( (max.size()<=0) && (min.size()<=0) )
		{
			max = Vector(v);
			min = Vector(v);
		} else {
			for ( uint i = 0 ; i < v.size() ; i++ )
			{
				if ( v[i] > max[i] ) max[i] = v[i];
				if ( v[i] < min[i] ) min[i] = v[i];
			}
		}
    }
}

void VCSVMLight::readDocuments(
		    DOC ***docs, 
		    double **label, 
		    long int *totwords, 
		    long int *totdoc, 
		    const LabeledSetVector & train )
{
  if ( normalization_type == SVM_NORMALIZATION_01 )
      estimateMaxMin ( train );

  char comment [2] = "";
  WORD *words;
  long vectorcount = train.count();
  long vectorsize = train.dimension();

  long dnum=0,dpos=0,dneg=0,dunlab=0,queryid,slackid,max_docs=vectorcount+2;
  long max_words_doc=vectorsize;
  double costfactor;

  long int wc=0;

  (*docs) = (DOC **)my_malloc(sizeof(DOC *)*max_docs);    /* feature vectors */
  (*label) = (double *)my_malloc(sizeof(double)*max_docs); /* target values */

  words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
  (*totwords)=0;
  
  (*totwords) = vectorsize+1;
  queryid=0; slackid=0; costfactor=1.;
  dnum=0;
  LOOP_ALL(train)
  {
      EACH(classno,v);
      NICE::Vector v_normalized ( v );

      normalizeVector ( normalization_type, v_normalized );

      for(wc=0; wc<vectorsize; wc++){
		  words[wc].weight = (FVAL)(v_normalized[wc]);
		  words[wc].wnum = wc+1;
      }
      words[wc].wnum = 0;
      (*label)[dnum]= (classno==1) ? 1 : -1;
      if (classno == 1) dpos++;
      if (classno == 0) dneg++;
      if (classno == 0) dunlab++;
      if((*totwords) > MAXFEATNUM) {
		  printf("\nMaximum feature number exceeds limit defined in MAXFEATNUM!\n");
		  exit(1);
      }
      (*docs)[dnum] = create_example(dnum,queryid,slackid,costfactor,
				   create_svector(words,comment,1.0));
      dnum++;  
  } 

  free(words);
  (*totdoc)=dnum;
}

void VCSVMLight::initMainParameters ( LEARN_PARM *learn_parm,
    KERNEL_PARM *kernel_parm )
{
  learn_parm->svm_c=svm_c;

  kernel_parm->kernel_type=kernel_type;
  kernel_parm->poly_degree=poly_degree;
  kernel_parm->rbf_gamma=rbf_gamma;
  kernel_parm->coef_lin=sigmoidpoly_scale;
  kernel_parm->coef_const=sigmoidpoly_bias;
  strcpy(kernel_parm->custom,"empty");

  if(learn_parm->svm_c<0) {
    printf("\nThe C parameter must be greater than zero!\n\n");
    exit(-1);
  }
}

void VCSVMLight::initParameters ( 
    LEARN_PARM *learn_parm,
    KERNEL_PARM *kernel_parm )
{
  /* set default */
  strcpy (learn_parm->predfile, "");
  strcpy (learn_parm->alphafile, "");
  learn_parm->biased_hyperplane=1;
  learn_parm->sharedslack=0;
  learn_parm->remove_inconsistent=0;
  learn_parm->skip_final_opt_check=0;
  learn_parm->svm_maxqpsize=10;
  learn_parm->svm_newvarsinqp=0;
  
  if( kernel_type == SVM_LINEAR ) 
      learn_parm->svm_iter_to_shrink=2;
  else
      learn_parm->svm_iter_to_shrink=100;

  learn_parm->maxiter=100000;
  learn_parm->kernel_cache_size=40;
  learn_parm->eps=0.1;
  learn_parm->transduction_posratio=-1.0;
  learn_parm->svm_costratio=1.0;
  learn_parm->svm_costratio_unlab=1.0;
  learn_parm->svm_unlabbound=1E-5;
  learn_parm->epsilon_crit=0.001;
  learn_parm->epsilon_a=1E-15;
  learn_parm->rho=1.0;
  learn_parm->xa_depth=0;

  // cross validation
  learn_parm->compute_loo = (use_crossvalidation) ? 1 : 0;
    
  learn_parm->type=CLASSIFICATION;
  
  if((learn_parm->skip_final_opt_check) 
     && (kernel_parm->kernel_type == SVM_LINEAR)) {
    printf("\nIt does not make sense to skip the final optimality check for linear kernels.\n\n");
    learn_parm->skip_final_opt_check=0;
  }    
  if((learn_parm->skip_final_opt_check) 
     && (learn_parm->remove_inconsistent)) {
    printf("\nIt is necessary to do the final optimality check when removing inconsistent \nexamples.\n");
    exit(-1);
  }    
  if((learn_parm->svm_maxqpsize<2)) {
    printf("\nMaximum size of QP-subproblems not in valid range: %ld [2..]\n",learn_parm->svm_maxqpsize); 
    exit(-1);
  }
  if((learn_parm->svm_maxqpsize<learn_parm->svm_newvarsinqp)) {
    printf("\nMaximum size of QP-subproblems [%ld] must be larger than the number of\n",learn_parm->svm_maxqpsize); 
    printf("new variables [%ld] entering the working set in each iteration.\n",learn_parm->svm_newvarsinqp); 
    exit(-1);
  }
  if(learn_parm->svm_iter_to_shrink<1) {
    printf("\nMaximum number of iterations for shrinking not in valid range: %ld [1,..]\n",learn_parm->svm_iter_to_shrink);
    exit(-1);
  }
    if(learn_parm->transduction_posratio>1) {
    printf("\nThe fraction of unlabeled examples to classify as positives must\n");
    printf("be less than 1.0 !!!\n\n");
    exit(-1);
  }
  if(learn_parm->svm_costratio<=0) {
    printf("\nThe COSTRATIO parameter must be greater than zero!\n\n");
    exit(-1);
  }
  if(learn_parm->epsilon_crit<=0) {
    printf("\nThe epsilon parameter must be greater than zero!\n\n");
    exit(-1);
  }
  if(learn_parm->rho<0) {
    printf("\nThe parameter rho for xi/alpha-estimates and leave-one-out pruning must\n");
    printf("be greater than zero (typically 1.0 or 2.0, see T. Joachims, Estimating the\n");
    printf("Generalization Performance of an SVM Efficiently, ICML, 2000.)!\n\n");
    exit(-1);
  }
  if((learn_parm->xa_depth<0) || (learn_parm->xa_depth>100)) {
    printf("\nThe parameter depth for ext. xi/alpha-estimates must be in [0..100] (zero\n");
    printf("for switching to the conventional xa/estimates described in T. Joachims,\n");
    printf("Estimating the Generalization Performance of an SVM Efficiently, ICML, 2000.)\n");
    exit(-1);
  }
}

MODEL *VCSVMLight::optimizeParameters ( DOC **docs, 
    double *target, long int totwords, long int totdoc, 
    MODEL *model, KERNEL_PARM *kernel_parm,
    LEARN_PARM *learn_parm )
{
    double best_error = numeric_limits<double>::max();
    double old_best_error = best_error;

    const int numberOfParameters = (kernel_type == SVM_RBF ? 2 : 1 );

    KERNEL_PARM *currentKernelParm = (KERNEL_PARM *)my_malloc(sizeof(KERNEL_PARM));
    LEARN_PARM *currentLearnParm = (LEARN_PARM *)my_malloc(sizeof(LEARN_PARM));
    MODEL *currentModel = (MODEL *)my_malloc(sizeof(MODEL));

    initParameters ( learn_parm, kernel_parm );
    initMainParameters ( learn_parm, kernel_parm );
    
    double *parametersToOptimize [ numberOfParameters ];
	double minValue [ numberOfParameters ];
	double maxValue [ numberOfParameters ];
	double steps [ numberOfParameters ];
	
	parametersToOptimize[0] = &(currentLearnParm->svm_c);
	minValue[0] = svm_c_min;
	maxValue[0] = svm_c_max;
	steps[0] = svm_c_step;
	if ( kernel_type == SVM_RBF ) {
		parametersToOptimize[1] = &(currentKernelParm->rbf_gamma);
		minValue[1] = rbf_gamma_min;
		maxValue[1] = rbf_gamma_max;
		steps[1] = rbf_gamma_step;
	}

    if ( use_crossvalidation )
    {
		fprintf (stderr, "VCSVMLight: error estimate type: leave-one-out error\n");
    } else {
		fprintf (stderr, "VCSVMLight: error estimate type: Xi/Alpha error\n");
    }

    do {
	old_best_error = best_error;
	for ( int i = 0 ; i < numberOfParameters ; i++ )
	{
	    for ( double value = minValue[i] ; value < maxValue[i] ; value += steps[i] )
	    {
			memcpy ( currentKernelParm, kernel_parm, sizeof(KERNEL_PARM) );
			memcpy ( currentLearnParm, learn_parm, sizeof(LEARN_PARM) );
			double *parameter = parametersToOptimize[i];
			*parameter = value;

			singleTraining ( docs, target, totwords, totdoc, currentModel, 
				currentKernelParm, currentLearnParm );

			fprintf (stderr, "VCSVMLight: optimize rbf_gamma=%f C=%f err=%f xa_err=%f (%f, %f)\n", 
				currentKernelParm->rbf_gamma, currentLearnParm->svm_c, currentModel->loo_error,
				currentModel->xa_error,
				kernel_parm->rbf_gamma, learn_parm->svm_c );

			double currentError = (use_crossvalidation) ? currentModel->loo_error : currentModel->xa_error;
			if ( currentError < 0 ) {
				fthrow ( Exception, "Error < 0; check your settings; SVMLight verbosity bug ?");
			}
				
			if ( currentError < best_error )
			{
				fprintf (stderr, "VCSVMLight: new optimum found !\n");
				best_error = currentError;
				memcpy ( kernel_parm, currentKernelParm, sizeof(KERNEL_PARM) );
				memcpy ( learn_parm, currentLearnParm, sizeof(LEARN_PARM) );
				memcpy ( model, currentModel, sizeof(MODEL) );
			}
	    }
	}
    } while ( old_best_error > best_error );

    free_model(currentModel,0);
    free (currentLearnParm);
    free (currentKernelParm);

    // rewrite
    rbf_gamma = kernel_parm->rbf_gamma;
    svm_c = learn_parm->svm_c;
    fprintf (stderr, "VCSVMLight: optimimum rbf_gamma=%f C=%f !\n", rbf_gamma, svm_c );

    return model;
}

MODEL *VCSVMLight::singleTraining ( DOC **docs, 
    double *target, long int totwords, long int totdoc, 
    MODEL *model, KERNEL_PARM *kernel_parm,
    LEARN_PARM *learn_parm )
{
    double *alpha_in=NULL;
    KERNEL_CACHE *kernel_cache;

    if(kernel_parm->kernel_type == SVM_LINEAR) { /* don't need the cache */
	kernel_cache=NULL;
    }
    else {
	/* Always get a new kernel cache. It is not possible to use the
	   same cache for two different training runs */
	kernel_cache = kernel_cache_init(totdoc,learn_parm->kernel_cache_size);
    }

    svm_learn_classification(docs,target,totdoc,totwords,learn_parm,
			     kernel_parm,kernel_cache,model,alpha_in);

    if(kernel_cache) {
	/* Free the memory used for the cache. */
	kernel_cache_cleanup(kernel_cache);
    }
    free(alpha_in);

    return model;

}

void VCSVMLight::svmLightTraining ( 
	const LabeledSetVector & trainSet )
{
    double *target;
    long int totwords,totdoc,i;

    DOC **docs;  /* training examples */
    LEARN_PARM learn_parm;
    KERNEL_PARM kernel_parm;
    MODEL *model=(MODEL *)my_malloc(sizeof(MODEL));

    readDocuments( &docs,&target,&totwords,&totdoc,
	           trainSet);
    initParameters(&learn_parm, &kernel_parm);
    initMainParameters(&learn_parm, &kernel_parm);

    if ( optimize_parameters )
		optimizeParameters ( docs, target, totwords, totdoc, model, &kernel_parm, &learn_parm );
    else
		singleTraining ( docs, target, totwords, totdoc, model, &kernel_parm, &learn_parm );

    finalModel = copy_model(model); 

    free_model(model,0);

    for(i=0;i<totdoc;i++) 
	free_example(docs[i],1);

    free(docs);
    free(target);
};

void VCSVMLight::teach ( const LabeledSetVector & _teachSet )
{
    maxClassNo = _teachSet.getMaxClassno();

    if ( _teachSet.getMaxClassno() != 1 ) 
		fthrow ( Exception, "This classifier is only suitable for binary classification problems!");

    svmLightTraining ( _teachSet );

    if ( logreg != NULL )
    {
		LabeledSetVector scoreSet;
		LOOP_ALL ( _teachSet )
		{
			EACH(classno,x);
			double score = getSVMScore ( x );
			Vector y (1);
			y[0] = score;
			scoreSet.add ( classno, y );
		}
		logreg->teach ( scoreSet );
		scoreSet.clear();
    }

}

void VCSVMLight::finishTeaching()
{
}

/** clone this object */
VCSVMLight *VCSVMLight::clone(void) const
{
	VCSVMLight *classifier = new VCSVMLight( *this );
	

	return classifier;
}

void VCSVMLight::clear ()
{
    if ( finalModel != NULL )
    {
		free(finalModel);
		finalModel = NULL;
    }
}

void VCSVMLight::read (const string& s, int format)
{
    finalModel = read_model ( const_cast<char *>(s.c_str()) );
}

void VCSVMLight::save (const string& s, int format) const
{
    write_model ( const_cast<char *>(s.c_str()), finalModel );
}


void VCSVMLight::store ( std::ostream & os, int format ) const
{
    fprintf (stderr, "VCSVMLight: unable to write to stream! please use read()\n");
}

void VCSVMLight::restore ( std::istream & is, int format )
{
    fprintf (stderr, "VCSVMLight: unable to read from stream! please use save()\n");
    exit (-1);
}


#endif