/**
* @file RTBLinear.cpp
* @brief random regression tree, which learns a LSE-model in every inner node during training
* @author Frank Prüfer
* @date 09/17/2013

*/
#include <iostream>

#include "RTBLinear.h"
#include "vislearning/regression/linregression/LinRegression.h"

using namespace OBJREC;

#undef DEBUGTREE
#undef DETAILTREE

using namespace std;

using namespace NICE;

RTBLinear::RTBLinear( const Config *conf, std::string section )
{
  random_split_tests = conf->gI(section, "random_split_tests", 10 );
  random_features = conf->gI(section, "random_features", 500 );
  max_depth = conf->gI(section, "max_depth", 10 );
  min_examples = conf->gI(section, "min_examples", 50);
  minimum_error_reduction = conf->gD("RandomForest", "minimum_error_reduction", 10e-3 );
  save_indices = conf->gB(section, "save_indices", false);
  
  if ( conf->gB(section, "start_random_generator", false ) )
    srand(time(NULL));
}

RTBLinear::~RTBLinear()
{
}

void RTBLinear::computeLinearLSError( const VVector& x,
          const Vector& y,
          const int& numEx,
          double& lsError)
{
  LinRegression *lreg = new LinRegression;
  lreg->teach ( x, y);

  NICE::Vector diff ( numEx );
  for ( int i = 0; i < numEx; i++ ){
    diff[i] = y[i] - lreg->predict ( x[i] );
    diff[i] *= diff[i];
  }

  lsError = diff.Mean();
  delete lreg;
}

bool RTBLinear::errorReductionLeftRight(const vector< pair< double, int > > values,
          const Vector & y,
          double threshold,
          double& error_left,
          double& error_right,
          int& count_left,
          int& count_right)
{
  count_left = 0;
  count_right = 0;
  vector<int> selection_left;
  vector<int> selection_right;
  
  NICE::VVector xLeft;
  NICE::VVector xRight;
  
  for ( vector< pair< double, int > >::const_iterator it = values.begin();
        it != values.end(); it++ )
  {
    double value = it->first;
    if ( value < threshold )
    {
      count_left++;
      selection_left.push_back( it->second );
      NICE::Vector tmp(1,value);
      xLeft.push_back( tmp );
    }
    else
    {
      count_right++;
      selection_right.push_back( it->second );
      NICE::Vector tmp2(1,value);
      xRight.push_back( tmp2 );
    }
  }

  if ( (count_left == 0) || (count_right == 0) )
    return false; // no split
  
  if ( (count_left < min_examples)  || (count_right < min_examples) )
    return false; // no split
  

  NICE::Vector yLeft (count_left);
  for ( int i = 0; i < count_left; i++ ){
    yLeft[i] = y[selection_left[i]];
  }
  computeLinearLSError(xLeft, yLeft, count_left, error_left);

  NICE::Vector yRight (count_right);
  for ( int i = 0; i < count_right; i++ ){
    yRight[i] = y[selection_right[i]];
  }
  computeLinearLSError(xRight, yRight, count_right, error_right);
  
  return true;
}

RegressionNode *RTBLinear::buildRecursive ( const NICE::VVector & x,
          const NICE::Vector & y,
          std::vector<int> & selection,
          int depth)
{
#ifdef DEBUGTREE
    fprintf (stderr, "Examples: %d (depth %d)\n", (int)selection.size(),
    (int)depth);
#endif
    
  RegressionNode *node = new RegressionNode ();
//  node->nodePrediction( y, selection );
  double lsError;
  computeLinearLSError( x, y, (int)x.size(), lsError);
  
  if ( depth > max_depth )
  {
#ifdef DEBUGTREE
   fprintf (stderr, "RTBLinear: maxmimum depth reached !\n");
#endif
   node->trainExamplesIndices = selection;
   return node;
  }
  
  if ( (int)selection.size() < min_examples )
  {
#ifdef DEBUGTREE
    fprintf (stderr, "RTBLinear: minimum examples reached %d < %d !\n",
      (int)selection.size(), min_examples );
#endif
    node->trainExamplesIndices = selection;
    return node;
  }

  int best_feature = 0;
  double best_threshold = 0.0;
  double best_reduct = -1.0;
//  vector<pair<double, int> > best_values;
  vector<pair<double, int> > values;
  double lsError_left = 0.0;
  double lsError_right = 0.0;
  
  for ( int k = 0; k < random_features; k++ )
  {
#ifdef DETAILTREE
    fprintf (stderr, "calculating random feature %d\n", k );
#endif
    int f = rand() % x[0].size();
    
    values.clear();
    collectFeatureValues ( x, selection, f, values );
    
    double minValue = (min_element ( values.begin(), values.end() ))->first;
    double maxValue = (max_element ( values.begin(), values.end() ))->first;
    
#ifdef DETAILTREE
    fprintf (stderr, "max %f min %f\n", maxValue, minValue );
#endif
    if ( maxValue - minValue < 1e-7 ) continue;
    
    for ( int i = 0; i < random_split_tests; i++ )
    {
      double threshold;
      threshold = rand() * (maxValue -minValue ) / RAND_MAX + minValue;
      
#ifdef DETAILTREE
      fprintf (stderr, "calculating split f/s(f) %d/%d %f\n", k, i, threshold );
#endif
      lsError_left = 0.0;
      lsError_right = 0.0;
      
      int count_left, count_right;
      if ( ! errorReductionLeftRight( values, y, threshold, lsError_left,
          lsError_right, count_left, count_right) )
        continue;
      
      //double pl = (count_left) / (count_left +count_right);
      //double errorReduction = lsError - pl*lsError_left - (1-pl)*lsError_right;
      double errorReduction = lsError - lsError_left - lsError_right;
      
      if ( errorReduction > best_reduct )
      {
        best_reduct = errorReduction;
        best_threshold =  threshold;
        best_feature = f;
#ifdef DETAILTREE
        fprintf (stderr, "t %f for feature %i\n", best_threshold, best_feature );
#endif
      }
    }
  }
  
  if ( best_reduct < minimum_error_reduction )
  {
#ifdef DEBUGTREE
    fprintf (stderr, "RTBLinear: error reduction to small !\n");
#endif
    node->trainExamplesIndices = selection;
    return node;
  }
  
  node->f = best_feature;
  node->threshold = best_threshold;
  
  // re calculating examples_left and examples_right
  vector<int> best_examples_left;
  vector<int> best_examples_right;
  values.clear();
  collectFeatureValues( x, selection, best_feature, values);
  
  best_examples_left.reserve ( values.size() / 2 );
  best_examples_right.reserve ( values.size() / 2 );
  
  for ( vector< pair < double, int > >::const_iterator it = values.begin();
        it != values.end(); it++ )
  {
    double value = it->first;
    if ( value < best_threshold )
      best_examples_left.push_back( it->second );
    else
      best_examples_right.push_back( it->second );
  }
  
  node->left = buildRecursive( x, y, best_examples_left, depth+1 );
  node->right = buildRecursive( x, y, best_examples_right, depth+1 );
  
  return node;
}

RegressionNode *RTBLinear::build( const NICE::VVector & x,
          const NICE::Vector & y )
{
  int index = 0;
  
  vector<int> all;
  all.reserve ( y.size() );
  for ( uint i = 0; i < y.size(); i++ )
  {
    all.push_back( index );
    index++;
  }
  
  return buildRecursive( x, y, all, 0);
}