//////////////////////////////////////////////////////////////////////
//
//	GradientDescentOptimizer.cpp: Implementation of the GradienDescentOptimizer
//								  Optimizer class.
//
//	Written by Matthias Wacker
//  edited by Johannes Ruehle, 2012-10-11
//////////////////////////////////////////////////////////////////////

#include "GradientDescentOptimizer.h"

namespace OPTIMIZATION
{

GradientDescentOptimizer::GradientDescentOptimizer(OptLogBase *loger)
  : SuperClass(loger)
{
  m_stepLength = -1;
    m_MinimalGradientMagnitude = 1e-7;
}


GradientDescentOptimizer::GradientDescentOptimizer( const GradientDescentOptimizer &opt) : SuperClass(opt)
{
  m_stepSize = opt.m_stepSize;
  m_stepLength = opt.m_stepLength;
    m_MinimalGradientMagnitude = opt.m_MinimalGradientMagnitude;
}

GradientDescentOptimizer::~GradientDescentOptimizer()
{
}

void GradientDescentOptimizer::setStepSize(const matrix_type & stepSize)
{
  m_stepSize = stepSize;
  m_stepLength = -m_stepSize.Max();
}

void GradientDescentOptimizer::init()
{
  SuperClass::init();

  if (m_stepSize.rows() != static_cast<int>(m_numberOfParameters))
  {
    m_stepSize = m_scales; 

        std::cout << "GradientDescentOptimizer::init(): warning: using optimizer scales as steps, since no steps were specified! Consider, if this is desired behavoir!" << std::endl;

  }
  else
  {
    // check for nonzero stepsize
    bool tmp=false;
    for(int i = 0; i < static_cast<int>(m_numberOfParameters); ++i)
    {
      if(m_stepSize(i,0) > 0 )
      {
        tmp=true;
      }
    }
    if(tmp==false)
    {
      std::cerr <<  "GradientDescentOptimizer::init  all stepsizes zero - check your code!"<< std::endl;
      exit(1);
    }
  }

  
  m_numIter = 0;

  m_analyticalGradients = m_costFunction->hasAnalyticGradient();
  
  /*
    FIXME: WACKER: compute first value and first gradient!
  */
  m_currentCostFunctionValue = evaluateCostFunction(m_parameters);

  m_gradient = (m_analyticalGradients == true && 
        (m_costFunction->hasAnalyticGradient() == true) ) ?
              getAnalyticalGradient(m_parameters) : 
              getNumericalGradient(m_parameters, m_stepSize);


}

int GradientDescentOptimizer::optimize()
{
  /*
    call init
  */
  init();


  if(m_loger)
    m_loger->logTrace("GradientDescentOptimizer: starting optimization ... \n");



  /*
    start time criteria
  */
  m_startTime = clock();


  bool abort = false;
//	double stepLength = -1.0;
  double cosAngle = 0.0;
  double downScaleFactor = 0.5;

  matrix_type stepSize = m_stepSize;
  double stepLength = m_stepLength;
  
  /*
    check abort criteria for gradient
  */	
  if(m_gradientTolActive)
  {
    if(m_gradient.frobeniusNorm() < m_gradientTol){
      m_returnReason = SUCCESS_GRADIENTTOL;
      abort = true;

      if(m_verbose == true)
      {
        std::cout << "# Gradient Descenct :: aborting because of GradientTol" << std::endl;
      }
    }
  }	


  /* do the optimization */
  while(abort == false)
  {
    /*
      increase iteration counter
    */
    m_numIter++;
    
    if(m_verbose == true)
    {
      std::cout << "# Gradient Descenct :: Iteration: " << m_numIter << " : current parameters :\n ";
      for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
      {
        std::cout<< m_parameters(r,0) << " ";
      }
      std::cout << m_currentCostFunctionValue << std::endl;

      std::cout << " current gradient :\n ";
      for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
            {
                std::cout<< m_gradient(r,0) << " ";
            }
            std::cout << std::endl;

            std::cout << " current stepsize :\n ";
            for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
            {
                std::cout<< stepSize(r,0) << " ";
            }
            std::cout << std::endl;

    }

    /*
      Check iteration limit
    */
    if(m_maxNumIterActive)
    {
      if(m_numIter >= m_maxNumIter)
      {
        if(m_verbose == true)
        {
          std::cout << "# Gradient Descenct :: aborting because of numIter" << std::endl;
        }


        /* set according return status and return */
        m_returnReason = SUCCESS_MAXITER;
        abort = true;
        continue;
      }
    }


    /*
      store last results for comparison
    */
    matrix_type oldParams = m_parameters;
    matrix_type oldGradient = m_gradient;
    double oldFuncValue = m_currentCostFunctionValue;
    

    /*
      get gradient
    */
    m_gradient = (m_analyticalGradients == true && 
        (m_costFunction->hasAnalyticGradient() == true) ) ?
              getAnalyticalGradient(m_parameters) : 
              getNumericalGradient(m_parameters, stepSize);

    /*
      weight with scalings
    */
    //m_gradient = m_gradient.ElMul(m_scales);
    for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
    {
      m_gradient(k,0) *= m_scales(k,0);
    }

    /*
      check gradient tol
    */
    if(m_gradientTolActive)
    {
      if(m_gradient.frobeniusNorm() < m_gradientTol)
      {
        if(m_verbose == true)
        {
          std::cout << "# Gradient Descenct :: aborting because of GradientTol" << std::endl;
        }

        m_returnReason = SUCCESS_GRADIENTTOL;
        abort = true;
        continue;
      }
    }

    /*
      set gradient length to 1, if length of gradient is too small .. 
      return ERROR_COMPUTATION_UNSTABLE
      (this can happen if gradienTol is not active..)
      FIXME: WACKER think about a "usefull" limit
                ruehle: now adjustable via variable m_MinimalGradientMagnitude
            It considers a small gradient as having reached the local/global optimum, hello convex function...
    */
        double fGradientLength = m_gradient.frobeniusNorm();
        if (fGradientLength > m_MinimalGradientMagnitude)
    {
      for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
      {
                m_gradient(k,0) /= fGradientLength;

      }
    }
    else
    {

            if(m_verbose == true)
            {
                std::cout << "Gradient Descenct :: aborting because gradient is too small L2 norm = " << fGradientLength
                          << " with set minimum gradient magnitude = " << m_MinimalGradientMagnitude
                          << ". Consider decreasing the limit with GradientDescentOptimizer::setMinimalGradientMagnitude()."
                          <<std::endl;
            }

            /* set according return status and the last parameters and return */
            m_returnReason = SUCCESS_PARAMTOL;

      abort =true;
      continue;
    }
    
    /*
      check direction change to scale down for convergence!
    */
    if(( !m_gradient * oldGradient)(0,0) < cosAngle)
    {
      /*
        direction change is more than acos(cosAngle) deg.
        scaledown by downScaleFactor
      */

      for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
                stepSize(k,0) *= downScaleFactor;

      stepLength *= downScaleFactor;
      /*FIXME: WACKER: only as long
      as there is no steplength computation!*/

            if(m_verbose == true)
            {
                std::cout << "# Gradient Descenct :: direction change detected ->perfoming scaledown" << std::endl;
            }
    }
    
        
    /*
      set next iteration step
    */
        //weight the stepSize for the next grid search by the gradient;
        //FIXME: using this thought destroys convergence...somehow..
        //     for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
        //         stepSize(k,0) = stepSize(k,0) * m_gradient(k,0);

        //old but silly version:
        // m_parameters = m_parameters + m_gradient * stepLength ;
        //new version where each gradient is weighted by the dimensions individual step size (not one fits them all, as before)
        for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
            m_parameters(k,0) = m_parameters(k,0) - stepSize(k,0) * m_gradient(k,0);

    /*
      Check if it is in bounds, paramTol, funcTol, NumIter, gradienttol, maxSeconds
    */
    if(m_lowerParameterBoundActive || m_upperParameterBoundActive)
    {
      for(int i=0; i <static_cast<int>(m_numberOfParameters); i++)
      {
        if( m_upperParameterBoundActive)
        {
          if(m_parameters(i,0) >= m_upperParameterBound(i,0))
          {
            if(m_verbose == true)
            {
              std::cout << "# Gradient Descenct :: aborting because of parameter Bounds" << std::endl;
            }

            /* set according return status and the last parameters and return */
            m_returnReason = ERROR_XOUTOFBOUNDS;
            m_parameters = oldParams;
            abort = true;
            continue;
          }
        }
        if( m_lowerParameterBoundActive)
        {

          if(m_parameters(i,0) <= m_lowerParameterBound(i,0))
          {
            if(m_verbose == true)
            {
              std::cout << "# Gradient Descenct :: aborting because of parameter Bounds" << std::endl;
            }

            /* set according return status and return the last parameter*/
            m_returnReason = ERROR_XOUTOFBOUNDS;
            m_parameters = oldParams;
            abort = true;
            continue;
          }
        }
      }
    }

    /*
      Get new Costfunction Value
    */
    m_currentCostFunctionValue = evaluateCostFunction(m_parameters);

    /*
      Check ParamTol
    */
    if(m_paramTolActive)
    {
      if( (m_parameters - oldParams).frobeniusNorm() < m_paramTol)
      {
        if(m_verbose == true)
        {
          std::cout << "Gradient Descenct :: aborting because of param Tol" << std::endl;
        }

        /* set according return status and the last parameters and return */
        m_returnReason = SUCCESS_PARAMTOL;
        /*m_parameters = oldParams;*/
        abort = true;
        continue;
      }

    }
  
    if(m_funcTolActive)
    {
      if( fabs((m_currentCostFunctionValue - oldFuncValue)) < m_funcTol)
      {
        if(m_verbose == true)
        {
          std::cout << "# Gradient Descenct :: aborting because of Func Tol" << std::endl;
        }

        /* set according return status and the last parameters and return */
        m_returnReason = SUCCESS_FUNCTOL;
        /* m_parameters = oldParams; */
        abort = true;
        continue;
      }
    }

    /*
      Check Optimization Timilimit, if active
    */
    if(m_maxSecondsActive)
    {
      m_currentTime = clock();
    
      //std::cout << (_startTime - _currentTime) << std::endl;
      //std::cout << CLOCKS_PER_SEC << std::endl;
      /* time limit exceeded ? */
      if(((float)(m_currentTime - m_startTime )/CLOCKS_PER_SEC) >= m_maxSeconds )
      {
        /*
          
        */
        if(m_verbose == true)
        {
          std::cout << "# Gradient Descenct :: aborting because of Time Limit" << std::endl;
        }

        /* set according return status and the last parameters and return */
        m_returnReason = SUCCESS_TIMELIMIT;
        m_parameters = oldParams;
        abort = true;
        continue;
      }
    }

  }

  if(m_verbose)
  {
    std::cout << "# Gradient Descenct :: RESULT: ";
    for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
    {
      std::cout<< m_parameters(r,0) << " ";
    }
    std::cout << "    value:   " << m_currentCostFunctionValue  << std::endl;
  }

  return m_returnReason;

}

}