////////////////////////////////////////////////////////////////////// // // GradientDescentOptimizer.cpp: Implementation of the GradienDescentOptimizer // Optimizer class. // // Written by Matthias Wacker // edited by Johannes Ruehle, 2012-10-11 ////////////////////////////////////////////////////////////////////// #include "GradientDescentOptimizer.h" using namespace OPTIMIZATION; GradientDescentOptimizer::GradientDescentOptimizer(OptLogBase *loger) : SuperClass(loger) { m_stepLength = -1; m_MinimalGradientMagnitude = 1e-7; } GradientDescentOptimizer::GradientDescentOptimizer( const GradientDescentOptimizer &opt) : SuperClass(opt) { m_stepSize = opt.m_stepSize; m_stepLength = opt.m_stepLength; m_MinimalGradientMagnitude = opt.m_MinimalGradientMagnitude; } GradientDescentOptimizer::~GradientDescentOptimizer() { } void GradientDescentOptimizer::setStepSize(const matrix_type & stepSize) { m_stepSize = stepSize; m_stepLength = -m_stepSize.Max(); } void GradientDescentOptimizer::init() { SuperClass::init(); if (m_stepSize.rows() != static_cast(m_numberOfParameters)) { m_stepSize = m_scales; std::cout << "GradientDescentOptimizer::init(): warning: using optimizer scales as steps, since no steps were specified! Consider, if this is desired behavoir!" << std::endl; } else { // check for nonzero stepsize bool tmp=false; for(int i = 0; i < static_cast(m_numberOfParameters); ++i) { if(m_stepSize(i,0) > 0 ) { tmp=true; } } if(tmp==false) { std::cerr << "GradientDescentOptimizer::init all stepsizes zero - check your code!"<< std::endl; exit(1); } } m_numIter = 0; m_analyticalGradients = m_costFunction->hasAnalyticGradient(); /* FIXME: WACKER: compute first value and first gradient! */ m_currentCostFunctionValue = evaluateCostFunction(m_parameters); m_gradient = (m_analyticalGradients == true && (m_costFunction->hasAnalyticGradient() == true) ) ? getAnalyticalGradient(m_parameters) : getNumericalGradient(m_parameters, m_stepSize); } int GradientDescentOptimizer::optimize() { /* call init */ init(); if(m_loger) m_loger->logTrace("GradientDescentOptimizer: starting optimization ... \n"); /* start time criteria */ m_startTime = clock(); bool abort = false; // double stepLength = -1.0; double cosAngle = 0.0; double downScaleFactor = 0.5; matrix_type stepSize = m_stepSize; double stepLength = m_stepLength; /* check abort criteria for gradient */ if(m_gradientTolActive) { if(m_gradient.frobeniusNorm() < m_gradientTol){ m_returnReason = SUCCESS_GRADIENTTOL; abort = true; if(m_verbose == true) { std::cout << "# Gradient Descenct :: aborting because of GradientTol" << std::endl; } } } /* do the optimization */ while(abort == false) { /* increase iteration counter */ m_numIter++; if(m_verbose == true) { std::cout << "# Gradient Descenct :: Iteration: " << m_numIter << " : current parameters :\n "; for(int r = 0; r < static_cast(m_numberOfParameters); r++) { std::cout<< m_parameters(r,0) << " "; } std::cout << m_currentCostFunctionValue << std::endl; std::cout << " current gradient :\n "; for(int r = 0; r < static_cast(m_numberOfParameters); r++) { std::cout<< m_gradient(r,0) << " "; } std::cout << std::endl; std::cout << " current stepsize :\n "; for(int r = 0; r < static_cast(m_numberOfParameters); r++) { std::cout<< stepSize(r,0) << " "; } std::cout << std::endl; } /* Check iteration limit */ if(m_maxNumIterActive) { if(m_numIter >= m_maxNumIter) { if(m_verbose == true) { std::cout << "# Gradient Descenct :: aborting because of numIter" << std::endl; } /* set according return status and return */ m_returnReason = SUCCESS_MAXITER; abort = true; continue; } } /* store last results for comparison */ matrix_type oldParams = m_parameters; matrix_type oldGradient = m_gradient; double oldFuncValue = m_currentCostFunctionValue; /* get gradient */ m_gradient = (m_analyticalGradients == true && (m_costFunction->hasAnalyticGradient() == true) ) ? getAnalyticalGradient(m_parameters) : getNumericalGradient(m_parameters, stepSize); /* weight with scalings */ //m_gradient = m_gradient.ElMul(m_scales); for(int k=0; k < static_cast(m_numberOfParameters); ++k) { m_gradient(k,0) *= m_scales(k,0); } /* check gradient tol */ if(m_gradientTolActive) { if(m_gradient.frobeniusNorm() < m_gradientTol) { if(m_verbose == true) { std::cout << "# Gradient Descenct :: aborting because of GradientTol" << std::endl; } m_returnReason = SUCCESS_GRADIENTTOL; abort = true; continue; } } /* set gradient length to 1, if length of gradient is too small .. return ERROR_COMPUTATION_UNSTABLE (this can happen if gradienTol is not active..) FIXME: WACKER think about a "usefull" limit ruehle: now adjustable via variable m_MinimalGradientMagnitude It considers a small gradient as having reached the local/global optimum, hello convex function... */ double fGradientLength = m_gradient.frobeniusNorm(); if (fGradientLength > m_MinimalGradientMagnitude) { for(int k=0; k < static_cast(m_numberOfParameters); ++k) { m_gradient(k,0) /= fGradientLength; } } else { if(m_verbose == true) { std::cout << "Gradient Descenct :: aborting because gradient is too small L2 norm = " << fGradientLength << " with set minimum gradient magnitude = " << m_MinimalGradientMagnitude << ". Consider decreasing the limit with GradientDescentOptimizer::setMinimalGradientMagnitude()." <(m_numberOfParameters); ++k) stepSize(k,0) *= downScaleFactor; stepLength *= downScaleFactor; /*FIXME: WACKER: only as long as there is no steplength computation!*/ if(m_verbose == true) { std::cout << "# Gradient Descenct :: direction change detected ->perfoming scaledown" << std::endl; } } /* set next iteration step */ //weight the stepSize for the next grid search by the gradient; //FIXME: using this thought destroys convergence...somehow.. // for(int k=0; k < static_cast(m_numberOfParameters); ++k) // stepSize(k,0) = stepSize(k,0) * m_gradient(k,0); //old but silly version: // m_parameters = m_parameters + m_gradient * stepLength ; //new version where each gradient is weighted by the dimensions individual step size (not one fits them all, as before) for(int k=0; k < static_cast(m_numberOfParameters); ++k) m_parameters(k,0) = m_parameters(k,0) - stepSize(k,0) * m_gradient(k,0); /* Check if it is in bounds, paramTol, funcTol, NumIter, gradienttol, maxSeconds */ if(m_lowerParameterBoundActive || m_upperParameterBoundActive) { for(int i=0; i (m_numberOfParameters); i++) { if( m_upperParameterBoundActive) { if(m_parameters(i,0) >= m_upperParameterBound(i,0)) { if(m_verbose == true) { std::cout << "# Gradient Descenct :: aborting because of parameter Bounds" << std::endl; } /* set according return status and the last parameters and return */ m_returnReason = ERROR_XOUTOFBOUNDS; m_parameters = oldParams; abort = true; continue; } } if( m_lowerParameterBoundActive) { if(m_parameters(i,0) <= m_lowerParameterBound(i,0)) { if(m_verbose == true) { std::cout << "# Gradient Descenct :: aborting because of parameter Bounds" << std::endl; } /* set according return status and return the last parameter*/ m_returnReason = ERROR_XOUTOFBOUNDS; m_parameters = oldParams; abort = true; continue; } } } } /* Get new Costfunction Value */ m_currentCostFunctionValue = evaluateCostFunction(m_parameters); /* Check ParamTol */ if(m_paramTolActive) { if( (m_parameters - oldParams).frobeniusNorm() < m_paramTol) { if(m_verbose == true) { std::cout << "Gradient Descenct :: aborting because of param Tol" << std::endl; } /* set according return status and the last parameters and return */ m_returnReason = SUCCESS_PARAMTOL; /*m_parameters = oldParams;*/ abort = true; continue; } } if(m_funcTolActive) { if( fabs((m_currentCostFunctionValue - oldFuncValue)) < m_funcTol) { if(m_verbose == true) { std::cout << "# Gradient Descenct :: aborting because of Func Tol" << std::endl; } /* set according return status and the last parameters and return */ m_returnReason = SUCCESS_FUNCTOL; /* m_parameters = oldParams; */ abort = true; continue; } } /* Check Optimization Timilimit, if active */ if(m_maxSecondsActive) { m_currentTime = clock(); //std::cout << (_startTime - _currentTime) << std::endl; //std::cout << CLOCKS_PER_SEC << std::endl; /* time limit exceeded ? */ if(((float)(m_currentTime - m_startTime )/CLOCKS_PER_SEC) >= m_maxSeconds ) { /* */ if(m_verbose == true) { std::cout << "# Gradient Descenct :: aborting because of Time Limit" << std::endl; } /* set according return status and the last parameters and return */ m_returnReason = SUCCESS_TIMELIMIT; m_parameters = oldParams; abort = true; continue; } } } if(m_verbose) { std::cout << "# Gradient Descenct :: RESULT: "; for(int r = 0; r < static_cast(m_numberOfParameters); r++) { std::cout<< m_parameters(r,0) << " "; } std::cout << " value: " << m_currentCostFunctionValue << std::endl; } return m_returnReason; } //}