123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430 |
- //////////////////////////////////////////////////////////////////////
- //
- // GradientDescentOptimizer.cpp: Implementation of the GradienDescentOptimizer
- // Optimizer class.
- //
- // Written by Matthias Wacker
- // edited by Johannes Ruehle, 2012-10-11
- //////////////////////////////////////////////////////////////////////
- #include "GradientDescentOptimizer.h"
- using namespace OPTIMIZATION;
- GradientDescentOptimizer::GradientDescentOptimizer(OptLogBase *loger)
- : SuperClass(loger)
- {
- m_stepLength = -1;
- m_MinimalGradientMagnitude = 1e-7;
- }
- GradientDescentOptimizer::GradientDescentOptimizer( const GradientDescentOptimizer &opt) : SuperClass(opt)
- {
- m_stepSize = opt.m_stepSize;
- m_stepLength = opt.m_stepLength;
- m_MinimalGradientMagnitude = opt.m_MinimalGradientMagnitude;
- }
- GradientDescentOptimizer::~GradientDescentOptimizer()
- {
- }
- void GradientDescentOptimizer::setStepSize(const matrix_type & stepSize)
- {
- m_stepSize = stepSize;
- m_stepLength = -m_stepSize.Max();
- }
- void GradientDescentOptimizer::init()
- {
- SuperClass::init();
- if (m_stepSize.rows() != static_cast<int>(m_numberOfParameters))
- {
- m_stepSize = m_scales;
- std::cout << "GradientDescentOptimizer::init(): warning: using optimizer scales as steps, since no steps were specified! Consider, if this is desired behavoir!" << std::endl;
- }
- else
- {
- // check for nonzero stepsize
- bool tmp=false;
- for(int i = 0; i < static_cast<int>(m_numberOfParameters); ++i)
- {
- if(m_stepSize(i,0) > 0 )
- {
- tmp=true;
- }
- }
- if(tmp==false)
- {
- std::cerr << "GradientDescentOptimizer::init all stepsizes zero - check your code!"<< std::endl;
- exit(1);
- }
- }
-
- m_numIter = 0;
- m_analyticalGradients = m_costFunction->hasAnalyticGradient();
-
- /*
- FIXME: WACKER: compute first value and first gradient!
- */
- m_currentCostFunctionValue = evaluateCostFunction(m_parameters);
- m_gradient = (m_analyticalGradients == true &&
- (m_costFunction->hasAnalyticGradient() == true) ) ?
- getAnalyticalGradient(m_parameters) :
- getNumericalGradient(m_parameters, m_stepSize);
- }
- int GradientDescentOptimizer::optimize()
- {
- /*
- call init
- */
- init();
- if(m_loger)
- m_loger->logTrace("GradientDescentOptimizer: starting optimization ... \n");
- /*
- start time criteria
- */
- m_startTime = clock();
- bool abort = false;
- // double stepLength = -1.0;
- double cosAngle = 0.0;
- double downScaleFactor = 0.5;
- matrix_type stepSize = m_stepSize;
- double stepLength = m_stepLength;
-
- /*
- check abort criteria for gradient
- */
- if(m_gradientTolActive)
- {
- if(m_gradient.frobeniusNorm() < m_gradientTol){
- m_returnReason = SUCCESS_GRADIENTTOL;
- abort = true;
- if(m_verbose == true)
- {
- std::cout << "# Gradient Descenct :: aborting because of GradientTol" << std::endl;
- }
- }
- }
- /* do the optimization */
- while(abort == false)
- {
- /*
- increase iteration counter
- */
- m_numIter++;
-
- if(m_verbose == true)
- {
- std::cout << "# Gradient Descenct :: Iteration: " << m_numIter << " : current parameters :\n ";
- for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
- {
- std::cout<< m_parameters(r,0) << " ";
- }
- std::cout << m_currentCostFunctionValue << std::endl;
- std::cout << " current gradient :\n ";
- for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
- {
- std::cout<< m_gradient(r,0) << " ";
- }
- std::cout << std::endl;
- std::cout << " current stepsize :\n ";
- for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
- {
- std::cout<< stepSize(r,0) << " ";
- }
- std::cout << std::endl;
- }
- /*
- Check iteration limit
- */
- if(m_maxNumIterActive)
- {
- if(m_numIter >= m_maxNumIter)
- {
- if(m_verbose == true)
- {
- std::cout << "# Gradient Descenct :: aborting because of numIter" << std::endl;
- }
- /* set according return status and return */
- m_returnReason = SUCCESS_MAXITER;
- abort = true;
- continue;
- }
- }
- /*
- store last results for comparison
- */
- matrix_type oldParams = m_parameters;
- matrix_type oldGradient = m_gradient;
- double oldFuncValue = m_currentCostFunctionValue;
-
- /*
- get gradient
- */
- m_gradient = (m_analyticalGradients == true &&
- (m_costFunction->hasAnalyticGradient() == true) ) ?
- getAnalyticalGradient(m_parameters) :
- getNumericalGradient(m_parameters, stepSize);
- /*
- weight with scalings
- */
- //m_gradient = m_gradient.ElMul(m_scales);
- for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
- {
- m_gradient(k,0) *= m_scales(k,0);
- }
- /*
- check gradient tol
- */
- if(m_gradientTolActive)
- {
- if(m_gradient.frobeniusNorm() < m_gradientTol)
- {
- if(m_verbose == true)
- {
- std::cout << "# Gradient Descenct :: aborting because of GradientTol" << std::endl;
- }
- m_returnReason = SUCCESS_GRADIENTTOL;
- abort = true;
- continue;
- }
- }
- /*
- set gradient length to 1, if length of gradient is too small ..
- return ERROR_COMPUTATION_UNSTABLE
- (this can happen if gradienTol is not active..)
- FIXME: WACKER think about a "usefull" limit
- ruehle: now adjustable via variable m_MinimalGradientMagnitude
- It considers a small gradient as having reached the local/global optimum, hello convex function...
- */
- double fGradientLength = m_gradient.frobeniusNorm();
- if (fGradientLength > m_MinimalGradientMagnitude)
- {
- for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
- {
- m_gradient(k,0) /= fGradientLength;
- }
- }
- else
- {
- if(m_verbose == true)
- {
- std::cout << "Gradient Descenct :: aborting because gradient is too small L2 norm = " << fGradientLength
- << " with set minimum gradient magnitude = " << m_MinimalGradientMagnitude
- << ". Consider decreasing the limit with GradientDescentOptimizer::setMinimalGradientMagnitude()."
- <<std::endl;
- }
- /* set according return status and the last parameters and return */
- m_returnReason = SUCCESS_PARAMTOL;
- abort =true;
- continue;
- }
-
- /*
- check direction change to scale down for convergence!
- */
- if(( !m_gradient * oldGradient)(0,0) < cosAngle)
- {
- /*
- direction change is more than acos(cosAngle) deg.
- scaledown by downScaleFactor
- */
- for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
- stepSize(k,0) *= downScaleFactor;
- stepLength *= downScaleFactor;
- /*FIXME: WACKER: only as long
- as there is no steplength computation!*/
- if(m_verbose == true)
- {
- std::cout << "# Gradient Descenct :: direction change detected ->perfoming scaledown" << std::endl;
- }
- }
-
-
- /*
- set next iteration step
- */
- //weight the stepSize for the next grid search by the gradient;
- //FIXME: using this thought destroys convergence...somehow..
- // for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
- // stepSize(k,0) = stepSize(k,0) * m_gradient(k,0);
- //old but silly version:
- // m_parameters = m_parameters + m_gradient * stepLength ;
- //new version where each gradient is weighted by the dimensions individual step size (not one fits them all, as before)
- for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
- m_parameters(k,0) = m_parameters(k,0) - stepSize(k,0) * m_gradient(k,0);
- /*
- Check if it is in bounds, paramTol, funcTol, NumIter, gradienttol, maxSeconds
- */
- if(m_lowerParameterBoundActive || m_upperParameterBoundActive)
- {
- for(int i=0; i <static_cast<int>(m_numberOfParameters); i++)
- {
- if( m_upperParameterBoundActive)
- {
- if(m_parameters(i,0) >= m_upperParameterBound(i,0))
- {
- if(m_verbose == true)
- {
- std::cout << "# Gradient Descenct :: aborting because of parameter Bounds" << std::endl;
- }
- /* set according return status and the last parameters and return */
- m_returnReason = ERROR_XOUTOFBOUNDS;
- m_parameters = oldParams;
- abort = true;
- continue;
- }
- }
- if( m_lowerParameterBoundActive)
- {
- if(m_parameters(i,0) <= m_lowerParameterBound(i,0))
- {
- if(m_verbose == true)
- {
- std::cout << "# Gradient Descenct :: aborting because of parameter Bounds" << std::endl;
- }
- /* set according return status and return the last parameter*/
- m_returnReason = ERROR_XOUTOFBOUNDS;
- m_parameters = oldParams;
- abort = true;
- continue;
- }
- }
- }
- }
- /*
- Get new Costfunction Value
- */
- m_currentCostFunctionValue = evaluateCostFunction(m_parameters);
- /*
- Check ParamTol
- */
- if(m_paramTolActive)
- {
- if( (m_parameters - oldParams).frobeniusNorm() < m_paramTol)
- {
- if(m_verbose == true)
- {
- std::cout << "Gradient Descenct :: aborting because of param Tol" << std::endl;
- }
- /* set according return status and the last parameters and return */
- m_returnReason = SUCCESS_PARAMTOL;
- /*m_parameters = oldParams;*/
- abort = true;
- continue;
- }
- }
-
- if(m_funcTolActive)
- {
- if( fabs((m_currentCostFunctionValue - oldFuncValue)) < m_funcTol)
- {
- if(m_verbose == true)
- {
- std::cout << "# Gradient Descenct :: aborting because of Func Tol" << std::endl;
- }
- /* set according return status and the last parameters and return */
- m_returnReason = SUCCESS_FUNCTOL;
- /* m_parameters = oldParams; */
- abort = true;
- continue;
- }
- }
- /*
- Check Optimization Timilimit, if active
- */
- if(m_maxSecondsActive)
- {
- m_currentTime = clock();
-
- //std::cout << (_startTime - _currentTime) << std::endl;
- //std::cout << CLOCKS_PER_SEC << std::endl;
- /* time limit exceeded ? */
- if(((float)(m_currentTime - m_startTime )/CLOCKS_PER_SEC) >= m_maxSeconds )
- {
- /*
-
- */
- if(m_verbose == true)
- {
- std::cout << "# Gradient Descenct :: aborting because of Time Limit" << std::endl;
- }
- /* set according return status and the last parameters and return */
- m_returnReason = SUCCESS_TIMELIMIT;
- m_parameters = oldParams;
- abort = true;
- continue;
- }
- }
- }
- if(m_verbose)
- {
- std::cout << "# Gradient Descenct :: RESULT: ";
- for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
- {
- std::cout<< m_parameters(r,0) << " ";
- }
- std::cout << " value: " << m_currentCostFunctionValue << std::endl;
- }
- return m_returnReason;
- }
- //}
|