|
@@ -4,20 +4,18 @@
|
|
// Optimizer class.
|
|
// Optimizer class.
|
|
//
|
|
//
|
|
// Written by Matthias Wacker
|
|
// Written by Matthias Wacker
|
|
-//
|
|
|
|
|
|
+// edited by Johannes Ruehle, 2012-10-11
|
|
//////////////////////////////////////////////////////////////////////
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
-#include "optimization/GradientDescentOptimizer.h"
|
|
|
|
|
|
+#include "GradientDescentOptimizer.h"
|
|
|
|
|
|
using namespace optimization;
|
|
using namespace optimization;
|
|
|
|
|
|
-//#include <iostream>
|
|
|
|
-
|
|
|
|
-
|
|
|
|
GradientDescentOptimizer::GradientDescentOptimizer(OptLogBase *loger)
|
|
GradientDescentOptimizer::GradientDescentOptimizer(OptLogBase *loger)
|
|
: SuperClass(loger)
|
|
: SuperClass(loger)
|
|
{
|
|
{
|
|
m_stepLength = -1;
|
|
m_stepLength = -1;
|
|
|
|
+ m_MinimalGradientMagnitude = 1e-7;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -25,6 +23,7 @@ GradientDescentOptimizer::GradientDescentOptimizer( const GradientDescentOptimiz
|
|
{
|
|
{
|
|
m_stepSize = opt.m_stepSize;
|
|
m_stepSize = opt.m_stepSize;
|
|
m_stepLength = opt.m_stepLength;
|
|
m_stepLength = opt.m_stepLength;
|
|
|
|
+ m_MinimalGradientMagnitude = opt.m_MinimalGradientMagnitude;
|
|
}
|
|
}
|
|
|
|
|
|
GradientDescentOptimizer::~GradientDescentOptimizer()
|
|
GradientDescentOptimizer::~GradientDescentOptimizer()
|
|
@@ -44,6 +43,9 @@ void GradientDescentOptimizer::init()
|
|
if (m_stepSize.rows() != static_cast<int>(m_numberOfParameters))
|
|
if (m_stepSize.rows() != static_cast<int>(m_numberOfParameters))
|
|
{
|
|
{
|
|
m_stepSize = m_scales;
|
|
m_stepSize = m_scales;
|
|
|
|
+
|
|
|
|
+ std::cout << "GradientDescentOptimizer::init(): warning: using optimizer scales as steps, since no steps were specified! Consider, if this is desired behavoir!" << std::endl;
|
|
|
|
+
|
|
}
|
|
}
|
|
else
|
|
else
|
|
{
|
|
{
|
|
@@ -108,19 +110,6 @@ int GradientDescentOptimizer::optimize()
|
|
matrix_type stepSize = m_stepSize;
|
|
matrix_type stepSize = m_stepSize;
|
|
double stepLength = m_stepLength;
|
|
double stepLength = m_stepLength;
|
|
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- compute start value and first gradient!
|
|
|
|
- */
|
|
|
|
- m_currentCostFunctionValue = evaluateCostFunction(m_parameters);
|
|
|
|
-
|
|
|
|
- m_gradient = (m_analyticalGradients == true &&
|
|
|
|
- (m_costFunction->hasAnalyticGradient() == true) ) ?
|
|
|
|
- getAnalyticalGradient(m_parameters) :
|
|
|
|
- getNumericalGradient(m_parameters, m_stepSize);
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
check abort criteria for gradient
|
|
check abort criteria for gradient
|
|
*/
|
|
*/
|
|
@@ -160,6 +149,13 @@ int GradientDescentOptimizer::optimize()
|
|
{
|
|
{
|
|
std::cout<< m_gradient[r][0] << " ";
|
|
std::cout<< m_gradient[r][0] << " ";
|
|
}
|
|
}
|
|
|
|
+ std::cout << std::endl;
|
|
|
|
+
|
|
|
|
+ std::cout << " current stepsize :\n ";
|
|
|
|
+ for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
|
|
|
|
+ {
|
|
|
|
+ std::cout<< stepSize[r][0] << " ";
|
|
|
|
+ }
|
|
std::cout << std::endl;
|
|
std::cout << std::endl;
|
|
|
|
|
|
}
|
|
}
|
|
@@ -196,7 +192,6 @@ int GradientDescentOptimizer::optimize()
|
|
/*
|
|
/*
|
|
get gradient
|
|
get gradient
|
|
*/
|
|
*/
|
|
- //m_gradient = (m_analyticalGradients == true) ? getAnalyticalGradient(m_parameters) : getNumericalGradient(m_parameters, stepSize);
|
|
|
|
m_gradient = (m_analyticalGradients == true &&
|
|
m_gradient = (m_analyticalGradients == true &&
|
|
(m_costFunction->hasAnalyticGradient() == true) ) ?
|
|
(m_costFunction->hasAnalyticGradient() == true) ) ?
|
|
getAnalyticalGradient(m_parameters) :
|
|
getAnalyticalGradient(m_parameters) :
|
|
@@ -234,21 +229,31 @@ int GradientDescentOptimizer::optimize()
|
|
return ERROR_COMPUTATION_UNSTABLE
|
|
return ERROR_COMPUTATION_UNSTABLE
|
|
(this can happen if gradienTol is not active..)
|
|
(this can happen if gradienTol is not active..)
|
|
FIXME: WACKER think about a "usefull" limit
|
|
FIXME: WACKER think about a "usefull" limit
|
|
|
|
+ ruehle: now adjustable via variable m_MinimalGradientMagnitude
|
|
|
|
+ It considers a small gradient as having reached the local/global optimum, hello convex function...
|
|
*/
|
|
*/
|
|
- if (m_gradient.Norm(0) > 1.0e-50)
|
|
|
|
|
|
+ double fGradientLength = m_gradient.Norm(0);
|
|
|
|
+ if (fGradientLength > m_MinimalGradientMagnitude)
|
|
{
|
|
{
|
|
for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
|
|
for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
|
|
{
|
|
{
|
|
- m_gradient[k][0] /= m_gradient.Norm(0);
|
|
|
|
|
|
+ m_gradient[k][0] /= fGradientLength;
|
|
|
|
+
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
else
|
|
{
|
|
{
|
|
- m_returnReason = ERROR_COMPUTATION_UNSTABLE;
|
|
|
|
- if(m_verbose == true)
|
|
|
|
- {
|
|
|
|
- std::cout << "# Gradient Descenct :: aborting because of ERROR_COMPUTATION_UNSTABLE " << std::endl;
|
|
|
|
- }
|
|
|
|
|
|
+
|
|
|
|
+ if(m_verbose == true)
|
|
|
|
+ {
|
|
|
|
+ std::cout << "Gradient Descenct :: aborting because gradient is too small L2 norm = " << fGradientLength
|
|
|
|
+ << " with set minimum gradient magnitude = " << m_MinimalGradientMagnitude
|
|
|
|
+ << ". Consider decreasing the limit with GradientDescentOptimizer::setMinimalGradientMagnitude()."
|
|
|
|
+ <<std::endl;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* set according return status and the last parameters and return */
|
|
|
|
+ m_returnReason = SUCCESS_PARAMTOL;
|
|
|
|
|
|
abort =true;
|
|
abort =true;
|
|
continue;
|
|
continue;
|
|
@@ -265,18 +270,32 @@ int GradientDescentOptimizer::optimize()
|
|
*/
|
|
*/
|
|
|
|
|
|
for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
|
|
for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
|
|
- stepSize[k][0] *= downScaleFactor;
|
|
|
|
|
|
+ stepSize[k][0] *= downScaleFactor;
|
|
|
|
|
|
stepLength *= downScaleFactor;
|
|
stepLength *= downScaleFactor;
|
|
/*FIXME: WACKER: only as long
|
|
/*FIXME: WACKER: only as long
|
|
as there is no steplength computation!*/
|
|
as there is no steplength computation!*/
|
|
|
|
+
|
|
|
|
+ if(m_verbose == true)
|
|
|
|
+ {
|
|
|
|
+ std::cout << "# Gradient Descenct :: direction change detected ->perfoming scaledown" << std::endl;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
/*
|
|
set next iteration step
|
|
set next iteration step
|
|
*/
|
|
*/
|
|
- m_parameters = m_parameters + m_gradient * stepLength ;
|
|
|
|
|
|
+ //weight the stepSize for the next grid search by the gradient;
|
|
|
|
+ //FIXME: using this thought destroys convergence...somehow..
|
|
|
|
+ // for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
|
|
|
|
+ // stepSize[k][0] = stepSize[k][0] * m_gradient[k][0];
|
|
|
|
+
|
|
|
|
+ //old but silly version:
|
|
|
|
+ // m_parameters = m_parameters + m_gradient * stepLength ;
|
|
|
|
+ //new version where each gradient is weighted by the dimensions individual step size (not one fits them all, as before)
|
|
|
|
+ for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
|
|
|
|
+ m_parameters[k][0] = m_parameters[k][0] - stepSize[k][0] * m_gradient[k][0];
|
|
|
|
|
|
/*
|
|
/*
|
|
Check if it is in bounds, paramTol, funcTol, NumIter, gradienttol, maxSeconds
|
|
Check if it is in bounds, paramTol, funcTol, NumIter, gradienttol, maxSeconds
|
|
@@ -407,3 +426,5 @@ int GradientDescentOptimizer::optimize()
|
|
return m_returnReason;
|
|
return m_returnReason;
|
|
|
|
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+//}
|