Эх сурвалжийг харах

bug fixing Gradient descent optimization

Johannes Ruehle 12 жил өмнө
parent
commit
081f39a359

+ 49 - 28
GradientDescentOptimizer.cpp

@@ -4,20 +4,18 @@
 //								  Optimizer class.
 //
 //	Written by Matthias Wacker
-//
+//  edited by Johannes Ruehle, 2012-10-11
 //////////////////////////////////////////////////////////////////////
 
-#include "optimization/GradientDescentOptimizer.h"
+#include "GradientDescentOptimizer.h"
 
 using namespace optimization;
 
-//#include <iostream>
-
-
 GradientDescentOptimizer::GradientDescentOptimizer(OptLogBase *loger)
 	: SuperClass(loger)
 {
 	m_stepLength = -1;
+    m_MinimalGradientMagnitude = 1e-7;
 }
 
 
@@ -25,6 +23,7 @@ GradientDescentOptimizer::GradientDescentOptimizer( const GradientDescentOptimiz
 {
 	m_stepSize = opt.m_stepSize;
 	m_stepLength = opt.m_stepLength;
+    m_MinimalGradientMagnitude = opt.m_MinimalGradientMagnitude;
 }
 
 GradientDescentOptimizer::~GradientDescentOptimizer()
@@ -44,6 +43,9 @@ void GradientDescentOptimizer::init()
 	if (m_stepSize.rows() != static_cast<int>(m_numberOfParameters))
 	{
 		m_stepSize = m_scales; 
+
+        std::cout << "GradientDescentOptimizer::init(): warning: using optimizer scales as steps, since no steps were specified! Consider, if this is desired behavoir!" << std::endl;
+
 	}
 	else
 	{
@@ -108,19 +110,6 @@ int GradientDescentOptimizer::optimize()
 	matrix_type stepSize = m_stepSize;
 	double stepLength = m_stepLength;
 	
-
-	/*
-		compute start value and first gradient!
-	*/
-	m_currentCostFunctionValue = evaluateCostFunction(m_parameters);
-
-	m_gradient = (m_analyticalGradients == true && 
-				(m_costFunction->hasAnalyticGradient() == true) ) ?
-							getAnalyticalGradient(m_parameters) : 
-							getNumericalGradient(m_parameters, m_stepSize);
-
-
-
 	/*
 		check abort criteria for gradient
 	*/	
@@ -160,6 +149,13 @@ int GradientDescentOptimizer::optimize()
             {
                 std::cout<< m_gradient[r][0] << " ";
             }
+            std::cout << std::endl;
+
+            std::cout << " current stepsize :\n ";
+            for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
+            {
+                std::cout<< stepSize[r][0] << " ";
+            }
             std::cout << std::endl;
 
 		}
@@ -196,7 +192,6 @@ int GradientDescentOptimizer::optimize()
 		/*
 			get gradient
 		*/
-		//m_gradient = (m_analyticalGradients == true) ? getAnalyticalGradient(m_parameters) : getNumericalGradient(m_parameters, stepSize);
 		m_gradient = (m_analyticalGradients == true && 
 				(m_costFunction->hasAnalyticGradient() == true) ) ?
 							getAnalyticalGradient(m_parameters) : 
@@ -234,21 +229,31 @@ int GradientDescentOptimizer::optimize()
 			return ERROR_COMPUTATION_UNSTABLE
 			(this can happen if gradienTol is not active..)
 			FIXME: WACKER think about a "usefull" limit
+                ruehle: now adjustable via variable m_MinimalGradientMagnitude
+            It considers a small gradient as having reached the local/global optimum, hello convex function...
 		*/
-		if (m_gradient.Norm(0) > 1.0e-50)
+        double fGradientLength = m_gradient.Norm(0);
+        if (fGradientLength > m_MinimalGradientMagnitude)
 		{
 			for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
 			{
-				m_gradient[k][0] /= m_gradient.Norm(0);
+                m_gradient[k][0] /= fGradientLength;
+
 			}
 		}
 		else
 		{
-			m_returnReason = ERROR_COMPUTATION_UNSTABLE;
-			if(m_verbose == true)
-			{
-				std::cout << "# Gradient Descenct :: aborting because of ERROR_COMPUTATION_UNSTABLE " << std::endl;
-			}
+
+            if(m_verbose == true)
+            {
+                std::cout << "Gradient Descenct :: aborting because gradient is too small L2 norm = " << fGradientLength
+                          << " with set minimum gradient magnitude = " << m_MinimalGradientMagnitude
+                          << ". Consider decreasing the limit with GradientDescentOptimizer::setMinimalGradientMagnitude()."
+                          <<std::endl;
+            }
+
+            /* set according return status and the last parameters and return */
+            m_returnReason = SUCCESS_PARAMTOL;
 
 			abort =true;
 			continue;
@@ -265,18 +270,32 @@ int GradientDescentOptimizer::optimize()
 			*/
 
 			for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
-				stepSize[k][0] *= downScaleFactor;
+                stepSize[k][0] *= downScaleFactor;
 
 			stepLength *= downScaleFactor;
 			/*FIXME: WACKER: only as long
 			as there is no steplength computation!*/
+
+            if(m_verbose == true)
+            {
+                std::cout << "# Gradient Descenct :: direction change detected ->perfoming scaledown" << std::endl;
+            }
 		}
 		
 				
 		/*
 			set next iteration step
 		*/
-		m_parameters = m_parameters + m_gradient * stepLength ; 
+        //weight the stepSize for the next grid search by the gradient;
+        //FIXME: using this thought destroys convergence...somehow..
+        //     for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
+        //         stepSize[k][0] = stepSize[k][0] * m_gradient[k][0];
+
+        //old but silly version:
+        // m_parameters = m_parameters + m_gradient * stepLength ;
+        //new version where each gradient is weighted by the dimensions individual step size (not one fits them all, as before)
+        for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
+            m_parameters[k][0] = m_parameters[k][0] - stepSize[k][0] * m_gradient[k][0];
 
 		/*
 			Check if it is in bounds, paramTol, funcTol, NumIter, gradienttol, maxSeconds
@@ -407,3 +426,5 @@ int GradientDescentOptimizer::optimize()
 	return m_returnReason;
 
 }
+
+//}

+ 7 - 3
GradientDescentOptimizer.h

@@ -3,7 +3,7 @@
 //	GradientDescentOptimizer.h: interface of the optimizer GradientDescent.
 //
 //	Written by: Matthias Wacker
-//
+//  edited by Johannes Ruehle, 2012-10-11
 //////////////////////////////////////////////////////////////////////
 
 #ifndef _GRADIENT_DESCENT_OPTIMIZER_
@@ -12,7 +12,6 @@
 #include <cmath>
 #include "optimization/DerivativeBasedOptimizer.h"
 
-
 ///
 ///	Class GradientDescentOptimizer
 ///
@@ -93,7 +92,7 @@ class GradientDescentOptimizer : public DerivativeBasedOptimizer
 
 		inline void setStepLength(double stepLength){m_stepLength=stepLength;}
 
-
+        inline void setMinimalGradientMagnitude(double minGradientMag){m_MinimalGradientMagnitude=minGradientMag;}
 	private:
 
 		///
@@ -106,6 +105,11 @@ class GradientDescentOptimizer : public DerivativeBasedOptimizer
 		///
 		double m_stepLength;
 
+        ///
+        ///	Minimal threshold for the L2-Norm of the gradient, so that the gradient descent
+        /// is aborted.
+        ///
+        double m_MinimalGradientMagnitude;
 		
 };