GradientDescentOptimizer.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. //////////////////////////////////////////////////////////////////////
  2. //
  3. // GradientDescentOptimizer.cpp: Implementation of the GradienDescentOptimizer
  4. // Optimizer class.
  5. //
  6. // Written by Matthias Wacker
  7. // edited by Johannes Ruehle, 2012-10-11
  8. //////////////////////////////////////////////////////////////////////
  9. #include "GradientDescentOptimizer.h"
  10. using namespace OPTIMIZATION;
  11. GradientDescentOptimizer::GradientDescentOptimizer(OptLogBase *loger)
  12. : SuperClass(loger)
  13. {
  14. m_stepLength = -1;
  15. m_MinimalGradientMagnitude = 1e-7;
  16. }
  17. GradientDescentOptimizer::GradientDescentOptimizer( const GradientDescentOptimizer &opt) : SuperClass(opt)
  18. {
  19. m_stepSize = opt.m_stepSize;
  20. m_stepLength = opt.m_stepLength;
  21. m_MinimalGradientMagnitude = opt.m_MinimalGradientMagnitude;
  22. }
  23. GradientDescentOptimizer::~GradientDescentOptimizer()
  24. {
  25. }
  26. void GradientDescentOptimizer::setStepSize(const matrix_type & stepSize)
  27. {
  28. m_stepSize = stepSize;
  29. m_stepLength = -m_stepSize.Max();
  30. }
  31. void GradientDescentOptimizer::init()
  32. {
  33. SuperClass::init();
  34. if (m_stepSize.rows() != static_cast<int>(m_numberOfParameters))
  35. {
  36. m_stepSize = m_scales;
  37. std::cout << "GradientDescentOptimizer::init(): warning: using optimizer scales as steps, since no steps were specified! Consider, if this is desired behavoir!" << std::endl;
  38. }
  39. else
  40. {
  41. // check for nonzero stepsize
  42. bool tmp=false;
  43. for(int i = 0; i < static_cast<int>(m_numberOfParameters); ++i)
  44. {
  45. if(m_stepSize(i,0) > 0 )
  46. {
  47. tmp=true;
  48. }
  49. }
  50. if(tmp==false)
  51. {
  52. std::cerr << "GradientDescentOptimizer::init all stepsizes zero - check your code!"<< std::endl;
  53. exit(1);
  54. }
  55. }
  56. m_numIter = 0;
  57. m_analyticalGradients = m_costFunction->hasAnalyticGradient();
  58. /*
  59. FIXME: WACKER: compute first value and first gradient!
  60. */
  61. m_currentCostFunctionValue = evaluateCostFunction(m_parameters);
  62. m_gradient = (m_analyticalGradients == true &&
  63. (m_costFunction->hasAnalyticGradient() == true) ) ?
  64. getAnalyticalGradient(m_parameters) :
  65. getNumericalGradient(m_parameters, m_stepSize);
  66. }
  67. int GradientDescentOptimizer::optimize()
  68. {
  69. /*
  70. call init
  71. */
  72. init();
  73. if(m_loger)
  74. m_loger->logTrace("GradientDescentOptimizer: starting optimization ... \n");
  75. /*
  76. start time criteria
  77. */
  78. m_startTime = clock();
  79. bool abort = false;
  80. // double stepLength = -1.0;
  81. double cosAngle = 0.0;
  82. double downScaleFactor = 0.5;
  83. matrix_type stepSize = m_stepSize;
  84. double stepLength = m_stepLength;
  85. /*
  86. check abort criteria for gradient
  87. */
  88. if(m_gradientTolActive)
  89. {
  90. if(m_gradient.frobeniusNorm() < m_gradientTol){
  91. m_returnReason = SUCCESS_GRADIENTTOL;
  92. abort = true;
  93. if(m_verbose == true)
  94. {
  95. std::cout << "# Gradient Descenct :: aborting because of GradientTol" << std::endl;
  96. }
  97. }
  98. }
  99. /* do the optimization */
  100. while(abort == false)
  101. {
  102. /*
  103. increase iteration counter
  104. */
  105. m_numIter++;
  106. if(m_verbose == true)
  107. {
  108. std::cout << "# Gradient Descenct :: Iteration: " << m_numIter << " : current parameters :\n ";
  109. for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
  110. {
  111. std::cout<< m_parameters(r,0) << " ";
  112. }
  113. std::cout << m_currentCostFunctionValue << std::endl;
  114. std::cout << " current gradient :\n ";
  115. for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
  116. {
  117. std::cout<< m_gradient(r,0) << " ";
  118. }
  119. std::cout << std::endl;
  120. std::cout << " current stepsize :\n ";
  121. for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
  122. {
  123. std::cout<< stepSize(r,0) << " ";
  124. }
  125. std::cout << std::endl;
  126. }
  127. /*
  128. Check iteration limit
  129. */
  130. if(m_maxNumIterActive)
  131. {
  132. if(m_numIter >= m_maxNumIter)
  133. {
  134. if(m_verbose == true)
  135. {
  136. std::cout << "# Gradient Descenct :: aborting because of numIter" << std::endl;
  137. }
  138. /* set according return status and return */
  139. m_returnReason = SUCCESS_MAXITER;
  140. abort = true;
  141. continue;
  142. }
  143. }
  144. /*
  145. store last results for comparison
  146. */
  147. matrix_type oldParams = m_parameters;
  148. matrix_type oldGradient = m_gradient;
  149. double oldFuncValue = m_currentCostFunctionValue;
  150. /*
  151. get gradient
  152. */
  153. m_gradient = (m_analyticalGradients == true &&
  154. (m_costFunction->hasAnalyticGradient() == true) ) ?
  155. getAnalyticalGradient(m_parameters) :
  156. getNumericalGradient(m_parameters, stepSize);
  157. /*
  158. weight with scalings
  159. */
  160. //m_gradient = m_gradient.ElMul(m_scales);
  161. for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  162. {
  163. m_gradient(k,0) *= m_scales(k,0);
  164. }
  165. /*
  166. check gradient tol
  167. */
  168. if(m_gradientTolActive)
  169. {
  170. if(m_gradient.frobeniusNorm() < m_gradientTol)
  171. {
  172. if(m_verbose == true)
  173. {
  174. std::cout << "# Gradient Descenct :: aborting because of GradientTol" << std::endl;
  175. }
  176. m_returnReason = SUCCESS_GRADIENTTOL;
  177. abort = true;
  178. continue;
  179. }
  180. }
  181. /*
  182. set gradient length to 1, if length of gradient is too small ..
  183. return ERROR_COMPUTATION_UNSTABLE
  184. (this can happen if gradienTol is not active..)
  185. FIXME: WACKER think about a "usefull" limit
  186. ruehle: now adjustable via variable m_MinimalGradientMagnitude
  187. It considers a small gradient as having reached the local/global optimum, hello convex function...
  188. */
  189. double fGradientLength = m_gradient.frobeniusNorm();
  190. if (fGradientLength > m_MinimalGradientMagnitude)
  191. {
  192. for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  193. {
  194. m_gradient(k,0) /= fGradientLength;
  195. }
  196. }
  197. else
  198. {
  199. if(m_verbose == true)
  200. {
  201. std::cout << "Gradient Descenct :: aborting because gradient is too small L2 norm = " << fGradientLength
  202. << " with set minimum gradient magnitude = " << m_MinimalGradientMagnitude
  203. << ". Consider decreasing the limit with GradientDescentOptimizer::setMinimalGradientMagnitude()."
  204. <<std::endl;
  205. }
  206. /* set according return status and the last parameters and return */
  207. m_returnReason = SUCCESS_PARAMTOL;
  208. abort =true;
  209. continue;
  210. }
  211. /*
  212. check direction change to scale down for convergence!
  213. */
  214. if(( !m_gradient * oldGradient)(0,0) < cosAngle)
  215. {
  216. /*
  217. direction change is more than acos(cosAngle) deg.
  218. scaledown by downScaleFactor
  219. */
  220. for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  221. stepSize(k,0) *= downScaleFactor;
  222. stepLength *= downScaleFactor;
  223. /*FIXME: WACKER: only as long
  224. as there is no steplength computation!*/
  225. if(m_verbose == true)
  226. {
  227. std::cout << "# Gradient Descenct :: direction change detected ->perfoming scaledown" << std::endl;
  228. }
  229. }
  230. /*
  231. set next iteration step
  232. */
  233. //weight the stepSize for the next grid search by the gradient;
  234. //FIXME: using this thought destroys convergence...somehow..
  235. // for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  236. // stepSize(k,0) = stepSize(k,0) * m_gradient(k,0);
  237. //old but silly version:
  238. // m_parameters = m_parameters + m_gradient * stepLength ;
  239. //new version where each gradient is weighted by the dimensions individual step size (not one fits them all, as before)
  240. for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  241. m_parameters(k,0) = m_parameters(k,0) - stepSize(k,0) * m_gradient(k,0);
  242. /*
  243. Check if it is in bounds, paramTol, funcTol, NumIter, gradienttol, maxSeconds
  244. */
  245. if(m_lowerParameterBoundActive || m_upperParameterBoundActive)
  246. {
  247. for(int i=0; i <static_cast<int>(m_numberOfParameters); i++)
  248. {
  249. if( m_upperParameterBoundActive)
  250. {
  251. if(m_parameters(i,0) >= m_upperParameterBound(i,0))
  252. {
  253. if(m_verbose == true)
  254. {
  255. std::cout << "# Gradient Descenct :: aborting because of parameter Bounds" << std::endl;
  256. }
  257. /* set according return status and the last parameters and return */
  258. m_returnReason = ERROR_XOUTOFBOUNDS;
  259. m_parameters = oldParams;
  260. abort = true;
  261. continue;
  262. }
  263. }
  264. if( m_lowerParameterBoundActive)
  265. {
  266. if(m_parameters(i,0) <= m_lowerParameterBound(i,0))
  267. {
  268. if(m_verbose == true)
  269. {
  270. std::cout << "# Gradient Descenct :: aborting because of parameter Bounds" << std::endl;
  271. }
  272. /* set according return status and return the last parameter*/
  273. m_returnReason = ERROR_XOUTOFBOUNDS;
  274. m_parameters = oldParams;
  275. abort = true;
  276. continue;
  277. }
  278. }
  279. }
  280. }
  281. /*
  282. Get new Costfunction Value
  283. */
  284. m_currentCostFunctionValue = evaluateCostFunction(m_parameters);
  285. /*
  286. Check ParamTol
  287. */
  288. if(m_paramTolActive)
  289. {
  290. if( (m_parameters - oldParams).frobeniusNorm() < m_paramTol)
  291. {
  292. if(m_verbose == true)
  293. {
  294. std::cout << "Gradient Descenct :: aborting because of param Tol" << std::endl;
  295. }
  296. /* set according return status and the last parameters and return */
  297. m_returnReason = SUCCESS_PARAMTOL;
  298. /*m_parameters = oldParams;*/
  299. abort = true;
  300. continue;
  301. }
  302. }
  303. if(m_funcTolActive)
  304. {
  305. if( fabs((m_currentCostFunctionValue - oldFuncValue)) < m_funcTol)
  306. {
  307. if(m_verbose == true)
  308. {
  309. std::cout << "# Gradient Descenct :: aborting because of Func Tol" << std::endl;
  310. }
  311. /* set according return status and the last parameters and return */
  312. m_returnReason = SUCCESS_FUNCTOL;
  313. /* m_parameters = oldParams; */
  314. abort = true;
  315. continue;
  316. }
  317. }
  318. /*
  319. Check Optimization Timilimit, if active
  320. */
  321. if(m_maxSecondsActive)
  322. {
  323. m_currentTime = clock();
  324. //std::cout << (_startTime - _currentTime) << std::endl;
  325. //std::cout << CLOCKS_PER_SEC << std::endl;
  326. /* time limit exceeded ? */
  327. if(((float)(m_currentTime - m_startTime )/CLOCKS_PER_SEC) >= m_maxSeconds )
  328. {
  329. /*
  330. */
  331. if(m_verbose == true)
  332. {
  333. std::cout << "# Gradient Descenct :: aborting because of Time Limit" << std::endl;
  334. }
  335. /* set according return status and the last parameters and return */
  336. m_returnReason = SUCCESS_TIMELIMIT;
  337. m_parameters = oldParams;
  338. abort = true;
  339. continue;
  340. }
  341. }
  342. }
  343. if(m_verbose)
  344. {
  345. std::cout << "# Gradient Descenct :: RESULT: ";
  346. for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
  347. {
  348. std::cout<< m_parameters(r,0) << " ";
  349. }
  350. std::cout << " value: " << m_currentCostFunctionValue << std::endl;
  351. }
  352. return m_returnReason;
  353. }
  354. //}