GradientDescentOptimizer.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. //////////////////////////////////////////////////////////////////////
  2. //
  3. // GradientDescentOptimizer.cpp: Implementation of the GradienDescentOptimizer
  4. // Optimizer class.
  5. //
  6. // Written by Matthias Wacker
  7. // edited by Johannes Ruehle, 2012-10-11
  8. //////////////////////////////////////////////////////////////////////
  9. #include "GradientDescentOptimizer.h"
  10. namespace OPTIMIZATION
  11. {
  12. GradientDescentOptimizer::GradientDescentOptimizer(OptLogBase *loger)
  13. : SuperClass(loger)
  14. {
  15. m_stepLength = -1;
  16. m_MinimalGradientMagnitude = 1e-7;
  17. }
  18. GradientDescentOptimizer::GradientDescentOptimizer( const GradientDescentOptimizer &opt) : SuperClass(opt)
  19. {
  20. m_stepSize = opt.m_stepSize;
  21. m_stepLength = opt.m_stepLength;
  22. m_MinimalGradientMagnitude = opt.m_MinimalGradientMagnitude;
  23. }
  24. GradientDescentOptimizer::~GradientDescentOptimizer()
  25. {
  26. }
  27. void GradientDescentOptimizer::setStepSize(const matrix_type & stepSize)
  28. {
  29. m_stepSize = stepSize;
  30. m_stepLength = -m_stepSize.Max();
  31. }
  32. void GradientDescentOptimizer::init()
  33. {
  34. SuperClass::init();
  35. if (m_stepSize.rows() != static_cast<int>(m_numberOfParameters))
  36. {
  37. m_stepSize = m_scales;
  38. std::cout << "GradientDescentOptimizer::init(): warning: using optimizer scales as steps, since no steps were specified! Consider, if this is desired behavoir!" << std::endl;
  39. }
  40. else
  41. {
  42. // check for nonzero stepsize
  43. bool tmp=false;
  44. for(int i = 0; i < static_cast<int>(m_numberOfParameters); ++i)
  45. {
  46. if(m_stepSize(i,0) > 0 )
  47. {
  48. tmp=true;
  49. }
  50. }
  51. if(tmp==false)
  52. {
  53. std::cerr << "GradientDescentOptimizer::init all stepsizes zero - check your code!"<< std::endl;
  54. exit(1);
  55. }
  56. }
  57. m_numIter = 0;
  58. m_analyticalGradients = m_costFunction->hasAnalyticGradient();
  59. /*
  60. FIXME: WACKER: compute first value and first gradient!
  61. */
  62. m_currentCostFunctionValue = evaluateCostFunction(m_parameters);
  63. m_gradient = (m_analyticalGradients == true &&
  64. (m_costFunction->hasAnalyticGradient() == true) ) ?
  65. getAnalyticalGradient(m_parameters) :
  66. getNumericalGradient(m_parameters, m_stepSize);
  67. }
  68. int GradientDescentOptimizer::optimize()
  69. {
  70. /*
  71. call init
  72. */
  73. init();
  74. if(m_loger)
  75. m_loger->logTrace("GradientDescentOptimizer: starting optimization ... \n");
  76. /*
  77. start time criteria
  78. */
  79. m_startTime = clock();
  80. bool abort = false;
  81. // double stepLength = -1.0;
  82. double cosAngle = 0.0;
  83. double downScaleFactor = 0.5;
  84. matrix_type stepSize = m_stepSize;
  85. double stepLength = m_stepLength;
  86. /*
  87. check abort criteria for gradient
  88. */
  89. if(m_gradientTolActive)
  90. {
  91. if(m_gradient.frobeniusNorm() < m_gradientTol){
  92. m_returnReason = SUCCESS_GRADIENTTOL;
  93. abort = true;
  94. if(m_verbose == true)
  95. {
  96. std::cout << "# Gradient Descenct :: aborting because of GradientTol" << std::endl;
  97. }
  98. }
  99. }
  100. /* do the optimization */
  101. while(abort == false)
  102. {
  103. /*
  104. increase iteration counter
  105. */
  106. m_numIter++;
  107. if(m_verbose == true)
  108. {
  109. std::cout << "# Gradient Descenct :: Iteration: " << m_numIter << " : current parameters :\n ";
  110. for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
  111. {
  112. std::cout<< m_parameters(r,0) << " ";
  113. }
  114. std::cout << m_currentCostFunctionValue << std::endl;
  115. std::cout << " current gradient :\n ";
  116. for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
  117. {
  118. std::cout<< m_gradient(r,0) << " ";
  119. }
  120. std::cout << std::endl;
  121. std::cout << " current stepsize :\n ";
  122. for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
  123. {
  124. std::cout<< stepSize(r,0) << " ";
  125. }
  126. std::cout << std::endl;
  127. }
  128. /*
  129. Check iteration limit
  130. */
  131. if(m_maxNumIterActive)
  132. {
  133. if(m_numIter >= m_maxNumIter)
  134. {
  135. if(m_verbose == true)
  136. {
  137. std::cout << "# Gradient Descenct :: aborting because of numIter" << std::endl;
  138. }
  139. /* set according return status and return */
  140. m_returnReason = SUCCESS_MAXITER;
  141. abort = true;
  142. continue;
  143. }
  144. }
  145. /*
  146. store last results for comparison
  147. */
  148. matrix_type oldParams = m_parameters;
  149. matrix_type oldGradient = m_gradient;
  150. double oldFuncValue = m_currentCostFunctionValue;
  151. /*
  152. get gradient
  153. */
  154. m_gradient = (m_analyticalGradients == true &&
  155. (m_costFunction->hasAnalyticGradient() == true) ) ?
  156. getAnalyticalGradient(m_parameters) :
  157. getNumericalGradient(m_parameters, stepSize);
  158. /*
  159. weight with scalings
  160. */
  161. //m_gradient = m_gradient.ElMul(m_scales);
  162. for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  163. {
  164. m_gradient(k,0) *= m_scales(k,0);
  165. }
  166. /*
  167. check gradient tol
  168. */
  169. if(m_gradientTolActive)
  170. {
  171. if(m_gradient.frobeniusNorm() < m_gradientTol)
  172. {
  173. if(m_verbose == true)
  174. {
  175. std::cout << "# Gradient Descenct :: aborting because of GradientTol" << std::endl;
  176. }
  177. m_returnReason = SUCCESS_GRADIENTTOL;
  178. abort = true;
  179. continue;
  180. }
  181. }
  182. /*
  183. set gradient length to 1, if length of gradient is too small ..
  184. return ERROR_COMPUTATION_UNSTABLE
  185. (this can happen if gradienTol is not active..)
  186. FIXME: WACKER think about a "usefull" limit
  187. ruehle: now adjustable via variable m_MinimalGradientMagnitude
  188. It considers a small gradient as having reached the local/global optimum, hello convex function...
  189. */
  190. double fGradientLength = m_gradient.frobeniusNorm();
  191. if (fGradientLength > m_MinimalGradientMagnitude)
  192. {
  193. for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  194. {
  195. m_gradient(k,0) /= fGradientLength;
  196. }
  197. }
  198. else
  199. {
  200. if(m_verbose == true)
  201. {
  202. std::cout << "Gradient Descenct :: aborting because gradient is too small L2 norm = " << fGradientLength
  203. << " with set minimum gradient magnitude = " << m_MinimalGradientMagnitude
  204. << ". Consider decreasing the limit with GradientDescentOptimizer::setMinimalGradientMagnitude()."
  205. <<std::endl;
  206. }
  207. /* set according return status and the last parameters and return */
  208. m_returnReason = SUCCESS_PARAMTOL;
  209. abort =true;
  210. continue;
  211. }
  212. /*
  213. check direction change to scale down for convergence!
  214. */
  215. if(( !m_gradient * oldGradient)(0,0) < cosAngle)
  216. {
  217. /*
  218. direction change is more than acos(cosAngle) deg.
  219. scaledown by downScaleFactor
  220. */
  221. for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  222. stepSize(k,0) *= downScaleFactor;
  223. stepLength *= downScaleFactor;
  224. /*FIXME: WACKER: only as long
  225. as there is no steplength computation!*/
  226. if(m_verbose == true)
  227. {
  228. std::cout << "# Gradient Descenct :: direction change detected ->perfoming scaledown" << std::endl;
  229. }
  230. }
  231. /*
  232. set next iteration step
  233. */
  234. //weight the stepSize for the next grid search by the gradient;
  235. //FIXME: using this thought destroys convergence...somehow..
  236. // for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  237. // stepSize(k,0) = stepSize(k,0) * m_gradient(k,0);
  238. //old but silly version:
  239. // m_parameters = m_parameters + m_gradient * stepLength ;
  240. //new version where each gradient is weighted by the dimensions individual step size (not one fits them all, as before)
  241. for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  242. m_parameters(k,0) = m_parameters(k,0) - stepSize(k,0) * m_gradient(k,0);
  243. /*
  244. Check if it is in bounds, paramTol, funcTol, NumIter, gradienttol, maxSeconds
  245. */
  246. if(m_lowerParameterBoundActive || m_upperParameterBoundActive)
  247. {
  248. for(int i=0; i <static_cast<int>(m_numberOfParameters); i++)
  249. {
  250. if( m_upperParameterBoundActive)
  251. {
  252. if(m_parameters(i,0) >= m_upperParameterBound(i,0))
  253. {
  254. if(m_verbose == true)
  255. {
  256. std::cout << "# Gradient Descenct :: aborting because of parameter Bounds" << std::endl;
  257. }
  258. /* set according return status and the last parameters and return */
  259. m_returnReason = ERROR_XOUTOFBOUNDS;
  260. m_parameters = oldParams;
  261. abort = true;
  262. continue;
  263. }
  264. }
  265. if( m_lowerParameterBoundActive)
  266. {
  267. if(m_parameters(i,0) <= m_lowerParameterBound(i,0))
  268. {
  269. if(m_verbose == true)
  270. {
  271. std::cout << "# Gradient Descenct :: aborting because of parameter Bounds" << std::endl;
  272. }
  273. /* set according return status and return the last parameter*/
  274. m_returnReason = ERROR_XOUTOFBOUNDS;
  275. m_parameters = oldParams;
  276. abort = true;
  277. continue;
  278. }
  279. }
  280. }
  281. }
  282. /*
  283. Get new Costfunction Value
  284. */
  285. m_currentCostFunctionValue = evaluateCostFunction(m_parameters);
  286. /*
  287. Check ParamTol
  288. */
  289. if(m_paramTolActive)
  290. {
  291. if( (m_parameters - oldParams).frobeniusNorm() < m_paramTol)
  292. {
  293. if(m_verbose == true)
  294. {
  295. std::cout << "Gradient Descenct :: aborting because of param Tol" << std::endl;
  296. }
  297. /* set according return status and the last parameters and return */
  298. m_returnReason = SUCCESS_PARAMTOL;
  299. /*m_parameters = oldParams;*/
  300. abort = true;
  301. continue;
  302. }
  303. }
  304. if(m_funcTolActive)
  305. {
  306. if( fabs((m_currentCostFunctionValue - oldFuncValue)) < m_funcTol)
  307. {
  308. if(m_verbose == true)
  309. {
  310. std::cout << "# Gradient Descenct :: aborting because of Func Tol" << std::endl;
  311. }
  312. /* set according return status and the last parameters and return */
  313. m_returnReason = SUCCESS_FUNCTOL;
  314. /* m_parameters = oldParams; */
  315. abort = true;
  316. continue;
  317. }
  318. }
  319. /*
  320. Check Optimization Timilimit, if active
  321. */
  322. if(m_maxSecondsActive)
  323. {
  324. m_currentTime = clock();
  325. //std::cout << (_startTime - _currentTime) << std::endl;
  326. //std::cout << CLOCKS_PER_SEC << std::endl;
  327. /* time limit exceeded ? */
  328. if(((float)(m_currentTime - m_startTime )/CLOCKS_PER_SEC) >= m_maxSeconds )
  329. {
  330. /*
  331. */
  332. if(m_verbose == true)
  333. {
  334. std::cout << "# Gradient Descenct :: aborting because of Time Limit" << std::endl;
  335. }
  336. /* set according return status and the last parameters and return */
  337. m_returnReason = SUCCESS_TIMELIMIT;
  338. m_parameters = oldParams;
  339. abort = true;
  340. continue;
  341. }
  342. }
  343. }
  344. if(m_verbose)
  345. {
  346. std::cout << "# Gradient Descenct :: RESULT: ";
  347. for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
  348. {
  349. std::cout<< m_parameters(r,0) << " ";
  350. }
  351. std::cout << " value: " << m_currentCostFunctionValue << std::endl;
  352. }
  353. return m_returnReason;
  354. }
  355. }