GradientDescentOptimizer.cpp 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. //////////////////////////////////////////////////////////////////////
  2. //
  3. // GradientDescentOptimizer.cpp: Implementation of the GradienDescentOptimizer
  4. // Optimizer class.
  5. //
  6. // Written by Matthias Wacker
  7. //
  8. //////////////////////////////////////////////////////////////////////
  9. #include "optimization/GradientDescentOptimizer.h"
  10. using namespace optimization;
  11. //#include <iostream>
  12. GradientDescentOptimizer::GradientDescentOptimizer(OptLogBase *loger)
  13. : SuperClass(loger)
  14. {
  15. m_stepLength = -1;
  16. }
  17. GradientDescentOptimizer::GradientDescentOptimizer( const GradientDescentOptimizer &opt) : SuperClass(opt)
  18. {
  19. m_stepSize = opt.m_stepSize;
  20. m_stepLength = opt.m_stepLength;
  21. }
  22. GradientDescentOptimizer::~GradientDescentOptimizer()
  23. {
  24. }
  25. void GradientDescentOptimizer::setStepSize(const matrix_type & stepSize)
  26. {
  27. m_stepSize = stepSize;
  28. m_stepLength = -m_stepSize.MaxVal();
  29. }
  30. void GradientDescentOptimizer::init()
  31. {
  32. SuperClass::init();
  33. if (m_stepSize.rows() != static_cast<int>(m_numberOfParameters))
  34. {
  35. m_stepSize = m_scales;
  36. }
  37. else
  38. {
  39. // check for nonzero stepsize
  40. bool tmp=false;
  41. for(int i = 0; i < static_cast<int>(m_numberOfParameters); ++i)
  42. {
  43. if(m_stepSize[i][0] > 0 )
  44. {
  45. tmp=true;
  46. }
  47. }
  48. if(tmp==false)
  49. {
  50. std::cerr << "GradientDescentOptimizer::init all stepsizes zero - check your code!"<< std::endl;
  51. exit(1);
  52. }
  53. }
  54. m_numIter = 0;
  55. m_analyticalGradients = m_costFunction->hasAnalyticGradient();
  56. /*
  57. FIXME: WACKER: compute first value and first gradient!
  58. */
  59. m_currentCostFunctionValue = evaluateCostFunction(m_parameters);
  60. m_gradient = (m_analyticalGradients == true &&
  61. (m_costFunction->hasAnalyticGradient() == true) ) ?
  62. getAnalyticalGradient(m_parameters) :
  63. getNumericalGradient(m_parameters, m_stepSize);
  64. }
  65. int GradientDescentOptimizer::optimize()
  66. {
  67. /*
  68. call init
  69. */
  70. init();
  71. if(m_loger)
  72. m_loger->logTrace("GradientDescentOptimizer: starting optimization ... \n");
  73. /*
  74. start time criteria
  75. */
  76. m_startTime = clock();
  77. bool abort = false;
  78. // double stepLength = -1.0;
  79. double cosAngle = 0.0;
  80. double downScaleFactor = 0.5;
  81. matrix_type stepSize = m_stepSize;
  82. double stepLength = m_stepLength;
  83. /*
  84. compute start value and first gradient!
  85. */
  86. m_currentCostFunctionValue = evaluateCostFunction(m_parameters);
  87. m_gradient = (m_analyticalGradients == true &&
  88. (m_costFunction->hasAnalyticGradient() == true) ) ?
  89. getAnalyticalGradient(m_parameters) :
  90. getNumericalGradient(m_parameters, m_stepSize);
  91. /*
  92. check abort criteria for gradient
  93. */
  94. if(m_gradientTolActive)
  95. {
  96. if(m_gradient.Norm(0) < m_gradientTol){
  97. m_returnReason = SUCCESS_GRADIENTTOL;
  98. abort = true;
  99. if(m_verbose == true)
  100. {
  101. std::cout << "# Gradient Descenct :: aborting because of GradientTol" << std::endl;
  102. }
  103. }
  104. }
  105. /* do the optimization */
  106. while(abort == false)
  107. {
  108. /*
  109. increase iteration counter
  110. */
  111. m_numIter++;
  112. if(m_verbose == true)
  113. {
  114. std::cout << "# Gradient Descenct :: Iteration: " << m_numIter << " : current parameters :\n ";
  115. for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
  116. {
  117. std::cout<< m_parameters[r][0] << " ";
  118. }
  119. std::cout << m_currentCostFunctionValue << std::endl;
  120. std::cout << " current gradient :\n ";
  121. for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
  122. {
  123. std::cout<< m_gradient[r][0] << " ";
  124. }
  125. std::cout << std::endl;
  126. }
  127. /*
  128. Check iteration limit
  129. */
  130. if(m_maxNumIterActive)
  131. {
  132. if(m_numIter >= m_maxNumIter)
  133. {
  134. if(m_verbose == true)
  135. {
  136. std::cout << "# Gradient Descenct :: aborting because of numIter" << std::endl;
  137. }
  138. /* set according return status and return */
  139. m_returnReason = SUCCESS_MAXITER;
  140. abort = true;
  141. continue;
  142. }
  143. }
  144. /*
  145. store last results for comparison
  146. */
  147. matrix_type oldParams = m_parameters;
  148. matrix_type oldGradient = m_gradient;
  149. double oldFuncValue = m_currentCostFunctionValue;
  150. /*
  151. get gradient
  152. */
  153. //m_gradient = (m_analyticalGradients == true) ? getAnalyticalGradient(m_parameters) : getNumericalGradient(m_parameters, stepSize);
  154. m_gradient = (m_analyticalGradients == true &&
  155. (m_costFunction->hasAnalyticGradient() == true) ) ?
  156. getAnalyticalGradient(m_parameters) :
  157. getNumericalGradient(m_parameters, stepSize);
  158. /*
  159. weight with scalings
  160. */
  161. //m_gradient = m_gradient.ElMul(m_scales);
  162. for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  163. {
  164. m_gradient[k][0] *= m_scales[k][0];
  165. }
  166. /*
  167. check gradient tol
  168. */
  169. if(m_gradientTolActive)
  170. {
  171. if(m_gradient.Norm(0) < m_gradientTol)
  172. {
  173. if(m_verbose == true)
  174. {
  175. std::cout << "# Gradient Descenct :: aborting because of GradientTol" << std::endl;
  176. }
  177. m_returnReason = SUCCESS_GRADIENTTOL;
  178. abort = true;
  179. continue;
  180. }
  181. }
  182. /*
  183. set gradient length to 1, if length of gradient is too small ..
  184. return ERROR_COMPUTATION_UNSTABLE
  185. (this can happen if gradienTol is not active..)
  186. FIXME: WACKER think about a "usefull" limit
  187. */
  188. if (m_gradient.Norm(0) > 1.0e-50)
  189. {
  190. for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  191. {
  192. m_gradient[k][0] /= m_gradient.Norm(0);
  193. }
  194. }
  195. else
  196. {
  197. m_returnReason = ERROR_COMPUTATION_UNSTABLE;
  198. if(m_verbose == true)
  199. {
  200. std::cout << "# Gradient Descenct :: aborting because of ERROR_COMPUTATION_UNSTABLE " << std::endl;
  201. }
  202. abort =true;
  203. continue;
  204. }
  205. /*
  206. check direction change to scale down for convergence!
  207. */
  208. if(( !m_gradient * oldGradient)[0][0] < cosAngle)
  209. {
  210. /*
  211. direction change is more than acos(cosAngle) deg.
  212. scaledown by downScaleFactor
  213. */
  214. for(int k=0; k < static_cast<int>(m_numberOfParameters); ++k)
  215. stepSize[k][0] *= downScaleFactor;
  216. stepLength *= downScaleFactor;
  217. /*FIXME: WACKER: only as long
  218. as there is no steplength computation!*/
  219. }
  220. /*
  221. set next iteration step
  222. */
  223. m_parameters = m_parameters + m_gradient * stepLength ;
  224. /*
  225. Check if it is in bounds, paramTol, funcTol, NumIter, gradienttol, maxSeconds
  226. */
  227. if(m_lowerParameterBoundActive || m_upperParameterBoundActive)
  228. {
  229. for(int i=0; i <static_cast<int>(m_numberOfParameters); i++)
  230. {
  231. if( m_upperParameterBoundActive)
  232. {
  233. if(m_parameters[i][0] >= m_upperParameterBound[i][0])
  234. {
  235. if(m_verbose == true)
  236. {
  237. std::cout << "# Gradient Descenct :: aborting because of parameter Bounds" << std::endl;
  238. }
  239. /* set according return status and the last parameters and return */
  240. m_returnReason = ERROR_XOUTOFBOUNDS;
  241. m_parameters = oldParams;
  242. abort = true;
  243. continue;
  244. }
  245. }
  246. if( m_lowerParameterBoundActive)
  247. {
  248. if(m_parameters[i][0] <= m_lowerParameterBound[i][0])
  249. {
  250. if(m_verbose == true)
  251. {
  252. std::cout << "# Gradient Descenct :: aborting because of parameter Bounds" << std::endl;
  253. }
  254. /* set according return status and return the last parameter*/
  255. m_returnReason = ERROR_XOUTOFBOUNDS;
  256. m_parameters = oldParams;
  257. abort = true;
  258. continue;
  259. }
  260. }
  261. }
  262. }
  263. /*
  264. Get new Costfunction Value
  265. */
  266. m_currentCostFunctionValue = evaluateCostFunction(m_parameters);
  267. /*
  268. Check ParamTol
  269. */
  270. if(m_paramTolActive)
  271. {
  272. if( (m_parameters - oldParams).Norm(0) < m_paramTol)
  273. {
  274. if(m_verbose == true)
  275. {
  276. std::cout << "Gradient Descenct :: aborting because of param Tol" << std::endl;
  277. }
  278. /* set according return status and the last parameters and return */
  279. m_returnReason = SUCCESS_PARAMTOL;
  280. /*m_parameters = oldParams;*/
  281. abort = true;
  282. continue;
  283. }
  284. }
  285. if(m_funcTolActive)
  286. {
  287. if( fabs((m_currentCostFunctionValue - oldFuncValue)) < m_funcTol)
  288. {
  289. if(m_verbose == true)
  290. {
  291. std::cout << "# Gradient Descenct :: aborting because of Func Tol" << std::endl;
  292. }
  293. /* set according return status and the last parameters and return */
  294. m_returnReason = SUCCESS_FUNCTOL;
  295. /* m_parameters = oldParams; */
  296. abort = true;
  297. continue;
  298. }
  299. }
  300. /*
  301. Check Optimization Timilimit, if active
  302. */
  303. if(m_maxSecondsActive)
  304. {
  305. m_currentTime = clock();
  306. //std::cout << (_startTime - _currentTime) << std::endl;
  307. //std::cout << CLOCKS_PER_SEC << std::endl;
  308. /* time limit exceeded ? */
  309. if(((float)(m_currentTime - m_startTime )/CLOCKS_PER_SEC) >= m_maxSeconds )
  310. {
  311. /*
  312. */
  313. if(m_verbose == true)
  314. {
  315. std::cout << "# Gradient Descenct :: aborting because of Time Limit" << std::endl;
  316. }
  317. /* set according return status and the last parameters and return */
  318. m_returnReason = SUCCESS_TIMELIMIT;
  319. m_parameters = oldParams;
  320. abort = true;
  321. continue;
  322. }
  323. }
  324. }
  325. if(m_verbose)
  326. {
  327. std::cout << "# Gradient Descenct :: RESULT: ";
  328. for(int r = 0; r < static_cast<int>(m_numberOfParameters); r++)
  329. {
  330. std::cout<< m_parameters[r][0] << " ";
  331. }
  332. std::cout << " value: " << m_currentCostFunctionValue << std::endl;
  333. }
  334. return m_returnReason;
  335. }