FirstOrderRasmussen.cpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. /**
  2. * @file FirstOrderRasmussen.cpp
  3. * @brief c/c++ implementation of rasmussens optimization method
  4. * @author Erik Rodner
  5. * @date 01/25/2010
  6. */
  7. #include <iostream>
  8. #include "FirstOrderRasmussen.h"
  9. #include <core/basics/Log.h>
  10. using namespace std;
  11. using namespace NICE;
  12. FirstOrderRasmussen::FirstOrderRasmussen( bool _verbose )
  13. : verbose(_verbose)
  14. {
  15. c_int = 0.1; // don't reevaluate within 0.1 of the limit of the current bracket
  16. c_ext = 3.0; // extrapolate maximum 3 times the current step-size
  17. c_max = 20; // max 20 function evaluations per line search
  18. c_ratio = 10; // maximum allowed slope ratio
  19. /* SIG and RHO are the constants controlling the Wolfe-
  20. * % Powell conditions. SIG is the maximum allowed absolute ratio between
  21. * % previous and new slopes (derivatives in the search direction), thus setting
  22. * % SIG to low (positive) values forces higher precision in the line-searches.
  23. * % RHO is the minimum allowed fraction of the expected (from the slope at the
  24. * % initial point in the linesearch). Constants must satisfy 0 < RHO < SIG < 1.
  25. * % Tuning of SIG (depending on the nature of the function to be optimized) may
  26. * % speed up the minimization; it is probably not worth playing much with RHO */
  27. c_sig = 0.1;
  28. c_rho = c_sig / 2.0;
  29. // set maximum function evaluations to 100
  30. length = -100;
  31. epsilonG = 1e-5; // if gradient norm is lower than this threshold -> exit
  32. }
  33. FirstOrderRasmussen::~FirstOrderRasmussen()
  34. {
  35. }
  36. void FirstOrderRasmussen::doOptimizeFirst(OptimizationProblemFirst& problem)
  37. {
  38. /** Comments of Carl Edward Rasmussen (2006-09-08).
  39. *
  40. * The code falls naturally into 3 parts, after the initial line search is
  41. * started in the direction of steepest descent. 1) we first enter a while loop
  42. * which uses point 1 (p1) and (p2) to compute an extrapolation (p3), until we
  43. * have extrapolated far enough (Wolfe-Powell conditions). 2) if necessary, we
  44. * enter the second loop which takes p2, p3 and p4 chooses the subinterval
  45. * containing a (local) minimum, and interpolates it, unil an acceptable point
  46. * is found (Wolfe-Powell conditions). Note, that points are always maintained
  47. * in order p0 <= p1 <= p2 < p3 < p4. 3) compute a new search direction using
  48. * conjugate gradients (Polack-Ribiere flavour), or revert to steepest if there
  49. * was a problem in the previous line-search. Return the best value so far, if
  50. * two consecutive line-searches fail, or whenever we run out of function
  51. * evaluations or line-searches. During extrapolation, the "f" function may fail
  52. * either with an error or returning Nan or Inf, and minimize should handle this
  53. * gracefully. */
  54. /** reimplementation comments (Erik)
  55. * I just converted the matlab code manually to this nasty piece of C/C++ code.
  56. * The only changes are due to the apply/unapply mechanism of nice optimization
  57. * interface. I also skipped some unneccessary gradient computations. */
  58. double red = 1.0;
  59. uint i = 0;
  60. bool ls_failed = false;
  61. double f0 = problem.objective();
  62. if ( verbose )
  63. cerr << "FirstOrderRasmussen: initial value of the objective function is " << f0 << endl;
  64. problem.computeGradient();
  65. Vector df0 = problem.gradientCached();
  66. if ( length < 0 ) i++;
  67. Vector s = - 1.0 * df0;
  68. double d0 = - s.scalarProduct(s);
  69. double d1, d2, d3;
  70. double d4 = d0;
  71. double x1, x2;
  72. double x4 = 0.0;
  73. double x3 = red / (1-d0);
  74. double f1, f2, f3;
  75. double f4 = f0;
  76. Vector df3 (df0.size());
  77. Vector df2 (df0.size());
  78. Vector df1 (df0.size());
  79. Vector dF0 (df0.size());
  80. double F0;
  81. Vector X0delta ( problem.position().size());
  82. while ( i < (uint)abs(length) )
  83. {
  84. if ( verbose )
  85. std::cerr << "Iteration " << i << " / " << abs(length) << " " << " objective function = " << f0 << std::endl;
  86. i = i + (length>0);
  87. if ( df0.normL2() < epsilonG ) {
  88. if ( verbose )
  89. Log::debug() << "FirstOrderRasmussen: low gradient " << df0.normL2() << " < " << epsilonG << " = epsilonG" << std::endl;
  90. break;
  91. }
  92. X0delta.set(0.0);
  93. F0 = f0;
  94. dF0 = df0;
  95. double M;
  96. if ( length > 0 )
  97. M = c_max;
  98. else
  99. M = std::min(c_max, -length-i);
  100. // perform a line search method (as far as I know)
  101. while (1)
  102. {
  103. x2 = 0;
  104. f2 = f0;
  105. d2 = d0;
  106. f3 = f0;
  107. df3 = df0;
  108. bool success = false;
  109. while ( !success && (M>0) )
  110. {
  111. M = M -1; i = i + (length<0);
  112. problem.applyStep ( x3*s );
  113. f3 = problem.objective();
  114. if ( NICE::isFinite(f3) ) success = true;
  115. if ( !success && (M>0) )
  116. problem.unapplyStep ( x3*s );
  117. if ( !NICE::isFinite(f3) )
  118. {
  119. x3 = (x2 + x3) / 2.0;
  120. }
  121. }
  122. problem.computeGradient();
  123. df3 = problem.gradientCached();
  124. problem.unapplyStep ( x3 * s );
  125. if ( f3 < F0 ) {
  126. X0delta = x3 * s;
  127. F0 = f3;
  128. dF0 = df3;
  129. }
  130. d3 = df3.scalarProduct (s);
  131. if ( (d3 > c_sig * d0) || (f3 > f0 + x3*c_rho*d0) || (M==0) )
  132. break;
  133. x1 = x2; f1 = f2; d1 = d2;
  134. x2 = x3; f2 = f3; d2 = d3;
  135. double A = 6*(f1-f2) + 3*(d2+d1)*(x2-x1);
  136. double B = 3*(f2-f1)-(2*d1+d2)*(x2-x1);
  137. x3 = x1-d1*pow((x2-x1),2)/(B+sqrt(B*B-A*d1*(x2-x1)));
  138. if ( !NICE::isFinite(x3) || (x3 < 0 ) )
  139. {
  140. x3 = x2*c_ext;
  141. } else if (x3 > x2 * c_ext) {
  142. x3 = x2*c_ext;
  143. } else if (x3 < x2 + c_int*(x2 - x1) ) {
  144. x3 = x2 + c_int*(x2-x1);
  145. }
  146. }
  147. while ( ( (abs(d3) > -c_sig*d0) || (f3 > f0+x3*c_rho*d0) ) && (M > 0) )
  148. {
  149. if ( (d3 > 0) || (f3 > f0+x3*c_rho*d0) )
  150. {
  151. x4 = x3; f4 = f3; d4 = d3;
  152. } else {
  153. x2 = x3; f2 = f3; d2 = d3;
  154. }
  155. if ( f4 > f0 ) {
  156. x3 = x2-(0.5*d2*pow((x4-x2),2))/(f4-f2-d2*(x4-x2));
  157. } else {
  158. // we observed that this computation can be critical
  159. // and instable
  160. double A = 6*(f2-f4)/(x4-x2)+3*(d4+d2);
  161. double B = 3*(f4-f2)-(2*d2+d4)*(x4-x2);
  162. // small values of A (1e-6) have a big influence on x3
  163. x3 = x2+(sqrt(B*B-A*d2*pow(x4-x2,2))-B)/A;
  164. }
  165. if ( !NICE::isFinite(x3) ) {
  166. x3 = (x2 + x4) / 2.0;
  167. }
  168. x3 = std::max(std::min(x3, x4-c_int*(x4-x2)),x2+c_int*(x4-x2));
  169. problem.applyStep ( x3*s );
  170. f3 = problem.objective();
  171. problem.computeGradient();
  172. df3 = problem.gradientCached();
  173. problem.unapplyStep ( x3*s );
  174. if ( f3 < F0 ) {
  175. X0delta = x3 * s;
  176. F0 = f3;
  177. dF0 = df3;
  178. }
  179. M = M - 1;
  180. i = i + (length < 0);
  181. d3 = df3.scalarProduct ( s );
  182. }
  183. if ( (abs(d3) < - c_sig * d0) && (f3 < f0 + x3 * c_rho * d0) )
  184. {
  185. problem.applyStep ( x3 * s );
  186. if ( verbose )
  187. cerr << "FirstOrderRasmussen: new objective value " << f3 << endl;
  188. f0 = f3;
  189. s = ( df3.scalarProduct(df3) - df0.scalarProduct(df3))/(df0.scalarProduct(df0)) * s - df3;
  190. df0 = df3;
  191. d3 = d0; d0 = df0.scalarProduct(s);
  192. if ( d0 > 0 ) {
  193. s = -1.0 * df0; d0 = - s.scalarProduct(s);
  194. }
  195. x3 = x3 * std::min(c_ratio, d3/(d0-std::numeric_limits<float>::min()) );
  196. ls_failed = false;
  197. } else {
  198. problem.applyStep ( X0delta );
  199. f0 = F0;
  200. df0 = dF0;
  201. // if maximum number of iterations exceeded or line
  202. // search failed twice in a row
  203. if ( ls_failed || (i > (uint)abs(length) ) )
  204. {
  205. if ( (i > (uint)abs(length)) && verbose )
  206. cerr << "FirstOrderRasmussen: maximum number of iterations reached" << endl;
  207. else if ( verbose )
  208. cerr << "FirstOrderRasmussen: line search failed twice" << endl;
  209. break;
  210. }
  211. s = - 1.0 * df0;
  212. d0 = - s.scalarProduct(s);
  213. x3 = 1.0/(1.0 - d0);
  214. ls_failed = true;
  215. }
  216. }
  217. }