/** * @file FirstOrderRasmussen.cpp * @brief c/c++ implementation of rasmussens optimization method * @author Erik Rodner * @date 01/25/2010 */ #include #include "FirstOrderRasmussen.h" #include using namespace std; using namespace NICE; FirstOrderRasmussen::FirstOrderRasmussen( bool _verbose ) : verbose(_verbose) { c_int = 0.1; // don't reevaluate within 0.1 of the limit of the current bracket c_ext = 3.0; // extrapolate maximum 3 times the current step-size c_max = 20; // max 20 function evaluations per line search c_ratio = 10; // maximum allowed slope ratio /* SIG and RHO are the constants controlling the Wolfe- * % Powell conditions. SIG is the maximum allowed absolute ratio between * % previous and new slopes (derivatives in the search direction), thus setting * % SIG to low (positive) values forces higher precision in the line-searches. * % RHO is the minimum allowed fraction of the expected (from the slope at the * % initial point in the linesearch). Constants must satisfy 0 < RHO < SIG < 1. * % Tuning of SIG (depending on the nature of the function to be optimized) may * % speed up the minimization; it is probably not worth playing much with RHO */ c_sig = 0.1; c_rho = c_sig / 2.0; // set maximum function evaluations to 100 length = -100; epsilonG = 1e-5; // if gradient norm is lower than this threshold -> exit } FirstOrderRasmussen::~FirstOrderRasmussen() { } void FirstOrderRasmussen::doOptimizeFirst(OptimizationProblemFirst& problem) { /** Comments of Carl Edward Rasmussen (2006-09-08). * * The code falls naturally into 3 parts, after the initial line search is * started in the direction of steepest descent. 1) we first enter a while loop * which uses point 1 (p1) and (p2) to compute an extrapolation (p3), until we * have extrapolated far enough (Wolfe-Powell conditions). 2) if necessary, we * enter the second loop which takes p2, p3 and p4 chooses the subinterval * containing a (local) minimum, and interpolates it, unil an acceptable point * is found (Wolfe-Powell conditions). Note, that points are always maintained * in order p0 <= p1 <= p2 < p3 < p4. 3) compute a new search direction using * conjugate gradients (Polack-Ribiere flavour), or revert to steepest if there * was a problem in the previous line-search. Return the best value so far, if * two consecutive line-searches fail, or whenever we run out of function * evaluations or line-searches. During extrapolation, the "f" function may fail * either with an error or returning Nan or Inf, and minimize should handle this * gracefully. */ /** reimplementation comments (Erik) * I just converted the matlab code manually to this nasty piece of C/C++ code. * The only changes are due to the apply/unapply mechanism of nice optimization * interface. I also skipped some unneccessary gradient computations. */ double red = 1.0; uint i = 0; bool ls_failed = false; double f0 = problem.objective(); if ( verbose ) cerr << "FirstOrderRasmussen: initial value of the objective function is " << f0 << endl; problem.computeGradient(); Vector df0 = problem.gradientCached(); if ( length < 0 ) i++; Vector s = - 1.0 * df0; double d0 = - s.scalarProduct(s); double d1, d2, d3; double d4 = d0; double x1, x2; double x4 = 0.0; double x3 = red / (1-d0); double f1, f2, f3; double f4 = f0; Vector df3 (df0.size()); Vector df2 (df0.size()); Vector df1 (df0.size()); Vector dF0 (df0.size()); double F0; Vector X0delta ( problem.position().size()); while ( i < (uint)abs(length) ) { if ( verbose ) std::cerr << "Iteration " << i << " / " << abs(length) << " " << " objective function = " << f0 << std::endl; i = i + (length>0); if ( df0.normL2() < epsilonG ) { if ( verbose ) Log::debug() << "FirstOrderRasmussen: low gradient " << df0.normL2() << " < " << epsilonG << " = epsilonG" << std::endl; break; } X0delta.set(0.0); F0 = f0; dF0 = df0; double M; if ( length > 0 ) M = c_max; else M = std::min(c_max, -length-i); // perform a line search method (as far as I know) while (1) { x2 = 0; f2 = f0; d2 = d0; f3 = f0; df3 = df0; bool success = false; while ( !success && (M>0) ) { M = M -1; i = i + (length<0); problem.applyStep ( x3*s ); f3 = problem.objective(); if ( NICE::isFinite(f3) ) success = true; if ( !success && (M>0) ) problem.unapplyStep ( x3*s ); if ( !NICE::isFinite(f3) ) { x3 = (x2 + x3) / 2.0; } } problem.computeGradient(); df3 = problem.gradientCached(); problem.unapplyStep ( x3 * s ); if ( f3 < F0 ) { X0delta = x3 * s; F0 = f3; dF0 = df3; } d3 = df3.scalarProduct (s); if ( (d3 > c_sig * d0) || (f3 > f0 + x3*c_rho*d0) || (M==0) ) break; x1 = x2; f1 = f2; d1 = d2; x2 = x3; f2 = f3; d2 = d3; double A = 6*(f1-f2) + 3*(d2+d1)*(x2-x1); double B = 3*(f2-f1)-(2*d1+d2)*(x2-x1); x3 = x1-d1*pow((x2-x1),2)/(B+sqrt(B*B-A*d1*(x2-x1))); if ( !NICE::isFinite(x3) || (x3 < 0 ) ) { x3 = x2*c_ext; } else if (x3 > x2 * c_ext) { x3 = x2*c_ext; } else if (x3 < x2 + c_int*(x2 - x1) ) { x3 = x2 + c_int*(x2-x1); } } while ( ( (abs(d3) > -c_sig*d0) || (f3 > f0+x3*c_rho*d0) ) && (M > 0) ) { if ( (d3 > 0) || (f3 > f0+x3*c_rho*d0) ) { x4 = x3; f4 = f3; d4 = d3; } else { x2 = x3; f2 = f3; d2 = d3; } if ( f4 > f0 ) { x3 = x2-(0.5*d2*pow((x4-x2),2))/(f4-f2-d2*(x4-x2)); } else { // we observed that this computation can be critical // and instable double A = 6*(f2-f4)/(x4-x2)+3*(d4+d2); double B = 3*(f4-f2)-(2*d2+d4)*(x4-x2); // small values of A (1e-6) have a big influence on x3 x3 = x2+(sqrt(B*B-A*d2*pow(x4-x2,2))-B)/A; } if ( !NICE::isFinite(x3) ) { x3 = (x2 + x4) / 2.0; } x3 = std::max(std::min(x3, x4-c_int*(x4-x2)),x2+c_int*(x4-x2)); problem.applyStep ( x3*s ); f3 = problem.objective(); problem.computeGradient(); df3 = problem.gradientCached(); problem.unapplyStep ( x3*s ); if ( f3 < F0 ) { X0delta = x3 * s; F0 = f3; dF0 = df3; } M = M - 1; i = i + (length < 0); d3 = df3.scalarProduct ( s ); } if ( (abs(d3) < - c_sig * d0) && (f3 < f0 + x3 * c_rho * d0) ) { problem.applyStep ( x3 * s ); if ( verbose ) cerr << "FirstOrderRasmussen: new objective value " << f3 << endl; f0 = f3; s = ( df3.scalarProduct(df3) - df0.scalarProduct(df3))/(df0.scalarProduct(df0)) * s - df3; df0 = df3; d3 = d0; d0 = df0.scalarProduct(s); if ( d0 > 0 ) { s = -1.0 * df0; d0 = - s.scalarProduct(s); } x3 = x3 * std::min(c_ratio, d3/(d0-std::numeric_limits::min()) ); ls_failed = false; } else { problem.applyStep ( X0delta ); f0 = F0; df0 = dF0; // if maximum number of iterations exceeded or line // search failed twice in a row if ( ls_failed || (i > (uint)abs(length) ) ) { if ( (i > (uint)abs(length)) && verbose ) cerr << "FirstOrderRasmussen: maximum number of iterations reached" << endl; else if ( verbose ) cerr << "FirstOrderRasmussen: line search failed twice" << endl; break; } s = - 1.0 * df0; d0 = - s.scalarProduct(s); x3 = 1.0/(1.0 - d0); ls_failed = true; } } }