123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258 |
- /**
- * @file FirstOrderRasmussen.cpp
- * @brief c/c++ implementation of rasmussens optimization method
- * @author Erik Rodner
- * @date 01/25/2010
- */
- #include <iostream>
- #include "FirstOrderRasmussen.h"
- #include <core/basics/Log.h>
- using namespace std;
- using namespace NICE;
- FirstOrderRasmussen::FirstOrderRasmussen( bool _verbose )
- : verbose(_verbose)
- {
- c_int = 0.1; // don't reevaluate within 0.1 of the limit of the current bracket
- c_ext = 3.0; // extrapolate maximum 3 times the current step-size
- c_max = 20; // max 20 function evaluations per line search
- c_ratio = 10; // maximum allowed slope ratio
-
- /* SIG and RHO are the constants controlling the Wolfe-
- * % Powell conditions. SIG is the maximum allowed absolute ratio between
- * % previous and new slopes (derivatives in the search direction), thus setting
- * % SIG to low (positive) values forces higher precision in the line-searches.
- * % RHO is the minimum allowed fraction of the expected (from the slope at the
- * % initial point in the linesearch). Constants must satisfy 0 < RHO < SIG < 1.
- * % Tuning of SIG (depending on the nature of the function to be optimized) may
- * % speed up the minimization; it is probably not worth playing much with RHO */
- c_sig = 0.1;
- c_rho = c_sig / 2.0;
- // set maximum function evaluations to 100
- length = -100;
- epsilonG = 1e-5; // if gradient norm is lower than this threshold -> exit
- }
- FirstOrderRasmussen::~FirstOrderRasmussen()
- {
- }
- void FirstOrderRasmussen::doOptimizeFirst(OptimizationProblemFirst& problem)
- {
- /** Comments of Carl Edward Rasmussen (2006-09-08).
- *
- * The code falls naturally into 3 parts, after the initial line search is
- * started in the direction of steepest descent. 1) we first enter a while loop
- * which uses point 1 (p1) and (p2) to compute an extrapolation (p3), until we
- * have extrapolated far enough (Wolfe-Powell conditions). 2) if necessary, we
- * enter the second loop which takes p2, p3 and p4 chooses the subinterval
- * containing a (local) minimum, and interpolates it, unil an acceptable point
- * is found (Wolfe-Powell conditions). Note, that points are always maintained
- * in order p0 <= p1 <= p2 < p3 < p4. 3) compute a new search direction using
- * conjugate gradients (Polack-Ribiere flavour), or revert to steepest if there
- * was a problem in the previous line-search. Return the best value so far, if
- * two consecutive line-searches fail, or whenever we run out of function
- * evaluations or line-searches. During extrapolation, the "f" function may fail
- * either with an error or returning Nan or Inf, and minimize should handle this
- * gracefully. */
- /** reimplementation comments (Erik)
- * I just converted the matlab code manually to this nasty piece of C/C++ code.
- * The only changes are due to the apply/unapply mechanism of nice optimization
- * interface. I also skipped some unneccessary gradient computations. */
- double red = 1.0;
- uint i = 0;
- bool ls_failed = false;
- double f0 = problem.objective();
- if ( verbose )
- cerr << "FirstOrderRasmussen: initial value of the objective function is " << f0 << endl;
- problem.computeGradient();
- Vector df0 = problem.gradientCached();
- if ( length < 0 ) i++;
- Vector s = - 1.0 * df0;
- double d0 = - s.scalarProduct(s);
- double d1, d2, d3;
- double d4 = d0;
- double x1, x2;
- double x4 = 0.0;
- double x3 = red / (1-d0);
- double f1, f2, f3;
- double f4 = f0;
- Vector df3 (df0.size());
- Vector df2 (df0.size());
- Vector df1 (df0.size());
- Vector dF0 (df0.size());
- double F0;
- Vector X0delta ( problem.position().size());
-
- while ( i < (uint)abs(length) )
- {
- if ( verbose )
- std::cerr << "Iteration " << i << " / " << abs(length) << " " << " objective function = " << f0 << std::endl;
- i = i + (length>0);
- if ( df0.normL2() < epsilonG ) {
- if ( verbose )
- Log::debug() << "FirstOrderRasmussen: low gradient " << df0.normL2() << " < " << epsilonG << " = epsilonG" << std::endl;
- break;
- }
- X0delta.set(0.0);
- F0 = f0;
- dF0 = df0;
- double M;
- if ( length > 0 )
- M = c_max;
- else
- M = std::min(c_max, -length-i);
- // perform a line search method (as far as I know)
- while (1)
- {
- x2 = 0;
- f2 = f0;
- d2 = d0;
- f3 = f0;
- df3 = df0;
- bool success = false;
- while ( !success && (M>0) )
- {
- M = M -1; i = i + (length<0);
- problem.applyStep ( x3*s );
- f3 = problem.objective();
- if ( NICE::isFinite(f3) ) success = true;
-
- if ( !success && (M>0) )
- problem.unapplyStep ( x3*s );
- if ( !NICE::isFinite(f3) )
- {
- x3 = (x2 + x3) / 2.0;
- }
- }
- problem.computeGradient();
- df3 = problem.gradientCached();
- problem.unapplyStep ( x3 * s );
- if ( f3 < F0 ) {
- X0delta = x3 * s;
- F0 = f3;
- dF0 = df3;
- }
- d3 = df3.scalarProduct (s);
- if ( (d3 > c_sig * d0) || (f3 > f0 + x3*c_rho*d0) || (M==0) )
- break;
- x1 = x2; f1 = f2; d1 = d2;
- x2 = x3; f2 = f3; d2 = d3;
- double A = 6*(f1-f2) + 3*(d2+d1)*(x2-x1);
- double B = 3*(f2-f1)-(2*d1+d2)*(x2-x1);
- x3 = x1-d1*pow((x2-x1),2)/(B+sqrt(B*B-A*d1*(x2-x1)));
- if ( !NICE::isFinite(x3) || (x3 < 0 ) )
- {
- x3 = x2*c_ext;
- } else if (x3 > x2 * c_ext) {
- x3 = x2*c_ext;
- } else if (x3 < x2 + c_int*(x2 - x1) ) {
- x3 = x2 + c_int*(x2-x1);
- }
-
- }
- while ( ( (abs(d3) > -c_sig*d0) || (f3 > f0+x3*c_rho*d0) ) && (M > 0) )
- {
- if ( (d3 > 0) || (f3 > f0+x3*c_rho*d0) )
- {
- x4 = x3; f4 = f3; d4 = d3;
- } else {
- x2 = x3; f2 = f3; d2 = d3;
- }
- if ( f4 > f0 ) {
- x3 = x2-(0.5*d2*pow((x4-x2),2))/(f4-f2-d2*(x4-x2));
- } else {
- // we observed that this computation can be critical
- // and instable
- double A = 6*(f2-f4)/(x4-x2)+3*(d4+d2);
- double B = 3*(f4-f2)-(2*d2+d4)*(x4-x2);
- // small values of A (1e-6) have a big influence on x3
- x3 = x2+(sqrt(B*B-A*d2*pow(x4-x2,2))-B)/A;
- }
- if ( !NICE::isFinite(x3) ) {
- x3 = (x2 + x4) / 2.0;
- }
- x3 = std::max(std::min(x3, x4-c_int*(x4-x2)),x2+c_int*(x4-x2));
- problem.applyStep ( x3*s );
- f3 = problem.objective();
- problem.computeGradient();
- df3 = problem.gradientCached();
- problem.unapplyStep ( x3*s );
- if ( f3 < F0 ) {
- X0delta = x3 * s;
- F0 = f3;
- dF0 = df3;
- }
- M = M - 1;
- i = i + (length < 0);
- d3 = df3.scalarProduct ( s );
- }
- if ( (abs(d3) < - c_sig * d0) && (f3 < f0 + x3 * c_rho * d0) )
- {
- problem.applyStep ( x3 * s );
- if ( verbose )
- cerr << "FirstOrderRasmussen: new objective value " << f3 << endl;
- f0 = f3;
- s = ( df3.scalarProduct(df3) - df0.scalarProduct(df3))/(df0.scalarProduct(df0)) * s - df3;
- df0 = df3;
- d3 = d0; d0 = df0.scalarProduct(s);
- if ( d0 > 0 ) {
- s = -1.0 * df0; d0 = - s.scalarProduct(s);
- }
- x3 = x3 * std::min(c_ratio, d3/(d0-std::numeric_limits<float>::min()) );
- ls_failed = false;
- } else {
- problem.applyStep ( X0delta );
- f0 = F0;
- df0 = dF0;
- // if maximum number of iterations exceeded or line
- // search failed twice in a row
- if ( ls_failed || (i > (uint)abs(length) ) )
- {
- if ( (i > (uint)abs(length)) && verbose )
- cerr << "FirstOrderRasmussen: maximum number of iterations reached" << endl;
- else if ( verbose )
- cerr << "FirstOrderRasmussen: line search failed twice" << endl;
- break;
- }
- s = - 1.0 * df0;
- d0 = - s.scalarProduct(s);
- x3 = 1.0/(1.0 - d0);
- ls_failed = true;
- }
- }
- }
|