DownhillSimplexOptimizer.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. /**
  2. *
  3. * @file: DownhillSimplexOptimizer.cpp: implementation of the downhill Simplex Optimier
  4. *
  5. * @author: Matthias Wacker, Esther Platzer, Alexander Freytag
  6. *
  7. */
  8. #include <iostream>
  9. #include "mateigen.h" // for SVD
  10. #include "optimization/DownhillSimplexOptimizer.h"
  11. #include "optimization/Opt_Namespace.h"
  12. using namespace optimization;
  13. DownhillSimplexOptimizer::DownhillSimplexOptimizer(OptLogBase *loger): SuperClass(loger)
  14. {
  15. m_abort = false;
  16. m_simplexInitialized = false;
  17. //note that we use the negative value of alpha in amoeba
  18. m_alpha = 1.0;
  19. m_beta = 0.5;
  20. // m_gamma was previously 1.0, but this actually deactivates extrapolation. We use 2.0 as suggested in Num. Recipes
  21. m_gamma = 2.0;
  22. m_rankdeficiencythresh = 0.01;
  23. m_rankcheckenabled = false;
  24. }
  25. DownhillSimplexOptimizer::DownhillSimplexOptimizer(const DownhillSimplexOptimizer &opt) : SuperClass(opt)
  26. {
  27. m_abort = opt.m_abort;
  28. m_simplexInitialized = opt.m_simplexInitialized;
  29. m_vertices = opt.m_vertices;
  30. m_y = opt.m_y;
  31. m_alpha = opt.m_alpha;
  32. m_beta = opt.m_beta;
  33. m_gamma = opt.m_gamma;
  34. m_rankdeficiencythresh= 0.01;
  35. m_rankcheckenabled= false;
  36. }
  37. DownhillSimplexOptimizer::~DownhillSimplexOptimizer()
  38. {
  39. }
  40. bool DownhillSimplexOptimizer::setWholeSimplex(const matrix_type &simplex)
  41. {
  42. if(simplex.rows() == static_cast<int>(m_numberOfParameters) && simplex.cols() == static_cast<int>(m_numberOfParameters + 1))
  43. {
  44. //Check if simplex has rank(simplex)=numberOfParameters
  45. //otherwise return false because of bad posed simplex
  46. if(m_rankcheckenabled)
  47. {
  48. int rank = getRank(simplex, m_rankdeficiencythresh);
  49. if(rank < static_cast<int>(m_numberOfParameters))
  50. {
  51. m_simplexInitialized = false;
  52. return false;
  53. }
  54. }
  55. m_vertices = simplex;
  56. m_simplexInitialized = true;
  57. for (int k = 0; k < static_cast<int>(m_numberOfParameters +1); k++)
  58. {
  59. m_y[k][0] = evaluateCostFunction(m_vertices(0,k,m_numberOfParameters-1,k));
  60. }
  61. return true;
  62. }
  63. else
  64. {
  65. m_simplexInitialized = false;
  66. return false;
  67. }
  68. }
  69. void DownhillSimplexOptimizer::init()
  70. {
  71. SuperClass::init();
  72. // allocate matrices
  73. m_vertices = matrix_type(m_numberOfParameters, m_numberOfParameters + 1);
  74. m_y = matrix_type(m_numberOfParameters + 1,1);
  75. m_abort = false;
  76. //this forces a re-initialization in all cases
  77. //it is even more advisable, if we would perform a random initialization
  78. m_simplexInitialized == false;
  79. // previously, we did a random initialization
  80. // right now, we rely on our first guess and move it in one dimension by m_scales[i]
  81. // as suggested in Numerical Recipes
  82. if (m_simplexInitialized == false)
  83. {
  84. //this aborting stuff was formerly needed to ensure a valid simplex for random initializations
  85. bool abort = false;
  86. while(abort == false)
  87. {
  88. matrix_type simplex(m_numberOfParameters,m_numberOfParameters+1);
  89. //move every point in a single dimension by m_scales[i]
  90. //we first iterate over dimensions
  91. for(int i = 0; i < static_cast<int>(m_numberOfParameters); i++)
  92. {
  93. //and internally over the different points
  94. for(int j = 0; j < static_cast<int>(m_numberOfParameters+1); j++)
  95. {
  96. simplex[i][j] = m_parameters[i][0];
  97. if( j == i+1 )
  98. {
  99. double tmpRand = m_scales[i][0];// * ((double)rand()) / RAND_MAX;
  100. simplex[i][j] += tmpRand;
  101. }
  102. }
  103. }
  104. //evaluate function values in simplex corners and
  105. //check if simplex is degenerated (less than d dimensions)
  106. if(this->setWholeSimplex(simplex) == true)
  107. {
  108. // simplexInitializen is only to indicate manual initialization ..
  109. //actually, this is not needed anymore.
  110. m_simplexInitialized = false;
  111. //we computed a valid solution, so break the while loop
  112. //actually, this is always the case, since we de-activated the random init
  113. abort =true;
  114. }
  115. }
  116. }
  117. // if the simplex was already initialized, we only have to compute the function values
  118. // of its corners
  119. else
  120. {
  121. //compute the function values of the initial simplex points
  122. for (int k = 0; k < static_cast<int>(m_numberOfParameters +1); k++)
  123. {
  124. m_y[k][0] = evaluateCostFunction(m_vertices(0,k,m_numberOfParameters-1,k));
  125. }
  126. }
  127. }
  128. int DownhillSimplexOptimizer::optimize()
  129. {
  130. //before optimizing, we initialize our simplex in all cases
  131. // if you are pretty sure, that you already have a suitable initial
  132. // simplex, you can skip this part
  133. //if(m_simplexInitialized == false)
  134. //{
  135. this->init();
  136. //}
  137. if(m_loger)
  138. m_loger->logTrace("Starting Downhill Simplex Optimization\n");
  139. //tmp contains the index of the best vertex point after running DHS
  140. int tmp=amoeba();
  141. m_parameters = m_vertices(0,tmp,m_numberOfParameters-1,tmp);
  142. //final evaluation of the cost function with our optimal parameter settings
  143. m_currentCostFunctionValue = evaluateCostFunction(m_parameters);
  144. return m_returnReason;
  145. }
  146. /*
  147. * Taken from Numerical Recipies in C
  148. */
  149. int DownhillSimplexOptimizer::amoeba()
  150. {
  151. //define the desired tolerance of our solution
  152. double tol = m_funcTol;
  153. //how many parameters do we optimize?
  154. const int ndim=m_numberOfParameters;
  155. if (m_verbose)
  156. {
  157. std::cerr << "ndim: " << ndim << std::endl;
  158. }
  159. // number of vertices
  160. const int spts=ndim+1;
  161. int i,j, max_val;
  162. // index of worst (lowest) vertex-point
  163. int ilo;
  164. // index of best (hightest) vertex point
  165. int ihi;
  166. // index of second worst (next hightest) vertex point
  167. int inhi;
  168. double rtol,ytry;
  169. // buffer to save a function value
  170. double ysave;
  171. matrix_type psum(1,ndim);
  172. //start time criteria
  173. m_startTime = clock();
  174. // get sum of vertex-coordinates
  175. for (j=0;j<ndim;j++)
  176. {
  177. double sum(0.0);
  178. for (i=0;i<spts;i++)
  179. sum += m_vertices[j][i];
  180. psum[0][j]=sum;
  181. }
  182. if (m_verbose)
  183. {
  184. std::cerr << "initial psum: ";
  185. for (j=0;j<ndim;j++)
  186. {
  187. std::cerr << psum[0][j] << " " ;
  188. }
  189. std::cerr << std::endl;
  190. }
  191. // loop until terminating
  192. //this is our main loop for the whole optimization
  193. for (;;)
  194. {
  195. //what is our current solution?
  196. if(m_verbose)
  197. {
  198. for(int u = 0; u < ndim+1; u++)
  199. {
  200. for(int v = 0; v < ndim ; v++)
  201. {
  202. std::cerr << m_vertices[v][u] << " ";
  203. }
  204. std::cerr<< " " << m_y[u][0] << std::endl;
  205. }
  206. std::cerr << std::endl;
  207. }
  208. // first we must determine
  209. // which point is highest,
  210. // next-highest
  211. // and lowest
  212. // by looping over the simplex
  213. ilo = 0;
  214. //this works only, if m_numberOfParameters=ndim >= 2
  215. if(ndim >= 2)
  216. {
  217. ihi = m_y[1][0]>m_y[2][0] ? (inhi=1,2) : (inhi=2,1);
  218. for (i=0;i<spts;i++)
  219. {
  220. if (m_y[i][0] <= m_y[ilo][0]) ilo=i;
  221. if (m_y[i][0] > m_y[ihi][0]) {
  222. inhi=ihi;
  223. ihi=i;
  224. }
  225. else
  226. if (m_y[i][0] > m_y[inhi][0])
  227. if (i != ihi)
  228. inhi=i;
  229. }
  230. }
  231. //if we have less than 3 dimensions, the solution is straight forward
  232. else
  233. {
  234. if(m_y[0][0]>m_y[1][0])
  235. {
  236. ilo = 1;
  237. inhi = 1;
  238. ihi = 0;
  239. }
  240. else
  241. {
  242. ilo = 0;
  243. inhi = 0;
  244. ihi = 1;
  245. }
  246. }
  247. // log parameters if parameter logger is given (does nothing for other loggers!)
  248. if(m_loger)
  249. {
  250. // extract optimal parameters from simplex
  251. matrix_type optparams(m_vertices.rows(),1,0);
  252. for(int i= 0; i< m_vertices.rows(); ++i)
  253. {
  254. optparams[i][0]= m_vertices[i][ilo];
  255. }
  256. matrix_type fullparams= m_costFunction->getFullParamsFromSubParams(optparams);
  257. m_loger->writeParamsToFile(fullparams);
  258. }
  259. //Check m_abort .. outofbounds may have occurred in amotry() of last iteration
  260. if(m_abort == true)
  261. {
  262. break;
  263. }
  264. //Check time criterion
  265. //in the default setting, this is deactivated
  266. if(m_maxSecondsActive)
  267. {
  268. m_currentTime = clock();
  269. /* time limit exceeded ? */
  270. if(((float)(m_currentTime - m_startTime )/CLOCKS_PER_SEC) >= m_maxSeconds )
  271. {
  272. /* set according return status and end optimization */
  273. m_returnReason = SUCCESS_TIMELIMIT;
  274. break;
  275. }
  276. }
  277. //check functol criterion
  278. if(m_funcTolActive == true)
  279. {
  280. // compute the fractional
  281. // range from highest to lowest
  282. // avoid division by zero
  283. rtol=2.0*fabs(m_y[ihi][0]-m_y[ilo][0])/(fabs(m_y[ihi][0])+fabs(m_y[ilo][0])+1e-10);
  284. #ifdef OPT_DEBUG
  285. std::cout<<"rtol"<<" "<<rtol<< std::endl;
  286. #endif
  287. // if the found solution is satisfactory, than terminate
  288. if (rtol<tol)
  289. {
  290. // save lowest value, which is our desired solution
  291. max_val=(int)m_y[ilo][0];
  292. m_returnReason = SUCCESS_FUNCTOL;
  293. //break the main loop and end this method
  294. break;
  295. }
  296. }
  297. //check param tol criterion
  298. if (m_paramTolActive == true)
  299. {
  300. // get norm of
  301. //matrix_type tmp = m_vertices(0,ihi,m_numberOfParameters-1,ihi) - m_vertices(0,ilo,m_numberOfParameters,ilo);
  302. //double norm)
  303. if ( (m_vertices(0,ihi,m_numberOfParameters-1,ihi) -
  304. m_vertices(0,ilo,m_numberOfParameters-1,ilo)).Norm(0) < m_paramTol)
  305. {
  306. /*
  307. set according return reason and end optimization
  308. */
  309. m_returnReason = SUCCESS_PARAMTOL;
  310. break;
  311. }
  312. }
  313. m_numIter++;
  314. //check max num iter criterion
  315. if(m_maxNumIterActive == true)
  316. {
  317. if (m_numIter >= m_maxNumIter)
  318. {
  319. //max_val=(int)m_y[ilo][0];
  320. m_returnReason = SUCCESS_MAXITER;
  321. break;
  322. }
  323. }
  324. //informative output
  325. if (m_verbose)
  326. {
  327. std::cerr << "start new iteration with amotry -alpha, i.e., reflect worst point through simplex" << std::endl;
  328. }
  329. // Begin a new iteration.
  330. // First extrapolate by the
  331. // factor alpha through the
  332. // face of the simplex across
  333. // from the high point, i.e.
  334. // reflect the simplex from the
  335. // high point:
  336. ytry=amotry(psum,ihi,-m_alpha);
  337. //did we got a new point better than anything else so far?
  338. if (ytry < m_y[ilo][0])
  339. {
  340. //informative output
  341. if (m_verbose)
  342. {
  343. std::cerr << "reflected point is better than best point, perform further extrapolation with gamma" << std::endl;
  344. }
  345. // result is better than best
  346. // point, try additional
  347. // extrapolation
  348. ytry=amotry(psum,ihi,m_gamma);
  349. #ifdef OPT_DEBUG
  350. std::cout<<"Case one .. reflected highest through simplex" << std::endl;
  351. #endif
  352. }
  353. //new point is not better as anything else
  354. else
  355. {
  356. //perhaps it is better than our second worst point
  357. if (ytry >= m_y[inhi][0])
  358. {
  359. //informative output
  360. if (m_verbose)
  361. {
  362. std::cerr << "reflected point is worse then second worst, looking for intermediate point with beta" << std::endl;
  363. }
  364. // The reflected point is worse
  365. // than the second worst
  366. ysave=m_y[ihi][0];
  367. // let's look for an intermediate lower point.
  368. ytry=amotry(psum,ihi,m_beta);
  369. #ifdef OPT_DEBUG
  370. std::cout<<"Case two .. looking for intermediate point" << std::endl;
  371. #endif
  372. //unfortunately, the intermediate point is still worse
  373. //then the original one
  374. if (ytry >= ysave)
  375. {
  376. #ifdef OPT_DEBUG
  377. std::cout<<"Case three .. contract around lowest point" << std::endl;
  378. #endif
  379. //informative output
  380. if (m_verbose)
  381. {
  382. std::cerr << "Intermediate point is also worse, contract around current best point with factor 0.5." << std::endl;
  383. }
  384. // Since we can't get rid
  385. // of that bad point, we better
  386. // our simplex around the current best point
  387. // note: contraction is performed by a factor of 0.5
  388. // as suggested in Numerical Recipes
  389. //contract all points
  390. for (i=0;i<spts;i++)
  391. {
  392. // but the best point has not to be contracted
  393. if (i!=ilo)
  394. {
  395. //contract in every dimension
  396. for (j=0;j<ndim;j++)
  397. {
  398. psum[0][j]=0.5*(m_vertices[j][i]+m_vertices[j][ilo]);
  399. #ifdef OPT_DEBUG
  400. printf("psum(%d)=%f\n",j,psum[0][j]);
  401. #endif
  402. m_vertices[j][i]=psum[0][j];
  403. }
  404. //TODO what does this?
  405. if (checkParameters(!psum))
  406. {
  407. m_y[i][0]= evaluateCostFunction(!psum);
  408. // count function evaluations
  409. //not needed in the current implementation
  410. //eval_count++;
  411. }
  412. else
  413. {
  414. m_returnReason = ERROR_XOUTOFBOUNDS; // out of domain !!!
  415. break;
  416. }
  417. }
  418. // update psum: get sum of vertex-coordinates
  419. for (j=0;j<ndim;j++)
  420. {
  421. double sum=0.0;
  422. for (int ii=0;ii<spts;ii++)
  423. sum += m_vertices[j][ii];
  424. psum[0][j]=sum;
  425. }//for
  426. }//for-loop for contracting all points
  427. }//if (ytry >= ysave)
  428. }//if (ytry >= m_y(inhi))
  429. }//else
  430. }// next iteration
  431. // finally, return index of best point
  432. return ilo;
  433. }
  434. double DownhillSimplexOptimizer::amotry(matrix_type & psum, int ihi, double fac)
  435. {
  436. // extrapolate by a factor fac through the face of the simplex across
  437. // from the low point, try this point it, and replace the low point if
  438. // the new point is better
  439. const double maxreal= (m_maximize == true)? -1.0e+300 : 1.0e+300;
  440. double fac1,fac2,ytry;
  441. int ndim=m_numberOfParameters;
  442. matrix_type ptry(1,ndim);
  443. //compute the factors as suggested in Numerical Recipes
  444. fac1=(1.0-fac)/ndim;
  445. fac2=fac1-fac;
  446. //compute the new point by performing a weighted superposition of the previous point and the surface of our simplex
  447. for (int j=0;j<ndim;j++)
  448. {
  449. ptry[0][j]=psum[0][j]*fac1-m_vertices[j][ihi]*fac2;
  450. }
  451. //informative output
  452. if (m_verbose)
  453. {
  454. std::cerr << "amotry fac: " << fac << std::endl;
  455. std::cerr << "fac1: " << fac1 << " fac2: " << fac2 << std::endl;
  456. std::cerr << "ptry: ";
  457. for (int j=0;j<ndim;j++)
  458. {
  459. std::cerr << ptry[0][j] << " ";
  460. }
  461. std::cerr << std::endl;
  462. }
  463. //is the new point valid?
  464. if (checkParameters(!ptry))
  465. {
  466. // evaluate function at the
  467. // new trial point
  468. ytry=evaluateCostFunction(!ptry);
  469. // count function evaluations
  470. //not needed in the current implementation
  471. // eval_count++;
  472. // if the new point is better than the old one
  473. // we replace the old one with the new point
  474. if (ytry<m_y[ihi][0])
  475. {
  476. //save function value of the new point
  477. m_y[ihi][0]=ytry;
  478. //update the surface of our simplex
  479. for (int j=0; j<ndim;j++)
  480. {
  481. // update psum
  482. psum[0][j] = psum[0][j] + ptry[0][j]-m_vertices[j][ihi];
  483. // replace lowest point
  484. m_vertices[j][ihi]=ptry[0][j];
  485. }
  486. }
  487. }
  488. // out of domain
  489. else
  490. {
  491. ytry=maxreal;
  492. m_abort = true;
  493. m_returnReason = ERROR_XOUTOFBOUNDS;
  494. }
  495. return ytry;
  496. }
  497. void DownhillSimplexOptimizer::setDownhillParams(const double alpha, const double beta, const double gamma)
  498. {
  499. m_alpha = alpha;
  500. m_beta = beta;
  501. m_gamma = gamma;
  502. }
  503. void DownhillSimplexOptimizer::setRankDeficiencyThresh(float rankdeficiencythresh)
  504. {
  505. m_rankdeficiencythresh= rankdeficiencythresh;
  506. }
  507. void DownhillSimplexOptimizer::setRankCheckStatus(bool status)
  508. {
  509. m_rankcheckenabled= status;
  510. }
  511. double DownhillSimplexOptimizer::getDownhillParameterAlpha() const
  512. {
  513. return m_alpha;
  514. }
  515. double DownhillSimplexOptimizer::getDownhillParameterBeta() const
  516. {
  517. return m_beta;
  518. }
  519. double DownhillSimplexOptimizer::getDownhillParameterGamma() const
  520. {
  521. return m_gamma;
  522. }
  523. bool DownhillSimplexOptimizer::getRankCheckStatus()
  524. {
  525. return m_rankcheckenabled;
  526. }
  527. unsigned int getRank(const matrix_type &A,double numZero)
  528. {
  529. unsigned int tmpCount = 0;
  530. matrix_type U,s,Vt;
  531. if(A.rows() < A.cols())
  532. {
  533. // call of the singular value decomp.
  534. SingularValueDcmp(!A, U, s, Vt);
  535. }
  536. else
  537. {
  538. // call of the singular value decomp.
  539. SingularValueDcmp(A, U, s, Vt);
  540. }
  541. // count singular values > numZero
  542. for(int i= 0; i < s.rows();i++)
  543. {
  544. if( s[i][i] > numZero )
  545. {
  546. tmpCount++;
  547. }
  548. }
  549. return tmpCount;
  550. }