DiagonalMatrixApprox.cpp 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. /**
  2. * @file DiagonalMatrixApprox.cpp
  3. * @brief find a diagonal matrix to approximate bilinear products
  4. * @author Erik Rodner
  5. * @date 05/31/2012
  6. */
  7. #include <iostream>
  8. #include <core/optimization/gradientBased/FirstOrderRasmussen.h>
  9. #include <core/vector/Eigen.h>
  10. #include "DiagonalMatrixApprox.h"
  11. #include "core/basics/numerictools.h"
  12. using namespace NICE;
  13. using namespace std;
  14. DiagonalMatrixApprox::DiagonalMatrixApprox( bool verbose, int maxIterations )
  15. {
  16. this->epsilonStart = 1.0;
  17. this->epsilonShrinkFactor = 0.8;
  18. this->minFDelta = 1e-8;
  19. this->minSolDelta = 1e-8;
  20. this->maxEpsilonIterations = 100;
  21. this->verbose = verbose;
  22. this->optimizer = new FirstOrderRasmussen( /*false*/ this->verbose );
  23. ( dynamic_cast< FirstOrderRasmussen * > (optimizer) )->setMaxIterations(maxIterations);
  24. }
  25. DiagonalMatrixApprox::~DiagonalMatrixApprox()
  26. {
  27. }
  28. void DiagonalMatrixApprox::approx ( const Matrix & A, Vector & D ) const
  29. {
  30. double f0 = std::numeric_limits<double>::max();
  31. // not so proper intialization with the diagonal matrix
  32. /*
  33. D.resize(A.rows());
  34. for ( uint i = 0; i < D.size(); i++ )
  35. D[i] = A(i,i);
  36. */
  37. Vector D0 ( D );
  38. double epsilon = epsilonStart;
  39. for ( uint i = 0; i < maxEpsilonIterations; i++ )
  40. {
  41. epsilon = epsilonShrinkFactor * epsilon;
  42. // perform minimization with some gradient based method (this is a convex optimization problem)
  43. DiagonalMatrixApproxOptimizationProblem opt ( &A, D0, epsilon, false /*verbose*/ );
  44. optimizer->optimizeFirst ( opt );
  45. D = opt.position();
  46. double f = opt.computeObjective ();
  47. if ( verbose )
  48. {
  49. cerr << "DiagonalMatrixApprox [" << i << " / " << maxEpsilonIterations << "]: " << f << " (epsilon=" << epsilon << ")" << endl;
  50. cerr << D << endl;
  51. }
  52. if ( !NICE::isFinite(f) )
  53. {
  54. f = f0;
  55. D = D0;
  56. if ( verbose )
  57. cerr << "DiagonalMatrixApprox ended in iteration " << i << " due to numerical problems when decreasing epsilon..." << endl;
  58. return;
  59. }
  60. double eig_small = opt.getSmallestEigenvalue();
  61. if ( eig_small < 0.0 )
  62. {
  63. if ( verbose )
  64. cerr << "DiagonalMatrixApprox: resetting current value due to negative eigenvalue: " << eig_small << endl;
  65. D = f0;
  66. D = D0;
  67. } else {
  68. // otherwise check for convergence
  69. double solDelta = ( D - D0 ).normInf();
  70. double fDelta = f0 - f; // this value should be positive, otherwise convergence is likely
  71. if ( fDelta < minFDelta || solDelta < minSolDelta )
  72. {
  73. if ( verbose )
  74. cerr << "DiagonalMatrixApprox: convergence detected delta_f=" << fDelta << " x_delta=" << solDelta << endl;
  75. return;
  76. }
  77. }
  78. f0 = f;
  79. D0 = D;
  80. }
  81. }
  82. DiagonalMatrixApproxOptimizationProblem::DiagonalMatrixApproxOptimizationProblem ( const Matrix *A, const Vector & D0, double epsilon, bool verbose )
  83. : OptimizationProblemFirst( D0.size() )
  84. {
  85. this->A = A;
  86. this->parameters() = D0;
  87. this->epsilon = epsilon;
  88. this->verbose = verbose;
  89. }
  90. double DiagonalMatrixApproxOptimizationProblem::computeObjective()
  91. {
  92. // Theoretically, we have to compute lambda_max(A - diag(D)). However, we want to solve
  93. // the regularized and relaxed optimization problem, which involves all eigenvalues
  94. const Vector & D = parameters();
  95. Matrix M = (*A);
  96. M.addDiagonal ( (-1.0) * D );
  97. if ( verbose ) {
  98. cerr << "M = " << M << endl;
  99. cerr << "D = " << D << endl;
  100. cerr << "A = " << *A << endl;
  101. }
  102. //if ( verbose )
  103. // cerr << "Computing the eigenvalue decomposition ..." << endl;
  104. try {
  105. eigenvectorvalues ( M, eigenvectors, eigenvalues );
  106. } catch ( ... ) {
  107. // the matrix seems to be singular, maybe this is a good sign.
  108. // Does not have to be: only the smallest eigenvalue can be zero
  109. return 0.0;
  110. }
  111. //if ( verbose )
  112. // cerr << "Computing the objective ..." << endl;
  113. double sumExp = 0.0;
  114. for ( uint i = 0 ; i < eigenvalues.size(); i++ )
  115. sumExp += exp( eigenvalues[i] / epsilon );
  116. double fval = epsilon * log( sumExp ) + 0.5 * D.scalarProduct(D);
  117. //if ( verbose ) {
  118. cerr << "DiagonalMatrixApprox: maximum eigenvalue is " << eigenvalues.Max() << endl;
  119. //}
  120. if ( !NICE::isFinite(fval) )
  121. {
  122. // some numerical problems occured
  123. fval = numeric_limits<double>::infinity();
  124. }
  125. //if ( verbose )
  126. // cerr << "Objective value of the sub-problem is: " << fval << endl;
  127. return fval;
  128. }
  129. void DiagonalMatrixApproxOptimizationProblem::computeGradient(Vector& newGradient)
  130. {
  131. // inefficient but straightforward implementation with matrices P (denoted as A in the MATLAB code)
  132. const Vector & D = parameters();
  133. uint n = D.size();
  134. Matrix P ( n, n, 0.0 );
  135. Matrix W ( n, n, 0.0 );
  136. if ( verbose ) {
  137. cerr << "Eigenvectors are: " << eigenvectors << endl;
  138. cerr << "Eigenvalues are: " << eigenvalues << endl;
  139. }
  140. for ( uint i = 0 ; i < n ; i++ )
  141. {
  142. P(i,i) = 1.0;
  143. Matrix Ptilde = eigenvectors.transpose() * P * eigenvectors;
  144. for ( uint j = 0 ; j < n ; j++ )
  145. W(j,i) = Ptilde(j,j);
  146. P(i,i) = 0.0;
  147. }
  148. double eigmax = eigenvalues.Max();
  149. Vector mu (n);
  150. for ( uint i = 0 ; i < n ; i++ )
  151. mu[i] = exp( (eigenvalues[i] - eigmax)/epsilon );
  152. mu.normalizeL1();
  153. if ( verbose ) {
  154. cerr << "W = " << W << endl;
  155. cerr << "mu = " << mu << endl;
  156. }
  157. newGradient = - 1.0 * W.transpose() * mu + D;
  158. if ( verbose ) {
  159. cerr << "gradient = " << newGradient << endl;
  160. }
  161. }