/** * @file DiagonalMatrixApprox.cpp * @brief find a diagonal matrix to approximate bilinear products * @author Erik Rodner * @date 05/31/2012 */ #include #include #include #include "DiagonalMatrixApprox.h" using namespace NICE; using namespace std; DiagonalMatrixApprox::DiagonalMatrixApprox( bool verbose, int maxIterations ) { this->epsilonStart = 1.0; this->epsilonShrinkFactor = 0.8; this->minFDelta = 1e-8; this->minSolDelta = 1e-8; this->maxEpsilonIterations = 100; this->verbose = verbose; this->optimizer = new FirstOrderRasmussen( /*false*/ this->verbose ); ( dynamic_cast< FirstOrderRasmussen * > (optimizer) )->setMaxIterations(maxIterations); } DiagonalMatrixApprox::~DiagonalMatrixApprox() { } void DiagonalMatrixApprox::approx ( const Matrix & A, Vector & D ) const { double f0 = std::numeric_limits::max(); // not so proper intialization with the diagonal matrix /* D.resize(A.rows()); for ( uint i = 0; i < D.size(); i++ ) D[i] = A(i,i); */ Vector D0 ( D ); double epsilon = epsilonStart; for ( uint i = 0; i < maxEpsilonIterations; i++ ) { epsilon = epsilonShrinkFactor * epsilon; // perform minimization with some gradient based method (this is a convex optimization problem) DiagonalMatrixApproxOptimizationProblem opt ( &A, D0, epsilon, false /*verbose*/ ); optimizer->optimizeFirst ( opt ); D = opt.position(); double f = opt.computeObjective (); if ( verbose ) { cerr << "DiagonalMatrixApprox [" << i << " / " << maxEpsilonIterations << "]: " << f << " (epsilon=" << epsilon << ")" << endl; cerr << D << endl; } if ( !finite(f) ) { f = f0; D = D0; if ( verbose ) cerr << "DiagonalMatrixApprox ended in iteration " << i << " due to numerical problems when decreasing epsilon..." << endl; return; } double eig_small = opt.getSmallestEigenvalue(); if ( eig_small < 0.0 ) { if ( verbose ) cerr << "DiagonalMatrixApprox: resetting current value due to negative eigenvalue: " << eig_small << endl; D = f0; D = D0; } else { // otherwise check for convergence double solDelta = ( D - D0 ).normInf(); double fDelta = f0 - f; // this value should be positive, otherwise convergence is likely if ( fDelta < minFDelta || solDelta < minSolDelta ) { if ( verbose ) cerr << "DiagonalMatrixApprox: convergence detected delta_f=" << fDelta << " x_delta=" << solDelta << endl; return; } } f0 = f; D0 = D; } } DiagonalMatrixApproxOptimizationProblem::DiagonalMatrixApproxOptimizationProblem ( const Matrix *A, const Vector & D0, double epsilon, bool verbose ) : OptimizationProblemFirst( D0.size() ) { this->A = A; this->parameters() = D0; this->epsilon = epsilon; this->verbose = verbose; } double DiagonalMatrixApproxOptimizationProblem::computeObjective() { // Theoretically, we have to compute lambda_max(A - diag(D)). However, we want to solve // the regularized and relaxed optimization problem, which involves all eigenvalues const Vector & D = parameters(); Matrix M = (*A); M.addDiagonal ( (-1.0) * D ); if ( verbose ) { cerr << "M = " << M << endl; cerr << "D = " << D << endl; cerr << "A = " << *A << endl; } //if ( verbose ) // cerr << "Computing the eigenvalue decomposition ..." << endl; try { eigenvectorvalues ( M, eigenvectors, eigenvalues ); } catch ( ... ) { // the matrix seems to be singular, maybe this is a good sign. // Does not have to be: only the smallest eigenvalue can be zero return 0.0; } //if ( verbose ) // cerr << "Computing the objective ..." << endl; double sumExp = 0.0; for ( uint i = 0 ; i < eigenvalues.size(); i++ ) sumExp += exp( eigenvalues[i] / epsilon ); double fval = epsilon * log( sumExp ) + 0.5 * D.scalarProduct(D); //if ( verbose ) { cerr << "DiagonalMatrixApprox: maximum eigenvalue is " << eigenvalues.Max() << endl; //} if ( !finite(fval) ) { // some numerical problems occured fval = numeric_limits::infinity(); } //if ( verbose ) // cerr << "Objective value of the sub-problem is: " << fval << endl; return fval; } void DiagonalMatrixApproxOptimizationProblem::computeGradient(Vector& newGradient) { // inefficient but straightforward implementation with matrices P (denoted as A in the MATLAB code) const Vector & D = parameters(); uint n = D.size(); Matrix P ( n, n, 0.0 ); Matrix W ( n, n, 0.0 ); if ( verbose ) { cerr << "Eigenvectors are: " << eigenvectors << endl; cerr << "Eigenvalues are: " << eigenvalues << endl; } for ( uint i = 0 ; i < n ; i++ ) { P(i,i) = 1.0; Matrix Ptilde = eigenvectors.transpose() * P * eigenvectors; for ( uint j = 0 ; j < n ; j++ ) W(j,i) = Ptilde(j,j); P(i,i) = 0.0; } double eigmax = eigenvalues.Max(); Vector mu (n); for ( uint i = 0 ; i < n ; i++ ) mu[i] = exp( (eigenvalues[i] - eigmax)/epsilon ); mu.normalizeL1(); if ( verbose ) { cerr << "W = " << W << endl; cerr << "mu = " << mu << endl; } newGradient = - 1.0 * W.transpose() * mu + D; if ( verbose ) { cerr << "gradient = " << newGradient << endl; } }