/** * @file LaplaceApproximation.cpp * @brief some utility functions for laplace approximation * @author Erik Rodner * @date 02/17/2010 */ #include #include "core/vector/Algorithms.h" #include "LaplaceApproximation.h" #include "LHCumulativeGauss.h" using namespace std; using namespace NICE; using namespace OBJREC; LaplaceApproximation::LaplaceApproximation() { maxiterations = 40; minimumDelta = 1e-14; noiseTerm = 0.0; verbose = false; } LaplaceApproximation::LaplaceApproximation( const Config *conf, const string & section ) { maxiterations = conf->gI(section, "laplace_max_iterations", 40 ); minimumDelta = conf->gD(section, "laplace_minimum_delta", 1e-14 ); // in my experiments (scene classification) changing this // additional noise parameter to a non-zero value is // not a good idea at all noiseTerm = conf->gD(section, "laplace_noise_term", 0.0); verbose = conf->gB(section, "laplace_verbose", false ); } LaplaceApproximation::~LaplaceApproximation() { } void LaplaceApproximation::updateCache ( const Matrix & kernelMatrix, const Vector & y, const LikelihoodFunction *likelihoodFunction ) { // compute hessianW = - nabla nable log p(y|f) for ( uint i = 0 ; i < mode.size(); i++ ) hessianW[i] = - likelihoodFunction->hessian ( y[i], mode[i] ); // B = I + hessianW^{1/2} K hessianW^{1/2} Matrix B ( kernelMatrix ); // this should not be necessary according to the bible B.addIdentity(noiseTerm); for ( uint i = 0 ; i < B.rows() ; i++ ) for ( uint j = 0 ; j < B.cols(); j++ ) if ( i == j ) B(i,i) *= hessianW[i]; else B(i,j) *= sqrt(hessianW[i] * hessianW[j]); B.addIdentity(1.0); // calculate cholesky decomposition of B choleskyDecompLargeScale ( B, cholB ); // calculate gradientL = nabla log p(y|f) for ( uint i = 0 ; i < mode.size(); i++ ) gradientL[i] = likelihoodFunction->gradient( y[i], mode[i] ); // calculate b = Wf + gradientL Vector b = gradientL; for ( uint i = 0 ; i < b.size(); i++ ) b[i] = hessianW[i] * mode[i] + b[i]; // calculate bb = W^{1/2} K b Vector bb = kernelMatrix * b; for ( uint i = 0 ; i < bb.size(); i++ ) bb[i] = sqrt( hessianW[i] ) * bb[i]; // calculate a = b - W^{1/2} B^{-1} bb choleskySolveLargeScale ( cholB, bb, a ); for ( uint i = 0 ; i < a.size(); i++ ) a[i] = b[i] - sqrt( hessianW[i] ) * a[i]; } void LaplaceApproximation::approximate ( KernelData *kernelData, const Vector & y, const LikelihoodFunction *likelihoodFunction ) { mode.resize ( y.size() ); mode.set(0.0); // FIXME: we always start with zero, but we could // also start with an estimate of the approximation before // and compare with our zero estimate objective = 0.0; double oldobjective = objective; uint iteration = 0; hessianW.resize(y.size()); gradientL.resize(y.size()); const Matrix & kernelMatrix = kernelData->getKernelMatrix(); while (iteration < maxiterations) { oldobjective = objective; // compute gradient hessian etc. updateCache( kernelMatrix, y, likelihoodFunction ); // gradient of the objective function is gradientL - a if ( verbose ) cerr << "findModeLaplace: gradient norm = " << (gradientL - a).normL2() << endl; mode = kernelMatrix * a; // compute loglikelihood double loglikelihood = 0.0; for ( uint i = 0 ; i < mode.size(); i++ ) loglikelihood += likelihoodFunction->logLike ( y[i], mode[i] ); if ( NICE::isNaN(loglikelihood) ) fthrow(Exception, "Log-Likelihood p(y|f) could not be calculated numerically...check your parameters!"); if ( isinf(loglikelihood) == 1 ) { if ( verbose ) { cerr << "findModeLaplace: log likelihood is positive infinity...we cannot optimize any more in this case." << endl; cerr << "findModeLaplace: mode = " << mode << endl; } break; } if ( isinf(loglikelihood) == -1 ) { fthrow(Exception, "Log likelihood is negative infinity...this seems to be a really improbable setting."); } // compute objective objective = - 0.5 * mode.scalarProduct(a); if ( NICE::isNaN(objective) ) fthrow(Exception, "Objective function of Laplace-Approximation could not be calculated numerically...check your parameters!"); objective += loglikelihood; if ( verbose ) cerr << "findModeLaplace: objective = " << objective << endl; double delta = fabs(oldobjective-objective)/(fabs(objective)+1); if ( verbose ) cerr << "findModeLaplace: delta = " << delta << endl; if ( delta < minimumDelta ) { break; } iteration++; } // reevaluate updateCache( kernelMatrix, y, likelihoodFunction ); } double LaplaceApproximation::predict ( const Vector & kernelVector, double kernelSelf, const Vector & y, const LikelihoodFunction *likelihoodFunction ) const { double fmean = kernelVector.scalarProduct ( gradientL ); Vector wKernelVector ( kernelVector ); for ( uint i = 0 ; i < wKernelVector.size(); i++ ) wKernelVector[i] *= sqrt(hessianW[i]); Vector v; triangleSolve ( cholB, wKernelVector, v ); double fvariance = kernelSelf - v.scalarProduct(v); if ( fvariance <= 0.0 ) fthrow(Exception, "There seems to be something odd here: the variance is negative !" << "Do you have used a normalized kernel ?"); double prob; const LHCumulativeGauss *likecumgauss = dynamic_cast ( likelihoodFunction ); if ( likecumgauss ) { prob = likecumgauss->predictAnalytically ( fmean, fvariance ); } else { fthrow(Exception, "Prediction for likelihood functions which are not cumulative gaussians is not yet implemented!"); } return prob; }