123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191 |
- /**
- * @file LaplaceApproximation.cpp
- * @brief some utility functions for laplace approximation
- * @author Erik Rodner
- * @date 02/17/2010
- */
- #include <iostream>
- #include <cmath>
- #include "core/vector/Algorithms.h"
- #include "LaplaceApproximation.h"
- #include "LHCumulativeGauss.h"
- using namespace NICE;
- using namespace OBJREC;
- LaplaceApproximation::LaplaceApproximation()
- {
- maxiterations = 40;
- minimumDelta = 1e-14;
- noiseTerm = 0.0;
- verbose = false;
- }
- LaplaceApproximation::LaplaceApproximation( const Config *conf, const std::string & section )
- {
- maxiterations = conf->gI(section, "laplace_max_iterations", 40 );
- minimumDelta = conf->gD(section, "laplace_minimum_delta", 1e-14 );
- // in my experiments (scene classification) changing this
- // additional noise parameter to a non-zero value is
- // not a good idea at all
- noiseTerm = conf->gD(section, "laplace_noise_term", 0.0);
- verbose = conf->gB(section, "laplace_verbose", false );
- }
- LaplaceApproximation::~LaplaceApproximation()
- {
- }
- void LaplaceApproximation::updateCache ( const Matrix & kernelMatrix, const Vector & y, const LikelihoodFunction *likelihoodFunction )
- {
- // compute hessianW = - nabla nable log p(y|f)
- for ( uint i = 0 ; i < mode.size(); i++ )
- hessianW[i] = - likelihoodFunction->hessian ( y[i], mode[i] );
- // B = I + hessianW^{1/2} K hessianW^{1/2}
- Matrix B ( kernelMatrix );
- // this should not be necessary according to the bible
- B.addIdentity(noiseTerm);
- for ( uint i = 0 ; i < B.rows() ; i++ )
- for ( uint j = 0 ; j < B.cols(); j++ )
- if ( i == j )
- B(i,i) *= hessianW[i];
- else
- B(i,j) *= sqrt(hessianW[i] * hessianW[j]);
- B.addIdentity(1.0);
- // calculate cholesky decomposition of B
- choleskyDecompLargeScale ( B, cholB );
-
- // calculate gradientL = nabla log p(y|f)
- for ( uint i = 0 ; i < mode.size(); i++ )
- gradientL[i] = likelihoodFunction->gradient( y[i], mode[i] );
- // calculate b = Wf + gradientL
- Vector b = gradientL;
- for ( uint i = 0 ; i < b.size(); i++ )
- b[i] = hessianW[i] * mode[i] + b[i];
- // calculate bb = W^{1/2} K b
- Vector bb = kernelMatrix * b;
- for ( uint i = 0 ; i < bb.size(); i++ )
- bb[i] = sqrt( hessianW[i] ) * bb[i];
- // calculate a = b - W^{1/2} B^{-1} bb
- choleskySolveLargeScale ( cholB, bb, a );
- for ( uint i = 0 ; i < a.size(); i++ )
- a[i] = b[i] - sqrt( hessianW[i] ) * a[i];
- }
- void LaplaceApproximation::approximate ( KernelData *kernelData, const Vector & y,
- const LikelihoodFunction *likelihoodFunction )
- {
- mode.resize ( y.size() );
- mode.set(0.0);
- // FIXME: we always start with zero, but we could
- // also start with an estimate of the approximation before
- // and compare with our zero estimate
- objective = 0.0;
- double oldobjective = objective;
- uint iteration = 0;
- hessianW.resize(y.size());
- gradientL.resize(y.size());
- const Matrix & kernelMatrix = kernelData->getKernelMatrix();
- while (iteration < maxiterations)
- {
- oldobjective = objective;
- // compute gradient hessian etc.
- updateCache( kernelMatrix, y, likelihoodFunction );
- // gradient of the objective function is gradientL - a
- if ( verbose )
- std::cerr << "findModeLaplace: gradient norm = " << (gradientL - a).normL2() << std::endl;
- mode = kernelMatrix * a;
-
- // compute loglikelihood
- double loglikelihood = 0.0;
- for ( uint i = 0 ; i < mode.size(); i++ )
- loglikelihood += likelihoodFunction->logLike ( y[i], mode[i] );
- if ( NICE::isNaN(loglikelihood) )
- fthrow(Exception, "Log-Likelihood p(y|f) could not be calculated numerically...check your parameters!");
- if ( isinf(loglikelihood) == 1 )
- {
- if ( verbose )
- {
- std::cerr << "findModeLaplace: log likelihood is positive infinity...we cannot optimize any more in this case." << std::endl;
- std::cerr << "findModeLaplace: mode = " << mode << std::endl;
- }
- break;
- }
-
- if ( isinf(loglikelihood) == -1 )
- {
- fthrow(Exception, "Log likelihood is negative infinity...this seems to be a really improbable setting.");
- }
- // compute objective
- objective = - 0.5 * mode.scalarProduct(a);
- if ( NICE::isNaN(objective) )
- fthrow(Exception, "Objective function of Laplace-Approximation could not be calculated numerically...check your parameters!");
- objective += loglikelihood;
- if ( verbose )
- std::cerr << "findModeLaplace: objective = " << objective << std::endl;
- double delta = fabs(oldobjective-objective)/(fabs(objective)+1);
- if ( verbose )
- std::cerr << "findModeLaplace: delta = " << delta << std::endl;
- if ( delta < minimumDelta ) {
- break;
- }
- iteration++;
- }
- // reevaluate
- updateCache( kernelMatrix, y, likelihoodFunction );
- }
- double LaplaceApproximation::predict ( const Vector & kernelVector, double kernelSelf, const Vector & y,
- const LikelihoodFunction *likelihoodFunction ) const
- {
- double fmean = kernelVector.scalarProduct ( gradientL );
- Vector wKernelVector ( kernelVector );
- for ( uint i = 0 ; i < wKernelVector.size(); i++ )
- wKernelVector[i] *= sqrt(hessianW[i]);
- Vector v;
- triangleSolve ( cholB, wKernelVector, v );
- double fvariance = kernelSelf - v.scalarProduct(v);
- if ( fvariance <= 0.0 )
- fthrow(Exception, "There seems to be something odd here: the variance is negative !" <<
- "Do you have used a normalized kernel ?");
- double prob;
- const LHCumulativeGauss *likecumgauss = dynamic_cast<const LHCumulativeGauss *> ( likelihoodFunction );
- if ( likecumgauss ) {
- prob = likecumgauss->predictAnalytically ( fmean, fvariance );
- } else {
- fthrow(Exception, "Prediction for likelihood functions which are not cumulative gaussians is not yet implemented!");
- }
- return prob;
- }
|