LaplaceApproximation.cpp 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. /**
  2. * @file LaplaceApproximation.cpp
  3. * @brief some utility functions for laplace approximation
  4. * @author Erik Rodner
  5. * @date 02/17/2010
  6. */
  7. #include <iostream>
  8. #include <cmath>
  9. #include "core/vector/Algorithms.h"
  10. #include "LaplaceApproximation.h"
  11. #include "LHCumulativeGauss.h"
  12. using namespace NICE;
  13. using namespace OBJREC;
  14. LaplaceApproximation::LaplaceApproximation()
  15. {
  16. maxiterations = 40;
  17. minimumDelta = 1e-14;
  18. noiseTerm = 0.0;
  19. verbose = false;
  20. }
  21. LaplaceApproximation::LaplaceApproximation( const Config *conf, const std::string & section )
  22. {
  23. maxiterations = conf->gI(section, "laplace_max_iterations", 40 );
  24. minimumDelta = conf->gD(section, "laplace_minimum_delta", 1e-14 );
  25. // in my experiments (scene classification) changing this
  26. // additional noise parameter to a non-zero value is
  27. // not a good idea at all
  28. noiseTerm = conf->gD(section, "laplace_noise_term", 0.0);
  29. verbose = conf->gB(section, "laplace_verbose", false );
  30. }
  31. LaplaceApproximation::~LaplaceApproximation()
  32. {
  33. }
  34. void LaplaceApproximation::updateCache ( const Matrix & kernelMatrix, const Vector & y, const LikelihoodFunction *likelihoodFunction )
  35. {
  36. // compute hessianW = - nabla nable log p(y|f)
  37. for ( uint i = 0 ; i < mode.size(); i++ )
  38. hessianW[i] = - likelihoodFunction->hessian ( y[i], mode[i] );
  39. // B = I + hessianW^{1/2} K hessianW^{1/2}
  40. Matrix B ( kernelMatrix );
  41. // this should not be necessary according to the bible
  42. B.addIdentity(noiseTerm);
  43. for ( uint i = 0 ; i < B.rows() ; i++ )
  44. for ( uint j = 0 ; j < B.cols(); j++ )
  45. if ( i == j )
  46. B(i,i) *= hessianW[i];
  47. else
  48. B(i,j) *= sqrt(hessianW[i] * hessianW[j]);
  49. B.addIdentity(1.0);
  50. // calculate cholesky decomposition of B
  51. choleskyDecompLargeScale ( B, cholB );
  52. // calculate gradientL = nabla log p(y|f)
  53. for ( uint i = 0 ; i < mode.size(); i++ )
  54. gradientL[i] = likelihoodFunction->gradient( y[i], mode[i] );
  55. // calculate b = Wf + gradientL
  56. Vector b = gradientL;
  57. for ( uint i = 0 ; i < b.size(); i++ )
  58. b[i] = hessianW[i] * mode[i] + b[i];
  59. // calculate bb = W^{1/2} K b
  60. Vector bb = kernelMatrix * b;
  61. for ( uint i = 0 ; i < bb.size(); i++ )
  62. bb[i] = sqrt( hessianW[i] ) * bb[i];
  63. // calculate a = b - W^{1/2} B^{-1} bb
  64. choleskySolveLargeScale ( cholB, bb, a );
  65. for ( uint i = 0 ; i < a.size(); i++ )
  66. a[i] = b[i] - sqrt( hessianW[i] ) * a[i];
  67. }
  68. void LaplaceApproximation::approximate ( KernelData *kernelData, const Vector & y,
  69. const LikelihoodFunction *likelihoodFunction )
  70. {
  71. mode.resize ( y.size() );
  72. mode.set(0.0);
  73. // FIXME: we always start with zero, but we could
  74. // also start with an estimate of the approximation before
  75. // and compare with our zero estimate
  76. objective = 0.0;
  77. double oldobjective = objective;
  78. uint iteration = 0;
  79. hessianW.resize(y.size());
  80. gradientL.resize(y.size());
  81. const Matrix & kernelMatrix = kernelData->getKernelMatrix();
  82. while (iteration < maxiterations)
  83. {
  84. oldobjective = objective;
  85. // compute gradient hessian etc.
  86. updateCache( kernelMatrix, y, likelihoodFunction );
  87. // gradient of the objective function is gradientL - a
  88. if ( verbose )
  89. std::cerr << "findModeLaplace: gradient norm = " << (gradientL - a).normL2() << std::endl;
  90. mode = kernelMatrix * a;
  91. // compute loglikelihood
  92. double loglikelihood = 0.0;
  93. for ( uint i = 0 ; i < mode.size(); i++ )
  94. loglikelihood += likelihoodFunction->logLike ( y[i], mode[i] );
  95. if ( NICE::isNaN(loglikelihood) )
  96. fthrow(Exception, "Log-Likelihood p(y|f) could not be calculated numerically...check your parameters!");
  97. if ( isinf(loglikelihood) == 1 )
  98. {
  99. if ( verbose )
  100. {
  101. std::cerr << "findModeLaplace: log likelihood is positive infinity...we cannot optimize any more in this case." << std::endl;
  102. std::cerr << "findModeLaplace: mode = " << mode << std::endl;
  103. }
  104. break;
  105. }
  106. if ( isinf(loglikelihood) == -1 )
  107. {
  108. fthrow(Exception, "Log likelihood is negative infinity...this seems to be a really improbable setting.");
  109. }
  110. // compute objective
  111. objective = - 0.5 * mode.scalarProduct(a);
  112. if ( NICE::isNaN(objective) )
  113. fthrow(Exception, "Objective function of Laplace-Approximation could not be calculated numerically...check your parameters!");
  114. objective += loglikelihood;
  115. if ( verbose )
  116. std::cerr << "findModeLaplace: objective = " << objective << std::endl;
  117. double delta = fabs(oldobjective-objective)/(fabs(objective)+1);
  118. if ( verbose )
  119. std::cerr << "findModeLaplace: delta = " << delta << std::endl;
  120. if ( delta < minimumDelta ) {
  121. break;
  122. }
  123. iteration++;
  124. }
  125. // reevaluate
  126. updateCache( kernelMatrix, y, likelihoodFunction );
  127. }
  128. double LaplaceApproximation::predict ( const Vector & kernelVector, double kernelSelf, const Vector & y,
  129. const LikelihoodFunction *likelihoodFunction ) const
  130. {
  131. double fmean = kernelVector.scalarProduct ( gradientL );
  132. Vector wKernelVector ( kernelVector );
  133. for ( uint i = 0 ; i < wKernelVector.size(); i++ )
  134. wKernelVector[i] *= sqrt(hessianW[i]);
  135. Vector v;
  136. triangleSolve ( cholB, wKernelVector, v );
  137. double fvariance = kernelSelf - v.scalarProduct(v);
  138. if ( fvariance <= 0.0 )
  139. fthrow(Exception, "There seems to be something odd here: the variance is negative !" <<
  140. "Do you have used a normalized kernel ?");
  141. double prob;
  142. const LHCumulativeGauss *likecumgauss = dynamic_cast<const LHCumulativeGauss *> ( likelihoodFunction );
  143. if ( likecumgauss ) {
  144. prob = likecumgauss->predictAnalytically ( fmean, fvariance );
  145. } else {
  146. fthrow(Exception, "Prediction for likelihood functions which are not cumulative gaussians is not yet implemented!");
  147. }
  148. return prob;
  149. }