123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229 |
- /**
- * @file GPLaplaceOptimizationProblem.cpp
- * @brief Hyperparameter Optimization Problem for GP Regression
- * @author Erik Rodner
- * @date 12/09/2009
- */
- #include <iostream>
- #include "core/vector/Algorithms.h"
- #include "core/algebra/CholeskyRobust.h"
- #include "core/algebra/CholeskyRobustAuto.h"
- #include "vislearning/math/kernels/OptimKernelParameterGradInterface.h"
- #include "GPLaplaceOptimizationProblem.h"
- using namespace std;
- using namespace NICE;
- using namespace OBJREC;
- GPLaplaceOptimizationProblem::GPLaplaceOptimizationProblem (
- KernelData *_kernelData,
- const NICE::Vector & _y,
- ParameterizedKernel *_kernel,
- const LikelihoodFunction *_likelihoodFunction,
- LaplaceApproximation *_laplaceApproximation,
- bool verbose )
- : OptimizationProblemFirst ( _kernel->getParameterSize() ),
- kernelData(_kernelData),
- kernel(_kernel),
- likelihoodFunction(_likelihoodFunction)
- {
- this->verbose = verbose;
- this->y.push_back ( _y );
- this->kernel->getParameters( parameters() );
- this->laplaceApproximation.push_back ( _laplaceApproximation );
- if ( verbose ) {
- cerr.precision(20);
- cerr << "GPLaplaceOptimizationProblem: initial parameters = " << parameters() << endl;
- }
- bestLooParameters.resize ( parameters().size() );
- bestAvgLooError = numeric_limits<double>::max();
- }
-
- GPLaplaceOptimizationProblem::GPLaplaceOptimizationProblem ( KernelData *_kernelData, const VVector & _y,
- ParameterizedKernel *_kernel, const LikelihoodFunction *_likelihoodFunction,
- const vector<LaplaceApproximation *> & _laplaceApproximation,
- bool verbose ) : OptimizationProblemFirst( _kernel->getParameterSize() ),
- kernelData(_kernelData),
- y(_y),
- kernel(_kernel),
- likelihoodFunction(_likelihoodFunction),
- laplaceApproximation(_laplaceApproximation)
- {
- this->verbose = verbose;
- this->kernel->getParameters( parameters() );
- if ( verbose ) {
- cerr.precision(20);
- cerr << "GPLaplaceOptimizationProblem: initial parameters = " << parameters() << endl;
- }
- if ( y.size() != laplaceApproximation.size() )
- fthrow(Exception, "Number of label vectors and number of Laplace approximation objects do not correspond.");
- bestLooParameters.resize ( parameters().size() );
- bestAvgLooError = numeric_limits<double>::max();
- }
-
- GPLaplaceOptimizationProblem::~GPLaplaceOptimizationProblem()
- {
- }
- void GPLaplaceOptimizationProblem::update ()
- {
- kernel->setParameters( parameters() );
- kernel->updateKernelData ( kernelData );
- kernelData->updateCholeskyFactorization();
- for ( uint i = 0 ; i < y.size(); i++ )
- laplaceApproximation[i]->approximate ( kernelData, y[i], likelihoodFunction );
- }
- double GPLaplaceOptimizationProblem::computeObjective()
- {
- if ( verbose ) {
- cerr << "GPLaplaceOptimizationProblem: computeObjective: " << parameters() << endl;
- }
- try {
- update();
- } catch ( Exception ) {
- // inversion problems
- if ( verbose )
- cerr << "GPLaplaceOptimizationProblem: kernel matrix might be singular" << endl;
- return numeric_limits<double>::max();
- }
-
- double loglikelihood = 0.0;
- /* Objective = - Laplace Objective + 0.5 logdet(B)
- * Laplace Objective = - 0.5 a^T f + log p(y|f)
- * B = Id + W^0.5 K W^0.5
- */
- for ( uint k = 0 ; k < laplaceApproximation.size() ; k++ )
- loglikelihood += - laplaceApproximation[k]->getObjective() + triangleMatrixLogDet ( laplaceApproximation[k]->getCholeskyB() );
- if ( verbose ) {
- cerr << "GPLaplaceOptimizationProblem: negative loglikelihood = " << loglikelihood << endl;
- }
- return loglikelihood;
- }
-
- void GPLaplaceOptimizationProblem::computeGradient( NICE::Vector& newGradient )
- {
- if ( verbose )
- cerr << "GPLaplaceOptimizationProblem: gradient computation : " << parameters() << endl;
- newGradient.resize(parameters().size());
- newGradient.set(0.0);
- for ( uint task = 0 ; task < laplaceApproximation.size(); task++ )
- {
- // NON-OPTIMIZED: some things could be computed out of this Loop (maybe)
- // The following code is a simple re-implementation of the MATLAB
- // pseudo-code provided in Rasmussen and Williams 2005
-
- // negative hessian (diagonal matrix) of p(y|f)
- const Vector & w = laplaceApproximation[task]->getHessian();
- // L*L^T = B = I + W^0.5 K W^0.5
- const Matrix & L = laplaceApproximation[task]->getCholeskyB();
- // mode found by Laplace approximation
- const Vector & f = laplaceApproximation[task]->getMode();
- // our kernel matrix
- const Matrix & kernelMatrix = kernelData->getKernelMatrix();
- //cerr << "K size: " << kernelMatrix.rows() << " x " << kernelMatrix.cols() << endl;
- // ------------- line 7: R = W^0.5 L^T\(L \ W^0.5)
- // Matrix W is a diagonal matrix
- // R is therefore a scaled variant of the inverse of B
- // actually: R = (W^-1 + K)^-1
- // matlab: Z = repmat(sW,1,n).*solve_chol(L, diag(sW));
- //cerr << "Hessian size: " << w.size() << endl;
- Matrix R ( w.size(), w.size(), 0.0 );
- for ( uint i = 0 ; i < w.size(); i++ )
- R(i,i) = sqrt( w[i] );
-
- choleskySolveMatrixLargeScale ( L, R );
- // W^0.5 * ... corresponds to a scaling of all rows
- for ( uint i = 0 ; i < R.rows(); i++ )
- for ( uint j = 0 ; j < R.cols(); j++ )
- R(i,j) *= sqrt( w[i] );
- // ------------ line 8: C = L \ (W^0.5 K)
- // matlab: C = L'\(repmat(sW,1,n).*K);
- Matrix C ( kernelMatrix );
- for ( uint i = 0 ; i < C.rows(); i++ )
- for ( uint j = 0 ; j < C.cols(); j++ )
- C(i,j) *= sqrt( w[i] );
- triangleSolveMatrix ( L, C );
- // ----------- line 9: s2 = - 0.5 diag(diag(K) - diag(C^T C)) nabla^3 p(y|f)
- // but there is a sign error!!!
- // matlab: s2 = 0.5*(diag(K)-sum(C.^2,1)').*d3lp;
- Vector s2 (w.size());
- for ( uint i = 0 ; i < w.size() ; i++ )
- {
- double nabla3 = likelihoodFunction->thirdgrad ( y[task][i], f[i] );
- Vector cColumn = C.getColumnRef(i);
- s2[i] = 0.5 * (kernelMatrix(i,i) - cColumn.scalarProduct(cColumn)) * nabla3;
- }
- // a = K^{-1} * f (already computed by the laplace approximation)
- const Vector & a = laplaceApproximation[task]->getAVector();
- // Due to the stationarity of f we have
- // the consistency equation: f = K gradientL = K * \nabla p(y|f)
- const Vector & gradientL = laplaceApproximation[task]->getGradient();
- // Both above equations should lead to a = gradientL
- /* DEBUG: if ( (f - kernelMatrix * a).normL2() > 1e-10 ) */
- /* DEBUG: if ( (f - kernelMatrix * gradientL).normL2() > 1e-10 ) */
- /* DEBUG: if ( (a - gradientL).normL2() > 1e-10 )
- fthrow(Exception, "Ups! a vector seems to be wrong!"); */
- // for k = 1 .. dim(theta)
- for ( uint k = 0 ; k < parameters().size() ; k++ )
- {
- // C is now our Jacobi Matrix
- // This one could be optimized by putting it out of this loop
- // ...but I don't care right now
- //
- // ------------ line 12
- // matlab: dK = feval(covfunc{:}, hyper, x, j);
- kernel->getKernelJacobi ( k, parameters(), kernelData, C );
- // s1 = alpha'*dK*alpha/2-sum(sum(Z.*dK))/2;
- double s1 = C.bilinear ( a );
- // calculate tr(RC) = sum of piecewiese multiplications
- for ( uint i = 0 ; i < R.rows(); i++ )
- for ( uint j = 0 ; j < R.cols(); j++ )
- s1 -= R(i,j) * C(i,j);
- s1 *= 0.5;
- // ------------ line 13
- // b = dK*dlp;
- Vector b = C * gradientL;
- // ------------ line 14
- // matlab: s3 = b-K*(Z*b);
- Vector s3 = b - kernelMatrix*(R*b);
- // In contrast to Rasmussen we have to flip the sign
- // because we are minimizing the negative log marginal instead
- // of maximizing the log marginal
- // matlab: dnlZ(j) = -s1-s2'*s3;
- newGradient[k] += - s1 - s2.scalarProduct(s3);
- }
- }
- if ( verbose )
- cerr << "GPLaplaceOptimizationProblem: gradient = " << newGradient << endl;
- }
-
- void GPLaplaceOptimizationProblem::useLooParameters ( )
- {
- parameters() = bestLooParameters;
- update();
- }
|