123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252 |
- /**
- * @file GPRegressionOptimizationProblem.cpp
- * @brief Hyperparameter Optimization Problem for GP Regression
- * @author Erik Rodner
- * @date 12/09/2009
- */
- #include <iostream>
- #include "core/vector/Algorithms.h"
- #include "core/algebra/CholeskyRobust.h"
- #include "core/algebra/CholeskyRobustAuto.h"
- #include "vislearning/math/kernels/OptimKernelParameterGradInterface.h"
- #include "GPRegressionOptimizationProblem.h"
- using namespace std;
- using namespace NICE;
- using namespace OBJREC;
- GPRegressionOptimizationProblem::GPRegressionOptimizationProblem (
- KernelData *_kernelData,
- const NICE::Vector & _y,
- ParameterizedKernel *_kernel,
- bool verbose,
- const GPMSCLooLikelihoodRegression *_modelselcrit,
- const TraceApproximation *_traceApproximation)
- : OptimizationProblemFirst ( _kernel->getParameterSize() ),
- kernelData(_kernelData),
- kernel(_kernel),
- modelselcrit(_modelselcrit),
- traceApproximation ( _traceApproximation )
- {
- this->verbose = verbose;
- this->y.push_back ( _y );
- this->kernel->getParameters( parameters() );
- if ( verbose ) {
- cerr.precision(20);
- cerr << "GPRegressionOptimizationProblem: initial parameters = " << parameters() << endl;
- }
- bestLooParameters.resize ( parameters().size() );
- bestAvgLooError = numeric_limits<double>::max();
- }
- GPRegressionOptimizationProblem::GPRegressionOptimizationProblem (
- KernelData * _kernelData,
- const VVector & _y,
- ParameterizedKernel *_kernel,
- bool verbose,
- const GPMSCLooLikelihoodRegression *_modelselcrit,
- const TraceApproximation *_traceApproximation ) :
- OptimizationProblemFirst ( _kernel->getParameterSize() ),
- kernelData(_kernelData),
- y(_y),
- kernel(_kernel),
- modelselcrit(_modelselcrit),
- traceApproximation(_traceApproximation)
- {
- this->verbose = verbose;
- this->kernel->getParameters( parameters() );
- if ( verbose ) {
- cerr.precision(20);
- cerr << "GPRegressionOptimizationProblem: initial parameters = " << parameters() << endl;
- }
- bestLooParameters.resize ( parameters().size() );
- bestAvgLooError = numeric_limits<double>::max();
- }
- void GPRegressionOptimizationProblem::update ()
- {
- kernel->setParameters( parameters() );
- kernel->updateKernelData ( kernelData );
- kernelData->updateCholeskyFactorization();
- }
- double GPRegressionOptimizationProblem::computeObjective()
- {
- if ( verbose ) {
- cerr << "GPRegressionOptimizationProblem: computeObjective: " << parameters() << endl;
- // cerr << "GPRegressionOptimizationProblem: matrix size: " << this->kernelData->getKernelMatrixSize() << endl;
- }
- try {
- update();
- } catch ( Exception ) {
- // inversion problems
- if ( verbose )
- cerr << "GPRegressionOptimizationProblem: kernel matrix is singular" << endl;
- return numeric_limits<double>::max();
- }
-
- double loglikelihood = 0.0;
- uint kernelsize = this->kernelData->getKernelMatrixSize();
- if ( kernelsize <= 0 )
- fthrow(Exception, "GPRegressionOptimizationProblem: kernel size is zero!");
- Vector invX ( kernelsize );
- for ( VVector::const_iterator i = y.begin(); i != y.end(); i++ )
- {
- this->kernelData->computeInverseKernelMultiply ( *i, invX );
- loglikelihood += 0.5 * i->scalarProduct ( invX );
- }
-
- loglikelihood += y.size() * 0.5 * this->kernelData->getLogDetKernelMatrix();
- if ( NICE::isNaN(loglikelihood) )
- {
- if ( verbose )
- cerr << "GPRegressionOptimizationProblem: loglikelihood is undefined (logdet=" << this->kernelData->getLogDetKernelMatrix() << ")" << endl;
- loglikelihood = std::numeric_limits<double>::max();
- }
- if ( verbose )
- cerr << "GPRegressionOptimizationProblem: negative log likelihood = " << loglikelihood << endl;
- return loglikelihood;
- }
-
- void GPRegressionOptimizationProblem::computeGradient( NICE::Vector& newGradient )
- {
- if ( verbose )
- cerr << "GPRegressionOptimizationProblem: gradient computation : " << parameters() << endl;
- OptimKernelParameterGradInterface *optimKernel = NULL;
- optimKernel = dynamic_cast <OptimKernelParameterGradInterface *> ( kernel );
- if ( optimKernel == NULL )
- {
- newGradient.resize ( kernel->getParameterSize() );
- if ( traceApproximation == NULL )
- {
- kernelData->updateInverseKernelMatrix();
- if ( verbose ) {
- const Matrix & invKernelMatrix = this->kernelData->getInverseKernelMatrix();
- cerr << "GPRegressionOptimizationProblem: max(K^{-1}) = " << invKernelMatrix.Max() << endl;
- cerr << "GPRegressionOptimizationProblem: min(K^{-1}) = " << invKernelMatrix.Min() << endl;
- }
- }
- Matrix W;
- // Compute some parts of the gradient in advance
- // which are shared among all derivatives
- // K^{-1} y_i = alpha_i
- // and \sum_i alpha_i * alpha_i^T
- uint k = 0;
- double modelSelCritVal = 0.0;
- for ( VVector::const_iterator i = y.begin(); i != y.end(); i++,k++ )
- {
- const Vector & ySingle = *i;
- Vector alpha ( ySingle.size() );
- this->kernelData->computeInverseKernelMultiply ( ySingle, alpha );
- if ( (W.rows() == 0) || (W.cols() == 0) )
- {
- W.resize ( alpha.size(), alpha.size() );
- W.set(0.0);
- }
- W.addTensorProduct ( - 1.0, alpha, alpha );
-
- // we are unable to compute loo estimates, if we did not compute
- // the inverse kernel matrix (this happens with trace approximation)
- if ( (traceApproximation == NULL) && (modelselcrit != NULL) )
- {
- // compute additional loo model selection criterion
- // e.g. loo error (c.f. page 116, section 5.4.2, rasmussen, williams)
- const Matrix & invKernelMatrix = this->kernelData->getInverseKernelMatrix();
- Vector invKernelMatrixDiag ( invKernelMatrix.rows() );
- for ( uint z = 0 ; z < invKernelMatrixDiag.size() ; z++ )
- invKernelMatrixDiag[z] = invKernelMatrix(z,z);
- double modelSelCritValSingle =
- modelselcrit->computeObjective ( invKernelMatrixDiag, alpha, ySingle );
- if ( verbose )
- cerr << "GPRegressionOptimizationProblem: model selection criterion (task " << k << " ) = "
- << modelSelCritValSingle << endl;
- modelSelCritVal += modelSelCritValSingle;
- }
- }
- // update best loo parameters
- if ( (traceApproximation == NULL) && (modelselcrit != NULL) )
- {
- modelSelCritVal /= y.size();
- modelSelCritVal = - modelSelCritVal;
- if ( verbose )
- cerr << "GPRegressionOptimizationProblem: loo error = " << modelSelCritVal << endl;
- // we use >= instead of <, because for parameters with equal
- // additional model selection criteria values we have a higher
- // likelihood for the current parameters
- if ( modelSelCritVal <= bestAvgLooError )
- {
- bestAvgLooError = modelSelCritVal;
- bestLooParameters = parameters();
- }
- }
- // Now: compute gradients
- for ( unsigned int k = 0 ; k < kernel->getParameterSize() ; k++ )
- {
- Matrix kernelJacobi;
- kernel->getKernelJacobi ( k, parameters(), kernelData, kernelJacobi );
- double volumeDerivative = 0.0;
- for ( unsigned int i = 0 ; i < kernelJacobi.rows(); i++ )
- for ( unsigned int j = 0 ; j < kernelJacobi.cols(); j++ )
- volumeDerivative += kernelJacobi(i,j) * W(i,j);
- double traceterm = 0.0;
- if ( traceApproximation != NULL ) {
- traceterm = traceApproximation->getApproximateTraceTerm ( kernelData, kernelJacobi );
- } else {
- const Matrix & invKernelMatrix = this->kernelData->getInverseKernelMatrix();
- for ( unsigned int i = 0 ; i < kernelJacobi.rows(); i++ )
- for ( unsigned int j = 0 ; j < kernelJacobi.cols(); j++ )
- traceterm += kernelJacobi(i,j) * invKernelMatrix(i,j);
- }
- newGradient[k] = 0.5 * ( volumeDerivative + y.size() * traceterm );
- }
- } else {
- // This is for optimization purposes only: the kernel will compute
- // all gradients for the regression case
- if ( verbose )
- cerr << "GPRegressionOptimizationProblem: efficient gradient computation" << endl;
- optimKernel->calculateGPRegGradientOptimized ( parameters(), kernelData, y, newGradient );
- }
- if ( verbose )
- cerr << "GPRegressionOptimizationProblem: gradient = " << newGradient << endl;
- }
-
- void GPRegressionOptimizationProblem::useLooParameters ( )
- {
- if ( traceApproximation != NULL )
- fthrow(Exception, "LOO parameter estimate is not available due to trace approximation and no inverse kernel matrix.");
- if ( modelselcrit == NULL )
- fthrow(Exception, "No additional model selection performed (specify one in the constructor of this class).");
- parameters() = bestLooParameters;
- update();
- }
|