GPRegressionOptimizationProblem.cpp 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. /**
  2. * @file GPRegressionOptimizationProblem.cpp
  3. * @brief Hyperparameter Optimization Problem for GP Regression
  4. * @author Erik Rodner
  5. * @date 12/09/2009
  6. */
  7. #include <iostream>
  8. #include "core/vector/Algorithms.h"
  9. #include "core/algebra/CholeskyRobust.h"
  10. #include "core/algebra/CholeskyRobustAuto.h"
  11. #include "vislearning/math/kernels/OptimKernelParameterGradInterface.h"
  12. #include "GPRegressionOptimizationProblem.h"
  13. using namespace std;
  14. using namespace NICE;
  15. using namespace OBJREC;
  16. GPRegressionOptimizationProblem::GPRegressionOptimizationProblem (
  17. KernelData *_kernelData,
  18. const NICE::Vector & _y,
  19. ParameterizedKernel *_kernel,
  20. bool verbose,
  21. const GPMSCLooLikelihoodRegression *_modelselcrit,
  22. const TraceApproximation *_traceApproximation)
  23. : OptimizationProblemFirst ( _kernel->getParameterSize() ),
  24. kernelData(_kernelData),
  25. kernel(_kernel),
  26. modelselcrit(_modelselcrit),
  27. traceApproximation ( _traceApproximation )
  28. {
  29. this->verbose = verbose;
  30. this->y.push_back ( _y );
  31. this->kernel->getParameters( parameters() );
  32. if ( verbose ) {
  33. cerr.precision(20);
  34. cerr << "GPRegressionOptimizationProblem: initial parameters = " << parameters() << endl;
  35. }
  36. bestLooParameters.resize ( parameters().size() );
  37. bestAvgLooError = numeric_limits<double>::max();
  38. }
  39. GPRegressionOptimizationProblem::GPRegressionOptimizationProblem (
  40. KernelData * _kernelData,
  41. const VVector & _y,
  42. ParameterizedKernel *_kernel,
  43. bool verbose,
  44. const GPMSCLooLikelihoodRegression *_modelselcrit,
  45. const TraceApproximation *_traceApproximation ) :
  46. OptimizationProblemFirst ( _kernel->getParameterSize() ),
  47. kernelData(_kernelData),
  48. y(_y),
  49. kernel(_kernel),
  50. modelselcrit(_modelselcrit),
  51. traceApproximation(_traceApproximation)
  52. {
  53. this->verbose = verbose;
  54. this->kernel->getParameters( parameters() );
  55. if ( verbose ) {
  56. cerr.precision(20);
  57. cerr << "GPRegressionOptimizationProblem: initial parameters = " << parameters() << endl;
  58. }
  59. bestLooParameters.resize ( parameters().size() );
  60. bestAvgLooError = numeric_limits<double>::max();
  61. }
  62. void GPRegressionOptimizationProblem::update ()
  63. {
  64. kernel->setParameters( parameters() );
  65. kernel->updateKernelData ( kernelData );
  66. kernelData->updateCholeskyFactorization();
  67. }
  68. double GPRegressionOptimizationProblem::computeObjective()
  69. {
  70. if ( verbose ) {
  71. cerr << "GPRegressionOptimizationProblem: computeObjective: " << parameters() << endl;
  72. // cerr << "GPRegressionOptimizationProblem: matrix size: " << this->kernelData->getKernelMatrixSize() << endl;
  73. }
  74. try {
  75. update();
  76. } catch ( Exception ) {
  77. // inversion problems
  78. if ( verbose )
  79. cerr << "GPRegressionOptimizationProblem: kernel matrix is singular" << endl;
  80. return numeric_limits<double>::max();
  81. }
  82. double loglikelihood = 0.0;
  83. uint kernelsize = this->kernelData->getKernelMatrixSize();
  84. if ( kernelsize <= 0 )
  85. fthrow(Exception, "GPRegressionOptimizationProblem: kernel size is zero!");
  86. Vector invX ( kernelsize );
  87. for ( VVector::const_iterator i = y.begin(); i != y.end(); i++ )
  88. {
  89. this->kernelData->computeInverseKernelMultiply ( *i, invX );
  90. loglikelihood += 0.5 * i->scalarProduct ( invX );
  91. }
  92. loglikelihood += y.size() * 0.5 * this->kernelData->getLogDetKernelMatrix();
  93. if ( isnan(loglikelihood) )
  94. {
  95. if ( verbose )
  96. cerr << "GPRegressionOptimizationProblem: loglikelihood is undefined (logdet=" << this->kernelData->getLogDetKernelMatrix() << ")" << endl;
  97. loglikelihood = std::numeric_limits<double>::max();
  98. }
  99. if ( verbose )
  100. cerr << "GPRegressionOptimizationProblem: negative log likelihood = " << loglikelihood << endl;
  101. return loglikelihood;
  102. }
  103. void GPRegressionOptimizationProblem::computeGradient( NICE::Vector& newGradient )
  104. {
  105. if ( verbose )
  106. cerr << "GPRegressionOptimizationProblem: gradient computation : " << parameters() << endl;
  107. OptimKernelParameterGradInterface *optimKernel = NULL;
  108. optimKernel = dynamic_cast <OptimKernelParameterGradInterface *> ( kernel );
  109. if ( optimKernel == NULL )
  110. {
  111. newGradient.resize ( kernel->getParameterSize() );
  112. if ( traceApproximation == NULL )
  113. {
  114. kernelData->updateInverseKernelMatrix();
  115. if ( verbose ) {
  116. const Matrix & invKernelMatrix = this->kernelData->getInverseKernelMatrix();
  117. cerr << "GPRegressionOptimizationProblem: max(K^{-1}) = " << invKernelMatrix.Max() << endl;
  118. cerr << "GPRegressionOptimizationProblem: min(K^{-1}) = " << invKernelMatrix.Min() << endl;
  119. }
  120. }
  121. Matrix W;
  122. // Compute some parts of the gradient in advance
  123. // which are shared among all derivatives
  124. // K^{-1} y_i = alpha_i
  125. // and \sum_i alpha_i * alpha_i^T
  126. uint k = 0;
  127. double modelSelCritVal = 0.0;
  128. for ( VVector::const_iterator i = y.begin(); i != y.end(); i++,k++ )
  129. {
  130. const Vector & ySingle = *i;
  131. Vector alpha ( ySingle.size() );
  132. this->kernelData->computeInverseKernelMultiply ( ySingle, alpha );
  133. if ( (W.rows() == 0) || (W.cols() == 0) )
  134. {
  135. W.resize ( alpha.size(), alpha.size() );
  136. W.set(0.0);
  137. }
  138. W.addTensorProduct ( - 1.0, alpha, alpha );
  139. // we are unable to compute loo estimates, if we did not compute
  140. // the inverse kernel matrix (this happens with trace approximation)
  141. if ( (traceApproximation == NULL) && (modelselcrit != NULL) )
  142. {
  143. // compute additional loo model selection criterion
  144. // e.g. loo error (c.f. page 116, section 5.4.2, rasmussen, williams)
  145. const Matrix & invKernelMatrix = this->kernelData->getInverseKernelMatrix();
  146. Vector invKernelMatrixDiag ( invKernelMatrix.rows() );
  147. for ( uint z = 0 ; z < invKernelMatrixDiag.size() ; z++ )
  148. invKernelMatrixDiag[z] = invKernelMatrix(z,z);
  149. double modelSelCritValSingle =
  150. modelselcrit->computeObjective ( invKernelMatrixDiag, alpha, ySingle );
  151. if ( verbose )
  152. cerr << "GPRegressionOptimizationProblem: model selection criterion (task " << k << " ) = "
  153. << modelSelCritValSingle << endl;
  154. modelSelCritVal += modelSelCritValSingle;
  155. }
  156. }
  157. // update best loo parameters
  158. if ( (traceApproximation == NULL) && (modelselcrit != NULL) )
  159. {
  160. modelSelCritVal /= y.size();
  161. modelSelCritVal = - modelSelCritVal;
  162. if ( verbose )
  163. cerr << "GPRegressionOptimizationProblem: loo error = " << modelSelCritVal << endl;
  164. // we use >= instead of <, because for parameters with equal
  165. // additional model selection criteria values we have a higher
  166. // likelihood for the current parameters
  167. if ( modelSelCritVal <= bestAvgLooError )
  168. {
  169. bestAvgLooError = modelSelCritVal;
  170. bestLooParameters = parameters();
  171. }
  172. }
  173. // Now: compute gradients
  174. for ( unsigned int k = 0 ; k < kernel->getParameterSize() ; k++ )
  175. {
  176. Matrix kernelJacobi;
  177. kernel->getKernelJacobi ( k, parameters(), kernelData, kernelJacobi );
  178. double volumeDerivative = 0.0;
  179. for ( unsigned int i = 0 ; i < kernelJacobi.rows(); i++ )
  180. for ( unsigned int j = 0 ; j < kernelJacobi.cols(); j++ )
  181. volumeDerivative += kernelJacobi(i,j) * W(i,j);
  182. double traceterm = 0.0;
  183. if ( traceApproximation != NULL ) {
  184. traceterm = traceApproximation->getApproximateTraceTerm ( kernelData, kernelJacobi );
  185. } else {
  186. const Matrix & invKernelMatrix = this->kernelData->getInverseKernelMatrix();
  187. for ( unsigned int i = 0 ; i < kernelJacobi.rows(); i++ )
  188. for ( unsigned int j = 0 ; j < kernelJacobi.cols(); j++ )
  189. traceterm += kernelJacobi(i,j) * invKernelMatrix(i,j);
  190. }
  191. newGradient[k] = 0.5 * ( volumeDerivative + y.size() * traceterm );
  192. }
  193. } else {
  194. // This is for optimization purposes only: the kernel will compute
  195. // all gradients for the regression case
  196. if ( verbose )
  197. cerr << "GPRegressionOptimizationProblem: efficient gradient computation" << endl;
  198. optimKernel->calculateGPRegGradientOptimized ( parameters(), kernelData, y, newGradient );
  199. }
  200. if ( verbose )
  201. cerr << "GPRegressionOptimizationProblem: gradient = " << newGradient << endl;
  202. }
  203. void GPRegressionOptimizationProblem::useLooParameters ( )
  204. {
  205. if ( traceApproximation != NULL )
  206. fthrow(Exception, "LOO parameter estimate is not available due to trace approximation and no inverse kernel matrix.");
  207. if ( modelselcrit == NULL )
  208. fthrow(Exception, "No additional model selection performed (specify one in the constructor of this class).");
  209. parameters() = bestLooParameters;
  210. update();
  211. }