TestLaplace.cpp 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. #ifdef NICE_USELIB_CPPUNIT
  2. #include "TestLaplace.h"
  3. #include <string>
  4. #include <iostream>
  5. #include <stdexcept>
  6. #include <core/basics/cppunitex.h>
  7. #include <core/basics/numerictools.h>
  8. #include <vislearning/classifier/kernelclassifier/LikelihoodFunction.h>
  9. #include <vislearning/classifier/kernelclassifier/GPLaplaceOptimizationProblem.h>
  10. #include <vislearning/classifier/kernelclassifier/LaplaceApproximation.h>
  11. #include <vislearning/classifier/kernelclassifier/LHCumulativeGauss.h>
  12. #include <vislearning/math/kernels/KernelLinearCombination.h>
  13. using namespace NICE;
  14. using namespace OBJREC;
  15. using namespace std;
  16. CPPUNIT_TEST_SUITE_REGISTRATION( TestLaplace );
  17. void TestLaplace::setUp() {
  18. }
  19. void TestLaplace::tearDown() {
  20. }
  21. void TestLaplace::testLikelihoodDiff()
  22. {
  23. uint numSteps = 10000;
  24. double lengthScale = 1.3;
  25. double bias = 0.4;
  26. LHCumulativeGauss l (lengthScale, bias);
  27. for ( uint degree = 0 ; degree < 3 ; degree++ )
  28. {
  29. // test gradient
  30. double oldv = 0.0;
  31. double error = 0.0;
  32. for ( uint i = 0 ; i < numSteps; i++ )
  33. {
  34. double f = i / (double)numSteps;
  35. double v;
  36. if ( degree == 2 )
  37. v = l.hessian(1.0,f);
  38. else if ( degree == 1 )
  39. v = l.gradient(1.0,f);
  40. else
  41. v = l.logLike(1.0,f);
  42. if ( i > 0 )
  43. {
  44. double d = 0.0;
  45. d = (v - oldv)*numSteps;
  46. double diff = 0.0;
  47. if ( degree == 2 )
  48. diff = l.thirdgrad(1.0,f);
  49. else if ( degree == 1 )
  50. diff = l.hessian(1.0,f);
  51. else
  52. diff = l.gradient(1.0,f);
  53. error += fabs(d-diff);
  54. }
  55. oldv = v;
  56. }
  57. error /= numSteps;
  58. CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, error, 1e-4);
  59. }
  60. }
  61. void TestLaplace::testCumGaussPredict()
  62. {
  63. // test integral stuff
  64. uint numSteps = 100000;
  65. double lengthScale = 1.3;
  66. double bias = 0.4;
  67. LHCumulativeGauss l (lengthScale, bias);
  68. double mean = 0.0;
  69. double variance = 1.0;
  70. double sum = 0.0;
  71. for ( uint i = 0 ; i < numSteps ; i++ )
  72. {
  73. double f = randGaussDouble( sqrt(variance) ) + mean;
  74. sum += l.likelihood(1.0,f);
  75. }
  76. sum /= numSteps;
  77. double fActual= l.predictAnalytically( mean, variance );
  78. CPPUNIT_ASSERT_DOUBLES_EQUAL(sum, fActual , 1e-2);
  79. }
  80. void TestLaplace::testHyperParameterOptGradients()
  81. {
  82. double step = 0.00001;
  83. double interval = 1.0;
  84. double a_max = 0.0;
  85. bool firstIteration = true;
  86. double oldObj = 0.0;
  87. Vector iparameters (1);
  88. iparameters[0] = log(1.0);
  89. Vector y (6, 1.0);
  90. for ( uint i = 0 ; i < 3 ; i++ )
  91. y[i] = -1.0;
  92. Matrix X (6, 2);
  93. X(0,0) = 1.0; X(0,1) = 0.0;
  94. X(1,0) = 0.3; X(1,1) = 0.4;
  95. X(2,0) = 0.8; X(2,1) = 0.1;
  96. X(3,0) = -1.0; X(3,1) = 0.2;
  97. X(4,0) = -0.8; X(4,1) = 0.1;
  98. X(5,0) = -0.3; X(5,1) = 0.5;
  99. Matrix kernelMatrix (6,6);
  100. for ( uint i = 0 ; i < X.rows(); i++ )
  101. for ( uint j = 0 ; j < X.rows(); j++ )
  102. kernelMatrix(i,j) = exp(- (X.getRow(i) - X.getRow(j)).normL2());
  103. KernelData kernelData;
  104. kernelData.setCachedMatrix ( 0, &kernelMatrix );
  105. KernelLinearCombination kernelFunction ( 1 );
  106. kernelFunction.updateKernelData( &kernelData );
  107. LHCumulativeGauss likelihood;
  108. LaplaceApproximation laplaceApproximation;
  109. GPLaplaceOptimizationProblem gp_problem ( &kernelData, y, &kernelFunction, &likelihood, &laplaceApproximation, false );
  110. for ( double s = -interval ; s <= interval ; s+=step )
  111. {
  112. Vector parameters ( iparameters );
  113. parameters[0] += s;
  114. gp_problem.setParameters( parameters );
  115. double obj = gp_problem.computeObjective ( );
  116. Vector gradient;
  117. CPPUNIT_ASSERT_NO_THROW(gp_problem.computeGradient( gradient ) );
  118. double deriv = gradient[0];
  119. if ( ! firstIteration )
  120. {
  121. double derivApprox = ( obj - oldObj ) / step;
  122. double a = fabs(deriv - derivApprox);
  123. if ( a > a_max )
  124. a_max = a;
  125. // DEBUGGING:
  126. // cerr << "EVAL: " << parameters[0] << " objective " << obj << " D(closedform) " << deriv << " D(approximated) " << derivApprox << endl;
  127. }
  128. firstIteration = false;
  129. oldObj = obj;
  130. }
  131. CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, a_max, 1e-5);
  132. }
  133. void TestLaplace::testModeFinding()
  134. {
  135. Config conf;
  136. //conf.sB("LaplaceApproximation", "laplace_verbose", true );
  137. LHCumulativeGauss likelihood;
  138. Vector y (6, 1.0);
  139. for ( uint i = 0 ; i < 3 ; i++ )
  140. y[i] = -1.0;
  141. /** Test 1: Check the special case of a diagonal kernel matrix */
  142. {
  143. Matrix K (6,6);
  144. K.setIdentity();
  145. KernelData kernelData ( &conf, K );
  146. /* Due to the diagonal kernel matrix
  147. * mode finding is really simple
  148. *
  149. * objective function:
  150. * log L(y_i, f_i) - 0.5 * f_i^2
  151. * = log ( ( ::erf(y*f)+1 ) / 2.0 ) - 0.5 * f_i^2
  152. * gnuplot:
  153. * plot [-1:1] log ((erf(x)+1) / 2.0) - 0.5*x**2
  154. *
  155. *
  156. * and the mode f should obey the
  157. * equation:
  158. * L'(y_i, f_i) - f_i = 0
  159. *
  160. * L'(y_i, f_i) = y_i*exp(-f_i^2)/(L(y,f)*sqrt(Pi)) ;
  161. * L(y_i, f_i) = ( ::erf(y*f)+1 ) / 2.0
  162. *
  163. * y_i = 1 ->
  164. * exp(-f_i^2)/sqrt(PI) = f_i * (erf(f)+1) / 2.0
  165. * gnuplot
  166. * plot [-1:1] exp(-x**2)/sqrt(pi), x * (erf(x)+1) / 2.0
  167. *
  168. * -> intersection at f = 0.541132
  169. *
  170. */
  171. LaplaceApproximation laplaceApproximation ( &conf );
  172. laplaceApproximation.approximate ( &kernelData, y, &likelihood );
  173. const Vector & mode = laplaceApproximation.getMode();
  174. for ( uint k = 0 ; k < mode.size(); k++ )
  175. {
  176. double fActual = y[k] * mode[k];
  177. CPPUNIT_ASSERT_DOUBLES_EQUAL( 0.541132, fActual, 1e-5 );
  178. }
  179. }
  180. /** Test 2: Check the self-consistent equation of the Mode */
  181. {
  182. Matrix X (6, 2);
  183. X(0,0) = 1.0; X(0,1) = 0.0;
  184. X(1,0) = 0.3; X(1,1) = 0.4;
  185. X(2,0) = 0.8; X(2,1) = 0.1;
  186. X(3,0) = -1.0; X(3,1) = 0.2;
  187. X(4,0) = -0.8; X(4,1) = 0.1;
  188. X(5,0) = -0.3; X(5,1) = 0.5;
  189. Matrix K (6,6);
  190. for ( uint i = 0 ; i < X.rows(); i++ )
  191. for ( uint j = 0 ; j < X.rows(); j++ )
  192. K(i,j) = exp(- (X.getRow(i) - X.getRow(j)).normL2());
  193. KernelData kernelData ( &conf, K );
  194. LaplaceApproximation laplaceApproximation ( &conf );
  195. laplaceApproximation.approximate ( &kernelData, y, &likelihood );
  196. const Vector & mode = laplaceApproximation.getMode();
  197. Vector gradLikelihood ( mode.size() );
  198. for ( uint i = 0; i < mode.size(); i++ )
  199. gradLikelihood[i] = likelihood.gradient ( y[i], mode[i] );
  200. // the mode should obey the following equation: mode = K gradLikelihood
  201. // (cf. eq. 3.17 Rasmussen and Williams)
  202. double fActual = ( mode - K * gradLikelihood ).normL2();
  203. CPPUNIT_ASSERT_DOUBLES_EQUAL ( 0.0,fActual, 1e-10 );
  204. }
  205. }
  206. #endif