GPMSCLooEstimates.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /**
  2. * @file GPMSCLooEstimates.cpp
  3. * @brief Model selection criterions for Gaussian Processes
  4. * @author Erik Rodner
  5. * @date 03/07/2010
  6. */
  7. #include <iostream>
  8. #include "GPMSCLooEstimates.h"
  9. #include "vislearning/cbaselib/ClassificationResults.h"
  10. #include "vislearning/classifier/kernelclassifier/LikelihoodFunction.h"
  11. #include "core/algebra/CholeskyRobustAuto.h"
  12. #include "core/optimization/limun/FirstOrderTrustRegion.h"
  13. #include "core/vector/Algorithms.h"
  14. using namespace std;
  15. using namespace NICE;
  16. using namespace OBJREC;
  17. GPMSCLooLikelihoodRegression::GPMSCLooLikelihoodRegression ( bool _weightedLooProb )
  18. : weightedLooProb( _weightedLooProb )
  19. {
  20. }
  21. GPMSCLooLikelihoodRegression::GPMSCLooLikelihoodRegression ( bool _weightedLooProb, const NICE::Vector & _selection ) :
  22. weightedLooProb( _weightedLooProb ), selection( _selection )
  23. {
  24. }
  25. double GPMSCLooLikelihoodRegression::computeObjective ( KernelData *kernelData,
  26. const NICE::Vector & labels ) const
  27. {
  28. kernelData->updateCholeskyFactorization();
  29. kernelData->updateInverseKernelMatrix();
  30. const Matrix & invKernelMatrix = kernelData->getInverseKernelMatrix();
  31. Vector invKernelMatrixDiag ( invKernelMatrix.rows() );
  32. for ( uint l = 0 ; l < invKernelMatrixDiag.size(); l++ )
  33. invKernelMatrixDiag[l] = invKernelMatrix(l,l);
  34. Vector alpha;
  35. kernelData->computeInverseKernelMultiply ( labels, alpha );
  36. return computeObjective ( invKernelMatrixDiag, alpha, labels );
  37. }
  38. double GPMSCLooLikelihoodRegression::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha,
  39. const NICE::Vector & labels ) const
  40. {
  41. uint count_positives = 0;
  42. uint count_negatives = 0;
  43. for ( uint l = 0 ; l < labels.size(); l++ )
  44. if ( (selection.size() == 0) || (selection[l]) )
  45. {
  46. count_positives += (labels[l] > 0);
  47. count_negatives += (labels[l] <= 0);
  48. }
  49. double looPredictiveLogProb = 0.0;
  50. for ( uint l = 0 ; l < labels.size(); l++ )
  51. {
  52. if ( (selection.size() > 0) && (!selection[l]) )
  53. {
  54. continue;
  55. }
  56. double sigma2 = 1.0 / invKernelMatrixDiag[l];
  57. double loo_est = labels[l] - alpha[l] * sigma2;
  58. double singleLooProb = -0.5 * log( sigma2 ) - pow(labels[l] - loo_est, 2)/(2*sigma2);
  59. double weight = 1.0;
  60. if ( weightedLooProb )
  61. weight = labels[l] < 0 ? 1.0 / (double) (count_negatives)
  62. : 1.0 / (double) (count_positives);
  63. looPredictiveLogProb += 0.5*weight*singleLooProb;
  64. }
  65. return looPredictiveLogProb;
  66. }
  67. /************************* GPMSCLooLikelihoodClassification ***********/
  68. GPMSCLooLikelihoodClassification::GPMSCLooLikelihoodClassification ()
  69. {
  70. }
  71. GPMSCLooLikelihoodClassification::GPMSCLooLikelihoodClassification ( const NICE::Vector & _selection ) :
  72. GPMSCLooLikelihoodRegression ( false, _selection )
  73. {
  74. }
  75. double GPMSCLooLikelihoodClassification::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha,
  76. const NICE::Vector & labels ) const
  77. {
  78. Vector looMean (labels.size(), 0.0);
  79. Vector looVariance (labels.size(), 0.0);
  80. for ( uint l = 0 ; l < labels.size(); l++ )
  81. {
  82. if ( (selection.size() > 0) && (!selection[l]) )
  83. continue;
  84. looVariance[l] = ( 1.0 / invKernelMatrixDiag[l] );
  85. looMean[l] = (labels[l] - alpha[l] / invKernelMatrixDiag[l]);
  86. }
  87. GPMSCLooLikelihoodClassificationOptProb opt ( labels, selection, looMean, looVariance, false );
  88. FirstOrderTrustRegion *optimizer = new FirstOrderTrustRegion(0.1 /*std value for typical gradient */, false);
  89. optimizer->setEpsilonG ( 0.01 );
  90. optimizer->setMaxIterations ( 500 );
  91. optimizer->optimizeFirst ( opt );
  92. delete optimizer;
  93. return - opt.computeObjective();
  94. }
  95. /********************* GPMSCLooLikelihoodClassificationOptProb *****************/
  96. GPMSCLooLikelihoodClassificationOptProb::GPMSCLooLikelihoodClassificationOptProb ( const NICE::Vector & _labels,
  97. const NICE::Vector & _selection, const NICE::Vector & _looMu, const NICE::Vector & _looVariance, bool _verbose )
  98. : OptimizationProblemFirst(2), labels(_labels), selection(_selection), looMu(_looMu), looVariance(_looVariance),
  99. verbose(_verbose)
  100. {
  101. parameters().resize(2);
  102. parameters()[0] = 1.0;
  103. parameters()[1] = 0.0;
  104. }
  105. double GPMSCLooLikelihoodClassificationOptProb::computeObjective ()
  106. {
  107. double alpha = parameters()[0];
  108. double beta = parameters()[1];
  109. double nloglike = 0.0;
  110. for ( uint l = 0 ; l < labels.size(); l++ )
  111. {
  112. if ( (selection.size() > 0) && (!selection[l]) )
  113. continue;
  114. double inner = labels[l] * ( alpha * looMu[l] + beta ) / sqrt( 1 + alpha*alpha*looVariance[l] );
  115. nloglike -= log ( (erf(inner/sqrt(2)) + 1)/2.0 );
  116. }
  117. if ( verbose )
  118. cerr << "GPMSCLooLikelihoodClassificationOptProb: " << parameters() << " " << nloglike << endl;
  119. return nloglike;
  120. }
  121. void GPMSCLooLikelihoodClassificationOptProb::computeGradient ( NICE::Vector & newGradient )
  122. {
  123. newGradient.resize(2);
  124. newGradient.set(0.0);
  125. double alpha = parameters()[0];
  126. double beta = parameters()[1];
  127. // Rasmussen & Williams: eq. (6.44)
  128. for ( uint i = 0 ; i < labels.size(); i++ )
  129. {
  130. if ( (selection.size() > 0) && (!selection[i]) )
  131. continue;
  132. double sqrtPart = sqrt(1 + alpha*alpha*looVariance[i]);
  133. double ri = ( alpha*looMu[i] + beta) / sqrtPart;
  134. double betaGrad2ndPart = (labels[i]/sqrtPart);
  135. double alphaGrad2ndPart = betaGrad2ndPart * ( looMu[i] - beta*alpha*looVariance[i] ) / (sqrtPart*sqrtPart);
  136. double firstPart = (exp(-ri*ri/2.0)/sqrt(2*M_PI)) * ( 2.0 / (erf(labels[i] * ri / sqrt(2)) + 1) );
  137. newGradient[0] -= alphaGrad2ndPart * firstPart;
  138. newGradient[1] -= betaGrad2ndPart * firstPart;
  139. }
  140. if ( verbose )
  141. cerr << "GPMSCLooLikelihoodClassificationOptProb: gradient=" << newGradient << endl;
  142. }
  143. /************************** GPMSCLooLikelihoodClassificationFixed ********************/
  144. GPMSCLooLikelihoodClassificationFixed::GPMSCLooLikelihoodClassificationFixed ()
  145. {
  146. }
  147. GPMSCLooLikelihoodClassificationFixed::GPMSCLooLikelihoodClassificationFixed ( const NICE::Vector & _selection ) :
  148. GPMSCLooLikelihoodRegression ( false, _selection )
  149. {
  150. }
  151. double GPMSCLooLikelihoodClassificationFixed::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha,
  152. const NICE::Vector & labels ) const
  153. {
  154. uint count_positives = 0;
  155. uint count_negatives = 0;
  156. for ( uint l = 0 ; l < labels.size(); l++ )
  157. if ( (selection.size() == 0) || (selection[l]) )
  158. {
  159. count_positives += (labels[l] > 0);
  160. count_negatives += (labels[l] <= 0);
  161. }
  162. double objective = 0.0;
  163. for ( uint l = 0 ; l < labels.size(); l++ )
  164. {
  165. if ( (selection.size() > 0) && (!selection[l]) )
  166. continue;
  167. double looVariance = 1.0 / invKernelMatrixDiag[l];
  168. double looMean = labels[l] - alpha[l] * looVariance;
  169. // as proposed by Tommasi and Caputo
  170. double w = (labels[l] < 0.0) ? (count_negatives + count_positives) / (2*count_negatives)
  171. : (count_negatives + count_positives) / (2*count_positives);
  172. objective += 1.0 / ( 1.0 + exp(-3.0 * (looMean*labels[l])) ) * w;
  173. }
  174. return objective;
  175. }
  176. /************************** GPMSCLooLabor ********************/
  177. GPMSCLooLabor::GPMSCLooLabor ()
  178. {
  179. }
  180. GPMSCLooLabor::GPMSCLooLabor ( const NICE::Vector & _selection ) :
  181. GPMSCLooLikelihoodRegression ( false, _selection )
  182. {
  183. }
  184. double GPMSCLooLabor::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha,
  185. const NICE::Vector & labels ) const
  186. {
  187. Vector posResults;
  188. for ( uint l = 0 ; l < labels.size(); l++ )
  189. {
  190. if ( (selection.size() > 0) && (!selection[l]) )
  191. continue;
  192. double looVariance = 1.0 / invKernelMatrixDiag[l];
  193. double looMean = labels[l] - alpha[l] * looVariance;
  194. if ( labels[l] > 0.0 )
  195. posResults.append ( looMean );
  196. }
  197. return posResults.StdDev();
  198. }
  199. /*************************** GPMSCVarious ******************/
  200. GPMSCLooVarious::GPMSCLooVarious ( int _type ) :
  201. GPMSCLooLikelihoodRegression ( false ), type(_type)
  202. {
  203. }
  204. GPMSCLooVarious::GPMSCLooVarious ( int _type, const NICE::Vector & _selection ) :
  205. GPMSCLooLikelihoodRegression ( false, _selection ), type(_type)
  206. {
  207. }
  208. double GPMSCLooVarious::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha,
  209. const NICE::Vector & labels ) const
  210. {
  211. uint count_positives = 0;
  212. uint count_negatives = 0;
  213. for ( uint l = 0 ; l < labels.size(); l++ )
  214. if ( (selection.size() == 0) || (selection[l]) )
  215. {
  216. count_positives += (labels[l] > 0);
  217. count_negatives += (labels[l] <= 0);
  218. }
  219. Vector looMean ( labels.size(), 0.0 );
  220. Vector looVariance ( labels.size(), 0.0 );
  221. // selected only one single positive example
  222. // here is like doing one class classification with
  223. // the negative ones. with support data this is just
  224. // the probability of the example using the support
  225. // classifier
  226. ClassificationResults results;
  227. double errors = 0.0;
  228. int count = 0;
  229. for ( uint l = 0 ; l < labels.size(); l++ )
  230. {
  231. if ( (selection.size() > 0) && (!selection[l]) )
  232. continue;
  233. looVariance[l] = 1.0 / invKernelMatrixDiag[l];
  234. looMean[l] = labels[l] - alpha[l] * looVariance[l];
  235. FullVector scores (2);
  236. scores.set(0.0);
  237. scores[1] = looMean[l];
  238. ClassificationResult r ( looMean[l] < 0 ? 0 : 1, scores );
  239. r.classno_groundtruth = labels[l] < 0.0 ? 0 : 1;
  240. results.push_back ( r );
  241. double weight = labels[l] < 0.0 ? 0.5/count_negatives : 0.5/count_positives;
  242. if ( looMean[l] * labels[l] < 0.0 )
  243. errors += weight;
  244. count++;
  245. }
  246. if ( type == P_RECOGNITIONRATE )
  247. return 1.0 - errors;
  248. else if ( type == P_AUC )
  249. return results.getBinaryClassPerformance ( ClassificationResults::PERF_AUC );
  250. else
  251. return results.getBinaryClassPerformance ( ClassificationResults::PERF_AVG_PRECISION );
  252. }
  253. /**************** conditional likelihood **********************/
  254. GPMSCConditionalLikelihood::GPMSCConditionalLikelihood ( const NICE::Vector & _selection ) :
  255. selection( _selection )
  256. {
  257. }
  258. double GPMSCConditionalLikelihood::computeObjective ( KernelData *kernelData,
  259. const NICE::Vector & labels ) const
  260. {
  261. const Matrix & kernelMatrix = kernelData->getKernelMatrix();
  262. /************************************************
  263. * If used for the parameters s of a chai kernel,
  264. * likelihood = cond_likelihood + likelihood_support
  265. * and likelihood_support does not depend on s
  266. * but conditional likelihood seems to be important
  267. * when comparing different likelihoods
  268. ***********************************************/
  269. if ( selection.size() == 0 )
  270. {
  271. // compute standard likelihood
  272. cerr << "GPMSCLooEstimates: Computing the standard likelihood !" << endl;
  273. Vector tmp;
  274. kernelData->updateCholeskyFactorization();
  275. kernelData->computeInverseKernelMultiply ( labels, tmp );
  276. return - kernelData->getLogDetKernelMatrix() - labels.scalarProduct(tmp);
  277. } else {
  278. if ( selection.size() != kernelMatrix.rows() )
  279. fthrow(Exception, "Selection does not fit to the size of the kernel matrix" );
  280. map<uint, uint> indexMapSelection;
  281. map<uint, uint> indexMapNSelection;
  282. uint iSel = 0;
  283. uint iNSel = 0;
  284. for ( uint i = 0 ; i < selection.size() ; i++ )
  285. if ( selection[i] == 0 )
  286. {
  287. indexMapNSelection.insert ( pair<uint, uint> ( i, iNSel ) );
  288. iNSel++;
  289. } else {
  290. indexMapSelection.insert ( pair<uint, uint> ( i, iSel ) );
  291. iSel++;
  292. }
  293. Vector labelsNSelection ( indexMapNSelection.size() );
  294. Vector labelsSelection ( indexMapSelection.size() );
  295. for ( uint i = 0 ; i < selection.size() ; i++ )
  296. if ( selection[i] == 0 )
  297. {
  298. labelsNSelection[ indexMapNSelection[i] ] = labels[i];
  299. } else {
  300. labelsSelection[ indexMapSelection[i] ] = labels[i];
  301. }
  302. Matrix KSelection ( indexMapSelection.size(), indexMapSelection.size() );
  303. Matrix KNSelection ( indexMapNSelection.size(), indexMapNSelection.size() );
  304. Matrix Ktilde ( indexMapSelection.size(), indexMapNSelection.size() );
  305. for ( uint i = 0 ; i < selection.size() ; i++ )
  306. {
  307. for ( uint j = 0 ; j < selection.size() ; j++ )
  308. {
  309. if ( (selection[i] == 0) && (selection[j] != 0) )
  310. {
  311. uint k1 = indexMapNSelection[i];
  312. uint k2 = indexMapSelection[j];
  313. Ktilde(k2,k1) = kernelMatrix(i,j);
  314. } else if ( (selection[i] == 0) && (selection[j] == 0) ) {
  315. uint k1 = indexMapNSelection[i];
  316. uint k2 = indexMapNSelection[j];
  317. KNSelection(k1,k2) = kernelMatrix(i,j);
  318. } else if ( (selection[i] != 0) && (selection[j] != 0) ) {
  319. uint k1 = indexMapSelection[i];
  320. uint k2 = indexMapSelection[j];
  321. KSelection(k1,k2) = kernelMatrix(i,j);
  322. } // ignore fourth case due to symmetry
  323. }
  324. }
  325. // Okay, now we have our matrices
  326. // we try to calculate p(y^selected | y^notselected)
  327. //
  328. // cerr << "Computing the conditional likelihood !" << endl;
  329. // K_{notselected}^{-1}
  330. Matrix KNSelectionInv;
  331. CholeskyRobustAuto cra (false);
  332. cra.robustCholInv ( KNSelection, KNSelectionInv );
  333. // compute conditional mean and covariance
  334. Vector conditionalMean = Ktilde * KNSelectionInv * labelsNSelection;
  335. Matrix conditionalCovariance = KSelection - Ktilde * KNSelectionInv * Ktilde.transpose();
  336. Vector diff = conditionalMean - labelsSelection;
  337. Matrix cholA;
  338. cra.robustChol ( conditionalCovariance, cholA );
  339. Vector tmp;
  340. choleskySolve ( cholA, diff, tmp );
  341. double likelihood = 0.0;
  342. likelihood -= tmp.scalarProduct ( diff );
  343. likelihood -= cra.getLastLogDet();
  344. // FIXME: this is just for debugging cerr << - cra.getLastLogDet() << " ";
  345. //
  346. // Important notice: we return the likelihood not the negative likelihood !!
  347. return likelihood;
  348. }
  349. }