/** * @file GPMSCLooEstimates.cpp * @brief Model selection criterions for Gaussian Processes * @author Erik Rodner * @date 03/07/2010 */ #include #include "GPMSCLooEstimates.h" #include "vislearning/cbaselib/ClassificationResults.h" #include "vislearning/classifier/kernelclassifier/LikelihoodFunction.h" #include "core/algebra/CholeskyRobustAuto.h" #include "core/optimization/limun/FirstOrderTrustRegion.h" #include "core/vector/Algorithms.h" using namespace std; using namespace NICE; using namespace OBJREC; GPMSCLooLikelihoodRegression::GPMSCLooLikelihoodRegression ( bool _weightedLooProb ) : weightedLooProb( _weightedLooProb ) { } GPMSCLooLikelihoodRegression::GPMSCLooLikelihoodRegression ( bool _weightedLooProb, const NICE::Vector & _selection ) : weightedLooProb( _weightedLooProb ), selection( _selection ) { } double GPMSCLooLikelihoodRegression::computeObjective ( KernelData *kernelData, const NICE::Vector & labels ) const { kernelData->updateCholeskyFactorization(); kernelData->updateInverseKernelMatrix(); const Matrix & invKernelMatrix = kernelData->getInverseKernelMatrix(); Vector invKernelMatrixDiag ( invKernelMatrix.rows() ); for ( uint l = 0 ; l < invKernelMatrixDiag.size(); l++ ) invKernelMatrixDiag[l] = invKernelMatrix(l,l); Vector alpha; kernelData->computeInverseKernelMultiply ( labels, alpha ); return computeObjective ( invKernelMatrixDiag, alpha, labels ); } double GPMSCLooLikelihoodRegression::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha, const NICE::Vector & labels ) const { uint count_positives = 0; uint count_negatives = 0; for ( uint l = 0 ; l < labels.size(); l++ ) if ( (selection.size() == 0) || (selection[l]) ) { count_positives += (labels[l] > 0); count_negatives += (labels[l] <= 0); } double looPredictiveLogProb = 0.0; for ( uint l = 0 ; l < labels.size(); l++ ) { if ( (selection.size() > 0) && (!selection[l]) ) { continue; } double sigma2 = 1.0 / invKernelMatrixDiag[l]; double loo_est = labels[l] - alpha[l] * sigma2; double singleLooProb = -0.5 * log( sigma2 ) - pow(labels[l] - loo_est, 2)/(2*sigma2); double weight = 1.0; if ( weightedLooProb ) weight = labels[l] < 0 ? 1.0 / (double) (count_negatives) : 1.0 / (double) (count_positives); looPredictiveLogProb += 0.5*weight*singleLooProb; } return looPredictiveLogProb; } /************************* GPMSCLooLikelihoodClassification ***********/ GPMSCLooLikelihoodClassification::GPMSCLooLikelihoodClassification () { } GPMSCLooLikelihoodClassification::GPMSCLooLikelihoodClassification ( const NICE::Vector & _selection ) : GPMSCLooLikelihoodRegression ( false, _selection ) { } double GPMSCLooLikelihoodClassification::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha, const NICE::Vector & labels ) const { Vector looMean (labels.size(), 0.0); Vector looVariance (labels.size(), 0.0); for ( uint l = 0 ; l < labels.size(); l++ ) { if ( (selection.size() > 0) && (!selection[l]) ) continue; looVariance[l] = ( 1.0 / invKernelMatrixDiag[l] ); looMean[l] = (labels[l] - alpha[l] / invKernelMatrixDiag[l]); } GPMSCLooLikelihoodClassificationOptProb opt ( labels, selection, looMean, looVariance, false ); FirstOrderTrustRegion *optimizer = new FirstOrderTrustRegion(0.1 /*std value for typical gradient */, false); optimizer->setEpsilonG ( 0.01 ); optimizer->setMaxIterations ( 500 ); optimizer->optimizeFirst ( opt ); delete optimizer; return - opt.computeObjective(); } /********************* GPMSCLooLikelihoodClassificationOptProb *****************/ GPMSCLooLikelihoodClassificationOptProb::GPMSCLooLikelihoodClassificationOptProb ( const NICE::Vector & _labels, const NICE::Vector & _selection, const NICE::Vector & _looMu, const NICE::Vector & _looVariance, bool _verbose ) : OptimizationProblemFirst(2), labels(_labels), selection(_selection), looMu(_looMu), looVariance(_looVariance), verbose(_verbose) { parameters().resize(2); parameters()[0] = 1.0; parameters()[1] = 0.0; } double GPMSCLooLikelihoodClassificationOptProb::computeObjective () { double alpha = parameters()[0]; double beta = parameters()[1]; double nloglike = 0.0; for ( uint l = 0 ; l < labels.size(); l++ ) { if ( (selection.size() > 0) && (!selection[l]) ) continue; double inner = labels[l] * ( alpha * looMu[l] + beta ) / sqrt( 1 + alpha*alpha*looVariance[l] ); nloglike -= log ( (erf(inner/sqrt(2)) + 1)/2.0 ); } if ( verbose ) cerr << "GPMSCLooLikelihoodClassificationOptProb: " << parameters() << " " << nloglike << endl; return nloglike; } void GPMSCLooLikelihoodClassificationOptProb::computeGradient ( NICE::Vector & newGradient ) { newGradient.resize(2); newGradient.set(0.0); double alpha = parameters()[0]; double beta = parameters()[1]; // Rasmussen & Williams: eq. (6.44) for ( uint i = 0 ; i < labels.size(); i++ ) { if ( (selection.size() > 0) && (!selection[i]) ) continue; double sqrtPart = sqrt(1 + alpha*alpha*looVariance[i]); double ri = ( alpha*looMu[i] + beta) / sqrtPart; double betaGrad2ndPart = (labels[i]/sqrtPart); double alphaGrad2ndPart = betaGrad2ndPart * ( looMu[i] - beta*alpha*looVariance[i] ) / (sqrtPart*sqrtPart); double firstPart = (exp(-ri*ri/2.0)/sqrt(2*M_PI)) * ( 2.0 / (erf(labels[i] * ri / sqrt(2)) + 1) ); newGradient[0] -= alphaGrad2ndPart * firstPart; newGradient[1] -= betaGrad2ndPart * firstPart; } if ( verbose ) cerr << "GPMSCLooLikelihoodClassificationOptProb: gradient=" << newGradient << endl; } /************************** GPMSCLooLikelihoodClassificationFixed ********************/ GPMSCLooLikelihoodClassificationFixed::GPMSCLooLikelihoodClassificationFixed () { } GPMSCLooLikelihoodClassificationFixed::GPMSCLooLikelihoodClassificationFixed ( const NICE::Vector & _selection ) : GPMSCLooLikelihoodRegression ( false, _selection ) { } double GPMSCLooLikelihoodClassificationFixed::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha, const NICE::Vector & labels ) const { uint count_positives = 0; uint count_negatives = 0; for ( uint l = 0 ; l < labels.size(); l++ ) if ( (selection.size() == 0) || (selection[l]) ) { count_positives += (labels[l] > 0); count_negatives += (labels[l] <= 0); } double objective = 0.0; for ( uint l = 0 ; l < labels.size(); l++ ) { if ( (selection.size() > 0) && (!selection[l]) ) continue; double looVariance = 1.0 / invKernelMatrixDiag[l]; double looMean = labels[l] - alpha[l] * looVariance; // as proposed by Tommasi and Caputo double w = (labels[l] < 0.0) ? (count_negatives + count_positives) / (2*count_negatives) : (count_negatives + count_positives) / (2*count_positives); objective += 1.0 / ( 1.0 + exp(-3.0 * (looMean*labels[l])) ) * w; } return objective; } /************************** GPMSCLooLabor ********************/ GPMSCLooLabor::GPMSCLooLabor () { } GPMSCLooLabor::GPMSCLooLabor ( const NICE::Vector & _selection ) : GPMSCLooLikelihoodRegression ( false, _selection ) { } double GPMSCLooLabor::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha, const NICE::Vector & labels ) const { Vector posResults; for ( uint l = 0 ; l < labels.size(); l++ ) { if ( (selection.size() > 0) && (!selection[l]) ) continue; double looVariance = 1.0 / invKernelMatrixDiag[l]; double looMean = labels[l] - alpha[l] * looVariance; if ( labels[l] > 0.0 ) posResults.append ( looMean ); } return posResults.StdDev(); } /*************************** GPMSCVarious ******************/ GPMSCLooVarious::GPMSCLooVarious ( int _type ) : GPMSCLooLikelihoodRegression ( false ), type(_type) { } GPMSCLooVarious::GPMSCLooVarious ( int _type, const NICE::Vector & _selection ) : GPMSCLooLikelihoodRegression ( false, _selection ), type(_type) { } double GPMSCLooVarious::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha, const NICE::Vector & labels ) const { uint count_positives = 0; uint count_negatives = 0; for ( uint l = 0 ; l < labels.size(); l++ ) if ( (selection.size() == 0) || (selection[l]) ) { count_positives += (labels[l] > 0); count_negatives += (labels[l] <= 0); } Vector looMean ( labels.size(), 0.0 ); Vector looVariance ( labels.size(), 0.0 ); // selected only one single positive example // here is like doing one class classification with // the negative ones. with support data this is just // the probability of the example using the support // classifier ClassificationResults results; double errors = 0.0; int count = 0; for ( uint l = 0 ; l < labels.size(); l++ ) { if ( (selection.size() > 0) && (!selection[l]) ) continue; looVariance[l] = 1.0 / invKernelMatrixDiag[l]; looMean[l] = labels[l] - alpha[l] * looVariance[l]; FullVector scores (2); scores.set(0.0); scores[1] = looMean[l]; ClassificationResult r ( looMean[l] < 0 ? 0 : 1, scores ); r.classno_groundtruth = labels[l] < 0.0 ? 0 : 1; results.push_back ( r ); double weight = labels[l] < 0.0 ? 0.5/count_negatives : 0.5/count_positives; if ( looMean[l] * labels[l] < 0.0 ) errors += weight; count++; } if ( type == P_RECOGNITIONRATE ) return 1.0 - errors; else if ( type == P_AUC ) return results.getBinaryClassPerformance ( ClassificationResults::PERF_AUC ); else return results.getBinaryClassPerformance ( ClassificationResults::PERF_AVG_PRECISION ); } /**************** conditional likelihood **********************/ GPMSCConditionalLikelihood::GPMSCConditionalLikelihood ( const NICE::Vector & _selection ) : selection( _selection ) { } double GPMSCConditionalLikelihood::computeObjective ( KernelData *kernelData, const NICE::Vector & labels ) const { const Matrix & kernelMatrix = kernelData->getKernelMatrix(); /************************************************ * If used for the parameters s of a chai kernel, * likelihood = cond_likelihood + likelihood_support * and likelihood_support does not depend on s * but conditional likelihood seems to be important * when comparing different likelihoods ***********************************************/ if ( selection.size() == 0 ) { // compute standard likelihood cerr << "GPMSCLooEstimates: Computing the standard likelihood !" << endl; Vector tmp; kernelData->updateCholeskyFactorization(); kernelData->computeInverseKernelMultiply ( labels, tmp ); return - kernelData->getLogDetKernelMatrix() - labels.scalarProduct(tmp); } else { if ( selection.size() != kernelMatrix.rows() ) fthrow(Exception, "Selection does not fit to the size of the kernel matrix" ); map indexMapSelection; map indexMapNSelection; uint iSel = 0; uint iNSel = 0; for ( uint i = 0 ; i < selection.size() ; i++ ) if ( selection[i] == 0 ) { indexMapNSelection.insert ( pair ( i, iNSel ) ); iNSel++; } else { indexMapSelection.insert ( pair ( i, iSel ) ); iSel++; } Vector labelsNSelection ( indexMapNSelection.size() ); Vector labelsSelection ( indexMapSelection.size() ); for ( uint i = 0 ; i < selection.size() ; i++ ) if ( selection[i] == 0 ) { labelsNSelection[ indexMapNSelection[i] ] = labels[i]; } else { labelsSelection[ indexMapSelection[i] ] = labels[i]; } Matrix KSelection ( indexMapSelection.size(), indexMapSelection.size() ); Matrix KNSelection ( indexMapNSelection.size(), indexMapNSelection.size() ); Matrix Ktilde ( indexMapSelection.size(), indexMapNSelection.size() ); for ( uint i = 0 ; i < selection.size() ; i++ ) { for ( uint j = 0 ; j < selection.size() ; j++ ) { if ( (selection[i] == 0) && (selection[j] != 0) ) { uint k1 = indexMapNSelection[i]; uint k2 = indexMapSelection[j]; Ktilde(k2,k1) = kernelMatrix(i,j); } else if ( (selection[i] == 0) && (selection[j] == 0) ) { uint k1 = indexMapNSelection[i]; uint k2 = indexMapNSelection[j]; KNSelection(k1,k2) = kernelMatrix(i,j); } else if ( (selection[i] != 0) && (selection[j] != 0) ) { uint k1 = indexMapSelection[i]; uint k2 = indexMapSelection[j]; KSelection(k1,k2) = kernelMatrix(i,j); } // ignore fourth case due to symmetry } } // Okay, now we have our matrices // we try to calculate p(y^selected | y^notselected) // // cerr << "Computing the conditional likelihood !" << endl; // K_{notselected}^{-1} Matrix KNSelectionInv; CholeskyRobustAuto cra (false); cra.robustCholInv ( KNSelection, KNSelectionInv ); // compute conditional mean and covariance Vector conditionalMean = Ktilde * KNSelectionInv * labelsNSelection; Matrix conditionalCovariance = KSelection - Ktilde * KNSelectionInv * Ktilde.transpose(); Vector diff = conditionalMean - labelsSelection; Matrix cholA; cra.robustChol ( conditionalCovariance, cholA ); Vector tmp; choleskySolve ( cholA, diff, tmp ); double likelihood = 0.0; likelihood -= tmp.scalarProduct ( diff ); likelihood -= cra.getLastLogDet(); // FIXME: this is just for debugging cerr << - cra.getLastLogDet() << " "; // // Important notice: we return the likelihood not the negative likelihood !! return likelihood; } }