123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430 |
- /**
- * @file GPMSCLooEstimates.cpp
- * @brief Model selection criterions for Gaussian Processes
- * @author Erik Rodner
- * @date 03/07/2010
- */
- #include <iostream>
- #include "GPMSCLooEstimates.h"
- #include "vislearning/cbaselib/ClassificationResults.h"
- #include "vislearning/classifier/kernelclassifier/LikelihoodFunction.h"
- #include "core/algebra/CholeskyRobustAuto.h"
- #include "core/optimization/limun/FirstOrderTrustRegion.h"
- #include "core/vector/Algorithms.h"
- using namespace std;
- using namespace NICE;
- using namespace OBJREC;
- GPMSCLooLikelihoodRegression::GPMSCLooLikelihoodRegression ( bool _weightedLooProb )
- : weightedLooProb( _weightedLooProb )
- {
- }
- GPMSCLooLikelihoodRegression::GPMSCLooLikelihoodRegression ( bool _weightedLooProb, const NICE::Vector & _selection ) :
- weightedLooProb( _weightedLooProb ), selection( _selection )
- {
- }
- double GPMSCLooLikelihoodRegression::computeObjective ( KernelData *kernelData,
- const NICE::Vector & labels ) const
- {
- kernelData->updateCholeskyFactorization();
- kernelData->updateInverseKernelMatrix();
- const Matrix & invKernelMatrix = kernelData->getInverseKernelMatrix();
- Vector invKernelMatrixDiag ( invKernelMatrix.rows() );
- for ( uint l = 0 ; l < invKernelMatrixDiag.size(); l++ )
- invKernelMatrixDiag[l] = invKernelMatrix(l,l);
- Vector alpha;
- kernelData->computeInverseKernelMultiply ( labels, alpha );
- return computeObjective ( invKernelMatrixDiag, alpha, labels );
- }
- double GPMSCLooLikelihoodRegression::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha,
- const NICE::Vector & labels ) const
- {
- uint count_positives = 0;
- uint count_negatives = 0;
- for ( uint l = 0 ; l < labels.size(); l++ )
- if ( (selection.size() == 0) || (selection[l]) )
- {
- count_positives += (labels[l] > 0);
- count_negatives += (labels[l] <= 0);
- }
- double looPredictiveLogProb = 0.0;
- for ( uint l = 0 ; l < labels.size(); l++ )
- {
- if ( (selection.size() > 0) && (!selection[l]) )
- {
- continue;
- }
- double sigma2 = 1.0 / invKernelMatrixDiag[l];
- double loo_est = labels[l] - alpha[l] * sigma2;
- double singleLooProb = -0.5 * log( sigma2 ) - pow(labels[l] - loo_est, 2)/(2*sigma2);
- double weight = 1.0;
- if ( weightedLooProb )
- weight = labels[l] < 0 ? 1.0 / (double) (count_negatives)
- : 1.0 / (double) (count_positives);
- looPredictiveLogProb += 0.5*weight*singleLooProb;
- }
- return looPredictiveLogProb;
- }
- /************************* GPMSCLooLikelihoodClassification ***********/
- GPMSCLooLikelihoodClassification::GPMSCLooLikelihoodClassification ()
- {
- }
- GPMSCLooLikelihoodClassification::GPMSCLooLikelihoodClassification ( const NICE::Vector & _selection ) :
- GPMSCLooLikelihoodRegression ( false, _selection )
- {
- }
- double GPMSCLooLikelihoodClassification::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha,
- const NICE::Vector & labels ) const
- {
- Vector looMean (labels.size(), 0.0);
- Vector looVariance (labels.size(), 0.0);
- for ( uint l = 0 ; l < labels.size(); l++ )
- {
- if ( (selection.size() > 0) && (!selection[l]) )
- continue;
- looVariance[l] = ( 1.0 / invKernelMatrixDiag[l] );
- looMean[l] = (labels[l] - alpha[l] / invKernelMatrixDiag[l]);
- }
- GPMSCLooLikelihoodClassificationOptProb opt ( labels, selection, looMean, looVariance, false );
- FirstOrderTrustRegion *optimizer = new FirstOrderTrustRegion(0.1 /*std value for typical gradient */, false);
- optimizer->setEpsilonG ( 0.01 );
- optimizer->setMaxIterations ( 500 );
- optimizer->optimizeFirst ( opt );
- delete optimizer;
- return - opt.computeObjective();
- }
- /********************* GPMSCLooLikelihoodClassificationOptProb *****************/
- GPMSCLooLikelihoodClassificationOptProb::GPMSCLooLikelihoodClassificationOptProb ( const NICE::Vector & _labels,
- const NICE::Vector & _selection, const NICE::Vector & _looMu, const NICE::Vector & _looVariance, bool _verbose )
- : OptimizationProblemFirst(2), labels(_labels), selection(_selection), looMu(_looMu), looVariance(_looVariance),
- verbose(_verbose)
- {
- parameters().resize(2);
- parameters()[0] = 1.0;
- parameters()[1] = 0.0;
- }
- double GPMSCLooLikelihoodClassificationOptProb::computeObjective ()
- {
- double alpha = parameters()[0];
- double beta = parameters()[1];
- double nloglike = 0.0;
- for ( uint l = 0 ; l < labels.size(); l++ )
- {
- if ( (selection.size() > 0) && (!selection[l]) )
- continue;
- double inner = labels[l] * ( alpha * looMu[l] + beta ) / sqrt( 1 + alpha*alpha*looVariance[l] );
- nloglike -= log ( (erf(inner/sqrt(2)) + 1)/2.0 );
- }
- if ( verbose )
- cerr << "GPMSCLooLikelihoodClassificationOptProb: " << parameters() << " " << nloglike << endl;
- return nloglike;
- }
- void GPMSCLooLikelihoodClassificationOptProb::computeGradient ( NICE::Vector & newGradient )
- {
- newGradient.resize(2);
- newGradient.set(0.0);
- double alpha = parameters()[0];
- double beta = parameters()[1];
- // Rasmussen & Williams: eq. (6.44)
- for ( uint i = 0 ; i < labels.size(); i++ )
- {
- if ( (selection.size() > 0) && (!selection[i]) )
- continue;
- double sqrtPart = sqrt(1 + alpha*alpha*looVariance[i]);
- double ri = ( alpha*looMu[i] + beta) / sqrtPart;
- double betaGrad2ndPart = (labels[i]/sqrtPart);
- double alphaGrad2ndPart = betaGrad2ndPart * ( looMu[i] - beta*alpha*looVariance[i] ) / (sqrtPart*sqrtPart);
- double firstPart = (exp(-ri*ri/2.0)/sqrt(2*M_PI)) * ( 2.0 / (erf(labels[i] * ri / sqrt(2)) + 1) );
- newGradient[0] -= alphaGrad2ndPart * firstPart;
- newGradient[1] -= betaGrad2ndPart * firstPart;
- }
-
- if ( verbose )
- cerr << "GPMSCLooLikelihoodClassificationOptProb: gradient=" << newGradient << endl;
- }
- /************************** GPMSCLooLikelihoodClassificationFixed ********************/
- GPMSCLooLikelihoodClassificationFixed::GPMSCLooLikelihoodClassificationFixed ()
- {
- }
- GPMSCLooLikelihoodClassificationFixed::GPMSCLooLikelihoodClassificationFixed ( const NICE::Vector & _selection ) :
- GPMSCLooLikelihoodRegression ( false, _selection )
- {
- }
- double GPMSCLooLikelihoodClassificationFixed::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha,
- const NICE::Vector & labels ) const
- {
- uint count_positives = 0;
- uint count_negatives = 0;
- for ( uint l = 0 ; l < labels.size(); l++ )
- if ( (selection.size() == 0) || (selection[l]) )
- {
- count_positives += (labels[l] > 0);
- count_negatives += (labels[l] <= 0);
- }
- double objective = 0.0;
- for ( uint l = 0 ; l < labels.size(); l++ )
- {
- if ( (selection.size() > 0) && (!selection[l]) )
- continue;
- double looVariance = 1.0 / invKernelMatrixDiag[l];
- double looMean = labels[l] - alpha[l] * looVariance;
- // as proposed by Tommasi and Caputo
- double w = (labels[l] < 0.0) ? (count_negatives + count_positives) / (2*count_negatives)
- : (count_negatives + count_positives) / (2*count_positives);
- objective += 1.0 / ( 1.0 + exp(-3.0 * (looMean*labels[l])) ) * w;
- }
- return objective;
- }
- /************************** GPMSCLooLabor ********************/
- GPMSCLooLabor::GPMSCLooLabor ()
- {
- }
- GPMSCLooLabor::GPMSCLooLabor ( const NICE::Vector & _selection ) :
- GPMSCLooLikelihoodRegression ( false, _selection )
- {
- }
- double GPMSCLooLabor::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha,
- const NICE::Vector & labels ) const
- {
- Vector posResults;
- for ( uint l = 0 ; l < labels.size(); l++ )
- {
- if ( (selection.size() > 0) && (!selection[l]) )
- continue;
- double looVariance = 1.0 / invKernelMatrixDiag[l];
- double looMean = labels[l] - alpha[l] * looVariance;
- if ( labels[l] > 0.0 )
- posResults.append ( looMean );
- }
- return posResults.StdDev();
- }
- /*************************** GPMSCVarious ******************/
- GPMSCLooVarious::GPMSCLooVarious ( int _type ) :
- GPMSCLooLikelihoodRegression ( false ), type(_type)
- {
- }
- GPMSCLooVarious::GPMSCLooVarious ( int _type, const NICE::Vector & _selection ) :
- GPMSCLooLikelihoodRegression ( false, _selection ), type(_type)
- {
- }
- double GPMSCLooVarious::computeObjective ( const NICE::Vector & invKernelMatrixDiag, const NICE::Vector & alpha,
- const NICE::Vector & labels ) const
- {
- uint count_positives = 0;
- uint count_negatives = 0;
- for ( uint l = 0 ; l < labels.size(); l++ )
- if ( (selection.size() == 0) || (selection[l]) )
- {
- count_positives += (labels[l] > 0);
- count_negatives += (labels[l] <= 0);
- }
- Vector looMean ( labels.size(), 0.0 );
- Vector looVariance ( labels.size(), 0.0 );
- // selected only one single positive example
- // here is like doing one class classification with
- // the negative ones. with support data this is just
- // the probability of the example using the support
- // classifier
- ClassificationResults results;
- double errors = 0.0;
- int count = 0;
- for ( uint l = 0 ; l < labels.size(); l++ )
- {
- if ( (selection.size() > 0) && (!selection[l]) )
- continue;
- looVariance[l] = 1.0 / invKernelMatrixDiag[l];
- looMean[l] = labels[l] - alpha[l] * looVariance[l];
- FullVector scores (2);
- scores.set(0.0);
- scores[1] = looMean[l];
- ClassificationResult r ( looMean[l] < 0 ? 0 : 1, scores );
- r.classno_groundtruth = labels[l] < 0.0 ? 0 : 1;
- results.push_back ( r );
- double weight = labels[l] < 0.0 ? 0.5/count_negatives : 0.5/count_positives;
- if ( looMean[l] * labels[l] < 0.0 )
- errors += weight;
- count++;
- }
- if ( type == P_RECOGNITIONRATE )
- return 1.0 - errors;
- else if ( type == P_AUC )
- return results.getBinaryClassPerformance ( ClassificationResults::PERF_AUC );
- else
- return results.getBinaryClassPerformance ( ClassificationResults::PERF_AVG_PRECISION );
- }
- /**************** conditional likelihood **********************/
- GPMSCConditionalLikelihood::GPMSCConditionalLikelihood ( const NICE::Vector & _selection ) :
- selection( _selection )
- {
- }
- double GPMSCConditionalLikelihood::computeObjective ( KernelData *kernelData,
- const NICE::Vector & labels ) const
- {
- const Matrix & kernelMatrix = kernelData->getKernelMatrix();
- /************************************************
- * If used for the parameters s of a chai kernel,
- * likelihood = cond_likelihood + likelihood_support
- * and likelihood_support does not depend on s
- * but conditional likelihood seems to be important
- * when comparing different likelihoods
- ***********************************************/
- if ( selection.size() == 0 )
- {
- // compute standard likelihood
- cerr << "GPMSCLooEstimates: Computing the standard likelihood !" << endl;
- Vector tmp;
- kernelData->updateCholeskyFactorization();
- kernelData->computeInverseKernelMultiply ( labels, tmp );
- return - kernelData->getLogDetKernelMatrix() - labels.scalarProduct(tmp);
- } else {
- if ( selection.size() != kernelMatrix.rows() )
- fthrow(Exception, "Selection does not fit to the size of the kernel matrix" );
- map<uint, uint> indexMapSelection;
- map<uint, uint> indexMapNSelection;
- uint iSel = 0;
- uint iNSel = 0;
- for ( uint i = 0 ; i < selection.size() ; i++ )
- if ( selection[i] == 0 )
- {
- indexMapNSelection.insert ( pair<uint, uint> ( i, iNSel ) );
- iNSel++;
- } else {
- indexMapSelection.insert ( pair<uint, uint> ( i, iSel ) );
- iSel++;
- }
- Vector labelsNSelection ( indexMapNSelection.size() );
- Vector labelsSelection ( indexMapSelection.size() );
- for ( uint i = 0 ; i < selection.size() ; i++ )
- if ( selection[i] == 0 )
- {
- labelsNSelection[ indexMapNSelection[i] ] = labels[i];
- } else {
- labelsSelection[ indexMapSelection[i] ] = labels[i];
- }
- Matrix KSelection ( indexMapSelection.size(), indexMapSelection.size() );
- Matrix KNSelection ( indexMapNSelection.size(), indexMapNSelection.size() );
- Matrix Ktilde ( indexMapSelection.size(), indexMapNSelection.size() );
- for ( uint i = 0 ; i < selection.size() ; i++ )
- {
- for ( uint j = 0 ; j < selection.size() ; j++ )
- {
- if ( (selection[i] == 0) && (selection[j] != 0) )
- {
- uint k1 = indexMapNSelection[i];
- uint k2 = indexMapSelection[j];
- Ktilde(k2,k1) = kernelMatrix(i,j);
- } else if ( (selection[i] == 0) && (selection[j] == 0) ) {
- uint k1 = indexMapNSelection[i];
- uint k2 = indexMapNSelection[j];
- KNSelection(k1,k2) = kernelMatrix(i,j);
- } else if ( (selection[i] != 0) && (selection[j] != 0) ) {
- uint k1 = indexMapSelection[i];
- uint k2 = indexMapSelection[j];
- KSelection(k1,k2) = kernelMatrix(i,j);
- } // ignore fourth case due to symmetry
- }
- }
- // Okay, now we have our matrices
- // we try to calculate p(y^selected | y^notselected)
- //
- // cerr << "Computing the conditional likelihood !" << endl;
- // K_{notselected}^{-1}
- Matrix KNSelectionInv;
- CholeskyRobustAuto cra (false);
- cra.robustCholInv ( KNSelection, KNSelectionInv );
- // compute conditional mean and covariance
- Vector conditionalMean = Ktilde * KNSelectionInv * labelsNSelection;
- Matrix conditionalCovariance = KSelection - Ktilde * KNSelectionInv * Ktilde.transpose();
- Vector diff = conditionalMean - labelsSelection;
- Matrix cholA;
- cra.robustChol ( conditionalCovariance, cholA );
- Vector tmp;
- choleskySolve ( cholA, diff, tmp );
- double likelihood = 0.0;
- likelihood -= tmp.scalarProduct ( diff );
- likelihood -= cra.getLastLogDet();
-
- // FIXME: this is just for debugging cerr << - cra.getLastLogDet() << " ";
- //
- // Important notice: we return the likelihood not the negative likelihood !!
- return likelihood;
- }
- }
|