123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776 |
- #ifdef NICE_USELIB_OPENMP
- #include <omp.h>
- #endif
- #include <stdio.h>
- #include "GMM.h"
- #include <core/vector/Algorithms.h>
- #include "vislearning/math/cluster/KMeans.h"
- #define DEBUGGMM
- using namespace OBJREC;
- using namespace NICE;
- using namespace std;
- GMM::GMM()
- {
- gaussians = 3;
- maxiter = 200;
- featsperclass = 0;
- tau = 10.0;
- srand ( time ( NULL ) );
- comp = false;
- }
- GMM::GMM ( int _gaussians ) : gaussians ( _gaussians )
- {
- maxiter = 200;
- featsperclass = 0;
- tau = 0.0;
- srand ( time ( NULL ) );
- pyramid = false;
- comp = false;
- }
- GMM::GMM ( const Config *conf, int _gaussians ) : gaussians ( _gaussians )
- {
- this->conf = conf;
- if ( gaussians < 2 )
- gaussians = conf->gI ( "GMM", "gaussians", 2 );
- maxiter = conf->gI ( "GMM", "maxiter", 200 );
- featsperclass = conf->gI ( "GMM", "featsperclass", 0 );
- tau = conf->gD ( "GMM", "tau", 100.0 );
- pyramid = conf->gB ( "GMM", "pyramid", false );
- comp = false;
- srand ( time ( NULL ) );
- }
- void GMM::computeMixture ( Examples examples )
- {
- int fsize = ( int ) examples.size();
- assert ( fsize >= gaussians );
- dim = examples[0].second.vec->size();
- int samples = fsize;
- if ( featsperclass > 0 )
- {
- samples = featsperclass * gaussians;
- samples = std::min ( samples, fsize );
- }
- // Copy data in Matrix
- VVector dataset;
- cout << "reduced training data for GMM from " << fsize << " features to " << samples << " features.";
- for ( int i = 0; i < samples; i++ )
- {
- int k = rand() % fsize;
- NICE::Vector *vec = examples[k].second.vec;
- dataset.push_back ( *vec );
- }
- computeMixture ( dataset );
- }
- void GMM::computeMixture ( const VVector &DataSet )
- {
- // Learn the GMM model
- assert ( DataSet.size() >= ( uint ) gaussians );
- //initEMkMeans(DataSet); // initialize the model
- srand ( time ( NULL ) );
- bool poweroftwo = false;
- int power = 1;
- while ( power <= gaussians )
- {
- if ( gaussians == power )
- poweroftwo = true;
- power *= 2;
- }
- if ( poweroftwo && pyramid )
- {
- initEM ( DataSet ); // initialize the model
- int g = 1;
- while ( g <= gaussians )
- {
- cout << "g = " << g << endl;
- doEM ( DataSet, g );
- if ( 2*g <= gaussians )
- {
- for ( int i = g; i < g*2; i++ )
- {
- mu[i] = mu[i-g];
- sparse_sigma[i] = sparse_sigma[i-g];
- sparse_inv_sigma[i] = sparse_inv_sigma[i-g];
- log_det_sigma[i] = log_det_sigma[i-g];
- priors[i] = priors[i-g];
- double interval = 1.0;
- for ( int k = 0; k < ( int ) mu[i].size(); k++ )
- {
- interval = mu[i][k];
- interval = std::max ( interval, 0.1 );
- double r = ( interval * ( ( double ) rand() / ( double ) RAND_MAX ) ) - interval / 2.0;
- mu[i][k] += r;
- }
- }
- }
- g *= 2;
- }
- }
- else
- {
- initEMkMeans ( DataSet ); // initialize the model
- doEM ( DataSet, gaussians );
- }
- // performs EM
- }
- inline double diagDeterminant ( const NICE::Vector &sparse_mat )
- {
- double det = 1.0;
- for ( int i = 0; i < ( int ) sparse_mat.size(); i++ )
- {
- det *= sparse_mat[i];
- }
- return det;
- }
- inline double logdiagDeterminant ( const NICE::Vector &sparse_mat )
- {
- double det = 0.0;
- for ( int i = 0; i < ( int ) sparse_mat.size(); i++ )
- {
- det += log ( sparse_mat[i] );
- }
- return det;
- }
- inline NICE::Vector diagInverse ( const NICE::Vector &sparse_mat )
- {
- NICE::Vector inv ( sparse_mat.size() );
- for ( int i = 0; i < ( int ) sparse_mat.size(); i++ )
- {
- inv[i] = 1.0 / sparse_mat[i];
- }
- return inv;
- }
- void GMM::initEMkMeans ( const VVector &DataSet )
- {
- /*init GMM with k-Means*/
- KMeans k ( gaussians );
- VVector means;
- vector<double> weights;
- vector<int> assignment;
- k.cluster ( DataSet, means, weights, assignment );
- int nData = DataSet.size();
- this->dim = DataSet[0].size();
- cdimval = dim * log ( 2 * 3.14159 );
- vector<int> pop ( gaussians, 0 );
- priors.resize ( gaussians );
- mu = VVector ( gaussians, dim );
- log_det_sigma.clear();
- vector<int> allk;
- NICE::Vector globmean ( dim );
- globmean.set ( 0.0 );
- for ( int n = 0; n < ( int ) DataSet.size(); n++ ) /* getting the max value for time */
- {
- globmean = globmean + DataSet[n];
- }
- globmean *= ( 1.0 / nData );
- NICE::Vector sparse_globsigma;
- sparse_globsigma.resize ( dim );
- sparse_globsigma.set ( 0.0 );
- for ( int i = 0; i < ( int ) DataSet.size(); i++ ) // Covariances updates
- {
- // nur diagonal Elemente berechnen
- for ( int d = 0; d < dim; d++ )
- {
- double diff = ( DataSet[i][d] - globmean[d] );
- sparse_globsigma[d] += diff * diff;
- }
- }
- sparse_globsigma *= ( 1.0 / DataSet.size() );
- for ( int i = 0; i < gaussians; i++ )
- {
- NICE::Vector _inv_sigma = diagInverse ( sparse_globsigma );
- sparse_sigma.push_back ( sparse_globsigma );
- sparse_inv_sigma.push_back ( _inv_sigma );
- log_det_sigma.push_back ( logdiagDeterminant ( sparse_globsigma ) );
- mu[i] = means[i];
- //priors[i]=1.0/(double)gaussians; // set equi-probables states
- priors[i] = weights[i]; // set kMeans weights
- }
- }
- void GMM::initEM ( const VVector &DataSet )
- {
- /* init the GaussianMixture by using randomized meanvectors */
- int nData = DataSet.size();
- this->dim = DataSet[0].size();
- cdimval = dim * log ( 2 * 3.14159 );
- vector<int> pop ( gaussians, 0 );
- priors.resize ( gaussians );
- mu = VVector ( gaussians, dim );
- log_det_sigma.clear();
- vector<int> allk;
- NICE::Vector globmean ( dim );
- globmean.set ( 0.0 );
- for ( int n = 0; n < ( int ) DataSet.size(); n++ ) /* getting the max value for time */
- {
- globmean = globmean + DataSet[n];
- }
- globmean *= ( 1.0 / nData );
- NICE::Vector sparse_globsigma;
- sparse_globsigma.resize ( dim );
- sparse_globsigma.set ( 0.0 );
- for ( int i = 0; i < ( int ) DataSet.size(); i++ ) // Covariances updates
- {
- // nur diagonal Elemente berechnen
- for ( int d = 0; d < dim; d++ )
- {
- double diff = ( DataSet[i][d] - globmean[d] );
- sparse_globsigma[d] += diff * diff;
- }
- }
- sparse_globsigma *= ( 1.0 / DataSet.size() );
- for ( int i = 0; i < gaussians; i++ )
- {
- priors[i] = 1.0 / ( double ) gaussians; // set equi-probables states
- NICE::Vector _inv_sigma = diagInverse ( sparse_globsigma );
- sparse_sigma.push_back ( sparse_globsigma );
- sparse_inv_sigma.push_back ( _inv_sigma );
- log_det_sigma.push_back ( logdiagDeterminant ( sparse_globsigma ) );
- bool newv = false;
- int k;
- while ( !newv )
- {
- newv = true;
- k = rand() % nData;
- for ( int nk = 0; nk < ( int ) allk.size(); nk++ )
- if ( allk[nk] == k )
- {
- newv = false;
- }
- if ( newv )
- allk.push_back ( k );
- }
- mu[i] = DataSet[k];
- }
- }
- inline void sumRow ( NICE::Matrix mat, NICE::Vector &res )
- {
- int row = mat.rows();
- int column = mat.cols();
- res = NICE::Vector ( column );
- res.set ( 1e-5f );
- //#pragma omp parallel for
- for ( int i = 0; i < column; i++ ) {
- for ( int j = 0; j < row; j++ ) {
- res[i] += mat ( j, i );
- }
- }
- }
- double GMM::logpdfState ( const NICE::Vector &Vin, int state )
- {
- /* get the probability density for a given state and a given vector */
- double p;
- NICE::Vector dif;
- dif = Vin;
- dif -= mu[state];
- p = 0.0;
- for ( int i = 0; i < ( int ) dif.size(); i++ )
- {
- p += dif[i] * dif[i] * sparse_inv_sigma[state][i];
- }
- p = -0.5 * ( p + cdimval + log_det_sigma[state] );
- return p;
- }
- int GMM::doEM ( const VVector &DataSet, int nbgaussians )
- {
- /* perform Expectation/Maximization on the given Dataset :
- Matrix DataSet(nSamples,Dimensions).
- The GaussianMixture Object must be initialised before
- (see initEM or initEMkMeans methods ) */
- #ifdef DEBUG
- cerr << "GMM::start EM" << endl;
- #endif
- int nData = DataSet.size();
- int iter = 0;
- double log_lik;
- double log_lik_threshold = 1e-6f;
- double log_lik_old = -1e10f;
- NICE::Matrix unity ( dim, dim, 0.0 );
- for ( int k = 0; k < dim; k++ )
- unity ( k, k ) = 1.0;
- //EM loop
- while ( true )
- {
- #ifdef DEBUGGMM
- cerr << "GMM::EM: iteration: " << iter << endl;
- #endif
- vector<double> sum_p;
- sum_p.resize ( nData );
- for ( int i = 0; i < nData; i++ )
- {
- sum_p[i] = 0.0;
- }
- NICE::Matrix pxi ( nData, gaussians );
- pxi.set ( 0.0 );
- NICE::Matrix pix ( nData, gaussians );
- pix.set ( 0.0 );
- NICE::Vector E;
- iter++;
- if ( iter > maxiter ) {
- cerr << "EM stops here. Max number of iterations (" << maxiter << ") has been reached." << endl;
- return iter;
- }
- double sum_log = 0.0;
- // computing expectation
- double maxp = -numeric_limits<double>::max();
- vector<double> maxptmp ( nData, -numeric_limits<double>::max() );
- #pragma omp parallel for
- for ( int i = 0; i < nData; i++ )
- {
- for ( int j = 0; j < nbgaussians; j++ )
- {
- double p = logpdfState ( DataSet[i], j ); // log(P(x|i))
- maxptmp[i] = std::max ( maxptmp[i], p );
- pxi ( i, j ) = p;
- }
- }
- for ( int i = 0; i < nData; i++ )
- {
- maxp = std::max ( maxp, maxptmp[i] );
- }
- #pragma omp parallel for
- for ( int i = 0; i < nData; i++ )
- {
- sum_p[i] = 0.0;
- for ( int j = 0; j < nbgaussians; j++ )
- {
- double p = pxi ( i, j ) - maxp; // log(P(x|i))
- p = exp ( p ); // P(x|i)
- if ( p < 1e-40 )
- p = 1e-40;
- pxi ( i, j ) = p;
- sum_p[i] += p * priors[j];
- }
- }
- for ( int i = 0; i < nData; i++ )
- {
- sum_log += log ( sum_p[i] );
- }
- #pragma omp parallel for
- for ( int j = 0; j < nbgaussians; j++ )
- {
- for ( int i = 0; i < nData; i++ )
- {
- pix ( i, j ) = ( pxi ( i, j ) * priors[j] ) / sum_p[i]; // then P(i|x)
- }
- }
- // here we compute the log likehood
- log_lik = sum_log / nData;
- #ifdef DEBUGGMM
- cout << "diff: " << fabs ( ( log_lik / log_lik_old ) - 1 ) << " thresh: " << log_lik_threshold << " sum: " << sum_log << endl;
- //cout << "logold: " << log_lik_old << " lognew: " << log_lik << endl;
- #endif
- if ( fabs ( ( log_lik / log_lik_old ) - 1 ) < log_lik_threshold )
- {
- //if log likehood hasn't move enough, the algorithm has converged, exiting the loop
- return iter;
- }
- log_lik_old = log_lik;
- // Update Step
- sumRow ( pix, E );
- #pragma omp parallel for
- for ( int j = 0; j < nbgaussians; j++ )
- {
- priors[j] = ( E[j] + tau ) / ( nData + tau * nbgaussians ); // new priors
- NICE::Vector tmu ( dim );
- tmu.set ( 0.0 );
- NICE::Vector sparse_tmsigma ( dim );
- sparse_tmsigma.set ( 0.0 );
- for ( int i = 0; i < nData; i++ ) // Means update loop
- {
- tmu = tmu + ( DataSet[i] * pix ( i, j ) );
- }
- NICE::Vector tmu2 = mu[j];
- mu[j] = tmu + tau * tmu2;
- mu[j] = mu[j] * ( 1.0 / ( E[j] + tau ) );
- for ( int i = 0; i < nData; i++ ) // Covariances updates
- {
- // nur diagonal Elemente berechnen
- for ( int d = 0; d < dim; d++ )
- {
- sparse_tmsigma[d] += DataSet[i][d] * DataSet[i][d] * pix ( i, j );
- }
- }
- NICE::Vector sparse_tmsigma2 ( dim );
- for ( int d = 0; d < dim; d++ )
- {
- sparse_tmsigma[d] += 1e-5f;
- sparse_tmsigma2[d] = sparse_sigma[j][d] + tmu2[d] * tmu2[d];
- sparse_sigma[j][d] = ( sparse_tmsigma[d] + tau * sparse_tmsigma2[d] ) / ( E[j] + tau ) - ( mu[j][d] * mu[j][d] );
- }
- sparse_inv_sigma[j] = diagInverse ( sparse_sigma[j] );
- log_det_sigma[j] = logdiagDeterminant ( sparse_sigma[j] );
- }
- if ( comp )
- {
- double dist = compare();
- cout << "dist for iteration " << iter << endl;
- cout << "d: " << dist << endl;
- }
- }
- #ifdef DEBUG
- cerr << "GMM::finished EM after " << iter << " iterations" << endl;
- #endif
- return iter;
- }
- int GMM::getBestClass ( const NICE::Vector &v, double *bprob )
- {
- int bestclass = 0;
- double maxprob = logpdfState ( v, 0 ) + log ( priors[0] ); // log(P(x|i))
- for ( int i = 1; i < gaussians; i++ )
- {
- double prob = logpdfState ( v, i ) + log ( priors[i] ); // log(P(x|i))
- if ( prob > maxprob )
- {
- maxprob = prob;
- bestclass = i;
- }
- }
- if ( bprob != NULL )
- *bprob = maxprob;
- return bestclass;
- }
- void GMM::getProbs ( const NICE::Vector &vin, SparseVector &outprobs )
- {
- outprobs.clear();
- outprobs.setDim ( gaussians );
- Vector probs;
- getProbs ( vin, probs );
- for ( int i = 0; i < gaussians; i++ )
- {
- if ( probs[i] > 1e-7f )
- outprobs[i] = probs[i];
- }
- }
- void GMM::getProbs ( const NICE::Vector &vin, Vector &outprobs )
- {
- Vector probs;
- probs.resize ( gaussians );
- probs.set ( 0.0 );
- double probsum = 0.0;
- double maxp = -numeric_limits<double>::max();
- for ( int i = 0; i < gaussians; i++ )
- {
- probs[i] = logpdfState ( vin, i ) + log ( priors[i] ); // log(P(x|i))
- maxp = std::max ( maxp, probs[i] );
- }
- for ( int i = 0; i < gaussians; i++ )
- {
- probs[i] -= maxp;
- probs[i] = exp ( probs[i] );
- probsum += probs[i];
- }
- // normalize probs
- #pragma omp parallel for
- for ( int i = 0; i < gaussians; i++ )
- {
- probs[i] /= probsum;
- }
- outprobs = probs;
- }
- void GMM::getFisher ( const NICE::Vector &vin, SparseVector &outprobs )
- {
- int size = gaussians * 2 * dim;
- outprobs.clear();
- outprobs.setDim ( size );
- int counter = 0;
- NICE::Vector classprobs;
- classprobs.resize ( gaussians );
- classprobs.set ( 0.0 );
- double maxp = -numeric_limits<double>::max();
- for ( int i = 0; i < gaussians; i++ )
- {
- classprobs[i] = logpdfState ( vin, i ) + log ( priors[i] ); // log(P(x|i))
- maxp = std::max ( maxp, classprobs[i] );
- }
- for ( int i = 0; i < gaussians; i++ )
- {
- double p = classprobs[i] - maxp;
- p = exp ( p );
- for ( int d = 0; d < dim; d++ )
- {
- double diff = vin[d] - mu[i][d];
- double sigma2 = sparse_sigma[i][d] * sparse_sigma[i][d];
- double sigma3 = sigma2 * sparse_sigma[i][d];
- double normmu = sqrt ( priors[i] / sigma2 );
- double normsig = sqrt ( ( 2.0 * priors[i] ) / sigma2 );
- double gradmu = ( p * ( diff / sigma2 ) ) / normmu;
- double gradsig = ( p * ( ( diff * diff ) / sigma3 - 1.0 / sparse_sigma[i][d] ) ) / normsig;
- if ( fabs ( gradmu ) > 1e-7f )
- outprobs[counter] = gradmu;
- counter++;
- if ( fabs ( gradsig ) > 1e-7f )
- outprobs[counter] = gradsig;
- counter++;
- }
- }
- }
- void GMM::cluster ( const VVector & features, VVector & prototypes, vector<double> & weights, vector<int> & assignment )
- {
- computeMixture ( features );
- prototypes.clear();
- weights.clear();
- assignment.clear();
- for ( int i = 0; i < ( int ) features.size(); i++ )
- {
- int c = getBestClass ( features[i] );
- assignment.push_back ( c );
- weights.push_back ( priors[c] );
- }
- for ( int c = 0; c < gaussians; c++ )
- prototypes.push_back ( mu[c] );
- cout << "tau: " << tau << endl;
- }
- void GMM::saveData ( const std::string filename )
- {
- ofstream fout ( filename.c_str() );
- fout << gaussians << " " << dim << endl;
- mu >> fout;
- fout << endl;
- for ( int n = 0; n < gaussians; n++ )
- {
- fout << sparse_sigma[n] << endl;
- }
- for ( int n = 0; n < gaussians; n++ )
- {
- fout << priors[n] << " ";
- }
- fout.close();
- }
- bool GMM::loadData ( const std::string filename )
- {
- cerr << "read GMM Data" << endl;
- ifstream fin ( filename.c_str() );
- if ( fin.fail() )
- {
- cerr << "GMM: Datei " << filename << " nicht gefunden!" << endl;
- return false;
- }
- fin >> gaussians;
- fin >> dim;
- cdimval = log ( pow ( 2 * 3.14159, dim ) );
- mu.clear();
- for ( int i = 0; i < gaussians; i++ )
- {
- NICE::Vector tmp;
- fin >> tmp;
- mu.push_back ( tmp );
- }
- log_det_sigma.clear();
- for ( int n = 0; n < gaussians; n++ )
- {
- NICE::Matrix _sigma;
- NICE::Vector _sparse_sigma;
- _sparse_sigma = NICE::Vector ( dim );
- fin >> _sparse_sigma;
- sparse_sigma.push_back ( _sparse_sigma );
- sparse_inv_sigma.push_back ( diagInverse ( sparse_sigma[n] ) );
- log_det_sigma.push_back ( logdiagDeterminant ( sparse_sigma[n] ) );
- }
- for ( int n = 0; n < gaussians; n++ )
- {
- double tmpd;
- fin >> tmpd;
- priors.push_back ( tmpd );
- }
- fin.close();
- cerr << "reading GMM Data finished" << endl;
- return true;
- }
- void GMM::getParams ( VVector &mean, VVector &sSigma, vector<double> &p )
- {
- mean = mu;
- sSigma.resize ( gaussians );
- p.clear();
- for ( int i = 0; i < gaussians; i++ )
- {
- sSigma[i] = sparse_sigma[i];
- p.push_back ( priors[i] );
- }
- }
- void GMM::setCompareGM ( VVector mean, VVector sSigma, vector<double> p )
- {
- mu2 = mean;
- sparse_sigma2 = sSigma;
- priors2 = vector<double> ( p );
- }
- double GMM::kPPK ( NICE::Vector sigma1, NICE::Vector sigma2, NICE::Vector mu1, NICE::Vector mu2, double p )
- {
- double d = mu1.size();
- double det1 = 1.0;
- double det2 = 1.0;
- double det3 = 1.0;
- double eval = 0.0;
- for ( int i = 0; i < d; i++ )
- {
- det1 *= sigma1[i];
- det2 *= sigma2[i];
- double sigma = 1.0 / ( ( 1.0 / sigma1[i] + 1.0 / sigma2[i] ) * p );
- det3 *= sigma;
- double mu = p * ( mu1[i] * sigma1[i] + mu2[i] * sigma2[i] );
- eval += 0.5 * mu * sigma * mu - ( p * mu1[i] * mu1[i] ) / ( 2.0 * sigma1[i] ) - ( p * mu2[i] * mu2[i] ) / ( 2.0 * sigma2[i] );
- }
- return ( pow ( 2.0*M_PI, ( ( 1.0 - 2.0*p ) *d ) / 2.0 ) * pow ( det1, -p / 2.0 ) * pow ( det2, -p / 2.0 ) * pow ( det3, 0.5 ) * exp ( eval ) );
- }
- double GMM::compare()
- {
- double distkij = 0.0;
- double distkjj = 0.0;
- double distkii = 0.0;
- for ( int i = 0; i < gaussians; i++ )
- {
- for ( int j = 0; j < gaussians; j++ )
- {
- double kij = kPPK ( sparse_sigma[i], sparse_sigma2[j], mu[i], mu2[j], 0.5 );
- double kii = kPPK ( sparse_sigma[i], sparse_sigma[j], mu[i], mu[j], 0.5 );
- double kjj = kPPK ( sparse_sigma2[i], sparse_sigma2[j], mu2[i], mu2[j], 0.5 );
- kij = priors[i] * priors2[j] * kij;
- kii = priors[i] * priors[j] * kii;
- kjj = priors2[i] * priors2[j] * kjj;
- distkij += kij;
- distkii += kii;
- distkjj += kjj;
- }
- }
- return ( distkij / ( sqrt ( distkii ) *sqrt ( distkjj ) ) );
- }
- void GMM::comparing ( bool c )
- {
- comp = c;
- }
|