123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776 |
- #ifdef NICE_USELIB_OPENMP
- #include <omp.h>
- #endif
- #include <stdio.h>
- #include "GMM.h"
- #include <core/vector/Algorithms.h>
- #include "vislearning/math/cluster/KMeans.h"
- #define DEBUGGMM
- using namespace OBJREC;
- using namespace NICE;
- using namespace std;
- GMM::GMM()
- {
- gaussians = 3;
- maxiter = 200;
- featsperclass = 0;
- tau = 10.0;
- srand ( time (NULL) );
- comp = false;
- }
- GMM::GMM(int _gaussians):gaussians(_gaussians)
- {
- maxiter = 200;
- featsperclass = 0;
- tau = 0.0;
- srand ( time (NULL) );
- pyramid = false;
- comp = false;
- }
- GMM::GMM(const Config *conf, int _gaussians):gaussians(_gaussians)
- {
- this->conf = conf;
-
- if(gaussians < 2)
- gaussians = conf->gI("GMM", "gaussians", 2 );
-
- maxiter = conf->gI("GMM", "maxiter", 200 );
-
- featsperclass = conf->gI("GMM", "featsperclass", 0 );
-
- tau = conf->gD("GMM", "tau", 100.0);
-
- pyramid = conf->gB("GMM", "pyramid", false);
-
- comp = false;
-
- srand ( time (NULL) );
- }
- void GMM::computeMixture(Examples examples)
- {
- int fsize = (int)examples.size();
- assert(fsize >= gaussians);
-
- dim = examples[0].second.vec->size();
-
- int samples = fsize;
- if(featsperclass > 0)
- {
- samples = featsperclass*gaussians;
- samples = std::min(samples, fsize);
- }
-
-
- VVector dataset;
- cout << "reduced training data for GMM from " << fsize << " features to " << samples << " features.";
- for(int i = 0; i < samples; i++)
- {
- int k = rand() % fsize;
- NICE::Vector *vec = examples[k].second.vec;
- dataset.push_back(*vec);
- }
-
- computeMixture(dataset);
- }
- void GMM::computeMixture(const VVector &DataSet)
- {
-
- assert(DataSet.size() >= (uint)gaussians);
-
- srand ( time (NULL) );
-
- bool poweroftwo = false;
- int power = 1;
- while(power <= gaussians)
- {
- if(gaussians == power)
- poweroftwo = true;
- power *= 2;
- }
-
- if(poweroftwo && pyramid)
- {
- initEM(DataSet);
- int g = 1;
-
- while(g <= gaussians)
- {
- cout << "g = " << g << endl;
- doEM(DataSet, g);
-
- if(2*g <= gaussians)
- {
- for(int i = g; i < g*2; i++)
- {
- mu[i] = mu[i-g];
- sparse_sigma[i] = sparse_sigma[i-g];
- sparse_inv_sigma[i] = sparse_inv_sigma[i-g];
- log_det_sigma[i] = log_det_sigma[i-g];
- priors[i] = priors[i-g];
-
- double interval = 1.0;
- for(int k = 0; k < (int)mu[i].size(); k++)
- {
- interval = mu[i][k];
- interval = std::max(interval, 0.1);
- double r = (interval*((double)rand()/(double)RAND_MAX))-interval/2.0;
- mu[i][k] += r;
- }
- }
- }
- g *= 2;
- }
- }
- else
- {
- initEMkMeans(DataSet);
- doEM(DataSet, gaussians);
- }
-
- }
- inline double diagDeterminant(const NICE::Vector &sparse_mat)
- {
- double det = 1.0;
- for(int i = 0; i < (int)sparse_mat.size(); i++)
- {
- det *= sparse_mat[i];
- }
-
- return det;
- }
- inline double logdiagDeterminant(const NICE::Vector &sparse_mat)
- {
- double det = 0.0;
- for(int i = 0; i < (int)sparse_mat.size(); i++)
- {
- det += log(sparse_mat[i]);
- }
- return det;
- }
- inline NICE::Vector diagInverse(const NICE::Vector &sparse_mat)
- {
- NICE::Vector inv(sparse_mat.size());
- for(int i = 0; i < (int)sparse_mat.size(); i++)
- {
- inv[i] = 1.0/sparse_mat[i];
- }
- return inv;
- }
- void GMM::initEMkMeans(const VVector &DataSet)
- {
-
- KMeans k( gaussians );
- VVector means;
- vector<double> weights;
- vector<int> assignment;
- k.cluster ( DataSet, means, weights, assignment );
-
- int nData = DataSet.size();
- this->dim = DataSet[0].size();
- cdimval = dim*log(2*3.14159);
- vector<int> pop(gaussians, 0);
- priors.resize(gaussians);
- mu = VVector(gaussians, dim);
- log_det_sigma.clear();
- vector<int> allk;
-
- NICE::Vector globmean(dim);
- globmean.set(0.0);
-
- for(int n = 0; n < (int)DataSet.size(); n++)
- {
- globmean = globmean + DataSet[n];
- }
- globmean *= (1.0/nData);
-
- NICE::Vector sparse_globsigma;
- sparse_globsigma.resize(dim);
- sparse_globsigma.set(0.0);
-
- for(int i = 0; i < (int)DataSet.size(); i++)
- {
-
- for(int d = 0; d < dim; d++)
- {
- double diff = (DataSet[i][d] - globmean[d]);
- sparse_globsigma[d] += diff*diff;
- }
- }
-
- sparse_globsigma *= (1.0/DataSet.size());
-
- for(int i = 0; i < gaussians; i++)
- {
- NICE::Vector _inv_sigma = diagInverse(sparse_globsigma);
- sparse_sigma.push_back(sparse_globsigma);
- sparse_inv_sigma.push_back(_inv_sigma);
- log_det_sigma.push_back(logdiagDeterminant(sparse_globsigma));
- mu[i] = means[i];
-
- priors[i]=weights[i];
- }
- }
- void GMM::initEM(const VVector &DataSet)
- {
-
- int nData = DataSet.size();
- this->dim = DataSet[0].size();
- cdimval = dim*log(2*3.14159);
- vector<int> pop(gaussians, 0);
- priors.resize(gaussians);
- mu = VVector(gaussians, dim);
- log_det_sigma.clear();
- vector<int> allk;
-
- NICE::Vector globmean(dim);
- globmean.set(0.0);
-
- for(int n = 0; n < (int)DataSet.size(); n++)
- {
- globmean = globmean + DataSet[n];
- }
- globmean *= (1.0/nData);
-
- NICE::Vector sparse_globsigma;
- sparse_globsigma.resize(dim);
- sparse_globsigma.set(0.0);
-
- for(int i = 0; i < (int)DataSet.size(); i++)
- {
-
- for(int d = 0; d < dim; d++)
- {
- double diff = (DataSet[i][d] - globmean[d]);
- sparse_globsigma[d] += diff*diff;
- }
- }
-
- sparse_globsigma *= (1.0/DataSet.size());
-
- for(int i = 0; i < gaussians; i++)
- {
- priors[i]=1.0/(double)gaussians;
- NICE::Vector _inv_sigma = diagInverse(sparse_globsigma);
- sparse_sigma.push_back(sparse_globsigma);
- sparse_inv_sigma.push_back(_inv_sigma);
- log_det_sigma.push_back(logdiagDeterminant(sparse_globsigma));
- bool newv = false;
- int k;
- while(!newv)
- {
- newv = true;
- k = rand() % nData;
-
- for(int nk = 0; nk < (int)allk.size(); nk++)
- if(allk[nk] == k)
- {
- newv = false;
- }
- if(newv)
- allk.push_back(k);
- }
- mu[i] = DataSet[k];
- }
- }
- inline void sumRow(NICE::Matrix mat, NICE::Vector &res)
- {
- int row = mat.rows();
- int column = mat.cols();
- res =NICE::Vector(column);
- res.set(1e-5f);
- for(int i = 0; i < column; i++){
- for(int j = 0; j < row; j++){
- res[i] += mat(j, i);
- }
- }
- }
- double GMM::logpdfState(const NICE::Vector &Vin,int state)
- {
-
- double p;
- NICE::Vector dif;
-
- dif = Vin;
- dif -= mu[state];
-
- p = 0.0;
- for(int i = 0; i < (int)dif.size(); i++)
- {
- p += dif[i] * dif[i] * sparse_inv_sigma[state][i];
- }
-
- p = -0.5*(p + cdimval + log_det_sigma[state]);
- return p;
- }
- int GMM::doEM(const VVector &DataSet, int nbgaussians)
- {
-
-
- #ifdef DEBUG
- cerr << "GMM::start EM" << endl;
- #endif
-
- int nData = DataSet.size();
- int iter = 0;
- double log_lik;
- double log_lik_threshold = 1e-6f;
- double log_lik_old = -1e10f;
- NICE::Matrix unity(dim,dim,0.0);
- for(int k = 0; k < dim; k++)
- unity(k, k)=1.0;
-
-
- while(true)
- {
- #ifdef DEBUGGMM
- cerr << "GMM::EM: iteration: " << iter << endl;
- #endif
-
- vector<double> sum_p;
- sum_p.resize(nData);
- for(int i = 0; i < nData; i++)
- {
- sum_p[i] = 0.0;
- }
-
- NICE::Matrix pxi(nData,gaussians);
- pxi.set(0.0);
- NICE::Matrix pix(nData,gaussians);
- pix.set(0.0);
- NICE::Vector E;
-
- iter++;
- if (iter>maxiter){
- cerr << "EM stops here. Max number of iterations (" << maxiter << ") has been reached." << endl;
- return iter;
- }
- double sum_log = 0.0;
-
-
- double maxp = -numeric_limits<double>::max();
- vector<double> maxptmp(nData,-numeric_limits<double>::max());
- #pragma omp parallel for
- for(int i = 0; i < nData; i++)
- {
- for(int j = 0; j < nbgaussians; j++)
- {
- double p = logpdfState(DataSet[i],j);
- maxptmp[i] = std::max(maxptmp[i], p);
- pxi(i, j) = p;
- }
- }
-
- for(int i = 0; i < nData; i++)
- {
- maxp = std::max(maxp, maxptmp[i]);
- }
- #pragma omp parallel for
- for(int i = 0; i < nData; i++)
- {
- sum_p[i] = 0.0;
- for(int j = 0; j < nbgaussians; j++)
- {
- double p = pxi(i, j) - maxp;
- p = exp(p);
-
- if(p < 1e-40)
- p = 1e-40;
- pxi(i, j) = p;
- sum_p[i] += p*priors[j];
- }
- }
-
- for(int i = 0; i < nData; i++)
- {
- sum_log += log(sum_p[i]);
- }
-
- #pragma omp parallel for
- for(int j = 0; j < nbgaussians; j++)
- {
- for(int i = 0; i < nData; i++)
- {
- pix(i, j) = (pxi(i, j)*priors[j])/sum_p[i];
- }
- }
-
- log_lik = sum_log/nData;
- #ifdef DEBUGGMM
- cout << "diff: " << fabs((log_lik/log_lik_old)-1) << " thresh: " << log_lik_threshold << " sum: " << sum_log << endl;
-
- #endif
-
- if(fabs((log_lik/log_lik_old)-1) < log_lik_threshold )
- {
-
- return iter;
- }
- log_lik_old = log_lik;
-
- sumRow(pix,E);
-
- #pragma omp parallel for
- for(int j = 0; j < nbgaussians; j++)
- {
- priors[j] = (E[j]+tau)/(nData+tau*nbgaussians);
- NICE::Vector tmu(dim);
- tmu.set(0.0);
- NICE::Vector sparse_tmsigma(dim);
- sparse_tmsigma.set(0.0);
- for(int i = 0; i < nData; i++)
- {
- tmu = tmu + (DataSet[i]*pix(i,j));
- }
-
- NICE::Vector tmu2 = mu[j];
- mu[j] = tmu + tau*tmu2;
- mu[j] = mu[j]*(1.0/(E[j]+tau));
-
- for(int i = 0; i < nData; i++)
- {
-
- for(int d = 0; d < dim; d++)
- {
- sparse_tmsigma[d] += DataSet[i][d]*DataSet[i][d]*pix(i, j);
- }
- }
-
- NICE::Vector sparse_tmsigma2(dim);
- for(int d = 0; d < dim; d++)
- {
- sparse_tmsigma[d] += 1e-5f;
- sparse_tmsigma2[d] = sparse_sigma[j][d]+tmu2[d]*tmu2[d];
- sparse_sigma[j][d] = (sparse_tmsigma[d]+tau*sparse_tmsigma2[d])/(E[j]+tau)-(mu[j][d]*mu[j][d]);
- }
- sparse_inv_sigma[j] = diagInverse(sparse_sigma[j]);
-
- log_det_sigma[j] = logdiagDeterminant(sparse_sigma[j]);
- }
- if(comp)
- {
- double dist = compare();
- cout << "dist for iteration " << iter << endl;
- cout << "d: " << dist << endl;
- }
- }
- #ifdef DEBUG
- cerr << "GMM::finished EM after " << iter << " iterations" << endl;
- #endif
- return iter;
- }
- int GMM::getBestClass(const NICE::Vector &v, double *bprob)
- {
- int bestclass = 0;
- double maxprob = logpdfState(v,0)+log(priors[0]);
-
- for(int i = 1; i < gaussians; i++)
- {
- double prob = logpdfState(v,i)+log(priors[i]);
-
- if(prob > maxprob)
- {
- maxprob = prob;
- bestclass = i;
- }
- }
-
- if(bprob != NULL)
- *bprob = maxprob;
-
- return bestclass;
- }
-
- void GMM::getProbs(const NICE::Vector &vin, SparseVector &outprobs)
- {
- outprobs.clear();
- outprobs.setDim(gaussians);
- Vector probs;
- getProbs(vin, probs);
- for(int i = 0; i < gaussians; i++)
- {
- if(probs[i] > 1e-7f )
- outprobs[i] = probs[i];
- }
- }
-
- void GMM::getProbs(const NICE::Vector &vin, Vector &outprobs)
- {
- Vector probs;
- probs.resize(gaussians);
- probs.set(0.0);
-
- double probsum = 0.0;
- double maxp = -numeric_limits<double>::max();
- for(int i = 0; i < gaussians; i++)
- {
- probs[i] = logpdfState(vin, i) + log(priors[i]);
- maxp = std::max(maxp, probs[i]);
- }
-
- for(int i = 0; i < gaussians; i++)
- {
- probs[i] -= maxp;
- probs[i] = exp(probs[i]);
- probsum += probs[i];
- }
-
- #pragma omp parallel for
- for(int i = 0; i < gaussians; i++)
- {
- probs[i] /= probsum;
- }
- outprobs = probs;
- }
- void GMM::getFisher(const NICE::Vector &vin, SparseVector &outprobs)
- {
- int size = gaussians*2*dim;
- outprobs.clear();
- outprobs.setDim(size);
-
- int counter = 0;
-
- NICE::Vector classprobs;
- classprobs.resize(gaussians);
- classprobs.set(0.0);
- double maxp = -numeric_limits<double>::max();
- for(int i = 0; i < gaussians; i++)
- {
- classprobs[i] = logpdfState(vin, i) + log(priors[i]);
- maxp = std::max(maxp, classprobs[i]);
- }
- for(int i = 0; i < gaussians; i++)
- {
- double p = classprobs[i] - maxp;
-
- p = exp(p);
-
- for(int d = 0; d < dim; d++)
- {
- double diff = vin[d]-mu[i][d];
-
- double sigma2 = sparse_sigma[i][d]*sparse_sigma[i][d];
- double sigma3 = sigma2*sparse_sigma[i][d];
- double normmu = sqrt(priors[i]/sigma2);
- double normsig = sqrt((2.0*priors[i])/sigma2);
-
- double gradmu = (p * (diff/sigma2))/normmu;
- double gradsig = (p * ((diff*diff)/sigma3 - 1.0/sparse_sigma[i][d])) /normsig;
- if(fabs(gradmu) > 1e-7f)
- outprobs[counter] = gradmu;
- counter++;
-
- if(fabs(gradsig) > 1e-7f)
- outprobs[counter] = gradsig;
- counter++;
- }
- }
- }
- void GMM::cluster ( const VVector & features, VVector & prototypes, vector<double> & weights, vector<int> & assignment )
- {
- computeMixture(features);
- prototypes.clear();
- weights.clear();
- assignment.clear();
- for(int i = 0; i < (int)features.size(); i++)
- {
- int c = getBestClass(features[i]);
- assignment.push_back(c);
-
- weights.push_back(priors[c]);
- }
- for(int c = 0; c < gaussians; c++)
- prototypes.push_back(mu[c]);
-
- cout << "tau: " << tau << endl;
- }
- void GMM::saveData(const std::string filename)
- {
- ofstream fout( filename.c_str() );
- fout << gaussians << " " << dim << endl;
-
- mu >> fout;
- fout << endl;
-
- for(int n = 0; n < gaussians; n++)
- {
- fout << sparse_sigma[n] << endl;
- }
-
- for(int n = 0; n < gaussians; n++)
- {
- fout << priors[n] << " ";
- }
- fout.close();
- }
- bool GMM::loadData(const std::string filename)
- {
- cerr << "read GMM Data" << endl;
- ifstream fin( filename.c_str() );
- if(fin.fail())
- {
- cerr << "GMM: Datei " << filename << " nicht gefunden!" << endl;
- return false;
- }
- fin >> gaussians;
- fin >> dim;
- cdimval = log(pow(2*3.14159,dim));
- mu.clear();
- for(int i = 0; i < gaussians; i++)
- {
- NICE::Vector tmp;
- fin >> tmp;
- mu.push_back(tmp);
- }
-
- log_det_sigma.clear();
- for(int n = 0; n < gaussians; n++)
- {
- NICE::Matrix _sigma;
- NICE::Vector _sparse_sigma;
-
- _sparse_sigma =NICE::Vector(dim);
- fin >> _sparse_sigma;
- sparse_sigma.push_back(_sparse_sigma);
-
- sparse_inv_sigma.push_back(diagInverse(sparse_sigma[n]));
- log_det_sigma.push_back(logdiagDeterminant(sparse_sigma[n]));
- }
- for(int n = 0; n < gaussians; n++)
- {
- double tmpd;
- fin >> tmpd;
- priors.push_back(tmpd);
- }
- fin.close();
- cerr << "reading GMM Data finished" << endl;
- return true;
- }
- void GMM::getParams(VVector &mean, VVector &sSigma, vector<double> &p)
- {
- mean = mu;
- sSigma.resize(gaussians);
- p.clear();
- for(int i = 0; i < gaussians; i++)
- {
- sSigma[i] = sparse_sigma[i];
- p.push_back(priors[i]);
- }
- }
- void GMM::setCompareGM(VVector mean, VVector sSigma, vector<double> p)
- {
- mu2 = mean;
- sparse_sigma2 = sSigma;
- priors2 = vector<double>(p);
- }
-
- double GMM::kPPK(NICE::Vector sigma1, NICE::Vector sigma2, NICE::Vector mu1, NICE::Vector mu2, double p)
- {
- double d = mu1.size();
-
- double det1 = 1.0;
- double det2 = 1.0;
- double det3 = 1.0;
- double eval = 0.0;
- for(int i = 0; i < d; i++)
- {
- det1 *= sigma1[i];
- det2 *= sigma2[i];
- double sigma = 1.0/((1.0/sigma1[i]+1.0/sigma2[i])*p);
- det3 *= sigma;
- double mu = p*(mu1[i]*sigma1[i]+mu2[i]*sigma2[i]);
- eval += 0.5*mu*sigma*mu - (p*mu1[i]*mu1[i])/(2.0*sigma1[i]) - (p*mu2[i]*mu2[i]) / (2.0*sigma2[i]);
- }
-
- return (pow(2.0*M_PI, ((1.0-2.0*p)*d)/2.0) * pow(det1, -p/2.0) * pow(det2, -p/2.0) * pow(det3, 0.5) * exp(eval));
- }
- double GMM::compare()
- {
- double distkij = 0.0;
- double distkjj = 0.0;
- double distkii = 0.0;
- for(int i = 0; i < gaussians; i++)
- {
- for(int j = 0; j < gaussians; j++)
- {
- double kij = kPPK(sparse_sigma[i],sparse_sigma2[j], mu[i], mu2[j], 0.5);
- double kii = kPPK(sparse_sigma[i],sparse_sigma[j], mu[i], mu[j], 0.5);
- double kjj = kPPK(sparse_sigma2[i],sparse_sigma2[j], mu2[i], mu2[j], 0.5);
- kij = priors[i]*priors2[j]*kij;
- kii = priors[i]*priors[j]*kii;
- kjj = priors2[i]*priors2[j]*kjj;
- distkij += kij;
- distkii += kii;
- distkjj += kjj;
- }
- }
- return (distkij / (sqrt(distkii)*sqrt(distkjj)));
- }
- void GMM::comparing(bool c)
- {
- comp = c;
- }
|