Forráskód Böngészése

making GMM persistent, slightly refactoring GMM layout, first (incomplete) GMM Unit Test

Alexander Freytag 11 éve
szülő
commit
a711eff255

+ 478 - 84
math/cluster/GMM.cpp

@@ -15,57 +15,115 @@ using namespace OBJREC;
 using namespace NICE;
 using namespace std;
 
-GMM::GMM()
+///////////////////// ///////////////////// /////////////////////
+//                   CONSTRUCTORS / DESTRUCTORS
+///////////////////// ///////////////////// /////////////////////
+
+GMM::GMM() : ClusterAlgorithm()
 {
-  gaussians = 3;
-  maxiter = 200;
-  featsperclass = 0;
-  tau = 10.0;
+  this->i_numOfGaussians = 3;
+  this->dim = -1;
+  
+  this->mu.clear();
+  this->sparse_sigma.clear();
+  this->priors.clear();  
+  this->sparse_inv_sigma.clear();
+  this->log_det_sigma.clear();
+
+  
+  this->mu2.clear();
+  this->sparse_sigma2.clear();
+  this->priors2.clear();  
+  this->b_compareTo2ndGMM = false;
+  
+  this->maxiter = 200;
+  this->featsperclass = 0;
+  this->cdimval = -1; //TODO
+  this->tau = 10.0;
+  this->pyramid = false;
+
   srand ( time ( NULL ) );
-  comp = false;
 }
 
-GMM::GMM ( int _gaussians ) : gaussians ( _gaussians )
+GMM::GMM ( int _numOfGaussians ) : i_numOfGaussians ( _numOfGaussians )
 {
-  maxiter = 200;
-  featsperclass = 0;
-  tau = 0.0;
+  
+  this->dim = -1;
+  
+  this->mu.clear();
+  this->sparse_sigma.clear();
+  this->priors.clear();  
+  this->sparse_inv_sigma.clear();
+  this->log_det_sigma.clear();  
+  
+  this->mu2.clear();
+  this->sparse_sigma2.clear();
+  this->priors2.clear();  
+  this->b_compareTo2ndGMM = false;
+  
+  this->maxiter = 200;
+  this->featsperclass = 0;
+  this->tau = 0.0;
+  this->pyramid = false;
+
   srand ( time ( NULL ) );
-  pyramid = false;
-  comp = false;
 }
 
-GMM::GMM ( const Config *conf, int _gaussians ) : gaussians ( _gaussians )
+GMM::GMM ( const Config * _conf, int _numOfGaussians ) : i_numOfGaussians ( _numOfGaussians )
 {
-  this->conf = conf;
-
-  if ( gaussians < 2 )
-    gaussians = conf->gI ( "GMM", "gaussians", 2 );
-
-  maxiter = conf->gI ( "GMM", "maxiter", 200 );
-
-  featsperclass = conf->gI ( "GMM", "featsperclass", 0 );
+  this->initFromConfig( _conf );
+}
 
-  tau = conf->gD ( "GMM", "tau", 100.0 );
+GMM::GMM ( const Config * _conf, const std::string& _confSection )
+{
+  this->initFromConfig( _conf, _confSection );
+}
 
-  pyramid = conf->gB ( "GMM", "pyramid", false );
+GMM::~GMM()
+{
+}
 
-  comp = false;
+void GMM::initFromConfig( const NICE::Config* _conf, const std::string& _confSection )
+{
+  if ( this->i_numOfGaussians < 2 )
+    this->i_numOfGaussians = _conf->gI ( _confSection, "i_numOfGaussians", 2 );
+  
+  this->dim = -1;
+  
+  this->mu.clear();
+  this->sparse_sigma.clear();
+  this->priors.clear();  
+  this->sparse_inv_sigma.clear();
+  this->log_det_sigma.clear();  
+  
+  this->mu2.clear();
+  this->sparse_sigma2.clear();
+  this->priors2.clear();  
+  
+  this->b_compareTo2ndGMM = false;
+  this->maxiter = _conf->gI ( _confSection, "maxiter", 200 );
+  this->featsperclass = _conf->gI ( _confSection, "featsperclass", 0 );
+  this->tau = _conf->gD ( _confSection, "tau", 100.0 );
+  this->pyramid = _conf->gB ( _confSection, "pyramid", false );  
 
   srand ( time ( NULL ) );
 }
 
+///////////////////// ///////////////////// /////////////////////
+//                      CLUSTERING STUFF
+///////////////////// ///////////////////// ////////////////// 
+
 void GMM::computeMixture ( Examples examples )
 {
   int fsize = ( int ) examples.size();
-  assert ( fsize >= gaussians );
+  assert ( fsize >= i_numOfGaussians );
 
   dim = examples[0].second.vec->size();
 
   int samples = fsize;
   if ( featsperclass > 0 )
   {
-    samples = featsperclass * gaussians;
+    samples = featsperclass * i_numOfGaussians;
     samples = std::min ( samples, fsize );
   }
 
@@ -86,15 +144,15 @@ void GMM::computeMixture ( Examples examples )
 void GMM::computeMixture ( const VVector &DataSet )
 {
   // Learn the GMM model
-  assert ( DataSet.size() >= ( uint ) gaussians );
+  assert ( DataSet.size() >= ( uint ) i_numOfGaussians );
   //initEMkMeans(DataSet); // initialize the model
   srand ( time ( NULL ) );
 
   bool poweroftwo = false;
   int power = 1;
-  while ( power <= gaussians )
+  while ( power <= i_numOfGaussians )
   {
-    if ( gaussians == power )
+    if ( i_numOfGaussians == power )
       poweroftwo = true;
     power *= 2;
   }
@@ -104,12 +162,12 @@ void GMM::computeMixture ( const VVector &DataSet )
     initEM ( DataSet ); // initialize the model
     int g = 1;
 
-    while ( g <= gaussians )
+    while ( g <= i_numOfGaussians )
     {
       cout << "g = " << g << endl;
       doEM ( DataSet, g );
 
-      if ( 2*g <= gaussians )
+      if ( 2*g <= i_numOfGaussians )
       {
         for ( int i = g; i < g*2; i++ )
         {
@@ -135,7 +193,7 @@ void GMM::computeMixture ( const VVector &DataSet )
   else
   {
     initEMkMeans ( DataSet ); // initialize the model
-    doEM ( DataSet, gaussians );
+    doEM ( DataSet, i_numOfGaussians );
   }
   // performs EM
 }
@@ -175,7 +233,7 @@ inline NICE::Vector diagInverse ( const NICE::Vector &sparse_mat )
 void GMM::initEMkMeans ( const VVector &DataSet )
 {
   /*init GMM with k-Means*/
-  OBJREC::KMeans k ( gaussians );
+  OBJREC::KMeans k ( i_numOfGaussians );
   NICE::VVector means;
   std::vector<double> weights;
   std::vector<int> assignment;
@@ -184,9 +242,9 @@ void GMM::initEMkMeans ( const VVector &DataSet )
   int nData = DataSet.size();
   this->dim = DataSet[0].size();
   cdimval = dim * log ( 2 * 3.14159 );
-  std::vector<int> pop ( gaussians, 0 );
-  priors.resize ( gaussians );
-  mu = VVector ( gaussians, dim );
+  std::vector<int> pop ( i_numOfGaussians, 0 );
+  priors.resize ( i_numOfGaussians );
+  mu = VVector ( i_numOfGaussians, dim );
   log_det_sigma.clear();
   vector<int> allk;
 
@@ -217,7 +275,7 @@ void GMM::initEMkMeans ( const VVector &DataSet )
 
   sparse_globsigma *= ( 1.0 / DataSet.size() );
 
-  for ( int i = 0; i < gaussians; i++ )
+  for ( int i = 0; i < i_numOfGaussians; i++ )
   {
     NICE::Vector _inv_sigma = diagInverse ( sparse_globsigma );
 
@@ -225,7 +283,7 @@ void GMM::initEMkMeans ( const VVector &DataSet )
     sparse_inv_sigma.push_back ( _inv_sigma );
     log_det_sigma.push_back ( logdiagDeterminant ( sparse_globsigma ) );
     mu[i] = means[i];
-    //priors[i]=1.0/(double)gaussians; // set equi-probables states
+    //priors[i]=1.0/(double)i_numOfGaussians; // set equi-probables states
     priors[i] = weights[i]; // set kMeans weights
   }
 }
@@ -237,9 +295,9 @@ void GMM::initEM ( const VVector &DataSet )
   int nData = DataSet.size();
   this->dim = DataSet[0].size();
   cdimval = dim * log ( 2 * 3.14159 );
-  vector<int> pop ( gaussians, 0 );
-  priors.resize ( gaussians );
-  mu = VVector ( gaussians, dim );
+  vector<int> pop ( i_numOfGaussians, 0 );
+  priors.resize ( i_numOfGaussians );
+  mu = VVector ( i_numOfGaussians, dim );
   log_det_sigma.clear();
   vector<int> allk;
 
@@ -270,9 +328,9 @@ void GMM::initEM ( const VVector &DataSet )
 
   sparse_globsigma *= ( 1.0 / DataSet.size() );
 
-  for ( int i = 0; i < gaussians; i++ )
+  for ( int i = 0; i < i_numOfGaussians; i++ )
   {
-    priors[i] = 1.0 / ( double ) gaussians; // set equi-probables states
+    priors[i] = 1.0 / ( double ) i_numOfGaussians; // set equi-probables states
     NICE::Vector _inv_sigma = diagInverse ( sparse_globsigma );
 
     sparse_sigma.push_back ( sparse_globsigma );
@@ -366,9 +424,9 @@ int GMM::doEM ( const VVector &DataSet, int nbgaussians )
       sum_p[i] = 0.0;
     }
 
-    NICE::Matrix pxi ( nData, gaussians );
+    NICE::Matrix pxi ( nData, i_numOfGaussians );
     pxi.set ( 0.0 );
-    NICE::Matrix pix ( nData, gaussians );
+    NICE::Matrix pix ( nData, i_numOfGaussians );
     pix.set ( 0.0 );
     NICE::Vector E;
 
@@ -488,11 +546,11 @@ int GMM::doEM ( const VVector &DataSet, int nbgaussians )
 
       log_det_sigma[j] = logdiagDeterminant ( sparse_sigma[j] );
     }
-    if ( comp )
+    if ( b_compareTo2ndGMM )
     {
-      double dist = compare();
-      cout << "dist for iteration " << iter << endl;
-      cout << "d: " << dist << endl;
+      double dist = this->compareTo2ndGMM();
+      std::cout << "dist for iteration " << iter << std::endl;
+      std::cout << "d: " << dist << std::endl;
     }
   }
 #ifdef DEBUG
@@ -506,7 +564,7 @@ int GMM::getBestClass ( const NICE::Vector &v, double *bprob )
   int bestclass = 0;
   double maxprob = logpdfState ( v, 0 ) + log ( priors[0] );  // log(P(x|i))
 
-  for ( int i = 1; i < gaussians; i++ )
+  for ( int i = 1; i < i_numOfGaussians; i++ )
   {
     double prob = logpdfState ( v, i ) + log ( priors[i] );  // log(P(x|i))
 
@@ -526,10 +584,10 @@ int GMM::getBestClass ( const NICE::Vector &v, double *bprob )
 void GMM::getProbs ( const NICE::Vector &vin, SparseVector &outprobs )
 {
   outprobs.clear();
-  outprobs.setDim ( gaussians );
+  outprobs.setDim ( i_numOfGaussians );
   Vector probs;
   getProbs ( vin, probs );
-  for ( int i = 0; i < gaussians; i++ )
+  for ( int i = 0; i < i_numOfGaussians; i++ )
   {
     if ( probs[i] > 1e-7f )
       outprobs[i] = probs[i];
@@ -539,18 +597,18 @@ void GMM::getProbs ( const NICE::Vector &vin, SparseVector &outprobs )
 void GMM::getProbs ( const NICE::Vector &vin, Vector &outprobs )
 {
   Vector probs;
-  probs.resize ( gaussians );
+  probs.resize ( i_numOfGaussians );
   probs.set ( 0.0 );
 
   double probsum = 0.0;
   double maxp = -numeric_limits<double>::max();
-  for ( int i = 0; i < gaussians; i++ )
+  for ( int i = 0; i < i_numOfGaussians; i++ )
   {
     probs[i] = logpdfState ( vin, i ) + log ( priors[i] );  // log(P(x|i))
     maxp = std::max ( maxp, probs[i] );
   }
 
-  for ( int i = 0; i < gaussians; i++ )
+  for ( int i = 0; i < i_numOfGaussians; i++ )
   {
     probs[i] -= maxp;
     probs[i] = exp ( probs[i] );
@@ -559,7 +617,7 @@ void GMM::getProbs ( const NICE::Vector &vin, Vector &outprobs )
 
   // normalize probs
 #pragma omp parallel for
-  for ( int i = 0; i < gaussians; i++ )
+  for ( int i = 0; i < i_numOfGaussians; i++ )
   {
     probs[i] /= probsum;
   }
@@ -568,25 +626,25 @@ void GMM::getProbs ( const NICE::Vector &vin, Vector &outprobs )
 
 void GMM::getFisher ( const NICE::Vector &vin, SparseVector &outprobs )
 {
-  int size = gaussians * 2 * dim;
+  int size = i_numOfGaussians * 2 * dim;
   outprobs.clear();
   outprobs.setDim ( size );
 
   int counter = 0;
 
   NICE::Vector classprobs;
-  classprobs.resize ( gaussians );
+  classprobs.resize ( i_numOfGaussians );
   classprobs.set ( 0.0 );
 
   double maxp = -numeric_limits<double>::max();
-  for ( int i = 0; i < gaussians; i++ )
+  for ( int i = 0; i < i_numOfGaussians; i++ )
   {
     classprobs[i] = logpdfState ( vin, i ) + log ( priors[i] );  // log(P(x|i))
 
     maxp = std::max ( maxp, classprobs[i] );
   }
 
-  for ( int i = 0; i < gaussians; i++ )
+  for ( int i = 0; i < i_numOfGaussians; i++ )
   {
     double p = classprobs[i] - maxp;
 
@@ -629,7 +687,7 @@ void GMM::cluster ( const VVector & features, VVector & prototypes, vector<doubl
 
     weights.push_back ( priors[c] );
   }
-  for ( int c = 0; c < gaussians; c++ )
+  for ( int c = 0; c < i_numOfGaussians; c++ )
     prototypes.push_back ( mu[c] );
 
   cout << "tau: " << tau << endl;
@@ -639,17 +697,17 @@ void GMM::saveData ( const std::string filename )
 {
   ofstream fout ( filename.c_str() );
 
-  fout << gaussians << " " << dim << endl;
+  fout << i_numOfGaussians << " " << dim << endl;
 
   mu >> fout;
   fout << endl;
 
-  for ( int n = 0; n < gaussians; n++ )
+  for ( int n = 0; n < i_numOfGaussians; n++ )
   {
     fout << sparse_sigma[n] << endl;
   }
 
-  for ( int n = 0; n < gaussians; n++ )
+  for ( int n = 0; n < i_numOfGaussians; n++ )
   {
     fout << priors[n] << " ";
   }
@@ -668,12 +726,12 @@ bool GMM::loadData ( const std::string filename )
     return false;
   }
 
-  fin >> gaussians;
+  fin >> i_numOfGaussians;
   fin >> dim;
   cdimval = log ( pow ( 2 * 3.14159, dim ) );
   mu.clear();
 
-  for ( int i = 0; i < gaussians; i++ )
+  for ( int i = 0; i < i_numOfGaussians; i++ )
   {
     NICE::Vector tmp;
     fin >> tmp;
@@ -682,7 +740,7 @@ bool GMM::loadData ( const std::string filename )
 
   log_det_sigma.clear();
 
-  for ( int n = 0; n < gaussians; n++ )
+  for ( int n = 0; n < i_numOfGaussians; n++ )
   {
     NICE::Matrix _sigma;
     NICE::Vector _sparse_sigma;
@@ -695,7 +753,7 @@ bool GMM::loadData ( const std::string filename )
     log_det_sigma.push_back ( logdiagDeterminant ( sparse_sigma[n] ) );
   }
 
-  for ( int n = 0; n < gaussians; n++ )
+  for ( int n = 0; n < i_numOfGaussians; n++ )
   {
     double tmpd;
     fin >> tmpd;
@@ -707,26 +765,26 @@ bool GMM::loadData ( const std::string filename )
   return true;
 }
 
-void GMM::getParams ( VVector &mean, VVector &sSigma, vector<double> &p )
+void GMM::getParams ( VVector &mean, VVector &sSigma, vector<double> &p ) const
 {
-  mean = mu;
-  sSigma.resize ( gaussians );
+  mean = this->mu;
+  sSigma.resize ( this->i_numOfGaussians );
   p.clear();
-  for ( int i = 0; i < gaussians; i++ )
+  for ( int i = 0; i < this->i_numOfGaussians; i++ )
   {
-    sSigma[i] = sparse_sigma[i];
-    p.push_back ( priors[i] );
+    sSigma[i] = this->sparse_sigma[i];
+    p.push_back ( this->priors[i] );
   }
 }
 
-void GMM::setCompareGM ( VVector mean, VVector sSigma, vector<double> p )
+void GMM::setGMMtoCompareWith ( NICE::VVector mean, NICE::VVector sSigma, std::vector<double> p )
 {
-  mu2 = mean;
-  sparse_sigma2 = sSigma;
-  priors2 = vector<double> ( p );
+  this->mu2           = mean;
+  this->sparse_sigma2 = sSigma;
+  this->priors2       = std::vector<double> ( p );
 }
 
-double GMM::kPPK ( NICE::Vector sigma1, NICE::Vector sigma2, NICE::Vector mu1, NICE::Vector mu2, double p )
+double GMM::kPPK ( NICE::Vector sigma1, NICE::Vector sigma2, NICE::Vector mu1, NICE::Vector mu2, double p ) const
 {
   double d = mu1.size();
 
@@ -747,14 +805,14 @@ double GMM::kPPK ( NICE::Vector sigma1, NICE::Vector sigma2, NICE::Vector mu1, N
   return ( pow ( 2.0*M_PI, ( ( 1.0 - 2.0*p ) *d ) / 2.0 ) * pow ( det1, -p / 2.0 ) * pow ( det2, -p / 2.0 ) * pow ( det3, 0.5 ) * exp ( eval ) );
 }
 
-double GMM::compare()
+double GMM::compareTo2ndGMM() const
 {
   double distkij = 0.0;
   double distkjj = 0.0;
   double distkii = 0.0;
-  for ( int i = 0; i < gaussians; i++ )
+  for ( int i = 0; i < i_numOfGaussians; i++ )
   {
-    for ( int j = 0; j < gaussians; j++ )
+    for ( int j = 0; j < i_numOfGaussians; j++ )
     {
       double kij = kPPK ( sparse_sigma[i], sparse_sigma2[j], mu[i], mu2[j], 0.5 );
       double kii = kPPK ( sparse_sigma[i], sparse_sigma[j], mu[i], mu[j], 0.5 );
@@ -770,7 +828,343 @@ double GMM::compare()
   return ( distkij / ( sqrt ( distkii ) *sqrt ( distkjj ) ) );
 }
 
-void GMM::comparing ( bool c )
+void GMM::setCompareTo2ndGMM ( const bool & _compareTo2ndGMM )
+{
+  this->b_compareTo2ndGMM = _compareTo2ndGMM;
+}
+
+int GMM::getNumberOfGaussians() const
+{
+  return this->i_numOfGaussians;
+}
+
+
+
+///////////////////// INTERFACE PERSISTENT /////////////////////
+// interface specific methods for store and restore
+///////////////////// INTERFACE PERSISTENT ///////////////////// 
+
+void GMM::restore ( std::istream & is, int format )
 {
-  comp = c;
+  //delete everything we knew so far...
+  this->clear();
+  
+  
+  if ( is.good() )
+  {
+    
+    std::string tmp;
+    is >> tmp; //class name 
+    
+    if ( ! this->isStartTag( tmp, "GMM" ) )
+    {
+      std::cerr << " WARNING - attempt to restore GMM, but start flag " << tmp << " does not match! Aborting... " << std::endl;
+      throw;
+    }   
+    
+    bool b_endOfBlock ( false ) ;
+    
+    while ( !b_endOfBlock )
+    {
+      is >> tmp; // start of block 
+      
+      if ( this->isEndTag( tmp, "GMM" ) )
+      {
+        b_endOfBlock = true;
+        continue;
+      }      
+      
+      tmp = this->removeStartTag ( tmp );  
+   
+      
+      if ( tmp.compare("i_numOfGaussians") == 0 )
+      {
+        is >> this->i_numOfGaussians;
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }
+      else if ( tmp.compare("dim") == 0 )
+      {
+        is >> this->dim;
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }
+      else if ( tmp.compare("mu") == 0 )
+      {
+        this->mu.clear();          
+        this->mu.setIoUntilEndOfFile ( false );
+        this->mu.restore ( is, format );
+      
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }       
+      else if ( tmp.compare("sparse_sigma") == 0 )
+      {
+        this->sparse_sigma.clear();   
+        this->sparse_sigma.setIoUntilEndOfFile ( false );
+        this->sparse_sigma.restore ( is, format );
+        
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }        
+      else if ( tmp.compare("priors") == 0 )
+      {
+        int sizeOfPriors;
+        is >> sizeOfPriors;
+        
+        this->priors.resize ( sizeOfPriors );
+        for ( std::vector< double >::iterator itPriors = this->priors.begin();
+             itPriors != this->priors.end(); 
+             itPriors++
+            )
+        {
+          is >> *itPriors;
+        }
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }        
+      else if ( tmp.compare("sparse_inv_sigma") == 0 )
+      {
+        this->sparse_inv_sigma.clear();          
+        this->sparse_inv_sigma.setIoUntilEndOfFile ( false );
+        this->sparse_inv_sigma.restore ( is, format );
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }         
+      else if ( tmp.compare("log_det_sigma") == 0 )
+      {
+        int sizeOfLogDetSigma;
+        is >> sizeOfLogDetSigma;
+        
+        this->log_det_sigma.resize ( sizeOfLogDetSigma );
+        for ( std::vector< double >::iterator itLogDetSigma = this->log_det_sigma.begin(); 
+              itLogDetSigma != this->log_det_sigma.end();
+              itLogDetSigma++
+            )
+        {
+          is >> *itLogDetSigma;
+        }
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }        
+      else if ( tmp.compare("mu2") == 0 )
+      {
+        this->mu2.clear();          
+        this->mu2.setIoUntilEndOfFile ( false );
+        this->mu2.restore ( is, format );
+
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }        
+      else if ( tmp.compare("sparse_sigma2") == 0 )
+      {
+        this->sparse_sigma2.clear();   
+        this->sparse_sigma2.setIoUntilEndOfFile ( false );
+        this->sparse_sigma2.restore ( is, format );
+        
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }       
+      else if ( tmp.compare("priors2") == 0 )
+      {
+        int sizeOfPriors2;
+        is >> sizeOfPriors2;
+        
+        this->priors2.resize ( sizeOfPriors2 );
+        for ( std::vector< double >::iterator itPriors2 = this->priors2.begin();
+             itPriors2 != this->priors2.end(); 
+             itPriors2++
+            )
+        {
+          is >> *itPriors2;
+        }
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }        
+      else if ( tmp.compare("b_compareTo2ndGMM") == 0 )
+      {
+        is >> this->b_compareTo2ndGMM;
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }           
+      else if ( tmp.compare("maxiter") == 0 )
+      {
+        is >> this->maxiter;
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }          
+      else if ( tmp.compare("featsperclass") == 0 )
+      {
+        is >> this->featsperclass;
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }       
+      else if ( tmp.compare("cdimval") == 0 )
+      {
+        is >> this->cdimval;
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }        
+      else if ( tmp.compare("tau") == 0 )
+      {
+        is >> this->tau;
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }      
+      else if ( tmp.compare("pyramid") == 0 )
+      {
+        is >> this->pyramid;
+        is >> tmp; // end of block 
+        tmp = this->removeEndTag ( tmp );
+      }
+      else
+      {
+      std::cerr << "WARNING -- unexpected GMM object -- " << tmp << " -- for restoration... aborting" << std::endl;
+      throw;
+      }
+    }
+  }
+  else
+  {
+    std::cerr << "GMM::restore -- InStream not initialized - restoring not possible!" << std::endl;
+    throw;
+  }
 }
+
+void GMM::store ( std::ostream & os, int format ) const
+{ 
+  if (os.good())
+  {
+    // show starting point
+    os << this->createStartTag( "GMM" ) << std::endl;
+    
+    os << this->createStartTag( "i_numOfGaussians" ) << std::endl;
+    os << this->i_numOfGaussians << std::endl;
+    os << this->createEndTag( "i_numOfGaussians" ) << std::endl;  
+    
+    if ( this->dim != -1 )
+    {
+      os << this->createStartTag( "dim" ) << std::endl;
+      os << this->dim << std::endl;
+      os << this->createEndTag( "dim" ) << std::endl;
+    }
+    
+    if ( this->mu.size() > 0 )
+    {
+      os << this->createStartTag( "mu" ) << std::endl;
+      this->mu.store ( os, format );
+      os << this->createEndTag( "mu" ) << std::endl;
+    }
+    
+    if ( this->sparse_sigma.size() > 0 )
+    {
+      os << this->createStartTag( "sparse_sigma" ) << std::endl;
+      this->sparse_sigma.store ( os, format );
+      os << this->createEndTag( "sparse_sigma" ) << std::endl;
+    }    
+    
+    if ( this->priors.size() > 0 )
+    {
+      os << this->createStartTag( "priors" ) << std::endl;
+      os << this->priors.size () << std::endl;
+      for ( std::vector< double >::const_iterator itPriors = this->priors.begin();
+            itPriors != this->priors.end(); 
+            itPriors++
+          )
+      {
+        os << *itPriors;
+      }
+      os << std::endl;
+      os << this->createEndTag( "priors" ) << std::endl;
+    }
+    
+    if ( this->sparse_inv_sigma.size() > 0 )
+    {
+      os << this->createStartTag( "sparse_inv_sigma" ) << std::endl;
+      this->sparse_inv_sigma.store ( os, format );
+      os << this->createEndTag( "sparse_inv_sigma" ) << std::endl;
+    }   
+    
+    if ( this->log_det_sigma.size() > 0 )
+    {
+      os << this->createStartTag( "log_det_sigma" ) << std::endl;
+      os << this->log_det_sigma.size ();
+      for ( std::vector< double >::const_iterator itLogDetSigma = this->log_det_sigma.begin(); 
+            itLogDetSigma != this->log_det_sigma.end();
+            itLogDetSigma++
+          )
+      {
+        os << *itLogDetSigma;
+      }
+      os << std::endl;
+      os << this->createEndTag( "log_det_sigma" ) << std::endl;
+    }    
+    
+    if ( this->mu2.size() > 0 )
+    {
+      os << this->createStartTag( "mu2" ) << std::endl;
+      this->mu2.store ( os, format );
+      os << this->createEndTag( "mu2" ) << std::endl;
+    }    
+    
+    if ( this->sparse_sigma2.size() > 0 )
+    {
+      os << this->createStartTag( "sparse_sigma2" ) << std::endl;
+      this->sparse_sigma2.store ( os, format );
+      os << this->createEndTag( "sparse_sigma2" ) << std::endl;
+    }  
+    
+    if ( this->priors2.size() > 0 )
+    {
+      os << this->createStartTag( "priors2" ) << std::endl;
+      os << this->priors2.size () << std::endl;
+      for ( std::vector< double >::const_iterator itPriors2 = this->priors2.begin();
+            itPriors2 != this->priors2.end(); 
+            itPriors2++
+          )
+      {
+        os << *itPriors2;
+      }
+      os << std::endl;
+      os << this->createEndTag( "priors2" ) << std::endl;
+    }     
+      
+    os << this->createStartTag( "b_compareTo2ndGMM" ) << std::endl;
+    os << this->b_compareTo2ndGMM << std::endl;
+    os << this->createEndTag( "b_compareTo2ndGMM" ) << std::endl;
+    
+    os << this->createStartTag( "maxiter" ) << std::endl;
+    os << this->maxiter << std::endl;
+    os << this->createEndTag( "maxiter" ) << std::endl;   
+   
+    os << this->createStartTag( "featsperclass" ) << std::endl;
+    os << this->featsperclass << std::endl;
+    os << this->createEndTag( "featsperclass" ) << std::endl;     
+  
+    if ( cdimval != -1 ) 
+    {
+      os << this->createStartTag( "cdimval" ) << std::endl;
+      os << this->cdimval << std::endl;
+      os << this->createEndTag( "cdimval" ) << std::endl;  
+    }
+    
+    os << this->createStartTag( "tau" ) << std::endl;
+    os << this->tau << std::endl;
+    os << this->createEndTag( "tau" ) << std::endl;     
+    
+    os << this->createStartTag( "pyramid" ) << std::endl;
+    os << this->pyramid << std::endl;
+    os << this->createEndTag( "pyramid" ) << std::endl;  
+    
+    // done
+    os << this->createEndTag( "GMM" ) << std::endl;    
+  }
+  else
+  {
+    std::cerr << "OutStream not initialized - storing not possible!" << std::endl;
+  }
+}
+
+void GMM::clear ()
+{ 
+}

+ 72 - 24
math/cluster/GMM.h

@@ -13,7 +13,7 @@
   number="2",
   pages="286--298"
 }
- * @author Björn Fröhlich
+ * @author Björn Fröhlich, Alexander Freytag
  * @date 05/14/2009
 
  */
@@ -38,7 +38,7 @@ class GMM : public ClusterAlgorithm
   protected:
 
     //! number of gaussians
-    int gaussians;
+    int i_numOfGaussians;
 
     //! dimension of each feature
     int dim;
@@ -48,6 +48,9 @@ class GMM : public ClusterAlgorithm
 
     //! sparse sigma vectors
     NICE::VVector sparse_sigma;
+    
+    //! the weight for each gaussian
+    std::vector<double> priors;    
 
     //! sparse inverse sigma vectors (if usediag)
     NICE::VVector sparse_inv_sigma;
@@ -55,17 +58,13 @@ class GMM : public ClusterAlgorithm
     //! save det_sigma for fast computing
     std::vector<double> log_det_sigma;
 
-    //! the configfile
-    const NICE::Config *conf;
 
-    //! the weight for each gaussian
-    std::vector<double> priors;
 
     //! parameters for other GM to compare
     NICE::VVector mu2;
     NICE::VVector sparse_sigma2;
     std::vector<double> priors2;
-    bool comp;
+    bool b_compareTo2ndGMM;
 
     //! maximum number of iterations for EM
     int maxiter;
@@ -81,7 +80,12 @@ class GMM : public ClusterAlgorithm
 
     //! whether to use pyramid initialisation or not
     bool pyramid;
+    
   public:
+    
+    ///////////////////// ///////////////////// /////////////////////
+    //                   CONSTRUCTORS / DESTRUCTORS
+    ///////////////////// ///////////////////// /////////////////////      
     /**
       * simplest constructor
      */
@@ -89,22 +93,41 @@ class GMM : public ClusterAlgorithm
 
     /**
      * simple constructor
-     * @param _no_classes
+     * @param _numOfGaussians
      */
-    GMM ( int _no_classes );
+    GMM ( int _numOfGaussians );
 
     /**
      * standard constructor
      * @param conf a Configfile
-     * @param _no_classes number of gaussian
+     * @param _numOfGaussians number of gaussian
+     */
+    GMM ( const NICE::Config *conf, int _numOfGaussians );
+    
+    /**
+     * @brief recommended constructor
+     * @author Alexander Freytag
+     * @date 14-02-2014 ( dd-mm-yyyy )
+     * @param _conf a Configfile
+     * @param _confSection tag specifying the part in the config file
      */
-    GMM ( const NICE::Config *conf, int _no_classes = -1 );
+    GMM ( const NICE::Config * _conf, const std::string & _confSection = "GMM" );    
 
     /**
      * standard destructor
      */
-    ~GMM() {
-    };
+    ~GMM();
+    
+    /** 
+     * @brief Jobs previously performed in the config-version of the constructor, read settings etc.
+     * @author Alexander Freytag
+     * @date 13-02-2014 ( dd-mm-yyyy )
+     */    
+    void initFromConfig ( const NICE::Config * _conf, const std::string & _confSection = "GMM");         
+        
+    ///////////////////// ///////////////////// /////////////////////
+    //                      CLUSTERING STUFF
+    ///////////////////// ///////////////////// //////////////////        
 
     /**
      * computes the mixture
@@ -187,6 +210,7 @@ class GMM : public ClusterAlgorithm
     /**
      * save GMM data
      * @param filename filename
+     * @NOTE deprecated, use methods inherited from persistent instead!
      */
     void saveData ( const std::string filename );
 
@@ -194,16 +218,17 @@ class GMM : public ClusterAlgorithm
     * load GMM data
     * @param filename filename
     * @return true if everything works fine
+    * @NOTE deprecated, use methods inherited from persistent instead!
     */
     bool loadData ( const std::string filename );
 
     /**
-     * return the parameter of the mixture
+     * return the parameter of the current mixture model
      * @param mu
      * @param sSigma
      * @param p
      */
-    void getParams ( NICE::VVector &mean, NICE::VVector &sSigma, std::vector<double> &p );
+    void getParams ( NICE::VVector &mean, NICE::VVector &sSigma, std::vector<double> &p ) const;
 
     /**
      * Set the parameters of an other mixture for comparing with this one
@@ -211,7 +236,7 @@ class GMM : public ClusterAlgorithm
      * @param sSigma diagonal covariance Matrixs
      * @param p weights
      */
-    void setCompareGM ( NICE::VVector mean, NICE::VVector sSigma, std::vector<double> p );
+    void setGMMtoCompareWith ( NICE::VVector mean, NICE::VVector sSigma, std::vector<double> p );
 
     /**
      * probability product kernel
@@ -222,22 +247,45 @@ class GMM : public ClusterAlgorithm
      * @param p
      * @return
      */
-    double kPPK ( NICE::Vector sigma1, NICE::Vector sigma2, NICE::Vector mu1, NICE::Vector mu2, double p );
+    double kPPK ( NICE::Vector sigma1, NICE::Vector sigma2, NICE::Vector mu1, NICE::Vector mu2, double p ) const;
 
     /**
-     * starts a comparison between this Mixture and a other one seted bei "setComparGM"
+     * starts a comparison between this Mixture and another one set bei "setGMMtoCompareWith"
      */
-    double compare();
+    double compareTo2ndGMM()  const;
 
     /**
      * whether to compare or not
-     * @param c
+     * @param _compareTo2ndGMM
      */
-    void comparing ( bool c = true );
+    void setCompareTo2ndGMM ( const bool & _compareTo2ndGMM = true );
+
+    int getNumberOfGaussians() const; 
+    
+    ///////////////////// INTERFACE PERSISTENT /////////////////////
+    // interface specific methods for store and restore
+    ///////////////////// INTERFACE PERSISTENT /////////////////////      
+    
+    /**
+    * @brief Load object from external file (stream)
+    * @author Alexander Freytag
+    * @date 13-02-2014 ( dd-mm-yyyy )
+    */
+    void restore ( std::istream & is, int format = 0 );
 
-    int getSize() {
-      return gaussians;
-    }
+    /**
+    * @brief Save object to external file (stream)
+    * @author Alexander Freytag
+    * @date 13-02-2014 ( dd-mm-yyyy )
+    */
+    void store ( std::ostream & os, int format = 0 ) const;
+
+    /**
+    * @brief Clear object
+    * @author Alexander Freytag
+    * @date 13-02-2014 ( dd-mm-yyyy )
+    */
+    void clear ();        
 };
 
 

+ 3 - 3
math/cluster/progs/compareGMM.cpp

@@ -110,11 +110,11 @@ int main (int argc, char **argv)
 	GMM clusteralg( &conf, gaussians);
 #if 1
 	//clusteralg->computeMixture ( x );
-	clusteralg.setCompareGM(mean1, sigma1, prior1);
-	clusteralg.comparing(true);
+	clusteralg.setGMMtoCompareWith(mean1, sigma1, prior1);
+	clusteralg.setCompareTo2ndGMM(true);
 	clusteralg.cluster ( x, prototypes, weights, assignments );
 	
-	double dist = clusteralg.compare();
+	double dist = clusteralg.compareTo2ndGMM();
 	
 	cout << "dist1: " << endl << dist << endl;
 	

+ 150 - 0
math/cluster/tests/TestGMM.cpp

@@ -0,0 +1,150 @@
+#ifdef NICE_USELIB_CPPUNIT
+
+#include <string>
+#include <exception>
+
+#include "TestGMM.h"
+
+#include <core/basics/Config.h>
+#include "vislearning/math/distances/genericDistance.h"
+
+
+const bool verboseStartEnd = true;
+const bool verbose = false;
+const std::string distanceType = "euclidean";
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+CPPUNIT_TEST_SUITE_REGISTRATION( TestGMM );
+
+void TestGMM::setUp() {
+}
+
+void TestGMM::tearDown() {
+}
+
+void TestGMM::testGMMClustering() 
+{
+  if (verboseStartEnd)
+    std::cerr << "================== TestGMM::testGMMClustering ===================== " << std::endl;
+  
+  Config * conf = new Config;
+  std::string confSection ( "GMM" );
+  
+  conf->sI( confSection, "i_numOfGaussians", 2 );
+  conf->sI( confSection, "maxIterations", 200 );
+   
+  OBJREC::GMM gmm ( conf, confSection );
+  
+  //create some artificial data
+  NICE::VVector features;
+  NICE::Vector x1 (2); x1[0] = 1;  x1[1] = 1; features.push_back(x1);
+  NICE::Vector x2 (2); x2[0] = 4;  x2[1] = 1; features.push_back(x2);
+  NICE::Vector x3 (2); x3[0] = 2;  x3[1] = 4; features.push_back(x3);
+  NICE::Vector x4 (2); x4[0] = 10; x4[1] = 3; features.push_back(x4);
+  NICE::Vector x5 (2); x5[0] = 8;  x5[1] = 3; features.push_back(x5);
+  NICE::Vector x6 (2); x6[0] = 4;  x6[1] = 3; features.push_back(x6);
+  NICE::Vector x7 (2); x7[0] = 3;  x7[1] = 2; features.push_back(x7);
+  NICE::Vector x8 (2); x8[0] = 1;  x8[1] = 3; features.push_back(x8);
+  NICE::Vector x9 (2); x9[0] = 9;  x9[1] = 2; features.push_back(x9);
+  
+  //cluster data
+  NICE::VVector prototypes;
+  std::vector<double> weights;
+  std::vector<int> assignment;
+  
+  gmm.cluster ( features, prototypes, weights, assignment );  
+
+  //check whether the results fits the ground truth  
+  //NOTE
+  // adapted from TestKMeadian:
+  // If no random initialization is activated, we initially grab x2 and x8.
+  // After 3 iterations, we should have converged and obtain x5 and x7.
+
+  //
+  //TODO define proper test example for a GMM!
+  //
+  
+//   NICE::VectorDistance<double> * distancefunction = GenericDistanceSelection::selectDistance(distanceType);
+// 
+//   if ( verbose )
+//   {
+    std::cerr << " x9: " << x9 << " cl1: " << prototypes[0] << std::endl;
+    std::cerr << " x7: " << x7 << " cl2: " << prototypes[1] << std::endl;
+//   }
+//   
+//   double distX9Cl1 ( distancefunction->calculate( x9, prototypes[0] ) );
+//   double distX7Cl2 ( distancefunction->calculate( x7, prototypes[1] ) );
+//   
+//   CPPUNIT_ASSERT_DOUBLES_EQUAL( distX9Cl1, 0.0, 1e-8);
+//   CPPUNIT_ASSERT_DOUBLES_EQUAL( distX7Cl2, 0.0, 1e-8); 
+//   
+//   std::cerr << "                               successfull              " << std::endl;
+        
+  //don't waste memory
+  delete conf;
+  
+  if (verboseStartEnd)
+    std::cerr << "================== TestGMM::testGMMClustering done ===================== " << std::endl;
+}
+
+
+void TestGMM::testGMMPersistent() 
+{
+  if (verboseStartEnd)
+    std::cerr << "================== TestGMM::testGMMPersistent ===================== " << std::endl;
+  
+  Config * conf = new Config;
+  std::string confSection ( "GMM" );
+  
+  conf->sI( confSection, "i_numOfGaussians", 2 );
+  conf->sI( confSection, "maxIterations", 200 );
+   
+  OBJREC::GMM gmm;
+  gmm.initFromConfig( conf, confSection );
+  
+  // TEST STORING ABILITIES
+  if ( verbose )
+    std::cerr << " TEST STORING ABILITIES FOR KMEDIAN" << std::endl;
+  
+  std::string s_destination_save ( "myGMM.txt" );
+  
+  std::filebuf fbOut;
+  fbOut.open ( s_destination_save.c_str(), ios::out );
+  std::ostream os (&fbOut);
+  //
+  gmm.store( os );
+  //   
+  fbOut.close();
+  
+  // TEST RESTORING ABILITIES
+  if ( verbose )
+    std::cerr << " TEST RESTORING ABILITIES FOR KMEDIAN" << std::endl;
+    
+  OBJREC::GMM gmmRestore;  
+      
+  std::string s_destination_load ( "myGMM.txt" );
+  
+  std::filebuf fbIn;
+  fbIn.open ( s_destination_load.c_str(), ios::in );
+  std::istream is (&fbIn);
+  //
+  gmmRestore.restore( is );
+  //   
+  fbIn.close(); 
+  
+  // currently, we have no possibility to actually verify that the restore-operation was successfull, i.e.,
+  // it returned an object identical to the stored one.
+  // However, if we reached this point, at least something went right, so we should be happy...  
+  
+  //don't waste memory
+  delete conf;
+  
+  if (verboseStartEnd)
+    std::cerr << "================== TestGMM::testGMMPersistent done ===================== " << std::endl;
+}
+
+
+#endif

+ 36 - 0
math/cluster/tests/TestGMM.h

@@ -0,0 +1,36 @@
+#ifndef _TESTGMM_H
+#define _TESTGMM_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include "vislearning/math/cluster/GMM.h"
+
+/**
+ * CppUnit-Testcase. 
+ * @brief CppUnit-Testcase to verify that the GMM-clustering works correctly
+ */
+class TestGMM : public CppUnit::TestFixture {
+
+    CPPUNIT_TEST_SUITE( TestGMM );
+    
+    CPPUNIT_TEST(testGMMClustering);
+    
+    CPPUNIT_TEST(testGMMPersistent);
+    
+    CPPUNIT_TEST_SUITE_END();
+  
+ private:
+ 
+ public:
+    void setUp();
+    void tearDown();
+
+    /**
+    * Constructor / Destructor testing 
+    */  
+    void testGMMClustering();
+    
+    void testGMMPersistent();
+
+};
+
+#endif // _TESTKMEDIAN_H