Эх сурвалжийг харах

Clustering -- added random clustering

Alexander Freytag 12 жил өмнө
parent
commit
786bf5da57

+ 157 - 0
math/cluster/RandomClustering.cpp

@@ -0,0 +1,157 @@
+/**
+ * @file RandomClustering.cpp
+* @brief Clustering by randomly picking some samples from the set of features as representatives
+* @author Alexander Freytag
+* @date 03-06-2013 (dd-mm-yyyy)
+ */
+
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <iostream>
+#include <map>
+
+#include "vislearning/math/distances/genericDistance.h"
+
+#include "vislearning/math/cluster/RandomClustering.h"
+
+#include <set>
+
+using namespace OBJREC;
+
+using namespace std;
+
+using namespace NICE;
+
+
+
+RandomClustering::RandomClustering(const int & _noClusters, const std::string & _distanceType) :
+  noClusters(_noClusters), distanceType(_distanceType)
+{
+}
+
+RandomClustering::RandomClustering( const NICE::Config *conf, const std::string & _section)
+{  
+  this->noClusters = conf->gI( _section, "noClusters", 20);
+  
+  this->distanceType = conf->gS( _section, "distanceType", "euclidean" );
+  this->distancefunction = GenericDistanceSelection::selectDistance(distanceType);  
+}
+
+RandomClustering::~RandomClustering()
+{
+}
+
+int RandomClustering::compute_prototypes(const NICE::VVector & _features, NICE::VVector & _prototypes,
+    std::vector<double> & _weights, const std::vector<int> & _assignment)
+{
+  
+  int noFeatures ( _features.size() );
+  
+  std::set<int, std::greater<int> > chosenIdx;
+  
+  //empty init
+  chosenIdx.clear();
+  
+  //pick k distinct cluster representatives randomly
+  for (int cnt = 0; cnt < this->noClusters; cnt++)
+  {
+    int idx;
+    do
+    {
+      idx = rand() % noFeatures;
+    }
+    while ( chosenIdx.find ( idx ) != chosenIdx.end() );
+                             
+    //if a new (distinct) idx was computed, insert it into the set of randomly picked indicees
+    chosenIdx.insert ( idx );
+  }
+  
+  _prototypes.resize( this->noClusters ); 
+  
+  int clusterCnt ( 0 );
+  for ( std::set<int>::const_iterator idxIt = chosenIdx.begin(); idxIt != chosenIdx.end(); idxIt++, clusterCnt++ )
+  {
+     _prototypes[clusterCnt] = _features[ *idxIt ];
+  }
+
+  return 0;
+}
+
+double RandomClustering::compute_assignments(const NICE::VVector & _features,
+                                    const NICE::VVector & _prototypes,
+                                    std::vector<int> & _assignment)
+{
+  _assignment.resize( _features.size() );
+  
+  int index = 0;
+  for (NICE::VVector::const_iterator i = _features.begin(); i != _features.end(); i++, index++)
+  {
+
+    const NICE::Vector & x = *i;
+    double mindist = std::numeric_limits<double>::max();
+    int minclass = 0;
+
+    int c = 0;
+           
+    for (NICE::VVector::const_iterator j = _prototypes.begin(); j
+        != _prototypes.end(); j++, c++)
+    {
+
+      const NICE::Vector & p = *j;
+      double distance = this->distancefunction->calculate(p, x);
+      
+      if (distance < mindist)
+      {
+        minclass = c;
+        mindist = distance;
+      }
+    }
+
+    _assignment[index] = minclass;
+  }
+
+  return 0.0;
+}
+
+double RandomClustering::compute_weights(const NICE::VVector & _features,
+                                std::vector<double> & _weights,
+                                std::vector<int> & _assignment)
+{
+  _weights.resize( this->noClusters );
+  
+  //initalization
+  for (int k = 0; k < noClusters; k++)
+    _weights[k] = 0;
+
+  int j = 0;
+
+  //increase weight for every assignment
+  for (NICE::VVector::const_iterator i = _features.begin(); i != _features.end(); i++, j++)
+  {
+    int k = _assignment[j];
+    _weights[k]++;
+  }
+
+  //normalize weights
+  for (int k = 0; k < noClusters; k++)
+    _weights[k] = _weights[k] / _features.size();
+
+  return 0.0;
+}
+
+void RandomClustering::cluster(const NICE::VVector & _features,
+                      NICE::VVector & _prototypes,
+                      std::vector<double> & _weights,
+                      std::vector<int> & _assignment)
+{
+  //randomly pick cluster centers
+  this->compute_prototypes( _features, _prototypes, _weights, _assignment );
+  
+  //compute assignments for every cluster
+  this->compute_assignments( _features, _prototypes, _assignment );
+  
+  //compute corresponding weights
+  this->compute_weights( _features, _weights, _assignment );
+}

+ 112 - 0
math/cluster/RandomClustering.h

@@ -0,0 +1,112 @@
+/** 
+* @file RandomClustering.h
+* @brief Clustering by randomly picking some samples from the set of features as representatives
+* @author Alexander Freytag
+* @date 03-06-2013 (dd-mm-yyyy)
+*/
+#ifndef RANDOMCLUSTERERNINCLUDE
+#define RANDOMCLUSTERERNINCLUDE
+
+#include <core/basics/Config.h>
+#include <core/vector/Distance.h>
+#include <core/vector/MatrixT.h>
+#include <core/vector/VectorT.h>
+  
+#include "ClusterAlgorithm.h"
+
+namespace OBJREC {
+
+  /**
+   * @class RandomClustering
+   * @brief Clustering by randomly picking some samples from the set of features as representatives
+   * @author Alexander Freytag
+   * @date 03-06-2013 (dd-mm-yyyy)
+  */    
+  class RandomClustering : public ClusterAlgorithm
+  {
+
+      protected:
+        
+      /************************
+       * 
+       *   protected variables
+       * 
+       **************************/ 
+      
+        //! desired number of clusters
+        int noClusters;
+        
+        //! specify which distance to use for calculating assignments
+        std::string distanceType;
+        
+        //! the actual distance metric
+        NICE::VectorDistance<double> *distancefunction;        
+        
+  
+       /************************
+       * 
+       *   protected methods
+       * 
+       **************************/  
+      
+        
+        //! compute assignments of all given features wrt to the currently known prototypes (cluster medoids) == ~ E-step
+        double compute_assignments ( const NICE::VVector & features,
+                  const NICE::VVector & prototypes,
+                  std::vector<int> & assignment );
+
+        //! compute number of assignments for every currently found cluster
+        double compute_weights ( const NICE::VVector & features,
+              std::vector<double> & weights,
+              std::vector<int>    & assignment );
+
+
+        //! compute (update) prototypes given the current assignments == ~ M-step
+        int compute_prototypes ( const NICE::VVector & features,
+                NICE::VVector & prototypes,
+                std::vector<double> & weights,
+                const std::vector<int>    & assignment );
+
+      public:
+    
+        /**
+        * @brief simple constructor
+        * @param _noClasses the number of clusters to be computed
+        * @param _distanceMode a string specifying the distance function to be used (default: euclidean)* 
+        * @date 03-06-2013 (dd-mm-yyyy)
+        */
+        RandomClustering( const int & _noClasses , const std::string & _distanceMode="euclidean" );
+        
+        /**
+        * @brief standard constructor
+        * @param conf config file specifying all relevant variable settings
+        * @param _section name of the section within the configfile where the settings can be found (default: RandomClustering)
+        * @date 03-06-2013 (dd-mm-yyyy)
+        */
+        RandomClustering( const NICE::Config *conf, const std::string & _section = "RandomClustering");
+
+        
+            
+        /** simple destructor */
+        virtual ~RandomClustering();
+          
+        /**
+        * @brief this is the actual method that performs the clustering for a given set of features
+        * @author Alexander Freytag
+        * @date 03-06-2013 (dd-mm-yyyy)
+        * @param   features input features to be clustered
+        * @param   prototypes computed prototypes (randomly chosen) for the given samples
+        * @param   weights number of assignments for every cluster
+        * @param   assignment explicite assignments of features to computed cluster medoids
+        */        
+        void cluster ( const NICE::VVector & features,
+                NICE::VVector & prototypes,
+                std::vector<double> & weights,
+                std::vector<int>    & assignment );
+
+  };
+
+
+} // namespace
+
+#endif