浏览代码

added KMedian

Alexander Freytag 12 年之前
父节点
当前提交
26357ad8e6
共有 5 个文件被更改,包括 770 次插入0 次删除
  1. 432 0
      math/cluster/KMedian.cpp
  2. 128 0
      math/cluster/KMedian.h
  3. 89 0
      math/cluster/tests/Makefile.inc
  4. 89 0
      math/cluster/tests/TestKMedian.cpp
  5. 32 0
      math/cluster/tests/TestKMedian.h

+ 432 - 0
math/cluster/KMedian.cpp

@@ -0,0 +1,432 @@
+/**
+ * @file KMedian.cpp
+ * @brief KMedian (aka K-medoid)
+* @author Alexander Freytag
+* @date 23-04-2013 (dd-mm-yyyy)
+
+ */
+
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <iostream>
+#include <map>
+#include <algorithm> //to easily find the smallest value in a map
+
+#include "vislearning/math/cluster/KMedian.h"
+#include "vislearning/math/distances/genericDistance.h"
+
+#include <set>
+
+using namespace OBJREC;
+
+using namespace std;
+
+using namespace NICE;
+
+
+typedef std::pair<int, double> MyPairType;
+struct CompareSecond
+{
+    bool operator()(const MyPairType& left, const MyPairType& right) const
+    {
+        return left.second < right.second;
+    }
+};
+
+#undef DEBUG_KMEDIAN_ASSIGNMENTS
+// #define DEBUG_KMEDIAN_ASSIGNMENTS
+
+#undef DEBUG_KMEDIAN_PROTOCOMP
+// #define DEBUG_KMEDIAN_PROTOCOMP
+
+
+
+KMedian::KMedian(const int & _noClasses, const std::string & _distanceType) :
+  noClasses(_noClasses), distanceType(_distanceType)
+{
+  //srand(time(NULL));
+  distancefunction = GenericDistanceSelection::selectDistance(distanceType);
+  
+  this->d_minDelta  = 1e-5;
+  this->i_maxIterations = 200;
+}
+
+KMedian::KMedian( const NICE::Config *conf, const std::string & _section)
+{       
+  this->distanceType = conf->gS( _section, "distanceType", "euclidean" );
+  this->distancefunction = GenericDistanceSelection::selectDistance(distanceType);
+  
+  this->d_minDelta  = conf->gD( _section, "minDelta", 1e-5 );
+  this->i_maxIterations = conf->gI( _section, "maxIterations", 200);
+  
+  this->noClasses = conf->gI( _section, "noClasses", 20);
+}
+
+KMedian::~KMedian()
+{
+}
+
+void KMedian::initial_guess(const VVector & features, VVector & prototypes)
+{
+  int j = 0;
+  std::set<int, std::greater<int> > mark;
+
+  for (VVector::iterator i = prototypes.begin(); i != prototypes.end(); i++, j++)
+  {
+    int k;
+
+    do
+    {
+      k = rand() % features.size();
+    } while (mark.find(k) != mark.end());
+
+    mark.insert(mark.begin(), k);
+
+    *i = features[k];
+  }
+}
+
+int KMedian::compute_prototypes(const VVector & features, VVector & prototypes,
+    std::vector<double> & weights, const std::vector<int> & assignment)
+{
+  
+  #ifdef DEBUG_KMEDIAN_PROTOCOMP  
+    std::cerr << "initial assignments: ";
+    for (std::vector<int>::const_iterator assignIt = assignment.begin(); assignIt != assignment.end(); assignIt++)
+    { 
+      std::cerr << " " << *assignIt;
+    } 
+    std::cerr << std::endl;
+  #endif
+  
+  //initialization
+  for (int k = 0; k < noClasses; k++)
+  {
+    prototypes[k].set(0);
+    weights[k] = 0;
+  }
+  
+  NICE::VectorT<int> numberOfCurrentAssignments ( noClasses ) ;
+  numberOfCurrentAssignments.set ( 0 );
+  
+  int exCnt = 0;  
+  //how many examples are assigned to the current clusters?
+  for (VVector::const_iterator i = features.begin(); i != features.end(); i++, exCnt++)
+  {
+    int k = assignment[exCnt];
+    //increase counter for assigned cluster
+    numberOfCurrentAssignments[ k ] ++;    
+  }
+    
+  #ifdef DEBUG_KMEDIAN_PROTOCOMP    
+    std::cerr << "k-median -- current assignmens: " << numberOfCurrentAssignments << std::endl << "noClasses: " << noClasses << std::endl;
+  #endif
+  
+  //compute the median for every cluster
+  #pragma omp parallel for
+  for (int clusterCnt = 0; clusterCnt < noClasses; clusterCnt++)
+  {    
+    NICE::Vector overallDistances ( numberOfCurrentAssignments[ clusterCnt ] );
+    VVector::const_iterator lastExampleWorkedOn = features.begin();
+    int i_idxOfLastExampleWorkedOn ( 0 );
+    uint i_exCntInt ( 0 );
+
+    //this map will contain overall distances of all examples within the current clusters
+    //we need separate maps for every cluster to allow parallelization
+    std::map<int,double> distancesWithinCluster;
+    for (VVector::const_iterator featIt = features.begin(); featIt != features.end(); featIt++, i_exCntInt++)
+    {
+      int k = assignment[i_exCntInt];
+      
+      //only considere examples currently assigned to cluster clusterCnt
+      if ( k != clusterCnt)
+      {
+        continue;      
+      }
+      
+      uint exCntIntTmp ( i_idxOfLastExampleWorkedOn ); //idx going over all features 
+      for (VVector::const_iterator j = lastExampleWorkedOn ; j != features.end(); j++, exCntIntTmp++)
+      {
+        int kTmp;
+        if ( exCntIntTmp < assignment.size() )
+          kTmp = assignment[exCntIntTmp];
+        else
+        {
+          //actually, this will be never be reached :)
+          std::cerr << "ERROR: exCntIntTmp >= assignment.size() " << exCntIntTmp << " " << assignment.size() << std::endl;
+        }
+        
+        //only considere examples currently assigned to cluster clusterCnt
+        if ( kTmp != clusterCnt)
+          continue;         
+       
+        
+        double dist ( distancefunction->calculate( *featIt, *j) );
+        if ( i_exCntInt < features.size() )
+        {
+          distancesWithinCluster[ i_exCntInt ] += dist;
+          #ifdef DEBUG_KMEDIAN_PROTOCOMP
+            std::cerr << "increase " << i_exCntInt << " by " << dist << " for " <<*featIt << " and " << *j << std::endl;
+          #endif
+        }
+        else
+        {
+          //actually, this will be never be reached :)          
+          std::cerr << "ERROR: i_exCntInt >= features.size() " << i_exCntInt << " " << features.size() << std::endl;
+        }
+
+        if ( i_exCntInt != exCntIntTmp )
+        {
+          if (exCntIntTmp < features.size() )
+          {
+            distancesWithinCluster[ exCntIntTmp ] += dist;
+            #ifdef DEBUG_KMEDIAN_PROTOCOMP
+              std::cerr << "increase also " << exCntIntTmp << " by " << dist << std::endl;
+            #endif
+          }
+          else
+            std::cerr << "ERROR: exCntIntTmp >= features.size() " << exCntIntTmp << " " << features.size() << std::endl;
+        }
+        
+      }      
+      
+      //inc by one to avoid calculating some distances twice
+      if ( ( featIt != features.end()) && ( (featIt +1 ) != features.end()) )
+      {
+        lastExampleWorkedOn = ( featIt + 1 );      
+        i_idxOfLastExampleWorkedOn = i_exCntInt+1;
+      }
+    }
+       
+    #ifdef DEBUG_KMEDIAN_PROTOCOMP
+      std::cerr << "distances for cluster " << clusterCnt << " ";
+      for(std::map<int,double>::const_iterator distIt = distancesWithinCluster.begin(); distIt != distancesWithinCluster.end(); distIt++)
+      {
+        std::cerr << distIt->first << " " << distIt->second << " ";
+      }
+      std::cerr << std::endl;
+    #endif
+      
+    //now compute the index of example with min overall distance
+    int idxOfClusterMedian ( (min_element(distancesWithinCluster.begin(), distancesWithinCluster.end(), CompareSecond()))->first );
+        
+    #pragma omp critical
+    prototypes[clusterCnt] = features[idxOfClusterMedian]; 
+ 
+    //finished computations for cluster k
+  }
+  
+  #ifdef DEBUG_KMEDIAN_PROTOCOMP
+    std::cerr << " ----   prototypes after current iteration:  --- " << std::endl;
+    for (NICE::VVector::const_iterator protoIt = prototypes.begin(); protoIt != prototypes.end(); protoIt++)
+    {
+      std::cerr << *protoIt << " ";
+    }
+    
+    std::cerr << std::endl;
+  #endif
+
+  return 0;
+}
+
+double KMedian::compute_delta(const VVector & oldprototypes,
+    const VVector & prototypes)
+{
+  double distance = 0;
+
+  for (uint k = 0; k < oldprototypes.size(); k++)
+  {
+    distance += distancefunction->calculate(oldprototypes[k], prototypes[k]);
+    
+    #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+      fprintf(stderr, "KMedian::compute_delta: Distance:",
+          distancefunction->calculate(oldprototypes[k], prototypes[k]));
+    #endif
+  }
+  return distance;
+}
+
+double KMedian::compute_assignments(const VVector & features,
+                                    const VVector & prototypes,
+                                    std::vector<int> & assignment)
+{
+  int index = 0;
+  for (VVector::const_iterator i = features.begin(); i != features.end(); i++, index++)
+  {
+
+    const NICE::Vector & x = *i;
+    double mindist = std::numeric_limits<double>::max();
+    int minclass = 0;
+
+    int c = 0;
+    
+    #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+
+        fprintf(stderr, "computing nearest prototype for std::vector %d\n",
+            index);
+    #endif
+        
+    for (VVector::const_iterator j = prototypes.begin(); j
+        != prototypes.end(); j++, c++)
+    {
+
+      const NICE::Vector & p = *j;
+      double distance = distancefunction->calculate(p, x);
+      
+      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+        fprintf(stderr, "KMedian::compute_delta: Distance: %f\n",
+            distancefunction->calculate(p, x));
+      #endif
+
+      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+            cerr << p << endl;
+            cerr << x << endl;
+            fprintf(stderr, "distance to prototype %d is %f\n", c, distance);
+      #endif
+
+      if (distance < mindist)
+      {
+        minclass = c;
+        mindist = distance;
+      }
+    }
+
+    assignment[index] = minclass;
+  }
+
+  return 0.0;
+}
+
+double KMedian::compute_weights(const VVector & features,
+                                std::vector<double> & weights,
+                                std::vector<int> & assignment)
+{
+  for (int k = 0; k < noClasses; k++)
+    weights[k] = 0;
+
+  int j = 0;
+
+  for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
+  {
+    int k = assignment[j];
+    weights[k]++;
+  }
+
+  for (int k = 0; k < noClasses; k++)
+    weights[k] = weights[k] / features.size();
+
+  return 0.0;
+}
+
+void KMedian::cluster(const NICE::VVector & features,
+                      NICE::VVector & prototypes,
+                      std::vector<double> & weights,
+                      std::vector<int> & assignment)
+{
+  NICE::VVector oldprototypes;
+
+  prototypes.clear();
+  weights.clear();
+  assignment.clear();
+  weights.resize(noClasses, 0);
+  assignment.resize(features.size(), 0);
+
+  int dimension;
+
+  if ((int) features.size() >= noClasses)
+    dimension = features[0].size();
+  else
+  {
+    fprintf(stderr,
+        "FATAL ERROR: Not enough feature vectors provided for kMeans\n");
+    exit(-1);
+  }
+
+  for (int k = 0; k < noClasses; k++)
+  {
+    prototypes.push_back( NICE::Vector(dimension) );
+    prototypes[k].set(0);
+  }
+ 
+  bool successKMedian ( false );
+  int iterations ( 0 );
+  double delta ( std::numeric_limits<double>::max() );
+  
+  while ( !successKMedian )
+  {
+    //we assume that this run will be successful
+    successKMedian = true;
+    
+    this->initial_guess(features, prototypes);
+
+    iterations = 0;
+    delta =  std::numeric_limits<double>::max();
+
+    //until-loop over iterations
+    do
+    {
+      iterations++;
+      
+      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+        std::cerr << "k-median iteration " << iterations << std::endl;
+      #endif
+        
+      this->compute_assignments( features, prototypes, assignment );
+
+      if (iterations > 1)
+        oldprototypes = prototypes;
+
+      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+          fprintf(stderr, "KMedian::cluster compute_prototypes\n");
+      #endif
+      
+      if ( this->compute_prototypes( features, prototypes, weights, assignment ) < 0 )
+      {
+        fprintf(stderr, "KMedian::cluster restart\n");
+        successKMedian = false;
+        break;
+      }
+
+      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+          fprintf(stderr, "KMedian::cluster compute_delta\n");
+      #endif
+      
+      if (iterations > 1)
+        delta = this->compute_delta( oldprototypes, prototypes );
+
+      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+          this->print_iteration( iterations, prototypes, delta );
+      #endif
+
+    } while ((delta > d_minDelta) && (iterations < i_maxIterations));
+    
+  }
+
+  std::cerr << "ended optimization  -- delta: " << delta  << " of d_minDelta: " << d_minDelta << " --- and iterations: " << iterations << " of i_maxIterations: " << i_maxIterations << std::endl;
+    
+  #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+    fprintf(stderr, "KMedian::cluster: iterations = %d, delta = %f\n",
+        iterations, delta);
+  #endif
+
+  this->compute_weights( features, weights, assignment );
+}
+
+void KMedian::print_iteration( int iterations, VVector & prototypes, double delta )
+{
+  if (iterations > 1)
+    fprintf(stderr, "KMedian::cluster: iteration=%d delta=%f\n", iterations,
+        delta);
+  else
+    fprintf(stderr, "KMedian::cluster: iteration=%d\n", iterations);
+
+  int k = 0;
+
+  for (VVector::const_iterator i = prototypes.begin(); i != prototypes.end(); i++, k++)
+  {
+    fprintf(stderr, "class (%d)\n", k);
+    cerr << "prototype = " << (*i) << endl;
+  }
+}

+ 128 - 0
math/cluster/KMedian.h

@@ -0,0 +1,128 @@
+/** 
+* @file KMedian.h
+ * @brief KMedian (aka K-medoid)
+* @author Alexander Freytag
+* @date 23-04-2013 (dd-mm-yyyy)
+
+*/
+#ifndef KMEDIANINCLUDE
+#define KMEDIANINCLUDE
+
+#include <core/basics/Config.h>
+#include <core/vector/Distance.h>
+#include <core/vector/MatrixT.h>
+#include <core/vector/VectorT.h>
+  
+#include "ClusterAlgorithm.h"
+
+namespace OBJREC {
+
+  /**
+   * @class KMedian
+   * @brief KMedian (aka K-medoid)
+   * @author Alexander Freytag
+   * @date 23-04-2013 (dd-mm-yyyy)
+  */    
+  class KMedian : public ClusterAlgorithm
+  {
+
+      protected:
+        
+      /************************
+       * 
+       *   protected variables
+       * 
+       **************************/ 
+      
+        //! desired number of clusters
+        int noClasses;
+        
+        //! specify which distance to use for calculating assignments
+        std::string distanceType;
+        
+        //! the actual distance metric
+        NICE::VectorDistance<double> *distancefunction;
+        
+        double d_minDelta;
+        int i_maxIterations;
+        
+  
+       /************************
+       * 
+       *   protected methods
+       * 
+       **************************/  
+      
+        //! compute the distance between two features using the specified distance metric
+        double vectorDistance(const NICE::Vector &vector1, const NICE::Vector &vector2, uint distancetype);
+        
+        //! compute assignments of all given features wrt to the currently known prototypes (cluster medoids) == ~ E-step
+        double compute_assignments ( const NICE::VVector & features,
+                  const NICE::VVector & prototypes,
+                  std::vector<int> & assignment );
+
+        //! compute number of assignments for every currently found cluster
+        double compute_weights ( const NICE::VVector & features,
+              std::vector<double> & weights,
+              std::vector<int>    & assignment );
+
+        //! compute the difference between prototypes of previous iteration and those currently found
+        double compute_delta ( const NICE::VVector & oldprototypes,
+                    const NICE::VVector & prototypes );
+
+        //! compute (update) prototypes given the current assignments == ~ M-step
+        int compute_prototypes ( const NICE::VVector & features,
+                NICE::VVector & prototypes,
+                std::vector<double> & weights,
+                const std::vector<int>    & assignment );
+
+        //! have an initial guess, i.e., randomly pick some features as initial cluster centroids
+        void initial_guess ( const NICE::VVector & features,
+                NICE::VVector & prototypes );
+        
+        //! give additional information for the current iteration
+        void print_iteration ( int iterations, 
+                  NICE::VVector & prototypes,
+                  double delta );
+
+      public:
+    
+        /**
+        * @brief simple constructor
+        * @param _noClasses the number of clusters to be computed
+        * @param _distanceMode a string specifying the distance function to be used (default: euclidean)
+        */
+        KMedian( const int & _noClasses , const std::string & _distanceMode="euclidean");
+        
+        /**
+        * @brief standard constructor
+        * @param conf config file specifying all relevant variable settings
+        * @param _section name of the section within the configfile where the settings can be found (default: KMedian)
+        */
+        KMedian( const NICE::Config *conf, const std::string & _section = "KMedian");
+
+        
+            
+        /** simple destructor */
+        virtual ~KMedian();
+          
+        /**
+        *@brief this is the actual method that performs the clustering for a given set of features
+        *@author Alexander Freytag
+        *@date 25-04-2013 (dd-mm-yyyy)
+        *@param   features input features to be clustered
+        *@param   prototypes computed prototypes (cluster medoids) for the given samples
+        *@param   weights number of assignments for every cluster
+        *@param   assignment explicite assignments of features to computed cluster medoids
+        */        
+        void cluster ( const NICE::VVector & features,
+                NICE::VVector & prototypes,
+                std::vector<double> & weights,
+                std::vector<int>    & assignment );
+
+  };
+
+
+} // namespace
+
+#endif

+ 89 - 0
math/cluster/tests/Makefile.inc

@@ -0,0 +1,89 @@
+# BINARY-DIRECTORY-MAKEFILE
+# conventions:
+# - there are no subdirectories, they are ignored!
+# - all ".C", ".cpp" and ".c" files in the current directory are considered
+#   independent binaries, and linked as such.
+# - the binaries depend on the library of the parent directory
+# - the binary names are created with $(BINNAME), i.e. it will be more or less
+#   the name of the .o file
+# - all binaries will be added to the default build list ALL_BINARIES
+
+# --------------------------------
+# - remember the last subdirectory
+#
+# set the variable $(SUBDIR) correctly to the current subdirectory. this
+# variable can be used throughout the current makefile.inc. The many 
+# SUBDIR_before, _add, and everything are only required so that we can recover
+# the previous content of SUBDIR before exitting the makefile.inc
+
+SUBDIR_add:=$(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
+SUBDIR_before:=$(SUBDIR)
+SUBDIR:=$(strip $(SUBDIR_add))
+SUBDIR_before_$(SUBDIR):=$(SUBDIR_before)
+
+# ------------------------
+# - include subdirectories
+#
+# note the variables $(SUBDIRS_OF_$(SUBDIR)) are required later on to recover
+# the dependencies automatically. if you handle dependencies on your own, you
+# can also dump the $(SUBDIRS_OF_$(SUBDIR)) variable, and include the
+# makefile.inc of the subdirectories on your own...
+
+#SUBDIRS_OF_$(SUBDIR):=$(patsubst %/Makefile.inc,%,$(wildcard $(SUBDIR)*/Makefile.inc))
+#include $(SUBDIRS_OF_$(SUBDIR):%=%/Makefile.inc)
+
+# ----------------------------
+# - include local dependencies
+#
+# include the libdepend.inc file, which gives additional dependencies for the
+# libraries and binaries. additionally, an automatic dependency from the library
+# of the parent directory is added (commented out in the code below).
+
+-include $(SUBDIR)libdepend.inc
+
+PARENTDIR:=$(patsubst %/,%,$(dir $(patsubst %/,%,$(SUBDIR))))
+$(call PKG_DEPEND_INT,$(PARENTDIR))
+$(call PKG_DEPEND_EXT,CPPUNIT)
+
+# ---------------------------
+# - objects in this directory
+#
+# the use of the variable $(OBJS) is not mandatory. it is mandatory however
+# to update $(ALL_OBJS) in a way that it contains the path and name of
+# all objects. otherwise we can not include the appropriate .d files.
+
+OBJS:=$(patsubst %.cpp,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.cpp))) \
+      $(patsubst %.C,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.C))) \
+      $(shell grep -ls Q_OBJECT $(SUBDIR)*.h | sed -e's@^@/@;s@.*/@$(OBJDIR)moc_@;s@\.h$$@.o@') \
+      $(patsubst %.c,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.c)))
+ALL_OBJS += $(OBJS)
+
+# ----------------------------
+# - binaries in this directory
+#
+# output of binaries in this directory. none of the variables has to be used.
+# but everything you add to $(ALL_LIBRARIES) and $(ALL_BINARIES) will be
+# compiled with `make all`. be sure again to add the files with full path.
+
+CHECKS:=$(BINDIR)$(call LIBNAME,$(SUBDIR))
+ALL_CHECKS+=$(CHECKS)
+
+# ---------------------
+# - binary dependencies
+#
+# there is no way of determining the binary dependencies automatically, so we
+# follow conventions. each binary depends on the corresponding .o file and
+# on the libraries specified by the INTLIBS/EXTLIBS. these dependencies can be
+# specified manually or they are automatically stored in a .bd file.
+
+$(foreach head,$(wildcard $(SUBDIR)*.h),$(eval $(shell grep -q Q_OBJECT $(head) && echo $(head) | sed -e's@^@/@;s@.*/\(.*\)\.h$$@$(BINDIR)\1:$(OBJDIR)moc_\1.o@')))
+$(eval $(foreach c,$(CHECKS),$(c):$(BUILDDIR)$(CPPUNIT_MAIN_OBJ) $(OBJS) $(call PRINT_INTLIB_DEPS,$(c),.a)))
+
+# -------------------
+# - subdir management
+#
+# as the last step, always add this line to correctly recover the subdirectory
+# of the makefile including this one!
+
+SUBDIR:=$(SUBDIR_before_$(SUBDIR))
+

+ 89 - 0
math/cluster/tests/TestKMedian.cpp

@@ -0,0 +1,89 @@
+#ifdef NICE_USELIB_CPPUNIT
+
+#include <string>
+#include <exception>
+
+#include "TestKMedian.h"
+
+#include <core/basics/Config.h>
+#include "vislearning/math/distances/genericDistance.h"
+
+
+const bool verboseStartEnd = true;
+const bool verbose = false;
+const std::string distanceType = "euclidean";
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+CPPUNIT_TEST_SUITE_REGISTRATION( TestKMedian );
+
+void TestKMedian::setUp() {
+}
+
+void TestKMedian::tearDown() {
+}
+
+void TestKMedian::testKMedianClustering() 
+{
+  if (verboseStartEnd)
+    std::cerr << "================== TestKMedian::testKMedianClustering ===================== " << std::endl;
+  
+  Config * conf = new Config;
+  std::string section ( "KMedian" );
+  conf->sS( section, "distanceType", "euclidean" );
+  conf->sI( section, "maxIterations", 200 );
+  conf->sI( section, "noClasses", 2 );
+   
+  OBJREC::KMedian kMedian ( conf, section );
+  
+  //create some artificial data
+  NICE::VVector features;
+  NICE::Vector x1 (2); x1[0] = 1;  x1[1] = 1; features.push_back(x1);
+  NICE::Vector x2 (2); x2[0] = 4;  x2[1] = 1; features.push_back(x2);
+  NICE::Vector x3 (2); x3[0] = 2;  x3[1] = 4; features.push_back(x3);
+  NICE::Vector x4 (2); x4[0] = 10; x4[1] = 3; features.push_back(x4);
+  NICE::Vector x5 (2); x5[0] = 8;  x5[1] = 3; features.push_back(x5);
+  NICE::Vector x6 (2); x6[0] = 4;  x6[1] = 3; features.push_back(x6);
+  NICE::Vector x7 (2); x7[0] = 3;  x7[1] = 2; features.push_back(x7);
+  NICE::Vector x8 (2); x8[0] = 1;  x8[1] = 3; features.push_back(x8);
+  NICE::Vector x9 (2); x9[0] = 9;  x9[1] = 2; features.push_back(x9);
+  
+  //cluster data
+  NICE::VVector prototypes;
+  std::vector<double> weights;
+  std::vector<int> assignment;
+  
+  kMedian.cluster ( features, prototypes, weights, assignment );  
+
+  //check whether the results fits the ground truth  
+  //NOTE
+  // If no random initialization is activated, we initially grab x2 and x8.
+  // After 3 iterations, we should have converged and obtain x5 and x7.
+
+  NICE::VectorDistance<double> * distancefunction = GenericDistanceSelection::selectDistance(distanceType);
+
+  if ( verbose )
+  {
+    std::cerr << " x9: " << x9 << " cl1: " << prototypes[0] << std::endl;
+    std::cerr << " x7: " << x7 << " cl2: " << prototypes[1] << std::endl;
+  }
+  
+  double distX9Cl1 ( distancefunction->calculate( x9, prototypes[0] ) );
+  double distX7Cl2 ( distancefunction->calculate( x7, prototypes[1] ) );
+  
+  CPPUNIT_ASSERT_DOUBLES_EQUAL( distX9Cl1, 0.0, 1e-8);
+  CPPUNIT_ASSERT_DOUBLES_EQUAL( distX7Cl2, 0.0, 1e-8); 
+  
+  std::cerr << "                               successfull              " << std::endl;
+        
+  //don't waste memory
+  delete conf;
+  
+  if (verboseStartEnd)
+    std::cerr << "================== TestKMedian::testKMedianClustering done ===================== " << std::endl;
+}
+
+
+#endif

+ 32 - 0
math/cluster/tests/TestKMedian.h

@@ -0,0 +1,32 @@
+#ifndef _TESTKMEDIAN_H
+#define _TESTKMEDIAN_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include "vislearning/math/cluster/KMedian.h"
+
+/**
+ * CppUnit-Testcase. 
+ * @brief CppUnit-Testcase to verify that the KMedian-clusterin works correctly
+ */
+class TestKMedian : public CppUnit::TestFixture {
+
+    CPPUNIT_TEST_SUITE( TestKMedian );
+    
+    CPPUNIT_TEST(testKMedianClustering);
+    
+    CPPUNIT_TEST_SUITE_END();
+  
+ private:
+ 
+ public:
+    void setUp();
+    void tearDown();
+
+    /**
+    * Constructor / Destructor testing 
+    */  
+    void testKMedianClustering();
+
+};
+
+#endif // _TESTKMEDIAN_H