Prechádzať zdrojové kódy

KMedian added and testfile included, featureLearning updated

Alexander Freytag 12 rokov pred
rodič
commit
9a96c90a04

+ 2 - 0
featureLearning/FeatureLearningGeneric.h

@@ -109,6 +109,8 @@ namespace OBJREC
       virtual NICE::FloatImage evaluateCurrentCodebookByDistance ( const std::string & _filename , const bool & beforeComputingNewFeatures = true) = 0;
       
       virtual NICE::ImageT<int> evaluateCurrentCodebookByAssignments ( const std::string & _filename , const bool & beforeComputingNewFeatures = true, const bool & _binaryShowLatestPrototype = false) = 0;      
+      
+      virtual void evaluateCurrentCodebookByConfusionMatrix( NICE::Matrix & _confusionMat ) = 0; 
   
 
   };

+ 131 - 30
featureLearning/FeatureLearningPrototypes.cpp

@@ -8,6 +8,7 @@
 #include <core/image/FilterT.h>
 #include <core/image/CircleT.h>
 #include <core/image/Convert.h>
+#include <core/imagedisplay/ImageDisplay.h>
 #include <core/vector/VectorT.h>
 
 //vislearning
@@ -18,6 +19,7 @@
 #include <vislearning/features/localfeatures/LFWriteCache.h>
 // 
 #include <vislearning/math/cluster/KMeans.h>
+#include <vislearning/math/cluster/KMedian.h>
 #include <vislearning/math/cluster/GMM.h>
 
 using namespace std;
@@ -40,6 +42,10 @@ void FeatureLearningPrototypes::setClusterAlgo( const std::string & _clusterAlgo
   {
     this->clusterAlgo = new OBJREC::KMeans(this->initialNumberOfClusters);
   }
+  else if (_clusterAlgoString.compare("kmedian") == 0)
+  {
+    this->clusterAlgo = new OBJREC::KMedian(this->initialNumberOfClusters);
+  }  
   else if (_clusterAlgoString.compare("GMM") == 0) 
   {
     this->clusterAlgo = new OBJREC::GMM(this->conf, this->initialNumberOfClusters);
@@ -433,7 +439,6 @@ NICE::FloatImage FeatureLearningPrototypes::evaluateCurrentCodebookByDistance (
     
     //convert float to RGB
     NICE::ColorImage noveltyImageRGB ( xsize, ysize  );
-//     ICETools::convertToRGB ( noveltyImageGaussFiltered, noveltyImageRGB );
     if ( beforeComputingNewFeatures )
     {
       imageToPseudoColorWithRangeSpecification( noveltyImageGaussFiltered, noveltyImageRGB, 0 /* min */, maxValForVisualization /* maxFiltered*/ /* max */ );
@@ -461,36 +466,96 @@ NICE::FloatImage FeatureLearningPrototypes::evaluateCurrentCodebookByDistance (
       noveltyImageRGB.writePPM( destination );
     }
     
+    ImageDisplay imgDisp;
     
     // now look where the closest features for the current cluster indices are
     int tmpProtCnt ( 0 );
     for (NICE::VVector::const_iterator protIt = prototypes.begin(); protIt != prototypes.end(); protIt++, tmpProtCnt++)
     {
-      double distToNewCluster ( std::numeric_limits<double>::max() );
+      double distToCurrentCluster ( std::numeric_limits<double>::max() );
       int indexOfMostSimFeat( 0 );
       double tmpDist;
-      int tmpCnt ( 0 );
+      int featureCnt ( 0 );
       
       for ( NICE::VVector::iterator i = features.begin();
             i != features.end();
-            i++, tmpCnt++)
+            i++, featureCnt++)
       {
         tmpDist = this->distFunction->calculate( *i, *protIt );
-        if ( tmpDist < distToNewCluster )
+        if ( tmpDist < distToCurrentCluster )
         {
-          distToNewCluster = tmpDist;
-          indexOfMostSimFeat = tmpCnt;
+          distToCurrentCluster = tmpDist;
+          indexOfMostSimFeat = featureCnt;
         }
       }
       
       int posX ( ( positions[indexOfMostSimFeat] ) [0]  );
       int posY ( ( positions[indexOfMostSimFeat] ) [1]  );
-      NICE::Circle circ ( Coord( posX, posY), 2*(tmpProtCnt+1) /* radius*/, Color(200,0,255 ) );
+      
+      //position (for OpponentSIFT of van de Sande): x y scale orientation cornerness
+      
+      /*What is the interpretation of scale?
+
+      The scale parameter was implemented to correspond with the Gaussian filter sigma at which points were detected. Therefore, the
+      scale is not directly interpretable as the size of the region described in terms of number of pixels. However, it is linearly
+      related the radius of the circular area described. To capture the area of the Gaussian originally used, we have a 3x 
+      magnification factor. But, remember that SIFT has 4x4 cells, and this is a measure for a single cell. So, because we are 
+      working with a radius, multiply by 2. Due to the square shape of the region, we need to extend the outer radius even further 
+      by sqrt(2), otherwise the corners of the outer cells are cut off by our circle. So, the largest outer radius is 
+      Round(scale * 3 * 2 * sqrt(2)). The area from which the SIFT descriptor is computed is a square which fits within a circle 
+      of this radius. Also, due to the Gaussian weighting applied within SIFT, the area that really matters is much, much smaller: 
+      the outer parts get a low weight. 
+
+      For the default scale of 1.2, we get a outer circle radius of 10. The potential sampling area then becomes -10..10, e.g. a 
+      21x21patch. However, the square area which fits inside this circle is smaller: about 15x15. The corners of this 15x15square 
+      touch the outer circle. */
+      
+      /*Why is the direction (angle) field always 0?
+
+
+      Estimating the dominant orientation of a descriptor is useful in matching scenarios. However, in an object/scene categorization
+      setting, the additional invariance reduces accuracy. Being able to discriminate between dominant directions of up and right 
+      is very useful here, and rotated down images are quite rare in an object categorization setting. Therefore, orientation 
+      estimation is disabled in the color descriptor software. The subsequent rotation of the descriptor to achieve 
+      rotation-invariance is still possible by supplying your own regions and angles for an image (through --loadRegions). However, 
+      by default, no such rotation is performed, since the default angle is 0. */      
+      
+      
+      //adapt the pseudo color transformation as done in Convert.cpp
+        size_t seg = ( size_t ) ( tmpProtCnt/(float)prototypes.size() * 6.0 );
+        double y   = ( 6 * tmpProtCnt/(float)prototypes.size() - seg );
+
+        Color circleColor;
+        switch ( seg ) {
+          case 0:
+            circleColor = Color( 0,0,(int)round(y*255) );
+            break;
+          case 1:
+            circleColor = Color( 0,(int)round(y*255),255 );
+            break;
+          case 2:
+            circleColor = Color( 0,255,(int)round((1-y)*255) );
+            break;
+          case 3:
+            circleColor = Color( (int)round(y*255),255,0 );
+            break;
+          case 4:
+            circleColor = Color( 255,(int)round((1-y)*255),0 );
+            break;
+          case 5:
+            circleColor = Color( 255,(int)round(y*255),(int)round(y*255) );
+            break;
+          default:
+            circleColor = Color( 255,255,255 );
+            break;
+        }      
+      
+      NICE::Circle circ ( Coord( posX, posY), (int) round(2*3*sqrt(2)*( positions[indexOfMostSimFeat] )[2]) /* radius*/, circleColor );
       img.draw(circ);  
     }
-    
+        
    if ( b_showResults )
-    showImage(img, "Current image and most similar features for current cluster"); 
+      showImage(img, "Current image and most similar features for current prototypes"); 
    else
    {
       std::vector< std::string > list2;
@@ -532,34 +597,46 @@ NICE::ImageT< int > FeatureLearningPrototypes::evaluateCurrentCodebookByAssignme
     i->normalizeL1();
   }
   
-  std::cerr << "normalization done - now look for nearest clusters for every extracted feature" << std::endl;
-
+  //this is the image we will return finally
   NICE::ImageT< int > clusterImage ( xsize, ysize );
   clusterImage.set ( 0 );
-   
+  
+  // after iterating over all features from the new image, this vector contains
+  // distances to the most similar feature for every prototype
+  NICE::Vector minDistances ( this->prototypes.size() );
+  
   NICE::VVector::const_iterator posIt = positions.begin();
   for ( NICE::VVector::const_iterator i = features.begin();
         i != features.end();
-        i++, posIt++)
+        i++, posIt++ )
   {              
     
     //loop over codebook representatives
     double minDist ( std::numeric_limits<double>::max() );
-    int indexOfNearestCluster ( 0 );
-    int clusterCounter ( 0 );
-    for (NICE::VVector::const_iterator it =  this->prototypes.begin(); it != this->prototypes.end(); it++, clusterCounter++)
+    int indexOfNearestPrototype ( 0 );
+    int prototypeCounter ( 0 );
+    for (NICE::VVector::const_iterator it =  this->prototypes.begin(); it != this->prototypes.end(); it++, prototypeCounter++)
     {
       //compute distance
       double tmpDist ( this->distFunction->calculate(*i,*it) );
+      //check what the closest prototype is
       if (tmpDist < minDist)
       {
         minDist = tmpDist;
-        indexOfNearestCluster = clusterCounter;
+        indexOfNearestPrototype = prototypeCounter;
       }
+      
+      //check whether we found a feature for the current prototype which is more similar then the previous best one
+      if ( minDistances[ prototypeCounter ] > tmpDist )
+        minDistances[ prototypeCounter ] = tmpDist;      
     }
+    
 
+    
+    
     //take minimum distance and store in in a float image    
-    //TODO hard coded!!!
+    // for nice visualization, we plot the cluster index into a square of size 3 x 3
+    //TODO currently hard coded!!!
     int noProtoTypes ( this->prototypes.size() -1 );
     
     for ( int tmpY = (*posIt)[1]  - 1; tmpY < (*posIt)[1]  + 1; tmpY++)
@@ -569,35 +646,59 @@ NICE::ImageT< int > FeatureLearningPrototypes::evaluateCurrentCodebookByAssignme
         if ( _binaryShowLatestPrototype )
         {
           //just a binary image - 1 if newest prototype is nearest - 0 if not
-          if  ( indexOfNearestCluster == noProtoTypes)
+          if  ( indexOfNearestPrototype == noProtoTypes)
             clusterImage ( tmpX, tmpY ) = 1;
           else
             clusterImage ( tmpX, tmpY ) = 0;
         }
         else
           //as many different values as current prototypes available
-          clusterImage ( tmpX, tmpY ) = indexOfNearestCluster;  
+          clusterImage ( tmpX, tmpY ) = indexOfNearestPrototype;  
       }
     }
-//     clusterImage ( (*posIt)[0], (*posIt)[1] ) = indexOfNearestCluster;
   } 
   
+  std::cerr << "Codebook evaluation by assignments... min distances in image for every prototype: " << std::endl << "    " << minDistances << std::endl;
+  
   //show how many clusters we have
   if ( !_binaryShowLatestPrototype )  
   {
     int tmpCnt ( 0 );
     for (NICE::VVector::const_iterator protoIt = prototypes.begin(); protoIt != prototypes.end(); protoIt++, tmpCnt++)
     {
-    for ( int tmpY = 1 + 2 - 2; tmpY < (2  + 2); tmpY++)
-      {
-        for ( int tmpX = 1 + 5*tmpCnt  - 2; tmpX < (1 + 5*tmpCnt  + 2); tmpX++)
+      for ( int tmpY = 1 + 2 - 2; tmpY < (2  + 2); tmpY++)
         {
-          //Take care, this might go "over" the image
-          clusterImage ( tmpX, tmpY ) = (Ipp8u) tmpCnt;
-        }
-      }   
+          for ( int tmpX = 1 + 4*tmpCnt ; tmpX < (1 + 4*tmpCnt  + 3); tmpX++)
+          {
+            //Take care, this might go "over" the image
+            clusterImage ( tmpX, tmpY ) = (Ipp8u) tmpCnt;
+          }
+        }   
     }
   }
-  std::cerr << " evaluateCurrentCodebookByAssignments done" << std::endl;
+  
   return clusterImage;
 }
+
+void FeatureLearningPrototypes::evaluateCurrentCodebookByConfusionMatrix( NICE::Matrix & _confusionMat )
+{
+  _confusionMat.resize ( this->prototypes.size(), this->prototypes.size() );
+  _confusionMat.set( 0.0 );
+  
+  double tmpDist ( 0.0 );
+  NICE::VVector::const_iterator protoItJ = prototypes.begin();
+  for ( int j = 0; j < prototypes.size(); j++, protoItJ++)
+  {
+    NICE::VVector::const_iterator protoItI = protoItJ; 
+    for ( int i = j; i < prototypes.size(); i++, protoItI++)
+    {
+      tmpDist = this->distFunction->calculate( *protoItJ, *protoItI );
+      
+      //assuming symmetric distances
+      _confusionMat ( i, j ) = tmpDist;
+//       if ( i != j )
+//         _confusionMat ( j, i ) = tmpDist;  
+    }
+  }
+}
+

+ 7 - 0
featureLearning/FeatureLearningPrototypes.h

@@ -63,7 +63,12 @@ namespace OBJREC
       
       //TODO stupid!!!
       double maxValForVisualization;
+      
+      //! just for temporary debugging
+      int i_posXOfNewPrototype;
+      int i_posYOfNewPrototype;      
             
+      
       /************************
        * 
        *   protected methods
@@ -106,6 +111,8 @@ namespace OBJREC
       
       virtual NICE::ImageT<int> evaluateCurrentCodebookByAssignments ( const std::string & _filename , const bool & beforeComputingNewFeatures = true, const bool & _binaryShowLatestPrototype = false);            
       
+      virtual void evaluateCurrentCodebookByConfusionMatrix( NICE::Matrix & _confusionMat ); 
+      
 
   };
 

+ 38 - 6
featureLearning/FeatureLearningRegionBased.cpp

@@ -184,7 +184,7 @@ void FeatureLearningRegionBased::learnNewFeatures ( const std::string & _filenam
   
   NICE::Vector representative ( newFeatures.begin()->size(), 0.0 );
   
-  //first guess: average feature vectors of the "best" region
+//   //first try: average feature vectors of the "best" region
 //   NICE::VVector::const_iterator posIt = positions.begin();
 //   for ( NICE::VVector::const_iterator featIt = newFeatures.begin();
 //         featIt != newFeatures.end();
@@ -203,7 +203,30 @@ void FeatureLearningRegionBased::learnNewFeatures ( const std::string & _filenam
 //   //normalization
 //   representative.normalizeL1();
   
-  //next try: simply take the first feature vector of the "best" region (although this one should lay on the border, and so on...)
+//   //second try: simply take the first feature vector of the "best" region (although this one should lay on the border, and so on...)
+//   NICE::VVector::const_iterator posIt = positions.begin();
+//   for ( NICE::VVector::const_iterator featIt = newFeatures.begin();
+//         featIt != newFeatures.end();
+//         featIt++, posIt++)
+//   {              
+//     
+//     //only considere features that actually belong to the best region
+//     if ( mask( (*posIt)[0], (*posIt)[1] ) != indexOfBestRegion )
+//       continue;
+//     
+//     representative = *featIt;
+//     i_posXOfNewPrototype = (*posIt)[0];
+//     i_posYOfNewPrototype = (*posIt)[1];
+//     //break after the first positive feature
+//     break;
+//   }   
+  
+  //third try: simply take the feature vector of the "best" region with largest novelty score within this region
+  // ... (hopefully, this is no outlier wrt to this region...)
+  
+  double maxNovelty ( 0.0 );
+  NICE::VVector::const_iterator mostNovelFeature = newFeatures.begin() ;
+  
   NICE::VVector::const_iterator posIt = positions.begin();
   for ( NICE::VVector::const_iterator featIt = newFeatures.begin();
         featIt != newFeatures.end();
@@ -214,10 +237,16 @@ void FeatureLearningRegionBased::learnNewFeatures ( const std::string & _filenam
     if ( mask( (*posIt)[0], (*posIt)[1] ) != indexOfBestRegion )
       continue;
     
-    representative = *featIt;
-    //break after the first positive feature
-    break;
-  }   
+    //did we found a feature of the "best"region with larger novelty score then the current most novel one?
+    if ( noveltyImageGaussFiltered( (*posIt)[0], (*posIt)[1] ) > maxNovelty )
+    {
+      maxNovelty = noveltyImageGaussFiltered( (*posIt)[0], (*posIt)[1] );
+      mostNovelFeature = featIt;
+      i_posXOfNewPrototype = (*posIt)[0];
+      i_posYOfNewPrototype = (*posIt)[1];
+    }
+  }
+  representative = *mostNovelFeature;
   
   std::cerr << " New representative: " << std::endl << representative << std::endl;
   
@@ -250,6 +279,9 @@ void FeatureLearningRegionBased::learnNewFeatures ( const std::string & _filenam
     
     int posX ( ( positions[indexOfMostSimFeat] ) [0]  );
     int posY ( ( positions[indexOfMostSimFeat] ) [1]  );
+    
+    std::cerr << "position of most similar feature --- posX: " << posX << " - posY " << posY << std::endl;
+    std::cerr << "position of new prototype        --- posX: " << i_posXOfNewPrototype << " - posY " << i_posYOfNewPrototype << std::endl;
     NICE::Circle circ ( Coord( posX, posY), 10 /* radius*/, Color(200,0,255) );
     imgTmp.draw(circ); 
     

+ 1 - 1
featureLearning/FeatureLearningRegionBased.h

@@ -41,7 +41,7 @@ namespace OBJREC
       
       //! determine a grid on which we extract local features
       int i_gridSize;
-      
+
       /************************
        * 
        *   protected methods

+ 16 - 4
featureLearning/progs/testFeatureLearning.cpp

@@ -83,10 +83,7 @@ int main( int argc, char **argv )
     std::cerr << "Unknown feature learning algorithm selected, use cluster based instead" << std::endl;
     featureLearning = new OBJREC::FeatureLearningClusterBased( conf, &md );
   }    
-  
-  //print computed cluster centers  -- this is NOT recommended :)
-//   prototypes.store(std::cerr);
-  
+    
   //evaluate how well the training images are covered with our initial codebook
   //that is, compute these nice "novelty maps" per feature
   
@@ -101,10 +98,25 @@ int main( int argc, char **argv )
       NICE::ImageT< int > imgClusterAssignments;
       imgClusterAssignments = featureLearning->evaluateCurrentCodebookByAssignments(filename , false /* beforeComputingNewFeatures */, false /* _binaryShowLatestPrototype*/ );
       
+      std::cerr << "now do image To pseude color and show the initial cluster assignments" << std::endl;
       NICE::ColorImage imgClusterAssignmentsRGB (imgClusterAssignments.width(), imgClusterAssignments.height() );
       imageToPseudoColor( imgClusterAssignments, imgClusterAssignmentsRGB );      
       showImage(imgClusterAssignmentsRGB, "cluster Assignments" ) ;
   }
+  
+  //**********************************************
+  //
+  //        EVALUATE INITIAL CLUSTER
+  //
+  //       COMPUTE A NICE CONFUSION MATRIX
+  //          FOR OUR INITIAL CODEBOOK
+  //
+  //**********************************************  
+  NICE::Matrix confusionMatInitial;
+  featureLearning->evaluateCurrentCodebookByConfusionMatrix( confusionMatInitial );
+  std::cerr << "initial Confusion matrix: " << std::endl << confusionMatInitial << std::endl;
+  
+  
 
   //**********************************************
   //

+ 1 - 0
features/localfeatures/LFColorSande.cpp

@@ -205,6 +205,7 @@ int LFColorSande::extractFeatures ( const NICE::ColorImage & img, VVector & feat
   while ( ! feof ( f ) )
   {
     // <CIRCLE 119 307 1.26134 0 0.00014763>; 0 0 6 2 0 6 25 7 9 4 4 0 0 4 20 36 78 4 5 0 0
+    //<CIRCLE x y scale orientation cornerness>
     if ( fgets ( buffer, buffersize, f ) == NULL )
       break;
 

+ 421 - 0
math/cluster/KMedian.cpp

@@ -0,0 +1,421 @@
+/**
+ * @file KMedian.cpp
+ * @brief KMedian (aka K-medoid)
+* @author Alexander Freytag
+* @date 23-04-2013 (dd-mm-yyyy)
+
+ */
+
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <iostream>
+#include <map>
+#include <algorithm> //to easily find the smallest value in a map
+
+#include "vislearning/math/cluster/KMedian.h"
+#include "vislearning/math/distances/genericDistance.h"
+
+#include <set>
+
+using namespace OBJREC;
+
+using namespace std;
+
+using namespace NICE;
+
+
+typedef std::pair<int, double> MyPairType;
+struct CompareSecond
+{
+    bool operator()(const MyPairType& left, const MyPairType& right) const
+    {
+        return left.second < right.second;
+    }
+};
+
+#undef DEBUG_KMEDIAN_ASSIGNMENTS
+// #define DEBUG_KMEDIAN_ASSIGNMENTS
+
+#undef DEBUG_KMEDIAN_PROTOCOMP
+// #define DEBUG_KMEDIAN_PROTOCOMP
+
+
+
+KMedian::KMedian(int _noClasses, string _distanceType) :
+  noClasses(_noClasses), distanceType(_distanceType)
+{
+  //srand(time(NULL));
+  distancefunction = GenericDistanceSelection::selectDistance(distanceType);
+  
+  this->d_minDelta  = 1e-5;
+  this->i_maxIterations = 200;
+}
+
+KMedian::~KMedian()
+{
+}
+
+void KMedian::initial_guess(const VVector & features, VVector & prototypes)
+{
+  int j = 0;
+  std::set<int, std::greater<int> > mark;
+
+  for (VVector::iterator i = prototypes.begin(); i != prototypes.end(); i++, j++)
+  {
+    int k;
+
+    do
+    {
+      k = rand() % features.size();
+    } while (mark.find(k) != mark.end());
+
+    mark.insert(mark.begin(), k);
+
+    *i = features[k];
+  }
+}
+
+int KMedian::compute_prototypes(const VVector & features, VVector & prototypes,
+    std::vector<double> & weights, const std::vector<int> & assignment)
+{
+  
+  #ifdef DEBUG_KMEDIAN_PROTOCOMP  
+    std::cerr << "initial assignments: ";
+    for (std::vector<int>::const_iterator assignIt = assignment.begin(); assignIt != assignment.end(); assignIt++)
+    { 
+      std::cerr << " " << *assignIt;
+    } 
+    std::cerr << std::endl;
+  #endif
+  
+  //initialization
+  for (int k = 0; k < noClasses; k++)
+  {
+    prototypes[k].set(0);
+    weights[k] = 0;
+  }
+  
+  NICE::VectorT<int> numberOfCurrentAssignments ( noClasses ) ;
+  numberOfCurrentAssignments.set ( 0 );
+  
+  int exCnt = 0;  
+  //how many examples are assigned to the current clusters?
+  for (VVector::const_iterator i = features.begin(); i != features.end(); i++, exCnt++)
+  {
+    int k = assignment[exCnt];
+    //increase counter for assigned cluster
+    numberOfCurrentAssignments[ k ] ++;    
+  }
+    
+  #ifdef DEBUG_KMEDIAN_PROTOCOMP    
+    std::cerr << "k-median -- current assignmens: " << numberOfCurrentAssignments << std::endl << "noClasses: " << noClasses << std::endl;
+  #endif
+  
+  //compute the median for every cluster
+  #pragma omp parallel for
+  for (int clusterCnt = 0; clusterCnt < noClasses; clusterCnt++)
+  {    
+    NICE::Vector overallDistances ( numberOfCurrentAssignments[ clusterCnt ] );
+    VVector::const_iterator lastExampleWorkedOn = features.begin();
+    int i_idxOfLastExampleWorkedOn ( 0 );
+    uint i_exCntInt ( 0 );
+
+    //this map will contain overall distances of all examples within the current clusters
+    //we need separate maps for every cluster to allow parallelization
+    std::map<int,double> distancesWithinCluster;
+    for (VVector::const_iterator featIt = features.begin(); featIt != features.end(); featIt++, i_exCntInt++)
+    {
+      int k = assignment[i_exCntInt];
+      
+      //only considere examples currently assigned to cluster clusterCnt
+      if ( k != clusterCnt)
+      {
+        continue;      
+      }
+      
+      uint exCntIntTmp ( i_idxOfLastExampleWorkedOn ); //idx going over all features 
+      for (VVector::const_iterator j = lastExampleWorkedOn ; j != features.end(); j++, exCntIntTmp++)
+      {
+        int kTmp;
+        if ( exCntIntTmp < assignment.size() )
+          kTmp = assignment[exCntIntTmp];
+        else
+        {
+          //actually, this will be never be reached :)
+          std::cerr << "ERROR: exCntIntTmp >= assignment.size() " << exCntIntTmp << " " << assignment.size() << std::endl;
+        }
+        
+        //only considere examples currently assigned to cluster clusterCnt
+        if ( kTmp != clusterCnt)
+          continue;         
+       
+        
+        double dist ( distancefunction->calculate( *featIt, *j) );
+        if ( i_exCntInt < features.size() )
+        {
+          distancesWithinCluster[ i_exCntInt ] += dist;
+          #ifdef DEBUG_KMEDIAN_PROTOCOMP
+            std::cerr << "increase " << i_exCntInt << " by " << dist << " for " <<*featIt << " and " << *j << std::endl;
+          #endif
+        }
+        else
+        {
+          //actually, this will be never be reached :)          
+          std::cerr << "ERROR: i_exCntInt >= features.size() " << i_exCntInt << " " << features.size() << std::endl;
+        }
+
+        if ( i_exCntInt != exCntIntTmp )
+        {
+          if (exCntIntTmp < features.size() )
+          {
+            distancesWithinCluster[ exCntIntTmp ] += dist;
+            #ifdef DEBUG_KMEDIAN_PROTOCOMP
+              std::cerr << "increase also " << exCntIntTmp << " by " << dist << std::endl;
+            #endif
+          }
+          else
+            std::cerr << "ERROR: exCntIntTmp >= features.size() " << exCntIntTmp << " " << features.size() << std::endl;
+        }
+        
+      }      
+      
+      //inc by one to avoid calculating some distances twice
+      if ( ( featIt != features.end()) && ( (featIt +1 ) != features.end()) )
+      {
+        lastExampleWorkedOn = ( featIt + 1 );      
+        i_idxOfLastExampleWorkedOn = i_exCntInt+1;
+      }
+    }
+       
+    #ifdef DEBUG_KMEDIAN_PROTOCOMP
+      std::cerr << "distances for cluster " << clusterCnt << " ";
+      for(std::map<int,double>::const_iterator distIt = distancesWithinCluster.begin(); distIt != distancesWithinCluster.end(); distIt++)
+      {
+        std::cerr << distIt->first << " " << distIt->second << " ";
+      }
+      std::cerr << std::endl;
+    #endif
+      
+    //now compute the index of example with min overall distance
+    int idxOfClusterMedian ( (min_element(distancesWithinCluster.begin(), distancesWithinCluster.end(), CompareSecond()))->first );
+        
+    #pragma omp critical
+    prototypes[clusterCnt] = features[idxOfClusterMedian]; 
+ 
+    //finished computations for cluster k
+  }
+  
+  #ifdef DEBUG_KMEDIAN_PROTOCOMP
+    std::cerr << " ----   prototypes after current iteration:  --- " << std::endl;
+    for (NICE::VVector::const_iterator protoIt = prototypes.begin(); protoIt != prototypes.end(); protoIt++)
+    {
+      std::cerr << *protoIt << " ";
+    }
+    
+    std::cerr << std::endl;
+  #endif
+
+  return 0;
+}
+
+double KMedian::compute_delta(const VVector & oldprototypes,
+    const VVector & prototypes)
+{
+  double distance = 0;
+
+  for (uint k = 0; k < oldprototypes.size(); k++)
+  {
+    distance += distancefunction->calculate(oldprototypes[k], prototypes[k]);
+    
+    #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+      fprintf(stderr, "KMedian::compute_delta: Distance:",
+          distancefunction->calculate(oldprototypes[k], prototypes[k]));
+    #endif
+  }
+  return distance;
+}
+
+double KMedian::compute_assignments(const VVector & features,
+                                    const VVector & prototypes,
+                                    std::vector<int> & assignment)
+{
+  int index = 0;
+  for (VVector::const_iterator i = features.begin(); i != features.end(); i++, index++)
+  {
+
+    const NICE::Vector & x = *i;
+    double mindist = std::numeric_limits<double>::max();
+    int minclass = 0;
+
+    int c = 0;
+    
+    #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+
+        fprintf(stderr, "computing nearest prototype for std::vector %d\n",
+            index);
+    #endif
+        
+    for (VVector::const_iterator j = prototypes.begin(); j
+        != prototypes.end(); j++, c++)
+    {
+
+      const NICE::Vector & p = *j;
+      double distance = distancefunction->calculate(p, x);
+      
+      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+        fprintf(stderr, "KMedian::compute_delta: Distance: %f\n",
+            distancefunction->calculate(p, x));
+      #endif
+
+      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+            cerr << p << endl;
+            cerr << x << endl;
+            fprintf(stderr, "distance to prototype %d is %f\n", c, distance);
+      #endif
+
+      if (distance < mindist)
+      {
+        minclass = c;
+        mindist = distance;
+      }
+    }
+
+    assignment[index] = minclass;
+  }
+
+  return 0.0;
+}
+
+double KMedian::compute_weights(const VVector & features,
+                                std::vector<double> & weights,
+                                std::vector<int> & assignment)
+{
+  for (int k = 0; k < noClasses; k++)
+    weights[k] = 0;
+
+  int j = 0;
+
+  for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
+  {
+    int k = assignment[j];
+    weights[k]++;
+  }
+
+  for (int k = 0; k < noClasses; k++)
+    weights[k] = weights[k] / features.size();
+
+  return 0.0;
+}
+
+void KMedian::cluster(const NICE::VVector & features,
+                      NICE::VVector & prototypes,
+                      std::vector<double> & weights,
+                      std::vector<int> & assignment)
+{
+  NICE::VVector oldprototypes;
+
+  prototypes.clear();
+  weights.clear();
+  assignment.clear();
+  weights.resize(noClasses, 0);
+  assignment.resize(features.size(), 0);
+
+  int dimension;
+
+  if ((int) features.size() >= noClasses)
+    dimension = features[0].size();
+  else
+  {
+    fprintf(stderr,
+        "FATAL ERROR: Not enough feature vectors provided for kMeans\n");
+    exit(-1);
+  }
+
+  for (int k = 0; k < noClasses; k++)
+  {
+    prototypes.push_back( NICE::Vector(dimension) );
+    prototypes[k].set(0);
+  }
+ 
+  bool successKMedian ( false );
+  int iterations ( 0 );
+  double delta ( std::numeric_limits<double>::max() );
+  
+  while ( !successKMedian )
+  {
+    //we assume that this run will be successful
+    successKMedian = true;
+    
+    this->initial_guess(features, prototypes);
+
+    iterations = 0;
+    delta =  std::numeric_limits<double>::max();
+
+    //until-loop over iterations
+    do
+    {
+      iterations++;
+      
+      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+        std::cerr << "k-median iteration " << iterations << std::endl;
+      #endif
+        
+      this->compute_assignments( features, prototypes, assignment );
+
+      if (iterations > 1)
+        oldprototypes = prototypes;
+
+      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+          fprintf(stderr, "KMedian::cluster compute_prototypes\n");
+      #endif
+      
+      if ( this->compute_prototypes( features, prototypes, weights, assignment ) < 0 )
+      {
+        fprintf(stderr, "KMedian::cluster restart\n");
+        successKMedian = false;
+        break;
+      }
+
+      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+          fprintf(stderr, "KMedian::cluster compute_delta\n");
+      #endif
+      
+      if (iterations > 1)
+        delta = this->compute_delta( oldprototypes, prototypes );
+
+      #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+          this->print_iteration( iterations, prototypes, delta );
+      #endif
+
+    } while ((delta > d_minDelta) && (iterations < i_maxIterations));
+    
+  }
+
+  std::cerr << "ended optimization  -- delta: " << delta  << " of d_minDelta: " << d_minDelta << " --- and iterations: " << iterations << " of i_maxIterations: " << i_maxIterations << std::endl;
+    
+  #ifdef DEBUG_KMEDIAN_ASSIGNMENTS
+    fprintf(stderr, "KMedian::cluster: iterations = %d, delta = %f\n",
+        iterations, delta);
+  #endif
+
+  this->compute_weights( features, weights, assignment );
+}
+
+void KMedian::print_iteration( int iterations, VVector & prototypes, double delta )
+{
+  if (iterations > 1)
+    fprintf(stderr, "KMedian::cluster: iteration=%d delta=%f\n", iterations,
+        delta);
+  else
+    fprintf(stderr, "KMedian::cluster: iteration=%d\n", iterations);
+
+  int k = 0;
+
+  for (VVector::const_iterator i = prototypes.begin(); i != prototypes.end(); i++, k++)
+  {
+    fprintf(stderr, "class (%d)\n", k);
+    cerr << "prototype = " << (*i) << endl;
+  }
+}

+ 118 - 0
math/cluster/KMedian.h

@@ -0,0 +1,118 @@
+/** 
+* @file KMedian.h
+ * @brief KMedian (aka K-medoid)
+* @author Alexander Freytag
+* @date 23-04-2013 (dd-mm-yyyy)
+
+*/
+#ifndef KMEDIANINCLUDE
+#define KMEDIANINCLUDE
+
+#include "core/vector/VectorT.h"
+#include "core/vector/MatrixT.h"
+  
+#include "ClusterAlgorithm.h"
+#include <core/vector/Distance.h>
+
+namespace OBJREC {
+
+  /**
+   * @class KMedian
+   * @brief KMedian (aka K-medoid)
+   * @author Alexander Freytag
+   * @date 23-04-2013 (dd-mm-yyyy)
+  */    
+  class KMedian : public ClusterAlgorithm
+  {
+
+      protected:
+        
+      /************************
+       * 
+       *   protected variables
+       * 
+       **************************/ 
+      
+        //! desired number of clusters
+        int noClasses;
+        
+        //! specify which distance to use for calculating assignments
+        std::string distanceType;
+        
+        //! the actual distance metric
+        NICE::VectorDistance<double> *distancefunction;
+        
+        double d_minDelta;
+        int i_maxIterations;
+        
+  
+       /************************
+       * 
+       *   protected methods
+       * 
+       **************************/  
+      
+        //! compute the distance between two features using the specified distance metric
+        double vectorDistance(const NICE::Vector &vector1, const NICE::Vector &vector2, uint distancetype);
+        
+        //! compute assignments of all given features wrt to the currently known prototypes (cluster medoids) == ~ E-step
+        double compute_assignments ( const NICE::VVector & features,
+                  const NICE::VVector & prototypes,
+                  std::vector<int> & assignment );
+
+        //! compute number of assignments for every currently found cluster
+        double compute_weights ( const NICE::VVector & features,
+              std::vector<double> & weights,
+              std::vector<int>    & assignment );
+
+        //! compute the difference between prototypes of previous iteration and those currently found
+        double compute_delta ( const NICE::VVector & oldprototypes,
+                    const NICE::VVector & prototypes );
+
+        //! compute (update) prototypes given the current assignments == ~ M-step
+        int compute_prototypes ( const NICE::VVector & features,
+                NICE::VVector & prototypes,
+                std::vector<double> & weights,
+                const std::vector<int>    & assignment );
+
+        //! have an initial guess, i.e., randomly pick some features as initial cluster centroids
+        void initial_guess ( const NICE::VVector & features,
+                NICE::VVector & prototypes );
+        
+        //! give additional information for the current iteration
+        void print_iteration ( int iterations, 
+                  NICE::VVector & prototypes,
+                  double delta );
+
+      public:
+    
+        /**
+        * @brief standard constructor
+        * @param noClasses the number of clusters to be computed
+        * @param distanceMode a string specifying the distance function to be used (default: euclidean)
+        */
+        KMedian( int noClasses , std::string distanceMode="euclidean");
+            
+        /** simple destructor */
+        virtual ~KMedian();
+          
+        /**
+        *@brief this is the actual method that performs the clustering for a given set of features
+        *@author Alexander Freytag
+        *@date 25-04-2013 (dd-mm-yyyy)
+        *@param   features input features to be clustered
+        *@param   prototypes computed prototypes (cluster medoids) for the given samples
+        *@param   weights number of assignments for every cluster
+        *@param   assignment explicite assignments of features to computed cluster medoids
+        */        
+        void cluster ( const NICE::VVector & features,
+                NICE::VVector & prototypes,
+                std::vector<double> & weights,
+                std::vector<int>    & assignment );
+
+  };
+
+
+} // namespace
+
+#endif

+ 89 - 0
math/cluster/tests/Makefile.inc

@@ -0,0 +1,89 @@
+# BINARY-DIRECTORY-MAKEFILE
+# conventions:
+# - there are no subdirectories, they are ignored!
+# - all ".C", ".cpp" and ".c" files in the current directory are considered
+#   independent binaries, and linked as such.
+# - the binaries depend on the library of the parent directory
+# - the binary names are created with $(BINNAME), i.e. it will be more or less
+#   the name of the .o file
+# - all binaries will be added to the default build list ALL_BINARIES
+
+# --------------------------------
+# - remember the last subdirectory
+#
+# set the variable $(SUBDIR) correctly to the current subdirectory. this
+# variable can be used throughout the current makefile.inc. The many 
+# SUBDIR_before, _add, and everything are only required so that we can recover
+# the previous content of SUBDIR before exitting the makefile.inc
+
+SUBDIR_add:=$(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
+SUBDIR_before:=$(SUBDIR)
+SUBDIR:=$(strip $(SUBDIR_add))
+SUBDIR_before_$(SUBDIR):=$(SUBDIR_before)
+
+# ------------------------
+# - include subdirectories
+#
+# note the variables $(SUBDIRS_OF_$(SUBDIR)) are required later on to recover
+# the dependencies automatically. if you handle dependencies on your own, you
+# can also dump the $(SUBDIRS_OF_$(SUBDIR)) variable, and include the
+# makefile.inc of the subdirectories on your own...
+
+#SUBDIRS_OF_$(SUBDIR):=$(patsubst %/Makefile.inc,%,$(wildcard $(SUBDIR)*/Makefile.inc))
+#include $(SUBDIRS_OF_$(SUBDIR):%=%/Makefile.inc)
+
+# ----------------------------
+# - include local dependencies
+#
+# include the libdepend.inc file, which gives additional dependencies for the
+# libraries and binaries. additionally, an automatic dependency from the library
+# of the parent directory is added (commented out in the code below).
+
+-include $(SUBDIR)libdepend.inc
+
+PARENTDIR:=$(patsubst %/,%,$(dir $(patsubst %/,%,$(SUBDIR))))
+$(call PKG_DEPEND_INT,$(PARENTDIR))
+$(call PKG_DEPEND_EXT,CPPUNIT)
+
+# ---------------------------
+# - objects in this directory
+#
+# the use of the variable $(OBJS) is not mandatory. it is mandatory however
+# to update $(ALL_OBJS) in a way that it contains the path and name of
+# all objects. otherwise we can not include the appropriate .d files.
+
+OBJS:=$(patsubst %.cpp,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.cpp))) \
+      $(patsubst %.C,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.C))) \
+      $(shell grep -ls Q_OBJECT $(SUBDIR)*.h | sed -e's@^@/@;s@.*/@$(OBJDIR)moc_@;s@\.h$$@.o@') \
+      $(patsubst %.c,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.c)))
+ALL_OBJS += $(OBJS)
+
+# ----------------------------
+# - binaries in this directory
+#
+# output of binaries in this directory. none of the variables has to be used.
+# but everything you add to $(ALL_LIBRARIES) and $(ALL_BINARIES) will be
+# compiled with `make all`. be sure again to add the files with full path.
+
+CHECKS:=$(BINDIR)$(call LIBNAME,$(SUBDIR))
+ALL_CHECKS+=$(CHECKS)
+
+# ---------------------
+# - binary dependencies
+#
+# there is no way of determining the binary dependencies automatically, so we
+# follow conventions. each binary depends on the corresponding .o file and
+# on the libraries specified by the INTLIBS/EXTLIBS. these dependencies can be
+# specified manually or they are automatically stored in a .bd file.
+
+$(foreach head,$(wildcard $(SUBDIR)*.h),$(eval $(shell grep -q Q_OBJECT $(head) && echo $(head) | sed -e's@^@/@;s@.*/\(.*\)\.h$$@$(BINDIR)\1:$(OBJDIR)moc_\1.o@')))
+$(eval $(foreach c,$(CHECKS),$(c):$(BUILDDIR)$(CPPUNIT_MAIN_OBJ) $(OBJS) $(call PRINT_INTLIB_DEPS,$(c),.a)))
+
+# -------------------
+# - subdir management
+#
+# as the last step, always add this line to correctly recover the subdirectory
+# of the makefile including this one!
+
+SUBDIR:=$(SUBDIR_before_$(SUBDIR))
+

+ 79 - 0
math/cluster/tests/TestKMedian.cpp

@@ -0,0 +1,79 @@
+#ifdef NICE_USELIB_CPPUNIT
+
+#include <string>
+#include <exception>
+
+#include "TestKMedian.h"
+
+#include "vislearning/math/distances/genericDistance.h"
+
+
+const bool verboseStartEnd = true;
+const bool verbose = false;
+const std::string distanceType = "euclidean";
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+CPPUNIT_TEST_SUITE_REGISTRATION( TestKMedian );
+
+void TestKMedian::setUp() {
+}
+
+void TestKMedian::tearDown() {
+}
+
+void TestKMedian::testKMedianClustering() 
+{
+  if (verboseStartEnd)
+    std::cerr << "================== TestKMedian::testKMedianClustering ===================== " << std::endl;
+  
+  OBJREC::KMedian kMedian ( 2 /* noClasses */, "euclidean" /*distanceMode*/ );
+  
+  //create some artificial data
+  NICE::VVector features;
+  NICE::Vector x1 (2); x1[0] = 1;  x1[1] = 1; features.push_back(x1);
+  NICE::Vector x2 (2); x2[0] = 4;  x2[1] = 1; features.push_back(x2);
+  NICE::Vector x3 (2); x3[0] = 2;  x3[1] = 4; features.push_back(x3);
+  NICE::Vector x4 (2); x4[0] = 10; x4[1] = 3; features.push_back(x4);
+  NICE::Vector x5 (2); x5[0] = 8;  x5[1] = 3; features.push_back(x5);
+  NICE::Vector x6 (2); x6[0] = 4;  x6[1] = 3; features.push_back(x6);
+  NICE::Vector x7 (2); x7[0] = 3;  x7[1] = 2; features.push_back(x7);
+  NICE::Vector x8 (2); x8[0] = 1;  x8[1] = 3; features.push_back(x8);
+  NICE::Vector x9 (2); x9[0] = 9;  x9[1] = 2; features.push_back(x9);
+  
+  //cluster data
+  NICE::VVector prototypes;
+  std::vector<double> weights;
+  std::vector<int> assignment;
+  
+  kMedian.cluster ( features, prototypes, weights, assignment );  
+
+  //check whether the results fits the ground truth  
+  //NOTE
+  // If no random initialization is activated, we initially grab x2 and x8.
+  // After 3 iterations, we should have converged and obtain x5 and x7.
+
+  NICE::VectorDistance<double> * distancefunction = GenericDistanceSelection::selectDistance(distanceType);
+
+  if ( verbose )
+  {
+    std::cerr << " x9: " << x9 << " cl1: " << prototypes[0] << std::endl;
+    std::cerr << " x7: " << x7 << " cl2: " << prototypes[1] << std::endl;
+  }
+  
+  double distX9Cl1 ( distancefunction->calculate( x9, prototypes[0] ) );
+  double distX7Cl2 ( distancefunction->calculate( x7, prototypes[1] ) );
+  
+  CPPUNIT_ASSERT_DOUBLES_EQUAL( distX9Cl1, 0.0, 1e-8);
+  CPPUNIT_ASSERT_DOUBLES_EQUAL( distX7Cl2, 0.0, 1e-8); 
+  
+  std::cerr << "                               successfull              " << std::endl;
+        
+  if (verboseStartEnd)
+    std::cerr << "================== TestKMedian::testKMedianClustering done ===================== " << std::endl;
+}
+
+
+#endif

+ 32 - 0
math/cluster/tests/TestKMedian.h

@@ -0,0 +1,32 @@
+#ifndef _TESTKMEDIAN_H
+#define _TESTKMEDIAN_H
+
+#include <cppunit/extensions/HelperMacros.h>
+#include "vislearning/math/cluster/KMedian.h"
+
+/**
+ * CppUnit-Testcase. 
+ * @brief CppUnit-Testcase to verify that the KMedian-clusterin works correctly
+ */
+class TestKMedian : public CppUnit::TestFixture {
+
+    CPPUNIT_TEST_SUITE( TestKMedian );
+    
+    CPPUNIT_TEST(testKMedianClustering);
+    
+    CPPUNIT_TEST_SUITE_END();
+  
+ private:
+ 
+ public:
+    void setUp();
+    void tearDown();
+
+    /**
+    * Constructor / Destructor testing 
+    */  
+    void testKMedianClustering();
+
+};
+
+#endif // _TESTKMEDIAN_H

+ 1 - 1
math/pdf/tests/TestPDF.cpp

@@ -25,7 +25,7 @@
 #include "core/vector/VVector.h"
 #include "vislearning/math/pdf/PDFGaussian.h"
 
-#include "objrec/nice_nonvis.h"
+// #include "objrec/nice_nonvis.h"
 
 using namespace std;
 using namespace NICE;