|
@@ -1,9 +1,8 @@
|
|
|
/**
|
|
|
* @file KMeans.cpp
|
|
|
* @brief K-Means
|
|
|
- * @author Erik Rodner
|
|
|
- * @date 10/29/2007
|
|
|
-
|
|
|
+ * @author Erik Rodner, Alexander Freytag
|
|
|
+ * @date 29-10-2007 (dd-mm-yyyy)
|
|
|
*/
|
|
|
|
|
|
#include <iostream>
|
|
@@ -21,277 +20,293 @@ using namespace NICE;
|
|
|
|
|
|
#undef DEBUG_KMEANS
|
|
|
|
|
|
-KMeans::KMeans(int _noClasses, string _distanceType) :
|
|
|
- noClasses(_noClasses), distanceType(_distanceType)
|
|
|
+KMeans::KMeans(const int & _noClasses, const std::string & _distanceType) :
|
|
|
+ noClasses(_noClasses), distanceType(_distanceType)
|
|
|
{
|
|
|
- //srand(time(NULL));
|
|
|
- distancefunction = GenericDistanceSelection::selectDistance(distanceType);
|
|
|
+ //srand(time(NULL));
|
|
|
+ this->distancefunction = GenericDistanceSelection::selectDistance(distanceType);
|
|
|
+}
|
|
|
+
|
|
|
+KMeans::KMeans( const NICE::Config *conf, const std::string & _section)
|
|
|
+{
|
|
|
+ this->distanceType = conf->gS( _section, "distanceType", "euclidean" );
|
|
|
+ this->distancefunction = GenericDistanceSelection::selectDistance(distanceType);
|
|
|
+
|
|
|
+ this->d_minDelta = conf->gD( _section, "minDelta", 1e-5 );
|
|
|
+ this->i_maxIterations = conf->gI( _section, "maxIterations", 200);
|
|
|
+
|
|
|
+ this->noClasses = conf->gI( _section, "noClasses", 20);
|
|
|
}
|
|
|
|
|
|
KMeans::~KMeans()
|
|
|
{
|
|
|
}
|
|
|
|
|
|
-void KMeans::initial_guess(const VVector & features, VVector & prototypes)
|
|
|
+void KMeans::initial_guess(const NICE::VVector & features, NICE::VVector & prototypes)
|
|
|
{
|
|
|
- int j = 0;
|
|
|
- std::set<int, std::greater<int> > mark;
|
|
|
+ int j = 0;
|
|
|
+ std::set<int, std::greater<int> > mark;
|
|
|
|
|
|
- for (VVector::iterator i = prototypes.begin(); i != prototypes.end(); i++, j++)
|
|
|
- {
|
|
|
- int k;
|
|
|
+ for (VVector::iterator i = prototypes.begin(); i != prototypes.end(); i++, j++)
|
|
|
+ {
|
|
|
+ int k;
|
|
|
|
|
|
- do
|
|
|
- {
|
|
|
- k = rand() % features.size();
|
|
|
- } while (mark.find(k) != mark.end());
|
|
|
+ do
|
|
|
+ {
|
|
|
+ k = rand() % features.size();
|
|
|
+ } while (mark.find(k) != mark.end());
|
|
|
|
|
|
- mark.insert(mark.begin(), k);
|
|
|
+ mark.insert(mark.begin(), k);
|
|
|
|
|
|
- *i = features[k];
|
|
|
- }
|
|
|
+ *i = features[k];
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
int KMeans::compute_prototypes(const VVector & features, VVector & prototypes,
|
|
|
- std::vector<double> & weights, const std::vector<int> & assignment)
|
|
|
+ std::vector<double> & weights, const std::vector<int> & assignment)
|
|
|
{
|
|
|
- int j = 0;
|
|
|
- // fprintf (stderr, "KMeans::compute_prototypes: init noClasses=%d\n", noClasses);
|
|
|
- for (int k = 0; k < noClasses; k++)
|
|
|
- {
|
|
|
- prototypes[k].set(0);
|
|
|
- weights[k] = 0;
|
|
|
- }
|
|
|
-
|
|
|
- // fprintf (stderr, "KMeans::compute_prototypes: compute means\n");
|
|
|
- for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
|
|
|
- {
|
|
|
- int k = assignment[j];
|
|
|
-
|
|
|
- NICE::Vector & p = prototypes[k];
|
|
|
-
|
|
|
- const NICE::Vector & x = *i;
|
|
|
-
|
|
|
-#ifdef DEBUG_KMEANS
|
|
|
-
|
|
|
- fprintf(
|
|
|
- stderr,
|
|
|
- "KMeans::compute_prototypes: std::vector %d has assignment %d\n",
|
|
|
- j, k);
|
|
|
-#endif
|
|
|
-
|
|
|
- p += x;
|
|
|
-
|
|
|
-#ifdef DEBUG_KMEANS
|
|
|
- cerr << "vector was : " << x << endl;
|
|
|
- cerr << "prototype for this class is now : " << p << endl;
|
|
|
-#endif
|
|
|
- weights[k]++;
|
|
|
- }
|
|
|
-
|
|
|
- // fprintf (stderr, "KMeans::compute_prototypes: scaling\n");
|
|
|
- for (int k = 0; k < noClasses; k++)
|
|
|
- {
|
|
|
-
|
|
|
- NICE::Vector & p = prototypes[k];
|
|
|
-
|
|
|
-#ifdef DEBUG_KMEANS
|
|
|
- cerr << "prototype for this class before scaling : " << p << endl;
|
|
|
-#endif
|
|
|
-
|
|
|
- if (weights[k] <= 0)
|
|
|
- {
|
|
|
- return -1;
|
|
|
- }
|
|
|
-
|
|
|
- p *= (1.0 / weights[k]);
|
|
|
-
|
|
|
- weights[k] = weights[k] / features.size();
|
|
|
-
|
|
|
-#ifdef DEBUG_KMEANS
|
|
|
- cerr << "prototype for this class after scaling with " << weights[k]
|
|
|
- << " : " << p << endl;
|
|
|
-#endif
|
|
|
- }
|
|
|
-
|
|
|
- return 0;
|
|
|
+ int j = 0;
|
|
|
+ // fprintf (stderr, "KMeans::compute_prototypes: init noClasses=%d\n", noClasses);
|
|
|
+ for (int k = 0; k < this->noClasses; k++)
|
|
|
+ {
|
|
|
+ prototypes[k].set(0);
|
|
|
+ weights[k] = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ // fprintf (stderr, "KMeans::compute_prototypes: compute means\n");
|
|
|
+ for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
|
|
|
+ {
|
|
|
+ int k = assignment[j];
|
|
|
+
|
|
|
+ NICE::Vector & p = prototypes[k];
|
|
|
+
|
|
|
+ const NICE::Vector & x = *i;
|
|
|
+
|
|
|
+ #ifdef DEBUG_KMEANS
|
|
|
+ fprintf(
|
|
|
+ stderr,
|
|
|
+ "KMeans::compute_prototypes: std::vector %d has assignment %d\n",
|
|
|
+ j, k);
|
|
|
+ #endif
|
|
|
+
|
|
|
+ p += x;
|
|
|
+
|
|
|
+ #ifdef DEBUG_KMEANS
|
|
|
+ std::cerr << "vector was : " << x << std::endl;
|
|
|
+ std::cerr << "prototype for this class is now : " << p << std::endl;
|
|
|
+ #endif
|
|
|
+
|
|
|
+ weights[k]++;
|
|
|
+ }
|
|
|
+
|
|
|
+ // fprintf (stderr, "KMeans::compute_prototypes: scaling\n");
|
|
|
+ for (int k = 0; k < this->noClasses; k++)
|
|
|
+ {
|
|
|
+
|
|
|
+ NICE::Vector & p = prototypes[k];
|
|
|
+
|
|
|
+ #ifdef DEBUG_KMEANS
|
|
|
+ std::cerr << "prototype for this class before scaling : " << p << std::endl;
|
|
|
+ #endif
|
|
|
+
|
|
|
+ if (weights[k] <= 0)
|
|
|
+ {
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+
|
|
|
+ p *= (1.0 / weights[k]);
|
|
|
+
|
|
|
+ weights[k] = weights[k] / features.size();
|
|
|
+
|
|
|
+ #ifdef DEBUG_KMEANS
|
|
|
+ std::cerr << "prototype for this class after scaling with " << weights[k]
|
|
|
+ << " : " << p << std::endl;
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
-double KMeans::compute_delta(const VVector & oldprototypes,
|
|
|
- const VVector & prototypes)
|
|
|
+double KMeans::compute_delta(const NICE::VVector & oldprototypes,
|
|
|
+ const NICE::VVector & prototypes)
|
|
|
{
|
|
|
- double distance = 0;
|
|
|
-
|
|
|
- for (uint k = 0; k < oldprototypes.size(); k++)
|
|
|
- {
|
|
|
- distance
|
|
|
- += distancefunction->calculate(oldprototypes[k], prototypes[k]);
|
|
|
-#ifdef DEBUG_KMEANS
|
|
|
- fprintf(stderr, "KMeans::compute_delta: Distance:",
|
|
|
- distancefunction->calculate(oldprototypes[k], prototypes[k]));
|
|
|
-#endif
|
|
|
- }
|
|
|
- return distance;
|
|
|
+ double distance = 0;
|
|
|
+
|
|
|
+ for (uint k = 0; k < oldprototypes.size(); k++)
|
|
|
+ {
|
|
|
+ distance += this->distancefunction->calculate(oldprototypes[k], prototypes[k]);
|
|
|
+
|
|
|
+ #ifdef DEBUG_KMEANS
|
|
|
+ fprintf(stderr, "KMeans::compute_delta: Distance: %f",
|
|
|
+ distancefunction->calculate(oldprototypes[k], prototypes[k]));
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+ return distance;
|
|
|
}
|
|
|
|
|
|
-double KMeans::compute_assignments(const VVector & features,
|
|
|
- const VVector & prototypes, std::vector<int> & assignment)
|
|
|
+double KMeans::compute_assignments(const NICE::VVector & features,
|
|
|
+ const NICE::VVector & prototypes, std::vector<int> & assignment)
|
|
|
{
|
|
|
- int index = 0;
|
|
|
- for (VVector::const_iterator i = features.begin(); i != features.end(); i++, index++)
|
|
|
- {
|
|
|
-
|
|
|
- const NICE::Vector & x = *i;
|
|
|
- double mindist = std::numeric_limits<double>::max();
|
|
|
- int minclass = 0;
|
|
|
-
|
|
|
- int c = 0;
|
|
|
-#ifdef DEBUG_KMEANS
|
|
|
-
|
|
|
- fprintf(stderr, "computing nearest prototype for std::vector %d\n",
|
|
|
- index);
|
|
|
-#endif
|
|
|
- for (VVector::const_iterator j = prototypes.begin(); j
|
|
|
- != prototypes.end(); j++, c++)
|
|
|
- {
|
|
|
-
|
|
|
- const NICE::Vector & p = *j;
|
|
|
- double distance = distancefunction->calculate(p, x);
|
|
|
-#ifdef DEBUG_KMEANS
|
|
|
- fprintf(stderr, "KMeans::compute_delta: Distance:",
|
|
|
- distancefunction->calculate(p, x));
|
|
|
-#endif
|
|
|
-
|
|
|
-#ifdef DEBUG_KMEANS
|
|
|
- cerr << p << endl;
|
|
|
- cerr << x << endl;
|
|
|
- fprintf(stderr, "distance to prototype %d is %f\n", c, distance);
|
|
|
-#endif
|
|
|
-
|
|
|
- if (distance < mindist)
|
|
|
- {
|
|
|
- minclass = c;
|
|
|
- mindist = distance;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- assignment[index] = minclass;
|
|
|
- }
|
|
|
-
|
|
|
- return 0.0;
|
|
|
+ int index = 0;
|
|
|
+ for (VVector::const_iterator i = features.begin(); i != features.end(); i++, index++)
|
|
|
+ {
|
|
|
+
|
|
|
+ const NICE::Vector & x = *i;
|
|
|
+ double mindist = std::numeric_limits<double>::max();
|
|
|
+ int minclass = 0;
|
|
|
+
|
|
|
+ int c = 0;
|
|
|
+ #ifdef DEBUG_KMEANS
|
|
|
+
|
|
|
+ fprintf(stderr, "computing nearest prototype for std::vector %d\n",
|
|
|
+ index);
|
|
|
+ #endif
|
|
|
+ for (VVector::const_iterator j = prototypes.begin(); j
|
|
|
+ != prototypes.end(); j++, c++)
|
|
|
+ {
|
|
|
+
|
|
|
+ const NICE::Vector & p = *j;
|
|
|
+ double distance = this->distancefunction->calculate(p, x);
|
|
|
+ #ifdef DEBUG_KMEANS
|
|
|
+ fprintf(stderr, "KMeans::compute_delta: Distance: %f",
|
|
|
+ this->distancefunction->calculate(p, x));
|
|
|
+ #endif
|
|
|
+
|
|
|
+ #ifdef DEBUG_KMEANS
|
|
|
+ std::cerr << p << std::endl;
|
|
|
+ std::cerr << x << std::endl;
|
|
|
+ fprintf(stderr, "distance to prototype %d is %f\n", c, distance);
|
|
|
+ #endif
|
|
|
+
|
|
|
+ if (distance < mindist)
|
|
|
+ {
|
|
|
+ minclass = c;
|
|
|
+ mindist = distance;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ assignment[index] = minclass;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0.0;
|
|
|
}
|
|
|
|
|
|
-double KMeans::compute_weights(const VVector & features,
|
|
|
- std::vector<double> & weights, std::vector<int> & assignment)
|
|
|
+double KMeans::compute_weights(const NICE::VVector & features,
|
|
|
+ std::vector<double> & weights, std::vector<int> & assignment)
|
|
|
{
|
|
|
- for (int k = 0; k < noClasses; k++)
|
|
|
- weights[k] = 0;
|
|
|
+ for (int k = 0; k < this->noClasses; k++)
|
|
|
+ weights[k] = 0;
|
|
|
|
|
|
- int j = 0;
|
|
|
+ int j = 0;
|
|
|
|
|
|
- for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
|
|
|
- {
|
|
|
- int k = assignment[j];
|
|
|
- weights[k]++;
|
|
|
- }
|
|
|
+ for (NICE::VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
|
|
|
+ {
|
|
|
+ int k = assignment[j];
|
|
|
+ weights[k]++;
|
|
|
+ }
|
|
|
|
|
|
- for (int k = 0; k < noClasses; k++)
|
|
|
- weights[k] = weights[k] / features.size();
|
|
|
+ for (int k = 0; k < this->noClasses; k++)
|
|
|
+ weights[k] = weights[k] / features.size();
|
|
|
|
|
|
- return 0.0;
|
|
|
+ return 0.0;
|
|
|
}
|
|
|
|
|
|
-void KMeans::cluster(const VVector & features, VVector & prototypes,
|
|
|
- std::vector<double> & weights, std::vector<int> & assignment)
|
|
|
+void KMeans::cluster(const NICE::VVector & features, NICE::VVector & prototypes,
|
|
|
+ std::vector<double> & weights, std::vector<int> & assignment)
|
|
|
{
|
|
|
- VVector oldprototypes;
|
|
|
-
|
|
|
- prototypes.clear();
|
|
|
- weights.clear();
|
|
|
- assignment.clear();
|
|
|
- weights.resize(noClasses, 0);
|
|
|
- assignment.resize(features.size(), 0);
|
|
|
-
|
|
|
- int dimension;
|
|
|
-
|
|
|
- if ((int) features.size() >= noClasses)
|
|
|
- dimension = features[0].size();
|
|
|
- else
|
|
|
- {
|
|
|
- fprintf(stderr,
|
|
|
- "FATAL ERROR: Not enough feature vectors provided for kMeans\n");
|
|
|
- exit(-1);
|
|
|
- }
|
|
|
-
|
|
|
- for (int k = 0; k < noClasses; k++)
|
|
|
- {
|
|
|
- prototypes.push_back(Vector(dimension));
|
|
|
- prototypes[k].set(0);
|
|
|
- }
|
|
|
-
|
|
|
- KMeans_Restart:
|
|
|
-
|
|
|
- initial_guess(features, prototypes);
|
|
|
-
|
|
|
- int iterations = 0;
|
|
|
- double delta = std::numeric_limits<double>::max();
|
|
|
- const double minDelta = 1e-5;
|
|
|
- const int maxIterations = 200;
|
|
|
-
|
|
|
- do
|
|
|
- {
|
|
|
- iterations++;
|
|
|
- compute_assignments(features, prototypes, assignment);
|
|
|
-
|
|
|
- if (iterations > 1)
|
|
|
- oldprototypes = prototypes;
|
|
|
-
|
|
|
-#ifdef DEBUG_KMEANS
|
|
|
- fprintf(stderr, "KMeans::cluster compute_prototypes\n");
|
|
|
-
|
|
|
-#endif
|
|
|
- if (compute_prototypes(features, prototypes, weights, assignment) < 0)
|
|
|
- {
|
|
|
- fprintf(stderr, "KMeans::cluster restart\n");
|
|
|
- goto KMeans_Restart;
|
|
|
- }
|
|
|
-
|
|
|
-#ifdef DEBUG_KMEANS
|
|
|
- fprintf(stderr, "KMeans::cluster compute_delta\n");
|
|
|
-
|
|
|
-#endif
|
|
|
- if (iterations > 1)
|
|
|
- delta = compute_delta(oldprototypes, prototypes);
|
|
|
-
|
|
|
-#ifdef DEBUG_KMEANS
|
|
|
- print_iteration(iterations, prototypes, delta);
|
|
|
-
|
|
|
-#endif
|
|
|
-
|
|
|
- } while ((delta > minDelta) && (iterations < maxIterations));
|
|
|
-
|
|
|
-#ifdef DEBUG_KMEANS
|
|
|
- fprintf(stderr, "KMeans::cluster: iterations = %d, delta = %f\n",
|
|
|
- iterations, delta);
|
|
|
-
|
|
|
-#endif
|
|
|
-
|
|
|
- compute_weights(features, weights, assignment);
|
|
|
+ NICE::VVector oldprototypes;
|
|
|
+
|
|
|
+ prototypes.clear();
|
|
|
+ weights.clear();
|
|
|
+ assignment.clear();
|
|
|
+ weights.resize(noClasses, 0);
|
|
|
+ assignment.resize(features.size(), 0);
|
|
|
+
|
|
|
+ int dimension;
|
|
|
+
|
|
|
+ if ((int) features.size() >= this->noClasses)
|
|
|
+ dimension = features[0].size();
|
|
|
+ else
|
|
|
+ {
|
|
|
+ fprintf(stderr,
|
|
|
+ "FATAL ERROR: Not enough feature vectors provided for kMeans\n");
|
|
|
+ exit(-1);
|
|
|
+ }
|
|
|
+
|
|
|
+ for (int k = 0; k < this->noClasses; k++)
|
|
|
+ {
|
|
|
+ prototypes.push_back(Vector(dimension));
|
|
|
+ prototypes[k].set(0);
|
|
|
+ }
|
|
|
+
|
|
|
+ bool successKMeans ( false );
|
|
|
+ int iterations ( 0 );
|
|
|
+ double delta ( std::numeric_limits<double>::max() );
|
|
|
+
|
|
|
+ while ( !successKMeans )
|
|
|
+ {
|
|
|
+ //we assume that this run will be successful
|
|
|
+ successKMeans = true;
|
|
|
+
|
|
|
+ this->initial_guess(features, prototypes);
|
|
|
+
|
|
|
+ iterations = 0;
|
|
|
+ delta = std::numeric_limits<double>::max();
|
|
|
+
|
|
|
+ do
|
|
|
+ {
|
|
|
+ iterations++;
|
|
|
+ this->compute_assignments(features, prototypes, assignment);
|
|
|
+
|
|
|
+ if (iterations > 1)
|
|
|
+ oldprototypes = prototypes;
|
|
|
+
|
|
|
+ #ifdef DEBUG_KMEANS
|
|
|
+ fprintf(stderr, "KMeans::cluster compute_prototypes\n");
|
|
|
+ #endif
|
|
|
+
|
|
|
+ if ( this->compute_prototypes(features, prototypes, weights, assignment) < 0 )
|
|
|
+ {
|
|
|
+ fprintf(stderr, "KMeans::cluster restart\n");
|
|
|
+ successKMeans = false;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ #ifdef DEBUG_KMEANS
|
|
|
+ fprintf(stderr, "KMeans::cluster compute_delta\n");
|
|
|
+ #endif
|
|
|
+
|
|
|
+ if (iterations > 1)
|
|
|
+ delta = this->compute_delta(oldprototypes, prototypes);
|
|
|
+
|
|
|
+ #ifdef DEBUG_KMEANS
|
|
|
+ print_iteration(iterations, prototypes, delta);
|
|
|
+ #endif
|
|
|
+
|
|
|
+ } while ((delta > d_minDelta) && (iterations < i_maxIterations));
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ #ifdef DEBUG_KMEANS
|
|
|
+ fprintf(stderr, "KMeans::cluster: iterations = %d, delta = %f\n", iterations, delta);
|
|
|
+ #endif
|
|
|
+
|
|
|
+ this->compute_weights(features, weights, assignment);
|
|
|
}
|
|
|
|
|
|
-void KMeans::print_iteration(int iterations, VVector & prototypes, double delta)
|
|
|
+void KMeans::print_iteration(int iterations, NICE::VVector & prototypes, double delta)
|
|
|
{
|
|
|
- if (iterations > 1)
|
|
|
- fprintf(stderr, "KMeans::cluster: iteration=%d delta=%f\n", iterations,
|
|
|
- delta);
|
|
|
- else
|
|
|
- fprintf(stderr, "KMeans::cluster: iteration=%d\n", iterations);
|
|
|
-
|
|
|
- int k = 0;
|
|
|
-
|
|
|
- for (VVector::const_iterator i = prototypes.begin(); i != prototypes.end(); i++, k++)
|
|
|
- {
|
|
|
- fprintf(stderr, "class (%d)\n", k);
|
|
|
- cerr << "prototype = " << (*i) << endl;
|
|
|
- }
|
|
|
+ if (iterations > 1)
|
|
|
+ fprintf(stderr, "KMeans::cluster: iteration=%d delta=%f\n", iterations,
|
|
|
+ delta);
|
|
|
+ else
|
|
|
+ fprintf(stderr, "KMeans::cluster: iteration=%d\n", iterations);
|
|
|
+
|
|
|
+ int k = 0;
|
|
|
+
|
|
|
+ for (NICE::VVector::const_iterator i = prototypes.begin(); i != prototypes.end(); i++, k++)
|
|
|
+ {
|
|
|
+ fprintf(stderr, "class (%d)\n", k);
|
|
|
+ std::cerr << "prototype = " << (*i) << std::endl;
|
|
|
+ }
|
|
|
}
|