/** * @file KMeans.cpp * @brief K-Means * @author Erik Rodner * @date 10/29/2007 */ #include #include "vislearning/math/cluster/KMeans.h" #include "vislearning/math/distances/genericDistance.h" #include using namespace OBJREC; using namespace std; using namespace NICE; #undef DEBUG_KMEANS KMeans::KMeans(int _noClasses, string _distanceType) : noClasses(_noClasses), distanceType(_distanceType) { //srand(time(NULL)); distancefunction = GenericDistanceSelection::selectDistance(distanceType); } KMeans::~KMeans() { } void KMeans::initial_guess(const VVector & features, VVector & prototypes) { int j = 0; std::set > mark; for (VVector::iterator i = prototypes.begin(); i != prototypes.end(); i++, j++) { int k; do { k = rand() % features.size(); } while (mark.find(k) != mark.end()); mark.insert(mark.begin(), k); *i = features[k]; } } int KMeans::compute_prototypes(const VVector & features, VVector & prototypes, std::vector & weights, const std::vector & assignment) { int j = 0; // fprintf (stderr, "KMeans::compute_prototypes: init noClasses=%d\n", noClasses); for (int k = 0; k < noClasses; k++) { prototypes[k].set(0); weights[k] = 0; } // fprintf (stderr, "KMeans::compute_prototypes: compute means\n"); for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++) { int k = assignment[j]; NICE::Vector & p = prototypes[k]; const NICE::Vector & x = *i; #ifdef DEBUG_KMEANS fprintf( stderr, "KMeans::compute_prototypes: std::vector %d has assignment %d\n", j, k); #endif p += x; #ifdef DEBUG_KMEANS cerr << "vector was : " << x << endl; cerr << "prototype for this class is now : " << p << endl; #endif weights[k]++; } // fprintf (stderr, "KMeans::compute_prototypes: scaling\n"); for (int k = 0; k < noClasses; k++) { NICE::Vector & p = prototypes[k]; #ifdef DEBUG_KMEANS cerr << "prototype for this class before scaling : " << p << endl; #endif if (weights[k] <= 0) { return -1; } p *= (1.0 / weights[k]); weights[k] = weights[k] / features.size(); #ifdef DEBUG_KMEANS cerr << "prototype for this class after scaling with " << weights[k] << " : " << p << endl; #endif } return 0; } double KMeans::compute_delta(const VVector & oldprototypes, const VVector & prototypes) { double distance = 0; for (uint k = 0; k < oldprototypes.size(); k++) { distance += distancefunction->calculate(oldprototypes[k], prototypes[k]); #ifdef DEBUG_KMEANS fprintf(stderr, "KMeans::compute_delta: Distance:", distancefunction->calculate(oldprototypes[k], prototypes[k])); #endif } return distance; } double KMeans::compute_assignments(const VVector & features, const VVector & prototypes, std::vector & assignment) { int index = 0; for (VVector::const_iterator i = features.begin(); i != features.end(); i++, index++) { const NICE::Vector & x = *i; double mindist = std::numeric_limits::max(); int minclass = 0; int c = 0; #ifdef DEBUG_KMEANS fprintf(stderr, "computing nearest prototype for std::vector %d\n", index); #endif for (VVector::const_iterator j = prototypes.begin(); j != prototypes.end(); j++, c++) { const NICE::Vector & p = *j; double distance = distancefunction->calculate(p, x); #ifdef DEBUG_KMEANS fprintf(stderr, "KMeans::compute_delta: Distance:", distancefunction->calculate(p, x)); #endif #ifdef DEBUG_KMEANS cerr << p << endl; cerr << x << endl; fprintf(stderr, "distance to prototype %d is %f\n", c, distance); #endif if (distance < mindist) { minclass = c; mindist = distance; } } assignment[index] = minclass; } return 0.0; } double KMeans::compute_weights(const VVector & features, std::vector & weights, std::vector & assignment) { for (int k = 0; k < noClasses; k++) weights[k] = 0; int j = 0; for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++) { int k = assignment[j]; weights[k]++; } for (int k = 0; k < noClasses; k++) weights[k] = weights[k] / features.size(); return 0.0; } void KMeans::cluster(const VVector & features, VVector & prototypes, std::vector & weights, std::vector & assignment) { VVector oldprototypes; prototypes.clear(); weights.clear(); assignment.clear(); weights.resize(noClasses, 0); assignment.resize(features.size(), 0); int dimension; if ((int) features.size() >= noClasses) dimension = features[0].size(); else { fprintf(stderr, "FATAL ERROR: Not enough feature vectors provided for kMeans\n"); exit(-1); } for (int k = 0; k < noClasses; k++) { prototypes.push_back(Vector(dimension)); prototypes[k].set(0); } KMeans_Restart: initial_guess(features, prototypes); int iterations = 0; double delta = std::numeric_limits::max(); const double minDelta = 1e-5; const int maxIterations = 200; do { iterations++; compute_assignments(features, prototypes, assignment); if (iterations > 1) oldprototypes = prototypes; #ifdef DEBUG_KMEANS fprintf(stderr, "KMeans::cluster compute_prototypes\n"); #endif if (compute_prototypes(features, prototypes, weights, assignment) < 0) { fprintf(stderr, "KMeans::cluster restart\n"); goto KMeans_Restart; } #ifdef DEBUG_KMEANS fprintf(stderr, "KMeans::cluster compute_delta\n"); #endif if (iterations > 1) delta = compute_delta(oldprototypes, prototypes); #ifdef DEBUG_KMEANS print_iteration(iterations, prototypes, delta); #endif } while ((delta > minDelta) && (iterations < maxIterations)); #ifdef DEBUG_KMEANS fprintf(stderr, "KMeans::cluster: iterations = %d, delta = %f\n", iterations, delta); #endif compute_weights(features, weights, assignment); } void KMeans::print_iteration(int iterations, VVector & prototypes, double delta) { if (iterations > 1) fprintf(stderr, "KMeans::cluster: iteration=%d delta=%f\n", iterations, delta); else fprintf(stderr, "KMeans::cluster: iteration=%d\n", iterations); int k = 0; for (VVector::const_iterator i = prototypes.begin(); i != prototypes.end(); i++, k++) { fprintf(stderr, "class (%d)\n", k); cerr << "prototype = " << (*i) << endl; } }