/** * @file KMeans.cpp * @brief K-Means * @author Erik Rodner, Alexander Freytag * @date 29-10-2007 (dd-mm-yyyy) */ #include #include "vislearning/math/cluster/KMeans.h" #include "vislearning/math/distances/genericDistance.h" #include using namespace OBJREC; using namespace std; using namespace NICE; #undef DEBUG_KMEANS KMeans::KMeans(const int & _noClasses, const std::string & _distanceType) : noClasses(_noClasses), distanceType(_distanceType) { //srand(time(NULL)); this->distancefunction = GenericDistanceSelection::selectDistance(distanceType); } KMeans::KMeans( const NICE::Config *conf, const std::string & _section) { this->distanceType = conf->gS( _section, "distanceType", "euclidean" ); this->distancefunction = GenericDistanceSelection::selectDistance(distanceType); this->d_minDelta = conf->gD( _section, "minDelta", 1e-5 ); this->i_maxIterations = conf->gI( _section, "maxIterations", 200); this->noClasses = conf->gI( _section, "noClasses", 20); } KMeans::~KMeans() { } void KMeans::initial_guess(const NICE::VVector & features, NICE::VVector & prototypes) { int j = 0; std::set > mark; for (VVector::iterator i = prototypes.begin(); i != prototypes.end(); i++, j++) { int k; do { k = rand() % features.size(); } while (mark.find(k) != mark.end()); mark.insert(mark.begin(), k); *i = features[k]; } } int KMeans::compute_prototypes(const VVector & features, VVector & prototypes, std::vector & weights, const std::vector & assignment) { int j = 0; // fprintf (stderr, "KMeans::compute_prototypes: init noClasses=%d\n", noClasses); for (int k = 0; k < this->noClasses; k++) { prototypes[k].set(0); weights[k] = 0; } // fprintf (stderr, "KMeans::compute_prototypes: compute means\n"); for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++) { int k = assignment[j]; NICE::Vector & p = prototypes[k]; const NICE::Vector & x = *i; #ifdef DEBUG_KMEANS fprintf( stderr, "KMeans::compute_prototypes: std::vector %d has assignment %d\n", j, k); #endif p += x; #ifdef DEBUG_KMEANS std::cerr << "vector was : " << x << std::endl; std::cerr << "prototype for this class is now : " << p << std::endl; #endif weights[k]++; } // fprintf (stderr, "KMeans::compute_prototypes: scaling\n"); for (int k = 0; k < this->noClasses; k++) { NICE::Vector & p = prototypes[k]; #ifdef DEBUG_KMEANS std::cerr << "prototype for this class before scaling : " << p << std::endl; #endif if (weights[k] <= 0) { return -1; } p *= (1.0 / weights[k]); weights[k] = weights[k] / features.size(); #ifdef DEBUG_KMEANS std::cerr << "prototype for this class after scaling with " << weights[k] << " : " << p << std::endl; #endif } return 0; } double KMeans::compute_delta(const NICE::VVector & oldprototypes, const NICE::VVector & prototypes) { double distance = 0; for (uint k = 0; k < oldprototypes.size(); k++) { distance += this->distancefunction->calculate(oldprototypes[k], prototypes[k]); #ifdef DEBUG_KMEANS fprintf(stderr, "KMeans::compute_delta: Distance: %f", distancefunction->calculate(oldprototypes[k], prototypes[k])); #endif } return distance; } double KMeans::compute_assignments(const NICE::VVector & features, const NICE::VVector & prototypes, std::vector & assignment) { int index = 0; for (VVector::const_iterator i = features.begin(); i != features.end(); i++, index++) { const NICE::Vector & x = *i; double mindist = std::numeric_limits::max(); int minclass = 0; int c = 0; #ifdef DEBUG_KMEANS fprintf(stderr, "computing nearest prototype for std::vector %d\n", index); #endif for (VVector::const_iterator j = prototypes.begin(); j != prototypes.end(); j++, c++) { const NICE::Vector & p = *j; double distance = this->distancefunction->calculate(p, x); #ifdef DEBUG_KMEANS fprintf(stderr, "KMeans::compute_delta: Distance: %f", this->distancefunction->calculate(p, x)); #endif #ifdef DEBUG_KMEANS std::cerr << p << std::endl; std::cerr << x << std::endl; fprintf(stderr, "distance to prototype %d is %f\n", c, distance); #endif if (distance < mindist) { minclass = c; mindist = distance; } } assignment[index] = minclass; } return 0.0; } double KMeans::compute_weights(const NICE::VVector & features, std::vector & weights, std::vector & assignment) { for (int k = 0; k < this->noClasses; k++) weights[k] = 0; int j = 0; for (NICE::VVector::const_iterator i = features.begin(); i != features.end(); i++, j++) { int k = assignment[j]; weights[k]++; } for (int k = 0; k < this->noClasses; k++) weights[k] = weights[k] / features.size(); return 0.0; } void KMeans::cluster(const NICE::VVector & features, NICE::VVector & prototypes, std::vector & weights, std::vector & assignment) { NICE::VVector oldprototypes; prototypes.clear(); weights.clear(); assignment.clear(); weights.resize(noClasses, 0); assignment.resize(features.size(), 0); int dimension; if ((int) features.size() >= this->noClasses) dimension = features[0].size(); else { fprintf(stderr, "FATAL ERROR: Not enough feature vectors provided for kMeans\n"); exit(-1); } for (int k = 0; k < this->noClasses; k++) { prototypes.push_back(Vector(dimension)); prototypes[k].set(0); } bool successKMeans ( false ); int iterations ( 0 ); double delta ( std::numeric_limits::max() ); while ( !successKMeans ) { //we assume that this run will be successful successKMeans = true; this->initial_guess(features, prototypes); iterations = 0; delta = std::numeric_limits::max(); do { iterations++; this->compute_assignments(features, prototypes, assignment); if (iterations > 1) oldprototypes = prototypes; #ifdef DEBUG_KMEANS fprintf(stderr, "KMeans::cluster compute_prototypes\n"); #endif if ( this->compute_prototypes(features, prototypes, weights, assignment) < 0 ) { fprintf(stderr, "KMeans::cluster restart\n"); successKMeans = false; break; } #ifdef DEBUG_KMEANS fprintf(stderr, "KMeans::cluster compute_delta\n"); #endif if (iterations > 1) delta = this->compute_delta(oldprototypes, prototypes); #ifdef DEBUG_KMEANS print_iteration(iterations, prototypes, delta); #endif } while ((delta > d_minDelta) && (iterations < i_maxIterations)); } #ifdef DEBUG_KMEANS fprintf(stderr, "KMeans::cluster: iterations = %d, delta = %f\n", iterations, delta); #endif this->compute_weights(features, weights, assignment); } void KMeans::print_iteration(int iterations, NICE::VVector & prototypes, double delta) { if (iterations > 1) fprintf(stderr, "KMeans::cluster: iteration=%d delta=%f\n", iterations, delta); else fprintf(stderr, "KMeans::cluster: iteration=%d\n", iterations); int k = 0; for (NICE::VVector::const_iterator i = prototypes.begin(); i != prototypes.end(); i++, k++) { fprintf(stderr, "class (%d)\n", k); std::cerr << "prototype = " << (*i) << std::endl; } }