/** * @file KMeansOnline.cpp * @brief online kmeans clustering * @author Erik Rodner * @date 02/13/2008 */ #include #include #include "vislearning/math/cluster/KMeansOnline.h" using namespace OBJREC; using namespace std; // refactor-nice.pl: check this substitution // old: using namespace ice; using namespace NICE; KMeansOnline::KMeansOnline(size_t _numClusters, double _gamma) { gamma = _gamma; numClusters = _numClusters; init(); } KMeansOnline::~KMeansOnline() { } void KMeansOnline::init() { ClusterOnline::init(); weights.clear(); weights.resize(numClusters, 0.0); } int KMeansOnline::updateClusters(const NICE::Vector & x) { if (clusters.size() != numClusters) { clusters.push_back(x); fprintf(stderr, "clusters.size() %d weights.size() %d\n", (int)clusters.size(), (int)weights.size()); weights[ clusters.size() - 1 ]++; return clusters.size() - 1; } double mindist = std::numeric_limits::max(); int k = 0; int my_cluster = 0; const double *xp = x.getDataPointer(); int len = x.size(); for (VVector::const_iterator i = clusters.begin(); i != clusters.end(); i++, k++) { const NICE::Vector & cluster = *i; //double distance = (x - cluster).Length(); double distance = 0.0; const double *clusterp = cluster.getDataPointer(); for (int i = 0 ; i < len ; i++) { double h = clusterp[i] - xp[i]; distance += h * h; } if (distance < mindist) { my_cluster = k; mindist = distance; } } weights[my_cluster]++; // refactor-nice.pl: check this substitution // old: Vector & cluster = clusters[my_cluster]; NICE::Vector & cluster = clusters[my_cluster]; double f = gamma / weights[my_cluster]; for (size_t i = 0 ; i < cluster.size() ; i++) cluster[i] += f * (x[i] - cluster[i]); return my_cluster; } void KMeansOnline::cluster(const Examples &ex) { for (int i = 0; i < (int)ex.size(); i++) { updateClusters(*(ex[i].second.vec)); } } void KMeansOnline::getDist(const NICE::Vector &vin, SparseVector &dist, int size, bool hard) { int len = vin.size(); list alldist; int k = 0; double mindist = std::numeric_limits::max(); // get for each cluster the distance to the given feature for (vector::const_iterator i = clusters.begin(); i != clusters.end(); i++, k++) { const NICE::Vector & cluster = *i; double distance = 0.0; const double *clusterp = cluster.getDataPointer(); for (int i = 0 ; i < len ; i++) { double h = clusterp[i] - vin[i]; distance += h * h; } mindist = std::min(mindist, distance); alldist.push_back(distance); } list sortedalldist = alldist; sortedalldist.sort(); dist.clear(); dist.setDim((int)numClusters); size = std::min((int)numClusters, size); for (k = 0; k < size; k++) { sortedalldist.pop_front(); } double maxval = *(sortedalldist.begin()); list::const_iterator i = alldist.begin(); for (k = 0; i != alldist.end(); i++, k++) { double val = mindist / (*i); if ((*i) < maxval && val > 1e-07) { if (hard) { dist[k] = 1.0; } else { dist[k] = val; } } } }