123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- /**
- * @file KMeansOnline.cpp
- * @brief online kmeans clustering
- * @author Erik Rodner
- * @date 02/13/2008
- */
- #include <iostream>
- #include <list>
- #include "vislearning/math/cluster/KMeansOnline.h"
- using namespace OBJREC;
- using namespace std;
- // refactor-nice.pl: check this substitution
- // old: using namespace ice;
- using namespace NICE;
- KMeansOnline::KMeansOnline(size_t _numClusters, double _gamma)
- {
- gamma = _gamma;
- numClusters = _numClusters;
- init();
- }
- KMeansOnline::~KMeansOnline()
- {
- }
- void KMeansOnline::init()
- {
- ClusterOnline::init();
- weights.clear();
- weights.resize(numClusters, 0.0);
- }
- int KMeansOnline::updateClusters(const NICE::Vector & x)
- {
- if (clusters.size() != numClusters)
- {
- clusters.push_back(x);
- fprintf(stderr, "clusters.size() %d weights.size() %d\n",
- (int)clusters.size(), (int)weights.size());
- weights[ clusters.size() - 1 ]++;
- return clusters.size() - 1;
- }
- double mindist = std::numeric_limits<double>::max();
- int k = 0;
- int my_cluster = 0;
- const double *xp = x.getDataPointer();
- int len = x.size();
- for (VVector::const_iterator i = clusters.begin();
- i != clusters.end();
- i++, k++)
- {
- const NICE::Vector & cluster = *i;
- //double distance = (x - cluster).Length();
- double distance = 0.0;
- const double *clusterp = cluster.getDataPointer();
- for (int i = 0 ; i < len ; i++)
- {
- double h = clusterp[i] - xp[i];
- distance += h * h;
- }
- if (distance < mindist)
- {
- my_cluster = k;
- mindist = distance;
- }
- }
- weights[my_cluster]++;
- // refactor-nice.pl: check this substitution
- // old: Vector & cluster = clusters[my_cluster];
- NICE::Vector & cluster = clusters[my_cluster];
- double f = gamma / weights[my_cluster];
- for (size_t i = 0 ; i < cluster.size() ; i++)
- cluster[i] += f * (x[i] - cluster[i]);
- return my_cluster;
- }
- void KMeansOnline::cluster(const Examples &ex)
- {
- for (int i = 0; i < (int)ex.size(); i++)
- {
- updateClusters(*(ex[i].second.vec));
- }
- }
- void KMeansOnline::getDist(const NICE::Vector &vin, SparseVector &dist, int size, bool hard)
- {
- int len = vin.size();
- list<double> alldist;
- int k = 0;
- double mindist = std::numeric_limits<double>::max();
- // get for each cluster the distance to the given feature
- for (vector<Vector>::const_iterator i = clusters.begin();
- i != clusters.end();
- i++, k++)
- {
- const NICE::Vector & cluster = *i;
- double distance = 0.0;
- const double *clusterp = cluster.getDataPointer();
- for (int i = 0 ; i < len ; i++)
- {
- double h = clusterp[i] - vin[i];
- distance += h * h;
- }
- mindist = std::min(mindist, distance);
- alldist.push_back(distance);
- }
- list<double> sortedalldist = alldist;
- sortedalldist.sort();
- dist.clear();
- dist.setDim((int)numClusters);
- size = std::min((int)numClusters, size);
- for (k = 0; k < size; k++)
- {
- sortedalldist.pop_front();
- }
- double maxval = *(sortedalldist.begin());
- list<double>::const_iterator i = alldist.begin();
- for (k = 0; i != alldist.end(); i++, k++)
- {
- double val = mindist / (*i);
- if ((*i) < maxval && val > 1e-07)
- {
- if (hard)
- {
- dist[k] = 1.0;
- }
- else
- {
- dist[k] = val;
- }
- }
- }
- }
|