KMeansOnline.cpp 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. /**
  2. * @file KMeansOnline.cpp
  3. * @brief online kmeans clustering
  4. * @author Erik Rodner
  5. * @date 02/13/2008
  6. */
  7. #include <iostream>
  8. #include <list>
  9. #include "vislearning/math/cluster/KMeansOnline.h"
  10. using namespace OBJREC;
  11. using namespace std;
  12. // refactor-nice.pl: check this substitution
  13. // old: using namespace ice;
  14. using namespace NICE;
  15. KMeansOnline::KMeansOnline(size_t _numClusters, double _gamma)
  16. {
  17. gamma = _gamma;
  18. numClusters = _numClusters;
  19. init();
  20. }
  21. KMeansOnline::~KMeansOnline()
  22. {
  23. }
  24. void KMeansOnline::init()
  25. {
  26. ClusterOnline::init();
  27. weights.clear();
  28. weights.resize(numClusters, 0.0);
  29. }
  30. int KMeansOnline::updateClusters(const NICE::Vector & x)
  31. {
  32. if (clusters.size() != numClusters)
  33. {
  34. clusters.push_back(x);
  35. fprintf(stderr, "clusters.size() %d weights.size() %d\n",
  36. (int)clusters.size(), (int)weights.size());
  37. weights[ clusters.size() - 1 ]++;
  38. return clusters.size() - 1;
  39. }
  40. double mindist = std::numeric_limits<double>::max();
  41. int k = 0;
  42. int my_cluster = 0;
  43. const double *xp = x.getDataPointer();
  44. int len = x.size();
  45. for (VVector::const_iterator i = clusters.begin();
  46. i != clusters.end();
  47. i++, k++)
  48. {
  49. const NICE::Vector & cluster = *i;
  50. //double distance = (x - cluster).Length();
  51. double distance = 0.0;
  52. const double *clusterp = cluster.getDataPointer();
  53. for (int i = 0 ; i < len ; i++)
  54. {
  55. double h = clusterp[i] - xp[i];
  56. distance += h * h;
  57. }
  58. if (distance < mindist)
  59. {
  60. my_cluster = k;
  61. mindist = distance;
  62. }
  63. }
  64. weights[my_cluster]++;
  65. // refactor-nice.pl: check this substitution
  66. // old: Vector & cluster = clusters[my_cluster];
  67. NICE::Vector & cluster = clusters[my_cluster];
  68. double f = gamma / weights[my_cluster];
  69. for (size_t i = 0 ; i < cluster.size() ; i++)
  70. cluster[i] += f * (x[i] - cluster[i]);
  71. return my_cluster;
  72. }
  73. void KMeansOnline::cluster(const Examples &ex)
  74. {
  75. for (int i = 0; i < (int)ex.size(); i++)
  76. {
  77. updateClusters(*(ex[i].second.vec));
  78. }
  79. }
  80. void KMeansOnline::getDist(const NICE::Vector &vin, SparseVector &dist, int size, bool hard)
  81. {
  82. int len = vin.size();
  83. list<double> alldist;
  84. int k = 0;
  85. double mindist = std::numeric_limits<double>::max();
  86. // get for each cluster the distance to the given feature
  87. for (vector<Vector>::const_iterator i = clusters.begin();
  88. i != clusters.end();
  89. i++, k++)
  90. {
  91. const NICE::Vector & cluster = *i;
  92. double distance = 0.0;
  93. const double *clusterp = cluster.getDataPointer();
  94. for (int i = 0 ; i < len ; i++)
  95. {
  96. double h = clusterp[i] - vin[i];
  97. distance += h * h;
  98. }
  99. mindist = std::min(mindist, distance);
  100. alldist.push_back(distance);
  101. }
  102. list<double> sortedalldist = alldist;
  103. sortedalldist.sort();
  104. dist.clear();
  105. dist.setDim((int)numClusters);
  106. size = std::min((int)numClusters, size);
  107. for (k = 0; k < size; k++)
  108. {
  109. sortedalldist.pop_front();
  110. }
  111. double maxval = *(sortedalldist.begin());
  112. list<double>::const_iterator i = alldist.begin();
  113. for (k = 0; i != alldist.end(); i++, k++)
  114. {
  115. double val = mindist / (*i);
  116. if ((*i) < maxval && val > 1e-07)
  117. {
  118. if (hard)
  119. {
  120. dist[k] = 1.0;
  121. }
  122. else
  123. {
  124. dist[k] = val;
  125. }
  126. }
  127. }
  128. }