KMeansOnline.cpp 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. /**
  2. * @file KMeansOnline.cpp
  3. * @brief online kmeans clustering
  4. * @author Erik Rodner
  5. * @date 02/13/2008
  6. */
  7. #ifdef NOVISUAL
  8. #include <vislearning/nice_nonvis.h>
  9. #else
  10. #include <vislearning/nice.h>
  11. #endif
  12. #include <iostream>
  13. #include <list>
  14. #include "vislearning/math/cluster/KMeansOnline.h"
  15. using namespace OBJREC;
  16. using namespace std;
  17. // refactor-nice.pl: check this substitution
  18. // old: using namespace ice;
  19. using namespace NICE;
  20. KMeansOnline::KMeansOnline(size_t _numClusters, double _gamma)
  21. {
  22. gamma = _gamma;
  23. numClusters = _numClusters;
  24. init();
  25. }
  26. KMeansOnline::~KMeansOnline()
  27. {
  28. }
  29. void KMeansOnline::init()
  30. {
  31. ClusterOnline::init();
  32. weights.clear();
  33. weights.resize(numClusters, 0.0);
  34. }
  35. int KMeansOnline::updateClusters(const NICE::Vector & x)
  36. {
  37. if (clusters.size() != numClusters)
  38. {
  39. clusters.push_back(x);
  40. fprintf(stderr, "clusters.size() %d weights.size() %d\n",
  41. (int)clusters.size(), (int)weights.size());
  42. weights[ clusters.size() - 1 ]++;
  43. return clusters.size() - 1;
  44. }
  45. double mindist = std::numeric_limits<double>::max();
  46. int k = 0;
  47. int my_cluster = 0;
  48. const double *xp = x.getDataPointer();
  49. int len = x.size();
  50. for (VVector::const_iterator i = clusters.begin();
  51. i != clusters.end();
  52. i++, k++)
  53. {
  54. const NICE::Vector & cluster = *i;
  55. //double distance = (x - cluster).Length();
  56. double distance = 0.0;
  57. const double *clusterp = cluster.getDataPointer();
  58. for (int i = 0 ; i < len ; i++)
  59. {
  60. double h = clusterp[i] - xp[i];
  61. distance += h * h;
  62. }
  63. if (distance < mindist)
  64. {
  65. my_cluster = k;
  66. mindist = distance;
  67. }
  68. }
  69. weights[my_cluster]++;
  70. // refactor-nice.pl: check this substitution
  71. // old: Vector & cluster = clusters[my_cluster];
  72. NICE::Vector & cluster = clusters[my_cluster];
  73. double f = gamma / weights[my_cluster];
  74. for (size_t i = 0 ; i < cluster.size() ; i++)
  75. cluster[i] += f * (x[i] - cluster[i]);
  76. return my_cluster;
  77. }
  78. void KMeansOnline::cluster(const Examples &ex)
  79. {
  80. for (int i = 0; i < (int)ex.size(); i++)
  81. {
  82. updateClusters(*(ex[i].second.vec));
  83. }
  84. }
  85. void KMeansOnline::getDist(const NICE::Vector &vin, SparseVector &dist, int size, bool hard)
  86. {
  87. int len = vin.size();
  88. list<double> alldist;
  89. int k = 0;
  90. double mindist = std::numeric_limits<double>::max();
  91. // get for each cluster the distance to the given feature
  92. for (vector<Vector>::const_iterator i = clusters.begin();
  93. i != clusters.end();
  94. i++, k++)
  95. {
  96. const NICE::Vector & cluster = *i;
  97. double distance = 0.0;
  98. const double *clusterp = cluster.getDataPointer();
  99. for (int i = 0 ; i < len ; i++)
  100. {
  101. double h = clusterp[i] - vin[i];
  102. distance += h * h;
  103. }
  104. mindist = std::min(mindist, distance);
  105. alldist.push_back(distance);
  106. }
  107. list<double> sortedalldist = alldist;
  108. sortedalldist.sort();
  109. dist.clear();
  110. dist.setDim((int)numClusters);
  111. size = std::min((int)numClusters, size);
  112. for (k = 0; k < size; k++)
  113. {
  114. sortedalldist.pop_front();
  115. }
  116. double maxval = *(sortedalldist.begin());
  117. list<double>::const_iterator i = alldist.begin();
  118. for (k = 0; i != alldist.end(); i++, k++)
  119. {
  120. double val = mindist / (*i);
  121. if ((*i) < maxval && val > 1e-07)
  122. {
  123. if (hard)
  124. {
  125. dist[k] = 1.0;
  126. }
  127. else
  128. {
  129. dist[k] = val;
  130. }
  131. }
  132. }
  133. }