KMeans.cpp 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. /**
  2. * @file KMeans.cpp
  3. * @brief K-Means
  4. * @author Erik Rodner
  5. * @date 10/29/2007
  6. */
  7. #include <iostream>
  8. #include "vislearning/math/cluster/KMeans.h"
  9. #include "vislearning/math/distances/genericDistance.h"
  10. #include <set>
  11. using namespace OBJREC;
  12. using namespace std;
  13. using namespace NICE;
  14. #undef DEBUG_KMEANS
  15. KMeans::KMeans(int _noClasses, string _distanceType) :
  16. noClasses(_noClasses), distanceType(_distanceType)
  17. {
  18. //srand(time(NULL));
  19. distancefunction = GenericDistanceSelection::selectDistance(distanceType);
  20. }
  21. KMeans::~KMeans()
  22. {
  23. }
  24. void KMeans::initial_guess(const VVector & features, VVector & prototypes)
  25. {
  26. int j = 0;
  27. std::set<int, std::greater<int> > mark;
  28. for (VVector::iterator i = prototypes.begin(); i != prototypes.end(); i++, j++)
  29. {
  30. int k;
  31. do
  32. {
  33. k = rand() % features.size();
  34. } while (mark.find(k) != mark.end());
  35. mark.insert(mark.begin(), k);
  36. *i = features[k];
  37. }
  38. }
  39. int KMeans::compute_prototypes(const VVector & features, VVector & prototypes,
  40. std::vector<double> & weights, const std::vector<int> & assignment)
  41. {
  42. int j = 0;
  43. // fprintf (stderr, "KMeans::compute_prototypes: init noClasses=%d\n", noClasses);
  44. for (int k = 0; k < noClasses; k++)
  45. {
  46. prototypes[k].set(0);
  47. weights[k] = 0;
  48. }
  49. // fprintf (stderr, "KMeans::compute_prototypes: compute means\n");
  50. for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
  51. {
  52. int k = assignment[j];
  53. NICE::Vector & p = prototypes[k];
  54. const NICE::Vector & x = *i;
  55. #ifdef DEBUG_KMEANS
  56. fprintf(
  57. stderr,
  58. "KMeans::compute_prototypes: std::vector %d has assignment %d\n",
  59. j, k);
  60. #endif
  61. p += x;
  62. #ifdef DEBUG_KMEANS
  63. cerr << "vector was : " << x << endl;
  64. cerr << "prototype for this class is now : " << p << endl;
  65. #endif
  66. weights[k]++;
  67. }
  68. // fprintf (stderr, "KMeans::compute_prototypes: scaling\n");
  69. for (int k = 0; k < noClasses; k++)
  70. {
  71. NICE::Vector & p = prototypes[k];
  72. #ifdef DEBUG_KMEANS
  73. cerr << "prototype for this class before scaling : " << p << endl;
  74. #endif
  75. if (weights[k] <= 0)
  76. {
  77. return -1;
  78. }
  79. p *= (1.0 / weights[k]);
  80. weights[k] = weights[k] / features.size();
  81. #ifdef DEBUG_KMEANS
  82. cerr << "prototype for this class after scaling with " << weights[k]
  83. << " : " << p << endl;
  84. #endif
  85. }
  86. return 0;
  87. }
  88. double KMeans::compute_delta(const VVector & oldprototypes,
  89. const VVector & prototypes)
  90. {
  91. double distance = 0;
  92. for (uint k = 0; k < oldprototypes.size(); k++)
  93. {
  94. distance
  95. += distancefunction->calculate(oldprototypes[k], prototypes[k]);
  96. #ifdef DEBUG_KMEANS
  97. fprintf(stderr, "KMeans::compute_delta: Distance:",
  98. distancefunction->calculate(oldprototypes[k], prototypes[k]));
  99. #endif
  100. }
  101. return distance;
  102. }
  103. double KMeans::compute_assignments(const VVector & features,
  104. const VVector & prototypes, std::vector<int> & assignment)
  105. {
  106. int index = 0;
  107. for (VVector::const_iterator i = features.begin(); i != features.end(); i++, index++)
  108. {
  109. const NICE::Vector & x = *i;
  110. double mindist = std::numeric_limits<double>::max();
  111. int minclass = 0;
  112. int c = 0;
  113. #ifdef DEBUG_KMEANS
  114. fprintf(stderr, "computing nearest prototype for std::vector %d\n",
  115. index);
  116. #endif
  117. for (VVector::const_iterator j = prototypes.begin(); j
  118. != prototypes.end(); j++, c++)
  119. {
  120. const NICE::Vector & p = *j;
  121. double distance = distancefunction->calculate(p, x);
  122. #ifdef DEBUG_KMEANS
  123. fprintf(stderr, "KMeans::compute_delta: Distance:",
  124. distancefunction->calculate(p, x));
  125. #endif
  126. #ifdef DEBUG_KMEANS
  127. cerr << p << endl;
  128. cerr << x << endl;
  129. fprintf(stderr, "distance to prototype %d is %f\n", c, distance);
  130. #endif
  131. if (distance < mindist)
  132. {
  133. minclass = c;
  134. mindist = distance;
  135. }
  136. }
  137. assignment[index] = minclass;
  138. }
  139. return 0.0;
  140. }
  141. double KMeans::compute_weights(const VVector & features,
  142. std::vector<double> & weights, std::vector<int> & assignment)
  143. {
  144. for (int k = 0; k < noClasses; k++)
  145. weights[k] = 0;
  146. int j = 0;
  147. for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
  148. {
  149. int k = assignment[j];
  150. weights[k]++;
  151. }
  152. for (int k = 0; k < noClasses; k++)
  153. weights[k] = weights[k] / features.size();
  154. return 0.0;
  155. }
  156. void KMeans::cluster(const VVector & features, VVector & prototypes,
  157. std::vector<double> & weights, std::vector<int> & assignment)
  158. {
  159. VVector oldprototypes;
  160. prototypes.clear();
  161. weights.clear();
  162. assignment.clear();
  163. weights.resize(noClasses, 0);
  164. assignment.resize(features.size(), 0);
  165. int dimension;
  166. if ((int) features.size() >= noClasses)
  167. dimension = features[0].size();
  168. else
  169. {
  170. fprintf(stderr,
  171. "FATAL ERROR: Not enough feature vectors provided for kMeans\n");
  172. exit(-1);
  173. }
  174. for (int k = 0; k < noClasses; k++)
  175. {
  176. prototypes.push_back(Vector(dimension));
  177. prototypes[k].set(0);
  178. }
  179. KMeans_Restart:
  180. initial_guess(features, prototypes);
  181. int iterations = 0;
  182. double delta = std::numeric_limits<double>::max();
  183. const double minDelta = 1e-5;
  184. const int maxIterations = 200;
  185. do
  186. {
  187. iterations++;
  188. compute_assignments(features, prototypes, assignment);
  189. if (iterations > 1)
  190. oldprototypes = prototypes;
  191. #ifdef DEBUG_KMEANS
  192. fprintf(stderr, "KMeans::cluster compute_prototypes\n");
  193. #endif
  194. if (compute_prototypes(features, prototypes, weights, assignment) < 0)
  195. {
  196. fprintf(stderr, "KMeans::cluster restart\n");
  197. goto KMeans_Restart;
  198. }
  199. #ifdef DEBUG_KMEANS
  200. fprintf(stderr, "KMeans::cluster compute_delta\n");
  201. #endif
  202. if (iterations > 1)
  203. delta = compute_delta(oldprototypes, prototypes);
  204. #ifdef DEBUG_KMEANS
  205. print_iteration(iterations, prototypes, delta);
  206. #endif
  207. } while ((delta > minDelta) && (iterations < maxIterations));
  208. #ifdef DEBUG_KMEANS
  209. fprintf(stderr, "KMeans::cluster: iterations = %d, delta = %f\n",
  210. iterations, delta);
  211. #endif
  212. compute_weights(features, weights, assignment);
  213. }
  214. void KMeans::print_iteration(int iterations, VVector & prototypes, double delta)
  215. {
  216. if (iterations > 1)
  217. fprintf(stderr, "KMeans::cluster: iteration=%d delta=%f\n", iterations,
  218. delta);
  219. else
  220. fprintf(stderr, "KMeans::cluster: iteration=%d\n", iterations);
  221. int k = 0;
  222. for (VVector::const_iterator i = prototypes.begin(); i != prototypes.end(); i++, k++)
  223. {
  224. fprintf(stderr, "class (%d)\n", k);
  225. cerr << "prototype = " << (*i) << endl;
  226. }
  227. }