KMeans.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. /**
  2. * @file KMeans.cpp
  3. * @brief K-Means
  4. * @author Erik Rodner, Alexander Freytag
  5. * @date 29-10-2007 (dd-mm-yyyy)
  6. */
  7. #ifdef NICE_USELIB_OPENMP
  8. #include <omp.h>
  9. #endif
  10. #include <set>
  11. #include <iostream>
  12. #include <string>
  13. #include "vislearning/math/cluster/KMeans.h"
  14. #include "vislearning/math/distances/genericDistance.h"
  15. using namespace OBJREC;
  16. using namespace std;
  17. using namespace NICE;
  18. #undef DEBUG_KMEANS
  19. ///////////////////// ///////////////////// /////////////////////
  20. // CONSTRUCTORS / DESTRUCTORS
  21. ///////////////////// ///////////////////// /////////////////////
  22. KMeans::KMeans() : ClusterAlgorithm()
  23. {
  24. this->noClusters = 20;
  25. this->distanceType = "euclidean";
  26. this->distancefunction = NULL;
  27. this->d_minDelta = 1e-5;
  28. this->i_maxIterations = 200;
  29. }
  30. KMeans::KMeans(const int & _noClusters, const std::string & _distanceType) :
  31. noClusters(_noClusters), distanceType(_distanceType)
  32. {
  33. //srand(time(NULL));
  34. this->distancefunction = GenericDistanceSelection::selectDistance(distanceType);
  35. this->d_minDelta = 1e-5;
  36. this->i_maxIterations = 200;
  37. }
  38. KMeans::KMeans( const NICE::Config * _conf, const std::string & _confSection)
  39. {
  40. this->initFromConfig( _conf, _confSection );
  41. }
  42. KMeans::~KMeans()
  43. {
  44. if ( this->distancefunction != NULL )
  45. {
  46. delete this->distancefunction;
  47. this->distancefunction = NULL ;
  48. }
  49. }
  50. void KMeans::initFromConfig( const NICE::Config* _conf, const std::string& _confSection )
  51. {
  52. this->noClusters = _conf->gI( _confSection, "noClusters", 20);
  53. this->distanceType = _conf->gS( _confSection, "distanceType", "euclidean" );
  54. this->distancefunction = OBJREC::GenericDistanceSelection::selectDistance( this->distanceType );
  55. this->d_minDelta = _conf->gD( _confSection, "minDelta", 1e-5 );
  56. this->i_maxIterations = _conf->gI( _confSection, "maxIterations", 200);
  57. }
  58. ///////////////////// ///////////////////// /////////////////////
  59. // CLUSTERING STUFF
  60. ///////////////////// ///////////////////// //////////////////
  61. void KMeans::initial_guess(const NICE::VVector & features, NICE::VVector & prototypes)
  62. {
  63. int j = 0;
  64. std::set<int, std::greater<int> > mark;
  65. for (VVector::iterator i = prototypes.begin(); i != prototypes.end(); i++, j++)
  66. {
  67. int k;
  68. do
  69. {
  70. k = rand() % features.size();
  71. } while (mark.find(k) != mark.end());
  72. mark.insert(mark.begin(), k);
  73. *i = features[k];
  74. }
  75. }
  76. int KMeans::compute_prototypes(const VVector & features, VVector & prototypes,
  77. std::vector<double> & weights, const std::vector<int> & assignment)
  78. {
  79. int j = 0;
  80. // fprintf (stderr, "KMeans::compute_prototypes: init noClusters=%d\n", noClusters);
  81. for (int k = 0; k < this->noClusters; k++)
  82. {
  83. prototypes[k].set(0);
  84. weights[k] = 0;
  85. }
  86. // fprintf (stderr, "KMeans::compute_prototypes: compute means\n");
  87. for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
  88. {
  89. int k = assignment[j];
  90. NICE::Vector & p = prototypes[k];
  91. const NICE::Vector & x = *i;
  92. #ifdef DEBUG_KMEANS
  93. fprintf(
  94. stderr,
  95. "KMeans::compute_prototypes: std::vector %d has assignment %d\n",
  96. j, k);
  97. #endif
  98. p += x;
  99. #ifdef DEBUG_KMEANS
  100. std::cerr << "vector was : " << x << std::endl;
  101. std::cerr << "prototype for this class is now : " << p << std::endl;
  102. #endif
  103. weights[k]++;
  104. }
  105. // fprintf (stderr, "KMeans::compute_prototypes: scaling\n");
  106. #pragma omp parallel for
  107. for (int k = 0; k < this->noClusters; k++)
  108. {
  109. NICE::Vector & p = prototypes[k];
  110. #ifdef DEBUG_KMEANS
  111. std::cerr << "prototype for this class before scaling : " << p << std::endl;
  112. #endif
  113. // if (weights[k] <= 0)
  114. // {
  115. // return -1;
  116. // }
  117. if (weights[k] > 0)
  118. {
  119. p *= (1.0 / weights[k]);
  120. weights[k] = weights[k] / features.size();
  121. #ifdef DEBUG_KMEANS
  122. std::cerr << "prototype for this class after scaling with " << weights[k]
  123. << " : " << p << std::endl;
  124. #endif
  125. }
  126. }
  127. return 0;
  128. }
  129. double KMeans::compute_delta(const NICE::VVector & oldprototypes,
  130. const NICE::VVector & prototypes)
  131. {
  132. double distance = 0;
  133. for (uint k = 0; k < oldprototypes.size(); k++)
  134. {
  135. distance += this->distancefunction->calculate(oldprototypes[k], prototypes[k]);
  136. #ifdef DEBUG_KMEANS
  137. fprintf(stderr, "KMeans::compute_delta: Distance: %f",
  138. distancefunction->calculate(oldprototypes[k], prototypes[k]));
  139. #endif
  140. }
  141. return distance;
  142. }
  143. double KMeans::compute_assignments(const NICE::VVector & features,
  144. const NICE::VVector & prototypes, std::vector<int> & assignment)
  145. {
  146. // int index = 0;
  147. // for (VVector::const_iterator i = features.begin(); i != features.end(); i++, index++)
  148. uint noFeatures ( features.size() );
  149. #pragma omp parallel for
  150. for (int index = 0; index < noFeatures; index++)
  151. {
  152. // const NICE::Vector & x = *i;
  153. const NICE::Vector & x = features[index];
  154. double mindist = std::numeric_limits<double>::max();
  155. int minclass = 0;
  156. int c = 0;
  157. #ifdef DEBUG_KMEANS
  158. fprintf(stderr, "computing nearest prototype for std::vector %d\n",
  159. index);
  160. #endif
  161. for (VVector::const_iterator j = prototypes.begin(); j
  162. != prototypes.end(); j++, c++)
  163. {
  164. const NICE::Vector & p = *j;
  165. double distance = this->distancefunction->calculate(p, x);
  166. #ifdef DEBUG_KMEANS
  167. fprintf(stderr, "KMeans::compute_delta: Distance: %f",
  168. this->distancefunction->calculate(p, x));
  169. #endif
  170. #ifdef DEBUG_KMEANS
  171. std::cerr << p << std::endl;
  172. std::cerr << x << std::endl;
  173. fprintf(stderr, "distance to prototype %d is %f\n", c, distance);
  174. #endif
  175. if (distance < mindist)
  176. {
  177. minclass = c;
  178. mindist = distance;
  179. }
  180. }
  181. assignment[index] = minclass;
  182. }
  183. return 0.0;
  184. }
  185. double KMeans::compute_weights(const NICE::VVector & features,
  186. std::vector<double> & weights, std::vector<int> & assignment)
  187. {
  188. for (int k = 0; k < this->noClusters; k++)
  189. weights[k] = 0;
  190. int j = 0;
  191. for (NICE::VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
  192. {
  193. int k = assignment[j];
  194. weights[k]++;
  195. }
  196. #pragma omp parallel for
  197. for (int k = 0; k < this->noClusters; k++)
  198. weights[k] = weights[k] / features.size();
  199. return 0.0;
  200. }
  201. void KMeans::cluster(const NICE::VVector & features, NICE::VVector & prototypes,
  202. std::vector<double> & weights, std::vector<int> & assignment)
  203. {
  204. NICE::VVector oldprototypes;
  205. prototypes.clear();
  206. weights.clear();
  207. assignment.clear();
  208. weights.resize(noClusters, 0);
  209. assignment.resize(features.size(), 0);
  210. int dimension;
  211. if ((int) features.size() >= this->noClusters)
  212. dimension = features[0].size();
  213. else
  214. {
  215. std::ostringstream stringStream;
  216. stringStream << "FATAL ERROR: Not enough feature vectors provided for kMeans \n k: " << this->noClusters << " numberOfFeatures: " << features.size() << "\n";
  217. std::string errormessage ( stringStream.str() );
  218. throw NICE::Exception( errormessage );
  219. }
  220. for (int k = 0; k < this->noClusters; k++)
  221. {
  222. prototypes.push_back(Vector(dimension));
  223. prototypes[k].set(0);
  224. }
  225. bool successKMeans ( false );
  226. int iterations ( 0 );
  227. double delta ( std::numeric_limits<double>::max() );
  228. while ( !successKMeans )
  229. {
  230. //we assume that this run will be successful
  231. successKMeans = true;
  232. this->initial_guess(features, prototypes);
  233. iterations = 0;
  234. delta = std::numeric_limits<double>::max();
  235. do
  236. {
  237. iterations++;
  238. this->compute_assignments(features, prototypes, assignment);
  239. if (iterations > 1)
  240. oldprototypes = prototypes;
  241. #ifdef DEBUG_KMEANS
  242. fprintf(stderr, "KMeans::cluster compute_prototypes\n");
  243. #endif
  244. if ( this->compute_prototypes(features, prototypes, weights, assignment) < 0 )
  245. {
  246. fprintf(stderr, "KMeans::cluster restart\n");
  247. successKMeans = false;
  248. break;
  249. }
  250. #ifdef DEBUG_KMEANS
  251. fprintf(stderr, "KMeans::cluster compute_delta\n");
  252. #endif
  253. if (iterations > 1)
  254. delta = this->compute_delta(oldprototypes, prototypes);
  255. #ifdef DEBUG_KMEANS
  256. print_iteration(iterations, prototypes, delta);
  257. #endif
  258. } while ((delta > d_minDelta) && (iterations < i_maxIterations));
  259. }
  260. #ifdef DEBUG_KMEANS
  261. fprintf(stderr, "KMeans::cluster: iterations = %d, delta = %f\n", iterations, delta);
  262. #endif
  263. this->compute_weights(features, weights, assignment);
  264. }
  265. void KMeans::print_iteration(int iterations, NICE::VVector & prototypes, double delta)
  266. {
  267. if (iterations > 1)
  268. fprintf(stderr, "KMeans::cluster: iteration=%d delta=%f\n", iterations,
  269. delta);
  270. else
  271. fprintf(stderr, "KMeans::cluster: iteration=%d\n", iterations);
  272. int k = 0;
  273. for (NICE::VVector::const_iterator i = prototypes.begin(); i != prototypes.end(); i++, k++)
  274. {
  275. fprintf(stderr, "class (%d)\n", k);
  276. std::cerr << "prototype = " << (*i) << std::endl;
  277. }
  278. }
  279. ///////////////////// INTERFACE PERSISTENT /////////////////////
  280. // interface specific methods for store and restore
  281. ///////////////////// INTERFACE PERSISTENT /////////////////////
  282. void KMeans::restore ( std::istream & is, int format )
  283. {
  284. //delete everything we knew so far...
  285. this->clear();
  286. if ( is.good() )
  287. {
  288. std::string tmp;
  289. is >> tmp; //class name
  290. if ( ! this->isStartTag( tmp, "KMeans" ) )
  291. {
  292. std::cerr << " WARNING - attempt to restore KMeans, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  293. throw;
  294. }
  295. bool b_endOfBlock ( false ) ;
  296. while ( !b_endOfBlock )
  297. {
  298. is >> tmp; // start of block
  299. if ( this->isEndTag( tmp, "KMeans" ) )
  300. {
  301. b_endOfBlock = true;
  302. continue;
  303. }
  304. tmp = this->removeStartTag ( tmp );
  305. if ( tmp.compare("noClusters") == 0 )
  306. {
  307. is >> this->noClusters;
  308. is >> tmp; // end of block
  309. tmp = this->removeEndTag ( tmp );
  310. }
  311. else if ( tmp.compare("distanceType") == 0 )
  312. {
  313. is >> this->distanceType;
  314. //TODO fixme
  315. this->distancefunction = OBJREC::GenericDistanceSelection::selectDistance( this->distanceType );
  316. is >> tmp; // end of block
  317. tmp = this->removeEndTag ( tmp );
  318. }
  319. else if ( tmp.compare("distancefunction") == 0 )
  320. {
  321. //TODO is >> this->distancefunction;
  322. is >> tmp; // end of block
  323. tmp = this->removeEndTag ( tmp );
  324. }
  325. else if ( tmp.compare("d_minDelta") == 0 )
  326. {
  327. is >> this->d_minDelta;
  328. is >> tmp; // end of block
  329. tmp = this->removeEndTag ( tmp );
  330. }
  331. else if ( tmp.compare("i_maxIterations") == 0 )
  332. {
  333. is >> this->i_maxIterations;
  334. is >> tmp; // end of block
  335. tmp = this->removeEndTag ( tmp );
  336. }
  337. else
  338. {
  339. std::cerr << "WARNING -- unexpected KMeans object -- " << tmp << " -- for restoration... aborting" << std::endl;
  340. throw;
  341. }
  342. }
  343. }
  344. else
  345. {
  346. std::cerr << "KMeans::restore -- InStream not initialized - restoring not possible!" << std::endl;
  347. throw;
  348. }
  349. }
  350. void KMeans::store ( std::ostream & os, int format ) const
  351. {
  352. if (os.good())
  353. {
  354. // show starting point
  355. os << this->createStartTag( "KMeans" ) << std::endl;
  356. os << this->createStartTag( "noClusters" ) << std::endl;
  357. os << this->noClusters << std::endl;
  358. os << this->createEndTag( "noClusters" ) << std::endl;
  359. os << this->createStartTag( "distanceType" ) << std::endl;
  360. os << this->distanceType << std::endl;
  361. os << this->createEndTag( "distanceType" ) << std::endl;
  362. os << this->createStartTag( "distancefunction" ) << std::endl;
  363. //TODO os << this->distancefunction << std::endl;
  364. os << this->createEndTag( "distancefunction" ) << std::endl;
  365. os << this->createStartTag( "d_minDelta" ) << std::endl;
  366. os << this->d_minDelta << std::endl;
  367. os << this->createEndTag( "d_minDelta" ) << std::endl;
  368. os << this->createStartTag( "i_maxIterations" ) << std::endl;
  369. os << this->i_maxIterations << std::endl;
  370. os << this->createEndTag( "i_maxIterations" ) << std::endl;
  371. // done
  372. os << this->createEndTag( "KMeans" ) << std::endl;
  373. }
  374. else
  375. {
  376. std::cerr << "OutStream not initialized - storing not possible!" << std::endl;
  377. }
  378. }
  379. void KMeans::clear ()
  380. {
  381. if ( this->distancefunction != NULL )
  382. {
  383. delete this->distancefunction;
  384. this->distancefunction = NULL ;
  385. }
  386. }