RandomClustering.cpp 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. /**
  2. * @file RandomClustering.cpp
  3. * @brief Clustering by randomly picking some samples from the set of features as representatives
  4. * @author Alexander Freytag
  5. * @date 03-06-2013 (dd-mm-yyyy)
  6. */
  7. #ifdef NICE_USELIB_OPENMP
  8. #include <omp.h>
  9. #endif
  10. #include <iostream>
  11. #include <map>
  12. #include "vislearning/math/distances/genericDistance.h"
  13. #include "vislearning/math/cluster/RandomClustering.h"
  14. #include <set>
  15. using namespace OBJREC;
  16. using namespace std;
  17. using namespace NICE;
  18. ///////////////////// ///////////////////// /////////////////////
  19. // CONSTRUCTORS / DESTRUCTORS
  20. ///////////////////// ///////////////////// /////////////////////
  21. RandomClustering::RandomClustering() : ClusterAlgorithm()
  22. {
  23. this->noClusters = 20;
  24. this->distanceType = "euclidean";
  25. this->distancefunction = NULL;
  26. }
  27. RandomClustering::RandomClustering(const int & _noClusters, const std::string & _distanceType) :
  28. noClusters(_noClusters), distanceType(_distanceType)
  29. {
  30. }
  31. RandomClustering::RandomClustering( const NICE::Config * _conf, const std::string & _confSection)
  32. {
  33. this->initFromConfig ( _conf, _confSection );
  34. }
  35. RandomClustering::~RandomClustering()
  36. {
  37. }
  38. void RandomClustering::initFromConfig( const NICE::Config* _conf, const std::string& _confSection )
  39. {
  40. this->noClusters = _conf->gI( _confSection, "noClusters", 20);
  41. this->distanceType = _conf->gS( _confSection, "distanceType", "euclidean" );
  42. this->distancefunction = OBJREC::GenericDistanceSelection::selectDistance( this->distanceType );
  43. }
  44. ///////////////////// ///////////////////// /////////////////////
  45. // CLUSTERING STUFF
  46. ///////////////////// ///////////////////// //////////////////
  47. int RandomClustering::compute_prototypes(const NICE::VVector & _features, NICE::VVector & _prototypes,
  48. std::vector<double> & _weights, const std::vector<int> & _assignment)
  49. {
  50. int noFeatures ( _features.size() );
  51. std::set<int, std::greater<int> > chosenIdx;
  52. //empty init
  53. chosenIdx.clear();
  54. //pick k distinct cluster representatives randomly
  55. for (int cnt = 0; cnt < this->noClusters; cnt++)
  56. {
  57. int idx;
  58. do
  59. {
  60. idx = rand() % noFeatures;
  61. }
  62. while ( chosenIdx.find ( idx ) != chosenIdx.end() );
  63. //if a new (distinct) idx was computed, insert it into the set of randomly picked indicees
  64. chosenIdx.insert ( idx );
  65. }
  66. _prototypes.resize( this->noClusters );
  67. int clusterCnt ( 0 );
  68. for ( std::set<int>::const_iterator idxIt = chosenIdx.begin(); idxIt != chosenIdx.end(); idxIt++, clusterCnt++ )
  69. {
  70. _prototypes[clusterCnt] = _features[ *idxIt ];
  71. }
  72. return 0;
  73. }
  74. double RandomClustering::compute_assignments(const NICE::VVector & _features,
  75. const NICE::VVector & _prototypes,
  76. std::vector<int> & _assignment)
  77. {
  78. _assignment.resize( _features.size() );
  79. int index = 0;
  80. for (NICE::VVector::const_iterator i = _features.begin(); i != _features.end(); i++, index++)
  81. {
  82. const NICE::Vector & x = *i;
  83. double mindist = std::numeric_limits<double>::max();
  84. int minclass = 0;
  85. int c = 0;
  86. for (NICE::VVector::const_iterator j = _prototypes.begin(); j
  87. != _prototypes.end(); j++, c++)
  88. {
  89. const NICE::Vector & p = *j;
  90. double distance = this->distancefunction->calculate(p, x);
  91. if (distance < mindist)
  92. {
  93. minclass = c;
  94. mindist = distance;
  95. }
  96. }
  97. _assignment[index] = minclass;
  98. }
  99. return 0.0;
  100. }
  101. double RandomClustering::compute_weights(const NICE::VVector & _features,
  102. std::vector<double> & _weights,
  103. std::vector<int> & _assignment)
  104. {
  105. _weights.resize( this->noClusters );
  106. //initalization
  107. for (int k = 0; k < noClusters; k++)
  108. _weights[k] = 0;
  109. int j = 0;
  110. //increase weight for every assignment
  111. for (NICE::VVector::const_iterator i = _features.begin(); i != _features.end(); i++, j++)
  112. {
  113. int k = _assignment[j];
  114. _weights[k]++;
  115. }
  116. //normalize weights
  117. for (int k = 0; k < noClusters; k++)
  118. _weights[k] = _weights[k] / _features.size();
  119. return 0.0;
  120. }
  121. void RandomClustering::cluster(const NICE::VVector & _features,
  122. NICE::VVector & _prototypes,
  123. std::vector<double> & _weights,
  124. std::vector<int> & _assignment)
  125. {
  126. //randomly pick cluster centers
  127. this->compute_prototypes( _features, _prototypes, _weights, _assignment );
  128. //compute assignments for every cluster
  129. this->compute_assignments( _features, _prototypes, _assignment );
  130. //compute corresponding weights
  131. this->compute_weights( _features, _weights, _assignment );
  132. }
  133. ///////////////////// INTERFACE PERSISTENT /////////////////////
  134. // interface specific methods for store and restore
  135. ///////////////////// INTERFACE PERSISTENT /////////////////////
  136. void RandomClustering::restore ( std::istream & is, int format )
  137. {
  138. //delete everything we knew so far...
  139. this->clear();
  140. if ( is.good() )
  141. {
  142. std::string tmp;
  143. is >> tmp; //class name
  144. if ( ! this->isStartTag( tmp, "RandomClustering" ) )
  145. {
  146. std::cerr << " WARNING - attempt to restore RandomClustering, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  147. throw;
  148. }
  149. bool b_endOfBlock ( false ) ;
  150. while ( !b_endOfBlock )
  151. {
  152. is >> tmp; // start of block
  153. if ( this->isEndTag( tmp, "RandomClustering" ) )
  154. {
  155. b_endOfBlock = true;
  156. continue;
  157. }
  158. tmp = this->removeStartTag ( tmp );
  159. if ( tmp.compare("noClusters") == 0 )
  160. {
  161. is >> this->noClusters;
  162. is >> tmp; // end of block
  163. tmp = this->removeEndTag ( tmp );
  164. }
  165. else if ( tmp.compare("distanceType") == 0 )
  166. {
  167. is >> this->distanceType;
  168. //TODO fixme
  169. this->distancefunction = OBJREC::GenericDistanceSelection::selectDistance( this->distanceType );
  170. is >> tmp; // end of block
  171. tmp = this->removeEndTag ( tmp );
  172. }
  173. else if ( tmp.compare("distancefunction") == 0 )
  174. {
  175. //TODO is >> this->distancefunction;
  176. is >> tmp; // end of block
  177. tmp = this->removeEndTag ( tmp );
  178. }
  179. else
  180. {
  181. std::cerr << "WARNING -- unexpected RandomClustering object -- " << tmp << " -- for restoration... aborting" << std::endl;
  182. throw;
  183. }
  184. }
  185. }
  186. else
  187. {
  188. std::cerr << "RandomClustering::restore -- InStream not initialized - restoring not possible!" << std::endl;
  189. throw;
  190. }
  191. }
  192. void RandomClustering::store ( std::ostream & os, int format ) const
  193. {
  194. if (os.good())
  195. {
  196. // show starting point
  197. os << this->createStartTag( "RandomClustering" ) << std::endl;
  198. os << this->createStartTag( "noClusters" ) << std::endl;
  199. os << this->noClusters << std::endl;
  200. os << this->createEndTag( "noClusters" ) << std::endl;
  201. os << this->createStartTag( "distanceType" ) << std::endl;
  202. os << this->distanceType << std::endl;
  203. os << this->createEndTag( "distanceType" ) << std::endl;
  204. os << this->createStartTag( "distancefunction" ) << std::endl;
  205. //TODO os << this->distancefunction << std::endl;
  206. os << this->createEndTag( "distancefunction" ) << std::endl;
  207. // done
  208. os << this->createEndTag( "RandomClustering" ) << std::endl;
  209. }
  210. else
  211. {
  212. std::cerr << "OutStream not initialized - storing not possible!" << std::endl;
  213. }
  214. }
  215. void RandomClustering::clear ()
  216. {
  217. if ( this->distancefunction != NULL )
  218. {
  219. delete this->distancefunction;
  220. this->distancefunction = NULL ;
  221. }
  222. }