HierarchicalCluster.cpp 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. /**
  2. * @file HierarchicalCluster.cpp
  3. * @brief Hierarchical Clustering wrapper
  4. * @author Michael Koch
  5. * @date 10/25/2010
  6. */
  7. #include <iostream>
  8. #include <core/vector/Distance.h>
  9. #include "vislearning/math/cluster/HierarchicalCluster.h"
  10. #include "vislearning/math/cluster/cluster.h"
  11. #include <set>
  12. #undef max
  13. #undef min
  14. #include <limits>
  15. using namespace OBJREC;
  16. using namespace std;
  17. using namespace NICE;
  18. #undef DEBUG_HierarchicalCluster
  19. HierarchicalCluster::HierarchicalCluster(int _noClasses, char _method,
  20. int _distanceType) :
  21. noClasses(_noClasses), method(_method), distanceType(_distanceType)
  22. {
  23. srand(time(NULL));
  24. }
  25. HierarchicalCluster::~HierarchicalCluster()
  26. {
  27. }
  28. void HierarchicalCluster::cluster(const VVector & features,
  29. VVector & prototypes, std::vector<double> & weights,
  30. std::vector<int> & assignment)
  31. {
  32. double double_max = std::numeric_limits<double>::max();
  33. if (features.size() <= 1)
  34. {
  35. prototypes=features;
  36. }
  37. else
  38. {
  39. int featureamount = features.size();
  40. int dimension = features[0].size();
  41. double **data;
  42. int **mask;
  43. VVectorToDoubleArray(features, data, mask);
  44. double * weightpointer = new double[dimension];
  45. for (uint i = 0; i < dimension; i++)
  46. {
  47. if (i < weights.size())
  48. {
  49. weightpointer[i] = weights[i];
  50. }
  51. else
  52. {
  53. weightpointer[i] = 1.0;
  54. }
  55. }
  56. int transpose = 0;
  57. double ** distmatrix = NULL;
  58. Node * nodes = treecluster(featureamount, dimension, data, mask,
  59. weightpointer, transpose, distanceFunction(distanceType),
  60. method, distmatrix);
  61. std::vector<double> distances;
  62. for (int i = 0; i < featureamount; i++)
  63. {
  64. Node currentnode = nodes[i];
  65. if (currentnode.distance < double_max)
  66. {
  67. distances.push_back(currentnode.distance);
  68. }
  69. }
  70. //TODO cluster estimation refactor without cluster.h in HierarchicalCluster.h
  71. if (noClasses <= 0)
  72. {
  73. std::sort(distances.begin(), distances.end());
  74. std::vector<double> gradient;
  75. std::vector<int> cuts;
  76. size_t count = 0;
  77. double last_element;
  78. for (size_t i = 0; i < distances.size(); i++)
  79. {
  80. if (count > 0)
  81. {
  82. gradient.push_back(distances[i] - last_element);
  83. }
  84. last_element = distances[i];
  85. count++;
  86. }
  87. size_t pos_max = 0;
  88. double max;
  89. if (gradient.size() > 0)
  90. {
  91. max = gradient[0];
  92. for (size_t i = 1; i < gradient.size(); i++)
  93. {
  94. if (gradient[i] > max)
  95. {
  96. max = gradient[i];
  97. pos_max = i;
  98. }
  99. }
  100. }
  101. size_t pos_reject = pos_max + 1;
  102. double distance_reject = distances[pos_reject];
  103. for (size_t i = 0; i < featureamount; i++)
  104. {
  105. if (nodes[i].distance < double_max && nodes[i].distance
  106. >= distance_reject)
  107. {
  108. cuts.push_back(2);
  109. }
  110. else
  111. {
  112. cuts.push_back(0);
  113. }
  114. }
  115. bool change = true;
  116. while (change)
  117. {
  118. change = false;
  119. for (size_t i = 0; i < featureamount; i++)
  120. {
  121. if (cuts[i] > 0 && nodes[i].distance < double_max
  122. && nodes[i].distance >= distance_reject)
  123. {
  124. if (nodes[i].left < 0)
  125. {
  126. int pos_left = -nodes[i].left - 1;
  127. if (cuts[pos_left] > 0)
  128. {
  129. if (cuts[i] > 0)
  130. {
  131. cuts[i]--;
  132. change = true;
  133. }
  134. }
  135. }
  136. if (nodes[i].right < 0)
  137. {
  138. int pos_right = -nodes[i].right - 1;
  139. if (cuts[pos_right] > 0)
  140. {
  141. if (cuts[i] > 0)
  142. {
  143. cuts[i]--;
  144. change = true;
  145. }
  146. }
  147. }
  148. }
  149. }
  150. }
  151. noClasses = 0;
  152. for (int i = 0; i < featureamount; i++)
  153. {
  154. noClasses += cuts[i];
  155. }
  156. }
  157. int clusterassignment[featureamount];
  158. cuttree(featureamount, nodes, noClasses, clusterassignment);
  159. // assignment.clear();
  160. for (int i = 0; i < featureamount; i++)
  161. {
  162. if (clusterassignment[i] >= 0)
  163. {
  164. assignment.push_back(clusterassignment[i]);
  165. }
  166. }
  167. prototypes.clear();
  168. for (int k = 0; k < noClasses; k++)
  169. {
  170. prototypes.push_back(Vector(dimension));
  171. prototypes[k].set(0);
  172. weights.push_back(0);
  173. }
  174. compute_prototypes(features, prototypes, weights, assignment);
  175. }
  176. }
  177. void HierarchicalCluster::VVectorToDoubleArray(const VVector &data,
  178. double ** &doublearray, int ** &mask)
  179. {
  180. if (data.size() > 0)
  181. {
  182. int featureamount = data.size();
  183. int dimension = data[0].size();
  184. doublearray = new double*[featureamount];
  185. mask = new int*[featureamount];
  186. for (int i = 0; i < featureamount; ++i)
  187. {
  188. doublearray[i] = new double[dimension];
  189. mask[i] = new int[dimension];
  190. }
  191. for (int row = 0; row < featureamount; row++)
  192. {
  193. for (int col = 0; col < dimension; col++)
  194. {
  195. doublearray[row][col] = data[row][col];
  196. if (isNaN(data[row][col]))
  197. {
  198. mask[row][col] = 0;
  199. }
  200. else
  201. {
  202. mask[row][col] = 1;
  203. }
  204. }
  205. }
  206. }
  207. }
  208. //TODO find better space for that one and refactor with enum
  209. char HierarchicalCluster::distanceFunction(int distancetype)
  210. {
  211. // dist (input) char
  212. // Defines which distance measure is used, as given by the table:
  213. // dist=='e': Euclidean distance
  214. // dist=='b': City-block distance
  215. // dist=='c': correlation
  216. // dist=='a': absolute value of the correlation
  217. // dist=='u': uncentered correlation
  218. // dist=='x': absolute uncentered correlation
  219. // dist=='s': Spearman's rank correlation
  220. // dist=='k': Kendall's tau
  221. // For other values of dist, the default (Euclidean distance) is used.
  222. char type = 'e';
  223. switch (distancetype)
  224. {
  225. case 0:
  226. {
  227. type = 'e';
  228. }
  229. break;
  230. case 1:
  231. {
  232. type = 'b';
  233. }
  234. break;
  235. case 2:
  236. {
  237. type = 'c';
  238. }
  239. break;
  240. case 3:
  241. {
  242. type = 'a';
  243. }
  244. break;
  245. case 4:
  246. {
  247. type = 'u';
  248. }
  249. break;
  250. case 5:
  251. {
  252. type = 'x';
  253. }
  254. break;
  255. case 6:
  256. {
  257. type = 's';
  258. }
  259. break;
  260. case 7:
  261. {
  262. type = 'k';
  263. }
  264. break;
  265. default:
  266. type = 'e';
  267. break;
  268. }
  269. return type;
  270. }
  271. int HierarchicalCluster::compute_prototypes(const VVector & features,
  272. VVector & prototypes, std::vector<double> & weights, const std::vector<
  273. int> & assignment)
  274. {
  275. int j = 0;
  276. for (int k = 0; k < noClasses; k++)
  277. {
  278. prototypes[k].set(0);
  279. weights[k] = 0;
  280. }
  281. for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
  282. {
  283. int k = assignment[j];
  284. NICE::Vector & p = prototypes[k];
  285. const NICE::Vector & x = *i;
  286. p += x;
  287. weights[k]++;
  288. }
  289. for (int k = 0; k < noClasses; k++)
  290. {
  291. NICE::Vector & p = prototypes[k];
  292. if (weights[k] <= 0)
  293. {
  294. return -1;
  295. }
  296. p *= (1.0 / weights[k]);
  297. weights[k] = weights[k] / features.size();
  298. }
  299. return 0;
  300. }