123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324 |
- /**
- * @file HierarchicalCluster.cpp
- * @brief Hierarchical Clustering wrapper
- * @author Michael Koch
- * @date 10/25/2010
- */
- #include <iostream>
- #include <core/vector/Distance.h>
- #include "vislearning/math/cluster/HierarchicalCluster.h"
- #include "vislearning/math/cluster/cluster.h"
- #include <set>
- #undef max
- #undef min
- #include <limits>
- using namespace OBJREC;
- using namespace std;
- using namespace NICE;
- #undef DEBUG_HierarchicalCluster
- HierarchicalCluster::HierarchicalCluster(int _noClasses, char _method,
- int _distanceType) :
- noClasses(_noClasses), method(_method), distanceType(_distanceType)
- {
- srand(time(NULL));
- }
- HierarchicalCluster::~HierarchicalCluster()
- {
- }
- void HierarchicalCluster::cluster(const VVector & features,
- VVector & prototypes, std::vector<double> & weights,
- std::vector<int> & assignment)
- {
- double double_max = std::numeric_limits<double>::max();
- if (features.size() <= 1)
- {
- prototypes=features;
- }
- else
- {
- int featureamount = features.size();
- int dimension = features[0].size();
- double **data;
- int **mask;
- VVectorToDoubleArray(features, data, mask);
- double * weightpointer = new double[dimension];
- for (uint i = 0; i < dimension; i++)
- {
- if (i < weights.size())
- {
- weightpointer[i] = weights[i];
- }
- else
- {
- weightpointer[i] = 1.0;
- }
- }
- int transpose = 0;
- double ** distmatrix = NULL;
- Node * nodes = treecluster(featureamount, dimension, data, mask,
- weightpointer, transpose, distanceFunction(distanceType),
- method, distmatrix);
- std::vector<double> distances;
- for (int i = 0; i < featureamount; i++)
- {
- Node currentnode = nodes[i];
- if (currentnode.distance < double_max)
- {
- distances.push_back(currentnode.distance);
- }
- }
- //TODO cluster estimation refactor without cluster.h in HierarchicalCluster.h
- if (noClasses <= 0)
- {
- std::sort(distances.begin(), distances.end());
- std::vector<double> gradient;
- std::vector<int> cuts;
- size_t count = 0;
- double last_element;
- for (size_t i = 0; i < distances.size(); i++)
- {
- if (count > 0)
- {
- gradient.push_back(distances[i] - last_element);
- }
- last_element = distances[i];
- count++;
- }
- size_t pos_max = 0;
- double max;
- if (gradient.size() > 0)
- {
- max = gradient[0];
- for (size_t i = 1; i < gradient.size(); i++)
- {
- if (gradient[i] > max)
- {
- max = gradient[i];
- pos_max = i;
- }
- }
- }
- size_t pos_reject = pos_max + 1;
- double distance_reject = distances[pos_reject];
- for (size_t i = 0; i < featureamount; i++)
- {
- if (nodes[i].distance < double_max && nodes[i].distance
- >= distance_reject)
- {
- cuts.push_back(2);
- }
- else
- {
- cuts.push_back(0);
- }
- }
- bool change = true;
- while (change)
- {
- change = false;
- for (size_t i = 0; i < featureamount; i++)
- {
- if (cuts[i] > 0 && nodes[i].distance < double_max
- && nodes[i].distance >= distance_reject)
- {
- if (nodes[i].left < 0)
- {
- int pos_left = -nodes[i].left - 1;
- if (cuts[pos_left] > 0)
- {
- if (cuts[i] > 0)
- {
- cuts[i]--;
- change = true;
- }
- }
- }
- if (nodes[i].right < 0)
- {
- int pos_right = -nodes[i].right - 1;
- if (cuts[pos_right] > 0)
- {
- if (cuts[i] > 0)
- {
- cuts[i]--;
- change = true;
- }
- }
- }
- }
- }
- }
- noClasses = 0;
- for (int i = 0; i < featureamount; i++)
- {
- noClasses += cuts[i];
- }
- }
- int clusterassignment[featureamount];
- cuttree(featureamount, nodes, noClasses, clusterassignment);
- // assignment.clear();
- for (int i = 0; i < featureamount; i++)
- {
- if (clusterassignment[i] >= 0)
- {
- assignment.push_back(clusterassignment[i]);
- }
- }
- prototypes.clear();
- for (int k = 0; k < noClasses; k++)
- {
- prototypes.push_back(Vector(dimension));
- prototypes[k].set(0);
- weights.push_back(0);
- }
- compute_prototypes(features, prototypes, weights, assignment);
- }
- }
- void HierarchicalCluster::VVectorToDoubleArray(const VVector &data,
- double ** &doublearray, int ** &mask)
- {
- if (data.size() > 0)
- {
- int featureamount = data.size();
- int dimension = data[0].size();
- doublearray = new double*[featureamount];
- mask = new int*[featureamount];
- for (int i = 0; i < featureamount; ++i)
- {
- doublearray[i] = new double[dimension];
- mask[i] = new int[dimension];
- }
- for (int row = 0; row < featureamount; row++)
- {
- for (int col = 0; col < dimension; col++)
- {
- doublearray[row][col] = data[row][col];
- if (isNaN(data[row][col]))
- {
- mask[row][col] = 0;
- }
- else
- {
- mask[row][col] = 1;
- }
- }
- }
- }
- }
- //TODO find better space for that one and refactor with enum
- char HierarchicalCluster::distanceFunction(int distancetype)
- {
- // dist (input) char
- // Defines which distance measure is used, as given by the table:
- // dist=='e': Euclidean distance
- // dist=='b': City-block distance
- // dist=='c': correlation
- // dist=='a': absolute value of the correlation
- // dist=='u': uncentered correlation
- // dist=='x': absolute uncentered correlation
- // dist=='s': Spearman's rank correlation
- // dist=='k': Kendall's tau
- // For other values of dist, the default (Euclidean distance) is used.
- char type = 'e';
- switch (distancetype)
- {
- case 0:
- {
- type = 'e';
- }
- break;
- case 1:
- {
- type = 'b';
- }
- break;
- case 2:
- {
- type = 'c';
- }
- break;
- case 3:
- {
- type = 'a';
- }
- break;
- case 4:
- {
- type = 'u';
- }
- break;
- case 5:
- {
- type = 'x';
- }
- break;
- case 6:
- {
- type = 's';
- }
- break;
- case 7:
- {
- type = 'k';
- }
- break;
- default:
- type = 'e';
- break;
- }
- return type;
- }
- int HierarchicalCluster::compute_prototypes(const VVector & features,
- VVector & prototypes, std::vector<double> & weights, const std::vector<
- int> & assignment)
- {
- int j = 0;
- for (int k = 0; k < noClasses; k++)
- {
- prototypes[k].set(0);
- weights[k] = 0;
- }
- for (VVector::const_iterator i = features.begin(); i != features.end(); i++, j++)
- {
- int k = assignment[j];
- NICE::Vector & p = prototypes[k];
- const NICE::Vector & x = *i;
- p += x;
- weights[k]++;
- }
- for (int k = 0; k < noClasses; k++)
- {
- NICE::Vector & p = prototypes[k];
- if (weights[k] <= 0)
- {
- return -1;
- }
- p *= (1.0 / weights[k]);
- weights[k] = weights[k] / features.size();
- }
- return 0;
- }
|