/** * @file RandomClustering.cpp * @brief Clustering by randomly picking some samples from the set of features as representatives * @author Alexander Freytag * @date 03-06-2013 (dd-mm-yyyy) */ #ifdef NICE_USELIB_OPENMP #include #endif #include #include #include "vislearning/math/distances/genericDistance.h" #include "vislearning/math/cluster/RandomClustering.h" #include using namespace OBJREC; using namespace std; using namespace NICE; ///////////////////// ///////////////////// ///////////////////// // CONSTRUCTORS / DESTRUCTORS ///////////////////// ///////////////////// ///////////////////// RandomClustering::RandomClustering() : ClusterAlgorithm() { this->noClusters = 20; this->distanceType = "euclidean"; this->distancefunction = NULL; } RandomClustering::RandomClustering(const int & _noClusters, const std::string & _distanceType) : noClusters(_noClusters), distanceType(_distanceType) { } RandomClustering::RandomClustering( const NICE::Config * _conf, const std::string & _confSection) { this->initFromConfig ( _conf, _confSection ); } RandomClustering::~RandomClustering() { } void RandomClustering::initFromConfig( const NICE::Config* _conf, const std::string& _confSection ) { this->noClusters = _conf->gI( _confSection, "noClusters", 20); this->distanceType = _conf->gS( _confSection, "distanceType", "euclidean" ); this->distancefunction = OBJREC::GenericDistanceSelection::selectDistance( this->distanceType ); } ///////////////////// ///////////////////// ///////////////////// // CLUSTERING STUFF ///////////////////// ///////////////////// ////////////////// int RandomClustering::compute_prototypes(const NICE::VVector & _features, NICE::VVector & _prototypes, std::vector & _weights, const std::vector & _assignment) { int noFeatures ( _features.size() ); std::set > chosenIdx; //empty init chosenIdx.clear(); //pick k distinct cluster representatives randomly for (int cnt = 0; cnt < this->noClusters; cnt++) { int idx; do { idx = rand() % noFeatures; } while ( chosenIdx.find ( idx ) != chosenIdx.end() ); //if a new (distinct) idx was computed, insert it into the set of randomly picked indicees chosenIdx.insert ( idx ); } _prototypes.resize( this->noClusters ); int clusterCnt ( 0 ); for ( std::set::const_iterator idxIt = chosenIdx.begin(); idxIt != chosenIdx.end(); idxIt++, clusterCnt++ ) { _prototypes[clusterCnt] = _features[ *idxIt ]; } return 0; } double RandomClustering::compute_assignments(const NICE::VVector & _features, const NICE::VVector & _prototypes, std::vector & _assignment) { _assignment.resize( _features.size() ); int index = 0; for (NICE::VVector::const_iterator i = _features.begin(); i != _features.end(); i++, index++) { const NICE::Vector & x = *i; double mindist = std::numeric_limits::max(); int minclass = 0; int c = 0; for (NICE::VVector::const_iterator j = _prototypes.begin(); j != _prototypes.end(); j++, c++) { const NICE::Vector & p = *j; double distance = this->distancefunction->calculate(p, x); if (distance < mindist) { minclass = c; mindist = distance; } } _assignment[index] = minclass; } return 0.0; } double RandomClustering::compute_weights(const NICE::VVector & _features, std::vector & _weights, std::vector & _assignment) { _weights.resize( this->noClusters ); //initalization for (int k = 0; k < noClusters; k++) _weights[k] = 0; int j = 0; //increase weight for every assignment for (NICE::VVector::const_iterator i = _features.begin(); i != _features.end(); i++, j++) { int k = _assignment[j]; _weights[k]++; } //normalize weights for (int k = 0; k < noClusters; k++) _weights[k] = _weights[k] / _features.size(); return 0.0; } void RandomClustering::cluster(const NICE::VVector & _features, NICE::VVector & _prototypes, std::vector & _weights, std::vector & _assignment) { //randomly pick cluster centers this->compute_prototypes( _features, _prototypes, _weights, _assignment ); //compute assignments for every cluster this->compute_assignments( _features, _prototypes, _assignment ); //compute corresponding weights this->compute_weights( _features, _weights, _assignment ); } ///////////////////// INTERFACE PERSISTENT ///////////////////// // interface specific methods for store and restore ///////////////////// INTERFACE PERSISTENT ///////////////////// void RandomClustering::restore ( std::istream & is, int format ) { //delete everything we knew so far... this->clear(); if ( is.good() ) { std::string tmp; is >> tmp; //class name if ( ! this->isStartTag( tmp, "RandomClustering" ) ) { std::cerr << " WARNING - attempt to restore RandomClustering, but start flag " << tmp << " does not match! Aborting... " << std::endl; throw; } bool b_endOfBlock ( false ) ; while ( !b_endOfBlock ) { is >> tmp; // start of block if ( this->isEndTag( tmp, "RandomClustering" ) ) { b_endOfBlock = true; continue; } tmp = this->removeStartTag ( tmp ); if ( tmp.compare("noClusters") == 0 ) { is >> this->noClusters; is >> tmp; // end of block tmp = this->removeEndTag ( tmp ); } else if ( tmp.compare("distanceType") == 0 ) { is >> this->distanceType; //TODO fixme this->distancefunction = OBJREC::GenericDistanceSelection::selectDistance( this->distanceType ); is >> tmp; // end of block tmp = this->removeEndTag ( tmp ); } else if ( tmp.compare("distancefunction") == 0 ) { //TODO is >> this->distancefunction; is >> tmp; // end of block tmp = this->removeEndTag ( tmp ); } else { std::cerr << "WARNING -- unexpected RandomClustering object -- " << tmp << " -- for restoration... aborting" << std::endl; throw; } } } else { std::cerr << "RandomClustering::restore -- InStream not initialized - restoring not possible!" << std::endl; throw; } } void RandomClustering::store ( std::ostream & os, int format ) const { if (os.good()) { // show starting point os << this->createStartTag( "RandomClustering" ) << std::endl; os << this->createStartTag( "noClusters" ) << std::endl; os << this->noClusters << std::endl; os << this->createEndTag( "noClusters" ) << std::endl; os << this->createStartTag( "distanceType" ) << std::endl; os << this->distanceType << std::endl; os << this->createEndTag( "distanceType" ) << std::endl; os << this->createStartTag( "distancefunction" ) << std::endl; //TODO os << this->distancefunction << std::endl; os << this->createEndTag( "distancefunction" ) << std::endl; // done os << this->createEndTag( "RandomClustering" ) << std::endl; } else { std::cerr << "OutStream not initialized - storing not possible!" << std::endl; } } void RandomClustering::clear () { if ( this->distancefunction != NULL ) { delete this->distancefunction; this->distancefunction = NULL ; } }