/** * @file SpectralCluster.cpp * @brief spectral clustering by kmeans-clustering of eigenvectors * @author Erik Rodner, Alexander Freytag * @date 11/13/2007 */ #include #include #include #include #include "vislearning/math/cluster/SpectralCluster.h" using namespace OBJREC; using namespace std; using namespace NICE; ///////////////////// ///////////////////// ///////////////////// // CONSTRUCTORS / DESTRUCTORS ///////////////////// ///////////////////// ///////////////////// SpectralCluster::SpectralCluster() : ClusterAlgorithm() , kmeans() { this->noClusters = 20; this->alpha = 1.0; } SpectralCluster::SpectralCluster ( int _noClusters, double alpha ) : noClusters(_noClusters), kmeans(_noClusters) { this->alpha = alpha; } SpectralCluster::SpectralCluster( const NICE::Config * _conf, const std::string & _confSection) { this->initFromConfig( _conf, _confSection ); } SpectralCluster::~SpectralCluster() { } void SpectralCluster::initFromConfig( const NICE::Config* _conf, const std::string& _confSection ) { this->noClusters = _conf->gI( _confSection, "noClusters", 20); this->alpha = _conf->gD( _confSection, "alpha", 1.0); this->kmeans.initFromConfig( _conf ); } ///////////////////// ///////////////////// ///////////////////// // CLUSTERING STUFF ///////////////////// ///////////////////// ////////////////// void SpectralCluster::getSimilarityMatrix ( const VVector & features, NICE::Matrix & laplacian, double alpha ) { NICE::Matrix distances ( laplacian ); double mindist = numeric_limits::max(); double maxdist = - numeric_limits::max(); long int count = 0; double mean = 0.0; double stddev = 0.0; NICE::EuclidianDistance distance; for ( int i = 0 ; i < (int)features.size() ; i++ ) { const NICE::Vector & xi = features[i]; for ( int j = i ; j < (int)features.size() ; j++ ) { const NICE::Vector & xj = features[j]; // double sim = xi * xj; double dist = distance.calculate ( xi, xj ); distances(i, j) = dist; if ( dist < mindist ) mindist = dist; if ( dist > maxdist ) maxdist = dist; count++; mean += dist; } } mean /= count; for ( int i = 0 ; i < (int)features.size() ; i++ ) { for ( int j = i ; j < (int)features.size() ; j++ ) { double d = ( mean - distances(i, j) ); stddev += d*d; } } stddev /= count; double norm = alpha / (2.0 * stddev); for ( int i = 0 ; i < (int)features.size() ; i++ ) { for ( int j = i ; j < (int)features.size() ; j++ ) { double sim = exp(-distances(i, j) * norm ); laplacian(i, j) = - sim; laplacian(j, i) = - sim; } } } void SpectralCluster::computeLaplacian ( const NICE::VVector & features, NICE::Matrix & laplacian, int method, double alpha ) { laplacian.set(0.0); getSimilarityMatrix(features, laplacian, alpha); NICE::Vector d ( laplacian.rows() ); d.set(0.0); for ( int i = 0 ; i < (int)laplacian.rows(); i++ ) { for ( int j = 0 ; j < (int)laplacian.cols(); j++ ) { d[i] -=laplacian(i, j); } } // Now we got the negative weight matrix laplacian : -W // and the degree matrix : D // calculate normalization // D^-1 * L if ( method == L_RW_NORMALIZED ) { // L = D^-1 L_unnormalized = I - D^-1*W for ( int i = 0 ; i < (int)laplacian.rows() ; i++ ) { for ( int j = 0 ; j < (int)laplacian.cols() ; j++ ) { laplacian(i, j) *= (1.0/d[i]); } } for ( int i = 0 ; i < (int)laplacian.rows() ; i++ ) { laplacian(i, i) += 1.0; } } else if ( method == L_UNNORMALIZED ) { // unnormalized version // L = D - W for ( int i = 0 ; i < (int)laplacian.rows() ; i++ ) { laplacian(i, i) += d[i]; } } } void SpectralCluster::cluster ( const NICE::VVector & features, NICE::VVector & prototypes, std::vector & weights, std::vector & assignment ) { if ( features.size() <= 0 ) { fprintf (stderr, "FATAL ERROR: not enough features vectors provided\n"); exit(-1); } const NICE::Vector & x = features[0]; int dimension = x.size(); NICE::Matrix laplacian ( features.size(), features.size() ); computeLaplacian ( features, laplacian, L_RW_NORMALIZED, alpha ); //computeLaplacian ( features, laplacian, L_UNNORMALIZED, alpha ); NICE::Matrix eigvect; NICE::Vector eigvals; try { NICE::eigenvectorvalues ( laplacian, eigvect, eigvals ); } catch (...) { std::cerr << "You should adjust the alpha parameter, or the features are somehow weird" << std::endl; std::cerr << "Laplacian = " << laplacian(0, 0, min((int)laplacian.rows()-1, 5), min((int)laplacian.cols()-1, 5)) << std::endl; throw Exception ("Laplacian matrix is singular or not well-conditioned!"); } std::map eigvals_sorted; for ( int i = 0 ; i < (int)eigvals.size(); i++ ) { eigvals_sorted.insert ( make_pair( eigvals[i], i ) ); } NICE::VVector spectral_features; for ( int i = 0 ; i < (int)eigvect.rows() ; i++ ) { NICE::Vector eigvec_k ( noClusters ); map::const_iterator k = eigvals_sorted.begin(); for ( int j = 0 ; j < noClusters ; j++ ) { int eigval_index = k->second; eigvec_k[j] = eigvect(i, eigval_index ) ; k++; } spectral_features.push_back ( eigvec_k ); } this->kmeans.cluster ( spectral_features, prototypes, weights, assignment ); // recompute prototypes for ( int k = 0 ; k < noClusters ; k++ ) { prototypes[k].resize( dimension ); prototypes[k].set(0); weights[k] = 0; } int j = 0; for ( VVector::const_iterator i = features.begin(); i != features.end(); i++, j++ ) { int k = assignment[j]; NICE::Vector & p = prototypes[k]; const NICE::Vector & x = *i; p += x; weights[k]++; } for ( int k = 0 ; k < noClusters ; k++ ) { NICE::Vector & p = prototypes[k]; if ( weights[k] <= 0 ) { fprintf (stderr, "FATAL ERROR: spectral clustering produced empty cluster !\n"); exit(-1); } p *= ( 1.0 / weights[k] ); weights[k] = weights[k] / features.size(); } } ///////////////////// INTERFACE PERSISTENT ///////////////////// // interface specific methods for store and restore ///////////////////// INTERFACE PERSISTENT ///////////////////// void SpectralCluster::restore ( std::istream & is, int format ) { //delete everything we knew so far... this->clear(); if ( is.good() ) { std::string tmp; is >> tmp; //class name if ( ! this->isStartTag( tmp, "SpectralCluster" ) ) { std::cerr << " WARNING - attempt to restore SpectralCluster, but start flag " << tmp << " does not match! Aborting... " << std::endl; throw; } bool b_endOfBlock ( false ) ; while ( !b_endOfBlock ) { is >> tmp; // start of block if ( this->isEndTag( tmp, "SpectralCluster" ) ) { b_endOfBlock = true; continue; } tmp = this->removeStartTag ( tmp ); if ( tmp.compare("noClusters") == 0 ) { is >> this->noClusters; is >> tmp; // end of block tmp = this->removeEndTag ( tmp ); } else if ( tmp.compare("alpha") == 0 ) { is >> this->alpha; is >> tmp; // end of block tmp = this->removeEndTag ( tmp ); } else if ( tmp.compare("kmeans") == 0 ) { this->kmeans.restore ( is ); is >> tmp; // end of block tmp = this->removeEndTag ( tmp ); } else { std::cerr << "WARNING -- unexpected SpectralCluster object -- " << tmp << " -- for restoration... aborting" << std::endl; throw; } } } else { std::cerr << "SpectralCluster::restore -- InStream not initialized - restoring not possible!" << std::endl; throw; } } void SpectralCluster::store ( std::ostream & os, int format ) const { if (os.good()) { // show starting point os << this->createStartTag( "SpectralCluster" ) << std::endl; os << this->createStartTag( "noClusters" ) << std::endl; os << this->noClusters << std::endl; os << this->createEndTag( "noClusters" ) << std::endl; os << this->createStartTag( "alpha" ) << std::endl; os << this->alpha << std::endl; os << this->createEndTag( "alpha" ) << std::endl; os << this->createStartTag( "kmeans" ) << std::endl; this->kmeans.store ( os ); os << this->createEndTag( "kmeans" ) << std::endl; // done os << this->createEndTag( "SpectralCluster" ) << std::endl; } else { std::cerr << "OutStream not initialized - storing not possible!" << std::endl; } } void SpectralCluster::clear () { }