/** * @file RegKNN.cpp * @brief Implementation of k-Nearest-Neighbor algorithm for regression purposes * @author Frank Prüfer * @date 08/29/2013 */ #ifdef NICE_USELIB_OPENMP #include #endif #include #include "vislearning/regression/npregression/RegKNN.h" #include "vislearning/math/mathbase/FullVector.h" using namespace OBJREC; using namespace NICE; RegKNN::RegKNN ( const Config *_conf, NICE::VectorDistance *_distancefunc ) : distancefunc (_distancefunc) { K = _conf->gI("RegKNN", "K", 1 ); if ( _distancefunc == NULL ) distancefunc = new EuclidianDistance(); } RegKNN::RegKNN ( const RegKNN & src ) : RegressionAlgorithm ( src ) { dataSet = src.dataSet; labelSet = src.labelSet; distancefunc = src.distancefunc; K = src.K; } RegKNN::~RegKNN () { } RegKNN* RegKNN::clone ( void ) const { return new RegKNN(*this); } void RegKNN::teach ( const NICE::VVector & _dataSet, const NICE::Vector & _labelSet) { fprintf (stderr, "teach using all !\n"); //NOTE this is crucial if we clear _teachSet afterwards! //therefore, take care NOT to call _techSet.clear() somewhere out of this method this->dataSet = _dataSet; this->labelSet = _labelSet.std_vector(); std::cerr << "number of known training samples: " << this->dataSet.size() << std::endl; } void RegKNN::teach ( const NICE::Vector & x, const double & y ) { std::cerr << "RegKNN::teach one new example" << std::endl; for ( size_t i = 0 ; i < x.size() ; i++ ) if ( isnan(x[i]) ) { fprintf (stderr, "There is a NAN value within this vector: x[%d] = %f\n", (int)i, x[i]); std::cerr << x << std::endl; exit(-1); } dataSet.push_back ( x ); labelSet.push_back ( y ); std::cerr << "number of known training samples: " << dataSet.size()<< std::endl; } double RegKNN::predict ( const NICE::Vector & x ) { FullVector distances(dataSet.size()); if ( dataSet.size() <= 0 ) { fprintf (stderr, "RegKNN: please use the teach method first\n"); exit(-1); } #pragma omp parallel for for(uint i = 0; i < dataSet.size(); i++) { double distance = distancefunc->calculate (x,dataSet[i]); if ( isnan(distance) ) { fprintf (stderr, "RegKNN::predict: NAN value found !!\n"); std::cerr << x << std::endl; std::cerr << dataSet[i] << std::endl; } // #pragma omp critical distances[i] = distance; } std::vector ind; distances.getSortedIndices(ind); double response = 0.0; if ( dataSet.size() < K ) { std::cerr << K << std::endl; K = dataSet.size(); std::cerr<<"RegKNN: Not enough datapoints! Setting K to: "<< K << std::endl; } if ( distances[ind[0]] == 0.0 ) { std::cerr<<"RegKNN: Warning: datapoint was already seen during training... using its label as prediction."<< std::endl; return labelSet[ind[0]]; } double maxElement = distances.max(); //normalize distances distances.multiply(1.0/maxElement); double weightSum = 0.0; for(uint i = 0; i < K; i++) { response += 1.0/distances[ind[i]] * labelSet[ind[i]]; weightSum += 1.0/distances[ind[i]]; } return ( response / weightSum ); }