123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- /**
- * @file RegKNN.cpp
- * @brief Implementation of k-Nearest-Neighbor algorithm for regression purposes
- * @author Frank Prüfer
- * @date 08/29/2013
- */
- #ifdef NICE_USELIB_OPENMP
- #include <omp.h>
- #endif
- #include <iostream>
- #include "vislearning/regression/npregression/RegKNN.h"
- #include "vislearning/math/mathbase/FullVector.h"
- using namespace OBJREC;
- using namespace std;
- using namespace NICE;
- RegKNN::RegKNN ( const Config *_conf, NICE::VectorDistance<double> *_distancefunc ) : distancefunc (_distancefunc)
- {
- K = _conf->gI("RegKNN", "K", 1 );
- if ( _distancefunc == NULL )
- distancefunc = new EuclidianDistance<double>();
- }
- RegKNN::RegKNN ( const RegKNN & src ) : RegressionAlgorithm ( src )
- {
- dataSet = src.dataSet;
- labelSet = src.labelSet;
- distancefunc = src.distancefunc;
- K = src.K;
- }
- RegKNN::~RegKNN ()
- {
- }
- RegKNN* RegKNN::clone ( void ) const
- {
- return new RegKNN(*this);
- }
- void RegKNN::teach ( const NICE::VVector & _dataSet, const NICE::Vector & _labelSet)
- {
- fprintf (stderr, "teach using all !\n");
- //NOTE this is crucial if we clear _teachSet afterwards!
- //therefore, take care NOT to call _techSet.clear() somewhere out of this method
- this->dataSet = _dataSet;
- this->labelSet = _labelSet.std_vector();
-
- std::cerr << "number of known training samples: " << this->dataSet.size() << std::endl;
-
- }
- void RegKNN::teach ( const NICE::Vector & x, const double & y )
- {
- std::cerr << "RegKNN::teach one new example" << std::endl;
- for ( size_t i = 0 ; i < x.size() ; i++ )
- if ( isnan(x[i]) )
- {
- fprintf (stderr, "There is a NAN value within this vector: x[%d] = %f\n", (int)i, x[i]);
- cerr << x << endl;
- exit(-1);
- }
- dataSet.push_back ( x );
- labelSet.push_back ( y );
- std::cerr << "number of known training samples: " << dataSet.size()<< std::endl;
- }
- double RegKNN::predict ( const NICE::Vector & x )
- {
- FullVector distances(dataSet.size());
- if ( dataSet.size() <= 0 )
- {
- fprintf (stderr, "RegKNN: please use the teach method first\n");
- exit(-1);
- }
- #pragma omp parallel for
- for(uint i = 0; i < dataSet.size(); i++)
- {
- double distance = distancefunc->calculate (x,dataSet[i]);
- if ( isnan(distance) )
- {
- fprintf (stderr, "RegKNN::predict: NAN value found !!\n");
- cerr << x << endl;
- cerr << dataSet[i] << endl;
- }
- // #pragma omp critical
- distances[i] = distance;
- }
-
- std::vector<int> ind;
- distances.getSortedIndices(ind);
- double response = 0.0;
-
- if ( dataSet.size() < K )
- {
- cerr << K << endl;
- K = dataSet.size();
- cerr<<"RegKNN: Not enough datapoints! Setting K to: "<< K <<endl;
- }
- if ( distances[ind[0]] == 0.0 ) {
- cerr<<"RegKNN: Warning: datapoint was already seen during training... using its label as prediction."<<endl;
- return labelSet[ind[0]];
- }
- double maxElement = distances.max(); //normalize distances
- distances.multiply(1.0/maxElement);
- double weightSum = 0.0;
- for(uint i = 0; i < K; i++)
- {
- response += 1.0/distances[ind[i]] * labelSet[ind[i]];
- weightSum += 1.0/distances[ind[i]];
- }
- return ( response / weightSum );
- }
|