|
@@ -24,100 +24,112 @@ using namespace NICE;
|
|
|
|
|
|
RegKNN::RegKNN ( const Config *_conf, NICE::VectorDistance<double> *_distancefunc ) : distancefunc (_distancefunc)
|
|
|
{
|
|
|
- K = _conf->gI("RegKNN", "K", 1 );
|
|
|
- if ( _distancefunc == NULL )
|
|
|
- distancefunc = new EuclidianDistance<double>();
|
|
|
+ K = _conf->gI("RegKNN", "K", 1 );
|
|
|
+ if ( _distancefunc == NULL )
|
|
|
+ distancefunc = new EuclidianDistance<double>();
|
|
|
}
|
|
|
|
|
|
RegKNN::RegKNN ( const RegKNN & src ) : RegressionAlgorithm ( src )
|
|
|
{
|
|
|
- dataSet = src.dataSet;
|
|
|
- labelSet = src.labelSet;
|
|
|
- distancefunc = src.distancefunc;
|
|
|
+ dataSet = src.dataSet;
|
|
|
+ labelSet = src.labelSet;
|
|
|
+ distancefunc = src.distancefunc;
|
|
|
+ K = src.K;
|
|
|
}
|
|
|
|
|
|
-RegKNN::~RegKNN()
|
|
|
+RegKNN::~RegKNN ()
|
|
|
{
|
|
|
}
|
|
|
|
|
|
+RegKNN* RegKNN::clone ( void ) const
|
|
|
+{
|
|
|
+ return new RegKNN(*this);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
void RegKNN::teach ( const NICE::VVector & _dataSet, const NICE::Vector & _labelSet)
|
|
|
{
|
|
|
- fprintf (stderr, "teach using all !\n");
|
|
|
- //NOTE this is crucial if we clear _teachSet afterwards!
|
|
|
- //therefore, take care NOT to call _techSet.clear() somewhere out of this method
|
|
|
- this->dataSet = _dataSet;
|
|
|
- this->labelSet = _labelSet.std_vector();
|
|
|
-
|
|
|
- std::cerr << "number of known training samples: " << this->dataSet.size() << std::endl;
|
|
|
+ fprintf (stderr, "teach using all !\n");
|
|
|
+ //NOTE this is crucial if we clear _teachSet afterwards!
|
|
|
+ //therefore, take care NOT to call _techSet.clear() somewhere out of this method
|
|
|
+ this->dataSet = _dataSet;
|
|
|
+ this->labelSet = _labelSet.std_vector();
|
|
|
+
|
|
|
+ std::cerr << "number of known training samples: " << this->dataSet.size() << std::endl;
|
|
|
|
|
|
}
|
|
|
|
|
|
void RegKNN::teach ( const NICE::Vector & x, const double & y )
|
|
|
{
|
|
|
- std::cerr << "RegKNN::teach one new example" << std::endl;
|
|
|
-
|
|
|
- for ( size_t i = 0 ; i < x.size() ; i++ )
|
|
|
- if ( isnan(x[i]) )
|
|
|
- {
|
|
|
- fprintf (stderr, "There is a NAN value in within this vector: x[%d] = %f\n", (int)i, x[i]);
|
|
|
- cerr << x << endl;
|
|
|
- exit(-1);
|
|
|
- }
|
|
|
-
|
|
|
- dataSet.push_back ( x );
|
|
|
-
|
|
|
- labelSet.push_back ( y );
|
|
|
-
|
|
|
- std::cerr << "number of known training samples: " << dataSet.size()<< std::endl;
|
|
|
+ std::cerr << "RegKNN::teach one new example" << std::endl;
|
|
|
+
|
|
|
+ for ( size_t i = 0 ; i < x.size() ; i++ )
|
|
|
+ if ( isnan(x[i]) )
|
|
|
+ {
|
|
|
+ fprintf (stderr, "There is a NAN value within this vector: x[%d] = %f\n", (int)i, x[i]);
|
|
|
+ cerr << x << endl;
|
|
|
+ exit(-1);
|
|
|
+ }
|
|
|
+
|
|
|
+ dataSet.push_back ( x );
|
|
|
+
|
|
|
+ labelSet.push_back ( y );
|
|
|
+
|
|
|
+ std::cerr << "number of known training samples: " << dataSet.size()<< std::endl;
|
|
|
}
|
|
|
|
|
|
double RegKNN::predict ( const NICE::Vector & x )
|
|
|
{
|
|
|
- FullVector distances(dataSet.size());
|
|
|
+ FullVector distances(dataSet.size());
|
|
|
|
|
|
- if ( dataSet.size() <= 0 ) {
|
|
|
- fprintf (stderr, "RegKNN: please use the train method first\n");
|
|
|
- exit(-1);
|
|
|
- }
|
|
|
+ if ( dataSet.size() <= 0 )
|
|
|
+ {
|
|
|
+ fprintf (stderr, "RegKNN: please use the teach method first\n");
|
|
|
+ exit(-1);
|
|
|
+ }
|
|
|
|
|
|
#pragma omp parallel for
|
|
|
- for(uint i = 0; i < dataSet.size(); i++){
|
|
|
-
|
|
|
- double distance = distancefunc->calculate (x,dataSet[i]);
|
|
|
-
|
|
|
- if ( isnan(distance) ){
|
|
|
- fprintf (stderr, "RegKNN::predict: NAN value found !!\n");
|
|
|
- cerr << x << endl;
|
|
|
- cerr << dataSet[i] << endl;
|
|
|
- }
|
|
|
-// #pragma omp critical
|
|
|
- distances[i] = distance;
|
|
|
- }
|
|
|
-
|
|
|
- std::vector<int> ind;
|
|
|
- distances.getSortedIndices(ind);
|
|
|
-
|
|
|
- double response = 0.0;
|
|
|
-
|
|
|
- if ( dataSet.size() < K ){
|
|
|
- K = dataSet.size();
|
|
|
- cerr<<"RegKNN: Not enough datapoints! Setting K to: "<< K <<endl;
|
|
|
- }
|
|
|
-
|
|
|
- if ( distances[ind[0]] == 0.0 ) {
|
|
|
- cerr<<"RegKNN: Warning: datapoint was already seen during training... using its label as prediction."<<endl;
|
|
|
- return labelSet[ind[0]];
|
|
|
+ for(uint i = 0; i < dataSet.size(); i++)
|
|
|
+ {
|
|
|
+ double distance = distancefunc->calculate (x,dataSet[i]);
|
|
|
+
|
|
|
+ if ( isnan(distance) )
|
|
|
+ {
|
|
|
+ fprintf (stderr, "RegKNN::predict: NAN value found !!\n");
|
|
|
+ cerr << x << endl;
|
|
|
+ cerr << dataSet[i] << endl;
|
|
|
}
|
|
|
+// #pragma omp critical
|
|
|
+ distances[i] = distance;
|
|
|
+ }
|
|
|
|
|
|
- double maxElement = distances.max(); //normalize distances
|
|
|
- distances.multiply(1.0/maxElement);
|
|
|
-
|
|
|
- double weightSum = 0.0;
|
|
|
+ std::vector<int> ind;
|
|
|
+ distances.getSortedIndices(ind);
|
|
|
+
|
|
|
+ double response = 0.0;
|
|
|
|
|
|
- for(uint i = 0; i < K; i++){
|
|
|
- response += 1.0/distances[ind[i]] * labelSet[ind[i]];
|
|
|
- weightSum += 1.0/distances[ind[i]];
|
|
|
- }
|
|
|
-
|
|
|
- return ( response / weightSum );
|
|
|
+ if ( dataSet.size() < K )
|
|
|
+ {
|
|
|
+ cerr << K << endl;
|
|
|
+ K = dataSet.size();
|
|
|
+ cerr<<"RegKNN: Not enough datapoints! Setting K to: "<< K <<endl;
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( distances[ind[0]] == 0.0 ) {
|
|
|
+ cerr<<"RegKNN: Warning: datapoint was already seen during training... using its label as prediction."<<endl;
|
|
|
+ return labelSet[ind[0]];
|
|
|
+ }
|
|
|
+
|
|
|
+ double maxElement = distances.max(); //normalize distances
|
|
|
+ distances.multiply(1.0/maxElement);
|
|
|
+
|
|
|
+ double weightSum = 0.0;
|
|
|
+
|
|
|
+ for(uint i = 0; i < K; i++)
|
|
|
+ {
|
|
|
+ response += 1.0/distances[ind[i]] * labelSet[ind[i]];
|
|
|
+ weightSum += 1.0/distances[ind[i]];
|
|
|
+ }
|
|
|
+
|
|
|
+ return ( response / weightSum );
|
|
|
}
|