|
@@ -57,27 +57,27 @@ ClassificationResult VCNearestNeighbour::classify ( const NICE::Vector & x ) con
|
|
|
priority_queue< pair<double, int> > examples;
|
|
|
LOOP_ALL(teachSet)
|
|
|
{
|
|
|
- EACH(classno,y)
|
|
|
-
|
|
|
- double distance = distancefunc->calculate ( x, y );
|
|
|
-
|
|
|
- if ( isnan(distance) )
|
|
|
- {
|
|
|
- fprintf (stderr, "VCNearestNeighbour::classify: NAN value found !!\n");
|
|
|
- cerr << x << endl;
|
|
|
- cerr << y << endl;
|
|
|
- }
|
|
|
-
|
|
|
- if ( mindists[classno] > distance )
|
|
|
- mindists[classno] = distance;
|
|
|
-
|
|
|
- if ( mindist > distance )
|
|
|
- {
|
|
|
- minclass = classno;
|
|
|
- mindist = distance;
|
|
|
- }
|
|
|
- if ( K > 1 )
|
|
|
- examples.push ( pair<double, int> ( -distance, classno ) );
|
|
|
+ EACH(classno,y)
|
|
|
+
|
|
|
+ double distance = distancefunc->calculate ( x, y );
|
|
|
+
|
|
|
+ if ( isnan(distance) )
|
|
|
+ {
|
|
|
+ fprintf (stderr, "VCNearestNeighbour::classify: NAN value found !!\n");
|
|
|
+ cerr << x << endl;
|
|
|
+ cerr << y << endl;
|
|
|
+ }
|
|
|
+
|
|
|
+ if ( mindists[classno] > distance )
|
|
|
+ mindists[classno] = distance;
|
|
|
+
|
|
|
+ if ( mindist > distance )
|
|
|
+ {
|
|
|
+ minclass = classno;
|
|
|
+ mindist = distance;
|
|
|
+ }
|
|
|
+ if ( K > 1 )
|
|
|
+ examples.push ( pair<double, int> ( -distance, classno ) );
|
|
|
}
|
|
|
|
|
|
if ( mindist == 0.0 )
|
|
@@ -88,24 +88,28 @@ ClassificationResult VCNearestNeighbour::classify ( const NICE::Vector & x ) con
|
|
|
fprintf (stderr, "class %d : %f\n", i, mindists.get(i) );
|
|
|
#endif
|
|
|
|
|
|
- if ( K > 1 ) {
|
|
|
- FullVector votes ( maxClassNo + 1 );
|
|
|
- votes.set(0.0);
|
|
|
- for ( int k = 0 ; k < K ; k++ )
|
|
|
- {
|
|
|
- const pair<double, int> & t = examples.top();
|
|
|
- votes[ t.second ]++;
|
|
|
- examples.pop();
|
|
|
- }
|
|
|
- votes.normalize();
|
|
|
- return ClassificationResult ( votes.maxElement(), votes );
|
|
|
- } else {
|
|
|
- for ( int i = 0 ; i < mindists.size() ; i++ )
|
|
|
- {
|
|
|
- mindists[i] = 1.0 / (mindists[i] + 1.0);
|
|
|
- }
|
|
|
- mindists.normalize();
|
|
|
- return ClassificationResult ( minclass, mindists );
|
|
|
+ if ( K > 1 )
|
|
|
+ {
|
|
|
+ FullVector votes ( maxClassNo + 1 );
|
|
|
+ votes.set(0.0);
|
|
|
+ for ( int k = 0 ; k < K ; k++ )
|
|
|
+ {
|
|
|
+ const pair<double, int> & t = examples.top();
|
|
|
+ votes[ t.second ]++;
|
|
|
+ examples.pop();
|
|
|
+ }
|
|
|
+ votes.normalize();
|
|
|
+ return ClassificationResult ( votes.maxElement(), votes );
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ //do we really want to do this? Only useful, if we want to obtain probability like scores
|
|
|
+// for ( int i = 0 ; i < mindists.size() ; i++ )
|
|
|
+// {
|
|
|
+// mindists[i] = 1.0 / (mindists[i] + 1.0);
|
|
|
+// }
|
|
|
+ //mindists.normalize();
|
|
|
+ return ClassificationResult ( minclass, mindists );
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -114,24 +118,45 @@ void VCNearestNeighbour::teach ( const LabeledSetVector & _teachSet )
|
|
|
{
|
|
|
fprintf (stderr, "teach using all !\n");
|
|
|
maxClassNo = _teachSet.getMaxClassno();
|
|
|
- teachSet = _teachSet;
|
|
|
+ //NOTE this is crucial if we clear _teachSet afterwards!
|
|
|
+ //therefore, take care NOT to call _techSet.clear() somewhere out of this method
|
|
|
+ this->teachSet = _teachSet;
|
|
|
+
|
|
|
+ std::cerr << "number of known training samples: " << this->teachSet.begin()->second.size() << std::endl;
|
|
|
+
|
|
|
+// //just for testing - remove everything but the first element
|
|
|
+// map< int, vector<NICE::Vector *> >::iterator it = this->teachSet.begin();
|
|
|
+// it++;
|
|
|
+// this->teachSet.erase(it, this->teachSet.end());
|
|
|
+// std::cerr << "keep " << this->teachSet.size() << " elements" << std::endl;
|
|
|
+
|
|
|
+
|
|
|
}
|
|
|
|
|
|
|
|
|
void VCNearestNeighbour::teach ( int classno, const NICE::Vector & x )
|
|
|
{
|
|
|
- fprintf (stderr, "teach!\n");
|
|
|
+ std::cerr << "VCNearestNeighbour::teach one new example" << std::endl;
|
|
|
+
|
|
|
for ( size_t i = 0 ; i < x.size() ; i++ )
|
|
|
- if ( isnan(x[i]) )
|
|
|
- {
|
|
|
- fprintf (stderr, "There is a NAN value in within this vector: x[%d] = %f\n", (int)i, x[i]);
|
|
|
- cerr << x << endl;
|
|
|
- exit(-1);
|
|
|
- }
|
|
|
+ if ( isnan(x[i]) )
|
|
|
+ {
|
|
|
+ fprintf (stderr, "There is a NAN value in within this vector: x[%d] = %f\n", (int)i, x[i]);
|
|
|
+ cerr << x << endl;
|
|
|
+ exit(-1);
|
|
|
+ }
|
|
|
|
|
|
if ( classno > maxClassNo ) maxClassNo = classno;
|
|
|
|
|
|
teachSet.add ( classno, x );
|
|
|
+
|
|
|
+ std::cerr << "adden class " << classno << " with feature " << x << std::endl;
|
|
|
+ int tmpCnt(0);
|
|
|
+ for (LabeledSetVector::const_iterator it = this->teachSet.begin(); it != this->teachSet.end(); it++)
|
|
|
+ {
|
|
|
+ tmpCnt += it->second.size();
|
|
|
+ }
|
|
|
+ std::cerr << "number of known training samples: " << tmpCnt << std::endl;
|
|
|
}
|
|
|
|
|
|
void VCNearestNeighbour::finishTeaching()
|
|
@@ -159,5 +184,3 @@ void VCNearestNeighbour::restore ( std::istream & is, int format )
|
|
|
teachSet.restore ( is, format );
|
|
|
maxClassNo = teachSet.getMaxClassno();
|
|
|
}
|
|
|
-
|
|
|
-
|