Parcourir la source

raw classifier debug

Erik Rodner il y a 9 ans
Parent
commit
99ca76a8c5
5 fichiers modifiés avec 104 ajouts et 37 suppressions
  1. 27 22
      GMHIKernelRaw.cpp
  2. 1 0
      GMHIKernelRaw.h
  3. 32 12
      GPHIKRawClassifier.cpp
  4. 2 0
      GPHIKRawClassifier.h
  5. 42 3
      tests/TestGPHIKOnlineLearnable.cpp

+ 27 - 22
GMHIKernelRaw.cpp

@@ -157,33 +157,38 @@ void GMHIKernelRaw::copyTable(double **src, double **dst) const
     }
     }
 }
 }
 
 
+void GMHIKernelRaw::updateTables ( const NICE::Vector _x ) const
+{
+    for (uint dim = 0; dim < this->num_dimension; dim++)
+    {
+      double alpha_sum = 0.0;
+      double alpha_times_x_sum = 0.0;
+      uint nnz = nnz_per_dimension[dim];
+
+      // loop through all elements in sorted order
+      sparseVectorElement *training_values_in_dim = examples_raw[dim];
+      for ( uint cntNonzeroFeat = 0; cntNonzeroFeat < nnz; cntNonzeroFeat++, training_values_in_dim++ )
+      {
+        // index of the feature
+        int index = training_values_in_dim->example_index;
+        // element of the feature
+        double elem = training_values_in_dim->value;
+
+        alpha_times_x_sum += _x[index] * elem;
+        this->table_A[dim][cntNonzeroFeat] = alpha_times_x_sum;
+
+        alpha_sum += _x[index];
+        this->table_B[dim][cntNonzeroFeat] = alpha_sum;
+      }
+    }
+
+}
 
 
 /** multiply with a vector: A*x = y */
 /** multiply with a vector: A*x = y */
 void GMHIKernelRaw::multiply (NICE::Vector & _y, const NICE::Vector & _x) const
 void GMHIKernelRaw::multiply (NICE::Vector & _y, const NICE::Vector & _x) const
 {
 {
   // STEP 1: initialize tables A and B
   // STEP 1: initialize tables A and B
-  for (uint dim = 0; dim < this->num_dimension; dim++)
-  {
-    double alpha_sum = 0.0;
-    double alpha_times_x_sum = 0.0;
-    uint nnz = nnz_per_dimension[dim];
-
-    // loop through all elements in sorted order
-    sparseVectorElement *training_values_in_dim = examples_raw[dim];
-    for ( uint cntNonzeroFeat = 0; cntNonzeroFeat < nnz; cntNonzeroFeat++, training_values_in_dim++ )
-    {
-      // index of the feature
-      int index = training_values_in_dim->example_index;
-      // element of the feature
-      double elem = training_values_in_dim->value;
-
-      alpha_times_x_sum += _x[index] * elem;
-      this->table_A[dim][cntNonzeroFeat] = alpha_times_x_sum;
-
-      alpha_sum += _x[index];
-      this->table_B[dim][cntNonzeroFeat] = alpha_sum;
-    }
-  }
+  updateTables(_x);
 
 
   _y.resize( this->num_examples );
   _y.resize( this->num_examples );
   _y.set(0.0);
   _y.set(0.0);

+ 1 - 0
GMHIKernelRaw.h

@@ -71,6 +71,7 @@ class GMHIKernelRaw : public GenericMatrix
     virtual ~GMHIKernelRaw();
     virtual ~GMHIKernelRaw();
 
 
     sparseVectorElement **getDataMatrix() const { return examples_raw; };
     sparseVectorElement **getDataMatrix() const { return examples_raw; };
+    void updateTables ( const NICE::Vector _x ) const;
 };
 };
 
 
 }
 }

+ 32 - 12
GPHIKRawClassifier.cpp

@@ -95,6 +95,7 @@ void GPHIKRawClassifier::initFromConfig(const Config *_conf,
   this->confSection = _confSection;
   this->confSection = _confSection;
   this->b_verbose   = _conf->gB( _confSection, "verbose", false);
   this->b_verbose   = _conf->gB( _confSection, "verbose", false);
   this->b_debug     = _conf->gB( _confSection, "debug", false);
   this->b_debug     = _conf->gB( _confSection, "debug", false);
+  this->f_tolerance = _conf->gD( _confSection, "f_tolerance", 1e-10);
 
 
   string ilssection = "FMKGPHyperparameterOptimization";
   string ilssection = "FMKGPHyperparameterOptimization";
   uint ils_max_iterations = _conf->gI( ilssection, "ils_max_iterations", 1000 );
   uint ils_max_iterations = _conf->gI( ilssection, "ils_max_iterations", 1000 );
@@ -139,7 +140,7 @@ void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
   {
   {
     uint classno = i->first;
     uint classno = i->first;
     maxClassNo = std::max ( maxClassNo, classno );
     maxClassNo = std::max ( maxClassNo, classno );
-    double beta;
+    double beta = 0;
 
 
     if ( this->q != NULL ) {
     if ( this->q != NULL ) {
       std::map<uint, double *>::const_iterator j = this->precomputedT.find ( classno );
       std::map<uint, double *>::const_iterator j = this->precomputedT.find ( classno );
@@ -157,7 +158,6 @@ void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
       std::map<uint, PrecomputedType>::const_iterator j = this->precomputedB.find ( classno );
       std::map<uint, PrecomputedType>::const_iterator j = this->precomputedB.find ( classno );
       const PrecomputedType & B = j->second;
       const PrecomputedType & B = j->second;
 
 
-      beta = 0.0;
       for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++)
       for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++)
       {
       {
         uint dim = i->first;
         uint dim = i->first;
@@ -177,9 +177,6 @@ void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
         GMHIKernelRaw::sparseVectorElement *it = upper_bound ( dataMatrix[dim], dataMatrix[dim] + nnz, fval_element );
         GMHIKernelRaw::sparseVectorElement *it = upper_bound ( dataMatrix[dim], dataMatrix[dim] + nnz, fval_element );
         position = distance ( dataMatrix[dim], it );
         position = distance ( dataMatrix[dim], it );
 
 
-
-
-
         bool posIsZero ( position == 0 );
         bool posIsZero ( position == 0 );
         if ( !posIsZero )
         if ( !posIsZero )
             position--;
             position--;
@@ -200,7 +197,25 @@ void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
 
 
     _scores[ classno ] = beta;
     _scores[ classno ] = beta;
   }
   }
-  _scores.setDim ( maxClassNo + 1 );
+  _scores.setDim ( *this->knownClasses.rbegin() + 1 );
+
+
+  if ( this->knownClasses.size() > 2 )
+  { // multi-class classification
+    _result = _scores.maxElement();
+  }
+  else if ( this->knownClasses.size() == 2 ) // binary setting
+  {
+    uint class1 = *(this->knownClasses.begin());
+    uint class2 = *(this->knownClasses.rbegin());
+    uint class_for_which_we_have_a_score = _scores.begin()->first;
+    uint class_for_which_we_dont_have_a_score = (class1 == class_for_which_we_have_a_score ? class2 : class1);
+
+    _scores[class_for_which_we_dont_have_a_score] = - _scores[class_for_which_we_have_a_score];
+
+    _result = _scores[class_for_which_we_have_a_score] > 0.0 ? class_for_which_we_have_a_score : class_for_which_we_dont_have_a_score;
+  }
+
 }
 }
 
 
 
 
@@ -216,12 +231,12 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
   }
   }
   this->num_examples = _examples.size();
   this->num_examples = _examples.size();
 
 
-  set<uint> classes;
+  this->knownClasses.clear();
   for ( uint i = 0; i < _labels.size(); i++ )
   for ( uint i = 0; i < _labels.size(); i++ )
-    classes.insert((uint)_labels[i]);
+    this->knownClasses.insert((uint)_labels[i]);
 
 
   std::map<uint, NICE::Vector> binLabels;
   std::map<uint, NICE::Vector> binLabels;
-  for ( set<uint>::const_iterator j = classes.begin(); j != classes.end(); j++ )
+  for ( set<uint>::const_iterator j = knownClasses.begin(); j != knownClasses.end(); j++ )
   {
   {
     uint current_class = *j;
     uint current_class = *j;
     Vector labels_binary ( _labels.size() );
     Vector labels_binary ( _labels.size() );
@@ -232,7 +247,7 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
   }
   }
 
 
   // handle special binary case
   // handle special binary case
-  if ( classes.size() == 2 )
+  if ( knownClasses.size() == 2 )
   {
   {
     std::map<uint, NICE::Vector>::iterator it = binLabels.begin();
     std::map<uint, NICE::Vector>::iterator it = binLabels.begin();
     it++;
     it++;
@@ -279,12 +294,17 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
           i != _binLabels.end(); i++ )
           i != _binLabels.end(); i++ )
   {
   {
     uint classno = i->first;
     uint classno = i->first;
+    if (b_verbose)
+        std::cerr << "Training for class " << classno << endl;
     const Vector & y = i->second;
     const Vector & y = i->second;
     Vector alpha;
     Vector alpha;
     solver->solveLin( *gm, y, alpha );
     solver->solveLin( *gm, y, alpha );
     // TODO: get lookup tables, A, B, etc. and store them
     // TODO: get lookup tables, A, B, etc. and store them
-    precomputedA.insert ( pair<uint, PrecomputedType> ( classno, gm->getTableA() ) );
-    precomputedB.insert ( pair<uint, PrecomputedType> ( classno, gm->getTableB() ) );
+    gm->updateTables(alpha);
+    double **A = gm->getTableA();
+    double **B = gm->getTableB();
+    precomputedA.insert ( pair<uint, PrecomputedType> ( classno, A ) );
+    precomputedB.insert ( pair<uint, PrecomputedType> ( classno, B ) );
   }
   }
 
 
 
 

+ 2 - 0
GPHIKRawClassifier.h

@@ -17,6 +17,7 @@
 #include <core/vector/SparseVectorT.h>
 #include <core/vector/SparseVectorT.h>
 #include <core/algebra/IterativeLinearSolver.h>
 #include <core/algebra/IterativeLinearSolver.h>
 //
 //
+#include <set>
 #include "quantization/Quantization.h"
 #include "quantization/Quantization.h"
 #include "GMHIKernelRaw.h"
 #include "GMHIKernelRaw.h"
 
 
@@ -85,6 +86,7 @@ class GPHIKRawClassifier //: public NICE::Persistent
     double f_tolerance;
     double f_tolerance;
 
 
     GMHIKernelRaw *gm;
     GMHIKernelRaw *gm;
+    std::set<uint> knownClasses;
 
 
     /////////////////////////
     /////////////////////////
     /////////////////////////
     /////////////////////////

+ 42 - 3
tests/TestGPHIKOnlineLearnable.cpp

@@ -17,6 +17,7 @@
 
 
 // gp-hik-core includes
 // gp-hik-core includes
 #include "gp-hik-core/GPHIKClassifier.h"
 #include "gp-hik-core/GPHIKClassifier.h"
+#include "gp-hik-core/GPHIKRawClassifier.h"
 
 
 #include "TestGPHIKOnlineLearnable.h"
 #include "TestGPHIKOnlineLearnable.h"
 
 
@@ -116,6 +117,33 @@ void evaluateClassifier ( NICE::Matrix & confusionMatrix,
   }
   }
 }
 }
 
 
+void evaluateClassifierRaw ( NICE::Matrix & confusionMatrix,
+                          const NICE::GPHIKRawClassifier * classifier,
+                          const NICE::Matrix & data,
+                          const NICE::Vector & yMulti,
+                          const std::map< uint,uint > & mapClNoToIdxTrain,
+                          const std::map< uint,uint > & mapClNoToIdxTest
+                        )
+{
+  int i_loopEnd  ( (int)data.rows() );
+
+  for (int i = 0; i < i_loopEnd ; i++)
+  {
+    NICE::Vector example_nonsparse ( data.getRow(i) );
+    NICE::SparseVector example (example_nonsparse);
+    NICE::SparseVector scores;
+    uint result;
+
+    // classify with incrementally trained classifier
+    classifier->classify( &example, result, scores );
+
+    uint gtlabel = mapClNoToIdxTest.find(yMulti[i])->second;
+    uint predlabel = mapClNoToIdxTrain.find(result)->second;
+    confusionMatrix( gtlabel, predlabel ) += 1.0;
+  }
+}
+
+
 void compareClassifierOutputs ( const NICE::GPHIKClassifier * classifier,
 void compareClassifierOutputs ( const NICE::GPHIKClassifier * classifier,
                                 const NICE::GPHIKClassifier * classifierScratch, 
                                 const NICE::GPHIKClassifier * classifierScratch, 
                                 const NICE::Matrix & data
                                 const NICE::Matrix & data
@@ -279,6 +307,7 @@ void TestGPHIKOnlineLearnable::testOnlineLearningOCCtoBinary()
   
   
   conf.sB ( "GPHIKClassifier", "eig_verbose", false);
   conf.sB ( "GPHIKClassifier", "eig_verbose", false);
   conf.sS ( "GPHIKClassifier", "optimization_method", "downhillsimplex");
   conf.sS ( "GPHIKClassifier", "optimization_method", "downhillsimplex");
+  conf.sB ( "GPHIKClassifier", "verbose", true);
   
   
   std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
   std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
   
   
@@ -335,6 +364,8 @@ void TestGPHIKOnlineLearnable::testOnlineLearningOCCtoBinary()
   NICE::GPHIKClassifier * classifierScratch = new NICE::GPHIKClassifier ( &conf );
   NICE::GPHIKClassifier * classifierScratch = new NICE::GPHIKClassifier ( &conf );
   classifierScratch->train ( examplesTrain, yBinTrain );
   classifierScratch->train ( examplesTrain, yBinTrain );
   
   
+  NICE::GPHIKRawClassifier * classifierScratchRaw = new NICE::GPHIKRawClassifier ( &conf );
+  classifierScratchRaw->train ( examplesTrain, yBinTrain );
     
     
   // TEST both classifiers to produce equal results
   // TEST both classifiers to produce equal results
   
   
@@ -363,17 +394,19 @@ void TestGPHIKOnlineLearnable::testOnlineLearningOCCtoBinary()
   
   
   NICE::Matrix confusionMatrix         ( mapClNoToIdxTrain.size(), mapClNoToIdxTest.size(), 0.0);
   NICE::Matrix confusionMatrix         ( mapClNoToIdxTrain.size(), mapClNoToIdxTest.size(), 0.0);
   NICE::Matrix confusionMatrixScratch  ( mapClNoToIdxTrain.size(), mapClNoToIdxTest.size(), 0.0);
   NICE::Matrix confusionMatrixScratch  ( mapClNoToIdxTrain.size(), mapClNoToIdxTest.size(), 0.0);
-  
+  NICE::Matrix confusionMatrixScratchRaw  ( mapClNoToIdxTrain.size(), mapClNoToIdxTest.size(), 0.0);
     
     
   // ------------------------------------------
   // ------------------------------------------
   // ------------- CLASSIFICATION --------------
   // ------------- CLASSIFICATION --------------
   // ------------------------------------------  
   // ------------------------------------------  
   evaluateClassifier ( confusionMatrix, classifier, dataTest, yBinTest,
   evaluateClassifier ( confusionMatrix, classifier, dataTest, yBinTest,
-                          mapClNoToIdxTrain,mapClNoToIdxTest ); 
+                          mapClNoToIdxTrain, mapClNoToIdxTest );
   
   
   evaluateClassifier ( confusionMatrixScratch, classifierScratch, dataTest, yBinTest,
   evaluateClassifier ( confusionMatrixScratch, classifierScratch, dataTest, yBinTest,
-                          mapClNoToIdxTrain,mapClNoToIdxTest );  
+                          mapClNoToIdxTrain, mapClNoToIdxTest );
   
   
+  evaluateClassifierRaw ( confusionMatrixScratchRaw, classifierScratchRaw, dataTest, yBinTest,
+                          mapClNoToIdxTrain, mapClNoToIdxTest );
     
     
   // post-process confusion matrices
   // post-process confusion matrices
   confusionMatrix.normalizeColumnsL1();
   confusionMatrix.normalizeColumnsL1();
@@ -382,20 +415,26 @@ void TestGPHIKOnlineLearnable::testOnlineLearningOCCtoBinary()
   confusionMatrixScratch.normalizeColumnsL1();
   confusionMatrixScratch.normalizeColumnsL1();
   double arrScratch ( confusionMatrixScratch.trace()/confusionMatrixScratch.cols() );
   double arrScratch ( confusionMatrixScratch.trace()/confusionMatrixScratch.cols() );
 
 
+  confusionMatrixScratchRaw.normalizeColumnsL1();
+  double arrScratchRaw ( confusionMatrixScratchRaw.trace()/confusionMatrixScratchRaw.cols() );
   
   
   if ( verbose ) 
   if ( verbose ) 
   {
   {
     std::cerr << "confusionMatrix: " << confusionMatrix  << std::endl;
     std::cerr << "confusionMatrix: " << confusionMatrix  << std::endl;
   
   
     std::cerr << "confusionMatrixScratch: " << confusionMatrixScratch << std::endl;
     std::cerr << "confusionMatrixScratch: " << confusionMatrixScratch << std::endl;
+
+    std::cerr << "confusionMatrixScratchRaw: " << confusionMatrixScratchRaw << std::endl;
   } 
   } 
   
   
   CPPUNIT_ASSERT_DOUBLES_EQUAL( arr, arrScratch, 1e-8);
   CPPUNIT_ASSERT_DOUBLES_EQUAL( arr, arrScratch, 1e-8);
+  CPPUNIT_ASSERT_DOUBLES_EQUAL( arrScratch, arrScratchRaw, 1e-8);
   
   
   // don't waste memory
   // don't waste memory
   
   
   delete classifier;
   delete classifier;
   delete classifierScratch;  
   delete classifierScratch;  
+  delete classifierScratchRaw;
   
   
   for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
   for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
   {
   {