Bläddra i källkod

raw classification (untested)

Erik Rodner 9 år sedan
förälder
incheckning
7d3842af12
4 ändrade filer med 177 tillägg och 44 borttagningar
  1. 55 4
      GMHIKernelRaw.cpp
  2. 11 2
      GMHIKernelRaw.h
  3. 90 25
      GPHIKRawClassifier.cpp
  4. 21 13
      GPHIKRawClassifier.h

+ 55 - 4
GMHIKernelRaw.cpp

@@ -109,6 +109,7 @@ void GMHIKernelRaw::initData ( const std::vector< const NICE::SparseVector *> &_
     }
     }
 
 
     // pre-allocate the A and B matrices
     // pre-allocate the A and B matrices
+    this->table_A = allocateTable();
     this->table_A = new double *[this->num_dimension];
     this->table_A = new double *[this->num_dimension];
     this->table_B = new double *[this->num_dimension];
     this->table_B = new double *[this->num_dimension];
     for (uint i = 0; i < this->num_dimension; i++)
     for (uint i = 0; i < this->num_dimension; i++)
@@ -124,6 +125,37 @@ void GMHIKernelRaw::initData ( const std::vector< const NICE::SparseVector *> &_
     }
     }
 }
 }
 
 
+double **GMHIKernelRaw::allocateTable() const
+{
+    double **table;
+    table = new double *[this->num_dimension];
+    for (uint i = 0; i < this->num_dimension; i++)
+    {
+        uint nnz = this->nnz_per_dimension[i];
+        if (nnz>0) {
+            table[i] = new double [ nnz ];
+        } else {
+            table[i] = NULL;
+        }
+    }
+    return table;
+}
+
+void GMHIKernelRaw::copyTable(double **src, double **dst) const
+{
+    for (uint i = 0; i < this->num_dimension; i++)
+    {
+        uint nnz = this->nnz_per_dimension[i];
+        if (nnz>0) {
+            for (uint j = 0; j < nnz; j++)
+                dst[i][j] = src[i][j];
+        } else {
+            dst[i] = NULL;
+        }
+    }
+}
+
+
 /** multiply with a vector: A*x = y */
 /** multiply with a vector: A*x = y */
 void GMHIKernelRaw::multiply (NICE::Vector & _y, const NICE::Vector & _x) const
 void GMHIKernelRaw::multiply (NICE::Vector & _y, const NICE::Vector & _x) const
 {
 {
@@ -158,8 +190,9 @@ void GMHIKernelRaw::multiply (NICE::Vector & _y, const NICE::Vector & _x) const
   for (uint dim = 0; dim < this->num_dimension; dim++)
   for (uint dim = 0; dim < this->num_dimension; dim++)
   {
   {
     uint nnz = this->nnz_per_dimension[dim];
     uint nnz = this->nnz_per_dimension[dim];
+    uint nz  = this->num_examples - nnz;
 
 
-    if ( nnz == this->num_examples ) {
+    if ( nnz == 0 ) {
       // all values are zero in this dimension :) and we can simply ignore the feature
       // all values are zero in this dimension :) and we can simply ignore the feature
       continue;
       continue;
     }
     }
@@ -172,16 +205,14 @@ void GMHIKernelRaw::multiply (NICE::Vector & _y, const NICE::Vector & _x) const
       double fval = training_values_in_dim->value;
       double fval = training_values_in_dim->value;
 
 
       double firstPart( this->table_A[dim][inversePosition] );
       double firstPart( this->table_A[dim][inversePosition] );
-      double secondPart( this->table_B[dim][this->num_examples-1-nnz] - this->table_B[dim][inversePosition]);
+      double secondPart( this->table_B[dim][this->num_examples-1-nz] - this->table_B[dim][inversePosition]);
 
 
       _y[cntNonzeroFeat] += firstPart + fval * secondPart;
       _y[cntNonzeroFeat] += firstPart + fval * secondPart;
     }
     }
   }
   }
 
 
   for (uint feat = 0; feat < this->num_examples; feat++)
   for (uint feat = 0; feat < this->num_examples; feat++)
-  {
     _y[feat] += this->d_noise * _x[feat];
     _y[feat] += this->d_noise * _x[feat];
-  }
 
 
 
 
 }
 }
@@ -200,4 +231,24 @@ uint GMHIKernelRaw::cols () const
   return num_examples;
   return num_examples;
 }
 }
 
 
+double **GMHIKernelRaw::getTableA() const
+{
+    double **t = allocateTable();
+    copyTable(this->table_A, t);
+    return t;
+}
+
+double **GMHIKernelRaw::getTableB() const
+{
+    double **t = allocateTable();
+    copyTable(this->table_B, t);
+    return t;
+}
 
 
+uint *GMHIKernelRaw::getNNZPerDimension() const
+{
+    uint *v = new uint[this->num_dimension];
+    for (uint i = 0; i < this->num_dimension; i++)
+        v[i] = this->nnz_per_dimension[i];
+    return v;
+}

+ 11 - 2
GMHIKernelRaw.h

@@ -21,8 +21,7 @@ namespace NICE {
 
 
 class GMHIKernelRaw : public GenericMatrix
 class GMHIKernelRaw : public GenericMatrix
 {
 {
-
-  protected:
+  public:
     typedef struct sparseVectorElement {
     typedef struct sparseVectorElement {
         uint example_index;
         uint example_index;
         double value;
         double value;
@@ -34,6 +33,8 @@ class GMHIKernelRaw : public GenericMatrix
 
 
     } sparseVectorElement;
     } sparseVectorElement;
 
 
+  protected:
+
     sparseVectorElement **examples_raw;
     sparseVectorElement **examples_raw;
     double **table_A;
     double **table_A;
     double **table_B;
     double **table_B;
@@ -45,6 +46,8 @@ class GMHIKernelRaw : public GenericMatrix
 
 
     void initData ( const std::vector< const NICE::SparseVector *> & examples );
     void initData ( const std::vector< const NICE::SparseVector *> & examples );
     void cleanupData ();
     void cleanupData ();
+    double **allocateTable() const;
+    void copyTable(double **src, double **dst) const;
 
 
   public:
   public:
 
 
@@ -60,8 +63,14 @@ class GMHIKernelRaw : public GenericMatrix
     /** get the number of columns in A */
     /** get the number of columns in A */
     virtual uint cols () const;
     virtual uint cols () const;
 
 
+    double **getTableA() const;
+    double **getTableB() const;
+    uint *getNNZPerDimension() const;
+
     /** simple destructor */
     /** simple destructor */
     virtual ~GMHIKernelRaw();
     virtual ~GMHIKernelRaw();
+
+    sparseVectorElement **getDataMatrix() const { return examples_raw; };
 };
 };
 
 
 }
 }

+ 90 - 25
GPHIKRawClassifier.cpp

@@ -39,6 +39,7 @@ GPHIKRawClassifier::GPHIKRawClassifier( )
 {
 {
   this->b_isTrained = false;
   this->b_isTrained = false;
   this->confSection = "";
   this->confSection = "";
+  this->nnz_per_dimension = NULL;
 
 
   // in order to be sure about all necessary variables be setup with default values, we
   // in order to be sure about all necessary variables be setup with default values, we
   // run initFromConfig with an empty config
   // run initFromConfig with an empty config
@@ -57,6 +58,7 @@ GPHIKRawClassifier::GPHIKRawClassifier( const Config *_conf,
 
 
   this->b_isTrained = false;
   this->b_isTrained = false;
   this->confSection = "";
   this->confSection = "";
+  this->q = NULL;
 
 
   ///////////
   ///////////
   // here comes the new code part different from the empty constructor
   // here comes the new code part different from the empty constructor
@@ -76,6 +78,7 @@ GPHIKRawClassifier::GPHIKRawClassifier( const Config *_conf,
     NICE::Config tmpConfEmpty ;
     NICE::Config tmpConfEmpty ;
     this->initFromConfig ( &tmpConfEmpty, this->confSection );
     this->initFromConfig ( &tmpConfEmpty, this->confSection );
   }
   }
+
 }
 }
 
 
 GPHIKRawClassifier::~GPHIKRawClassifier()
 GPHIKRawClassifier::~GPHIKRawClassifier()
@@ -118,43 +121,86 @@ std::set<uint> GPHIKRawClassifier::getKnownClassNumbers ( ) const
 //                      CLASSIFIER STUFF
 //                      CLASSIFIER STUFF
 ///////////////////// ///////////////////// /////////////////////
 ///////////////////// ///////////////////// /////////////////////
 
 
-void GPHIKRawClassifier::classify ( const SparseVector * _example,
+
+
+void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
                                  uint & _result,
                                  uint & _result,
                                  SparseVector & _scores
                                  SparseVector & _scores
                                ) const
                                ) const
 {
 {
   if ( ! this->b_isTrained )
   if ( ! this->b_isTrained )
      fthrow(Exception, "Classifier not trained yet -- aborting!" );
      fthrow(Exception, "Classifier not trained yet -- aborting!" );
-
   _scores.clear();
   _scores.clear();
 
 
-  if ( this->b_debug )
+  GMHIKernelRaw::sparseVectorElement **dataMatrix = gm->getDataMatrix();
+
+  uint maxClassNo = 0;
+  for ( std::map<uint, PrecomputedType>::const_iterator i = this->precomputedA.begin() ; i != this->precomputedA.end(); i++ )
   {
   {
-    std::cerr << "GPHIKRawClassifier::classify (sparse)" << std::endl;
-    _example->store( std::cerr );
-  }
+    uint classno = i->first;
+    maxClassNo = std::max ( maxClassNo, classno );
+    double beta;
 
 
-  // MAGIC happens here....
+    if ( this->q != NULL ) {
+      std::map<uint, double *>::const_iterator j = this->precomputedT.find ( classno );
+      double *T = j->second;
+      for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++ )
+      {
+        uint dim = i->first;
+        double v = i->second;
+        uint qBin = q->quantize( v, dim );
 
 
+        beta += T[dim * q->getNumberOfBins() + qBin];
+      }
+    } else {
+      const PrecomputedType & A = i->second;
+      std::map<uint, PrecomputedType>::const_iterator j = this->precomputedB.find ( classno );
+      const PrecomputedType & B = j->second;
 
 
-  // ...
-  if ( this->b_debug )
-  {
-    _scores.store ( std::cerr );
-    std::cerr << "_result: " << _result << std::endl;
-  }
+      beta = 0.0;
+      for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++)
+      {
+        uint dim = i->first;
+        double fval = i->second;
 
 
-  if ( _scores.size() == 0 ) {
-    fthrow(Exception, "Zero scores, something is likely to be wrong here: svec.size() = " << _example->size() );
-  }
-}
+        uint nnz = this->nnz_per_dimension[dim];
+        uint nz = this->num_examples - nnz;
 
 
-void GPHIKRawClassifier::classify ( const NICE::Vector * _example,
-                                 uint & _result,
-                                 SparseVector & _scores
-                               ) const
-{
-    fthrow(Exception, "GPHIKRawClassifier::classify( Vector ... ) not yet implemented");
+        if ( nnz == 0 ) continue;
+        if ( fval < this->f_tolerance ) continue;
+
+        uint position = 0;
+
+        //this->X_sorted.findFirstLargerInDimension(dim, fval, position);
+        GMHIKernelRaw::sparseVectorElement fval_element;
+        fval_element.value = fval;
+        GMHIKernelRaw::sparseVectorElement *it = upper_bound ( dataMatrix[dim], dataMatrix[dim] + nnz, fval_element );
+        position = distance ( dataMatrix[dim], it );
+
+
+
+
+        bool posIsZero ( position == 0 );
+        if ( !posIsZero )
+            position--;
+
+
+        double firstPart = 0.0;
+        if ( !posIsZero && ((position-nz) < this->num_examples) )
+          firstPart = (A[dim][position-nz]);
+
+        double secondPart( B[dim][this->num_examples-1-nz]);
+        if ( !posIsZero && (position >= nz) )
+            secondPart -= B[dim][position-nz];
+
+        // but apply using the transformed one
+        beta += firstPart + secondPart* fval;
+      }
+    }
+
+    _scores[ classno ] = beta;
+  }
+  _scores.setDim ( maxClassNo + 1 );
 }
 }
 
 
 
 
@@ -168,6 +214,7 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
   {
   {
     fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
     fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
   }
   }
+  this->num_examples = _examples.size();
 
 
   set<uint> classes;
   set<uint> classes;
   for ( uint i = 0; i < _labels.size(); i++ )
   for ( uint i = 0; i < _labels.size(); i++ )
@@ -184,6 +231,13 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
     binLabels.insert ( pair<uint, NICE::Vector>( current_class, labels_binary) );
     binLabels.insert ( pair<uint, NICE::Vector>( current_class, labels_binary) );
   }
   }
 
 
+  // handle special binary case
+  if ( classes.size() == 2 )
+  {
+    std::map<uint, NICE::Vector>::iterator it = binLabels.begin();
+    it++;
+    binLabels.erase( binLabels.begin(), it );
+  }
 
 
   train ( _examples, binLabels );
   train ( _examples, binLabels );
 }
 }
@@ -210,18 +264,27 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
   Timer t;
   Timer t;
   t.start();
   t.start();
 
 
+  precomputedA.clear();
+  precomputedB.clear();
+  precomputedT.clear();
+
   // sort examples in each dimension and "transpose" the feature matrix
   // sort examples in each dimension and "transpose" the feature matrix
   // set up the GenericMatrix interface
   // set up the GenericMatrix interface
-  GMHIKernelRaw gm ( _examples, this->d_noise );
+  gm = new GMHIKernelRaw ( _examples, this->d_noise );
+  nnz_per_dimension = gm->getNNZPerDimension();
 
 
   // solve linear equations for each class
   // solve linear equations for each class
+  // be careful when parallising this!
   for ( map<uint, NICE::Vector>::const_iterator i = _binLabels.begin();
   for ( map<uint, NICE::Vector>::const_iterator i = _binLabels.begin();
           i != _binLabels.end(); i++ )
           i != _binLabels.end(); i++ )
   {
   {
+    uint classno = i->first;
     const Vector & y = i->second;
     const Vector & y = i->second;
     Vector alpha;
     Vector alpha;
-    solver->solveLin( gm, y, alpha );
+    solver->solveLin( *gm, y, alpha );
     // TODO: get lookup tables, A, B, etc. and store them
     // TODO: get lookup tables, A, B, etc. and store them
+    precomputedA.insert ( pair<uint, PrecomputedType> ( classno, gm->getTableA() ) );
+    precomputedB.insert ( pair<uint, PrecomputedType> ( classno, gm->getTableB() ) );
   }
   }
 
 
 
 
@@ -236,6 +299,8 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
   // clean up all examples ??
   // clean up all examples ??
   if ( this->b_verbose )
   if ( this->b_verbose )
     std::cerr << "Learning finished" << std::endl;
     std::cerr << "Learning finished" << std::endl;
+
+
 }
 }
 
 
 
 

+ 21 - 13
GPHIKRawClassifier.h

@@ -17,6 +17,8 @@
 #include <core/vector/SparseVectorT.h>
 #include <core/vector/SparseVectorT.h>
 #include <core/algebra/IterativeLinearSolver.h>
 #include <core/algebra/IterativeLinearSolver.h>
 //
 //
+#include "quantization/Quantization.h"
+#include "GMHIKernelRaw.h"
 
 
 namespace NICE {
 namespace NICE {
 
 
@@ -64,6 +66,25 @@ class GPHIKRawClassifier //: public NICE::Persistent
     double d_noise;
     double d_noise;
 
 
     IterativeLinearSolver *solver;
     IterativeLinearSolver *solver;
+    /** object performing feature quantization */
+    NICE::Quantization *q;
+
+    typedef double ** PrecomputedType;
+
+    /** precomputed arrays A (1 per class) needed for classification without quantization  */
+    std::map< uint, PrecomputedType > precomputedA;
+    /** precomputed arrays B (1 per class) needed for classification without quantization  */
+    std::map< uint, PrecomputedType > precomputedB;
+
+    /** precomputed LUTs (1 per class) needed for classification with quantization  */
+    std::map< uint, double * > precomputedT;
+
+    uint *nnz_per_dimension;
+    uint num_examples;
+
+    double f_tolerance;
+
+    GMHIKernelRaw *gm;
 
 
     /////////////////////////
     /////////////////////////
     /////////////////////////
     /////////////////////////
@@ -126,19 +147,6 @@ class GPHIKRawClassifier //: public NICE::Persistent
                     NICE::SparseVector & _scores
                     NICE::SparseVector & _scores
                   ) const;
                   ) const;
 
 
-    /**
-     * @brief classify a given example with the previously learnt model
-     * NOTE: whenever possible, you should the sparse version to obtain significantly smaller computation times*
-     * @author Alexander Freytag, Erik Rodner
-     * @param example (non-sparse Vector) to be classified given in a non-sparse representation
-     * @param result (int) class number of most likely class
-     * @param scores (SparseVector) classification scores for known classes
-     */
-    void classify ( const NICE::Vector * _example,
-                    uint & _result,
-                    NICE::SparseVector & _scores
-                  ) const;
-
     /**
     /**
      * @brief train this classifier using a given set of examples and a given set of binary label vectors
      * @brief train this classifier using a given set of examples and a given set of binary label vectors
      * @date 18-10-2012 (dd-mm-yyyy)
      * @date 18-10-2012 (dd-mm-yyyy)