Bladeren bron

first structure for the raw classifier

Erik Rodner 9 jaren geleden
bovenliggende
commit
c84ad85728
4 gewijzigde bestanden met toevoegingen van 564 en 0 verwijderingen
  1. 101 0
      GMHIKernelRaw.cpp
  2. 64 0
      GMHIKernelRaw.h
  3. 235 0
      GPHIKRawClassifier.cpp
  4. 164 0
      GPHIKRawClassifier.h

+ 101 - 0
GMHIKernelRaw.cpp

@@ -0,0 +1,101 @@
+/**
+* @file GMHIKernelRaw.cpp
+* @brief Fast multiplication with histogram intersection kernel matrices (Implementation)
+* @author Erik Rodner, Alexander Freytag
+* @date 01/02/2012
+
+*/
+#include <iostream>
+
+#include <core/vector/VVector.h>
+#include <core/basics/Timer.h>
+
+#include "GMHIKernelRaw.h"
+
+using namespace NICE;
+using namespace std;
+
+
+GMHIKernelRaw::GMHIKernelRaw( const std::vector< const NICE::SparseVector *> &_examples )
+{
+    initData(_examples);
+
+}
+
+GMHIKernelRaw::~GMHIKernelRaw()
+{
+}
+
+void GMHIKernelRaw::initData ( const std::vector< const NICE::SparseVector *> &_examples )
+{
+    if (_examples.size() == 0 )
+        fthrow(Exception, "No examples given for learning");
+
+    // TODO: clean up data if it exists
+
+    this->num_dimension = _examples[0]->getDim();
+    this->examples_raw = new sparseVectorElement *[num_dimension];
+    this->nnz_per_dimension = new uint [num_dimension];
+    this->num_examples = _examples.size();
+
+    // waste memory and allocate a non-sparse data block
+    sparseVectorElement **examples_raw_increment = new sparseVectorElement *[num_dimension];
+    for (uint d = 0; d < num_dimension; d++)
+    {
+        this->examples_raw[d] = new sparseVectorElement [ this->num_dimension ];
+        examples_raw_increment[d] = this->examples_raw[d];
+        this->nnz_per_dimension[d] = 0;
+    }
+
+    uint example_index = 0;
+    for (std::vector< const NICE::SparseVector * >::const_iterator i = _examples.begin();
+            i != _examples.end(); i++, example_index++)
+    {
+        const NICE::SparseVector *x = *i;
+        for ( NICE::SparseVector::const_iterator j = x->begin(); j != x->end(); j++ )
+        {
+            uint index = j->first;
+            double value = j->second;
+            examples_raw_increment[index]->value = value;
+            examples_raw_increment[index]->example_index = example_index;
+            // move to the next element
+            examples_raw_increment[index]++;
+            this->nnz_per_dimension[index]++;
+        }
+    }
+
+    // sort along each dimension
+    for (uint d = 0; d < this->num_dimension; d++)
+    {
+        std::sort( this->examples_raw[d], this->examples_raw[d] + this->nnz_per_dimension[d] );
+    }
+}
+
+/** multiply with a vector: A*x = y */
+void GMHIKernelRaw::multiply (NICE::Vector & y, const NICE::Vector & x) const
+{
+    /*
+    NICE::VVector A;
+    NICE::VVector B;
+    // prepare to calculate sum_i x_i K(x,x_i)
+    fmk->hik_prepare_alpha_multiplications(x, A, B);
+
+    fmk->hik_kernel_multiply(A, B, x, y);
+    */
+}
+
+/** get the number of rows in A */
+uint GMHIKernelRaw::rows () const
+{
+  // return the number of examples
+  return num_examples;
+}
+
+/** get the number of columns in A */
+uint GMHIKernelRaw::cols () const
+{
+  // return the number of examples
+  return num_examples;
+}
+
+

+ 64 - 0
GMHIKernelRaw.h

@@ -0,0 +1,64 @@
+/**
+* @file GMHIKernelRaw.h
+* @author Erik Rodner, Alexander Freytag
+* @brief Fast multiplication with histogram intersection kernel matrices (Interface)
+
+*/
+#ifndef _NICE_GMHIKERNELRAWINCLUDE
+#define _NICE_GMHIKERNELRAWINCLUDE
+
+#include <vector>
+
+#include <core/algebra/GenericMatrix.h>
+
+namespace NICE {
+
+ /**
+ * @class GMHIKernel
+ * @brief Fast multiplication with histogram intersection kernel matrices
+ * @author Erik Rodner, Alexander Freytag
+ */
+
+class GMHIKernelRaw : public GenericMatrix
+{
+
+  protected:
+    typedef struct sparseVectorElement {
+        uint example_index;
+        double value;
+
+        bool operator< (const sparseVectorElement & a) const
+        {
+            return value < a.value;
+        }
+
+    } sparseVectorElement;
+
+    sparseVectorElement **examples_raw;
+
+    uint *nnz_per_dimension;
+    uint num_dimension;
+    uint num_examples;
+
+    void initData ( const std::vector< const NICE::SparseVector *> &_examples );
+
+  public:
+
+    /** simple constructor */
+    GMHIKernelRaw( const std::vector< const NICE::SparseVector *> &_examples );
+
+    /** multiply with a vector: A*x = y */
+    virtual void multiply (NICE::Vector & y, const NICE::Vector & x) const;
+
+    /** get the number of rows in A */
+    virtual uint rows () const;
+
+    /** get the number of columns in A */
+    virtual uint cols () const;
+
+    /** simple destructor */
+    virtual ~GMHIKernelRaw();
+};
+
+}
+#endif

+ 235 - 0
GPHIKRawClassifier.cpp

@@ -0,0 +1,235 @@
+/**
+* @file GPHIKRawClassifier.cpp
+* @brief Main interface for our GP HIK classifier (similar to the feature pool classifier interface in vislearning) (Implementation)
+* @author Erik Rodner, Alexander Freytag
+* @date 02/01/2012
+
+*/
+
+// STL includes
+#include <iostream>
+
+// NICE-core includes
+#include <core/basics/numerictools.h>
+#include <core/basics/Timer.h>
+
+#include <core/algebra/ILSConjugateGradients.h>
+
+// gp-hik-core includes
+#include "GPHIKRawClassifier.h"
+#include "GMHIKernelRaw.h"
+
+using namespace std;
+using namespace NICE;
+
+/////////////////////////////////////////////////////
+/////////////////////////////////////////////////////
+//                 PROTECTED METHODS
+/////////////////////////////////////////////////////
+/////////////////////////////////////////////////////
+
+
+
+/////////////////////////////////////////////////////
+/////////////////////////////////////////////////////
+//                 PUBLIC METHODS
+/////////////////////////////////////////////////////
+/////////////////////////////////////////////////////
+GPHIKRawClassifier::GPHIKRawClassifier( )
+{
+  this->b_isTrained = false;
+  this->confSection = "";
+
+  // in order to be sure about all necessary variables be setup with default values, we
+  // run initFromConfig with an empty config
+  NICE::Config tmpConfEmpty ;
+  this->initFromConfig ( &tmpConfEmpty, this->confSection );
+
+}
+
+GPHIKRawClassifier::GPHIKRawClassifier( const Config *_conf,
+                                  const string & _confSection
+                                )
+{
+  ///////////
+  // same code as in empty constructor - duplication can be avoided with C++11 allowing for constructor delegation
+  ///////////
+
+  this->b_isTrained = false;
+  this->confSection = "";
+
+  ///////////
+  // here comes the new code part different from the empty constructor
+  ///////////
+
+  this->confSection = _confSection;
+
+  // if no config file was given, we either restore the classifier from an external file, or run ::init with
+  // an emtpy config (using default values thereby) when calling the train-method
+  if ( _conf != NULL )
+  {
+    this->initFromConfig( _conf, _confSection );
+  }
+  else
+  {
+    // if no config was given, we create an empty one
+    NICE::Config tmpConfEmpty ;
+    this->initFromConfig ( &tmpConfEmpty, this->confSection );
+  }
+}
+
+GPHIKRawClassifier::~GPHIKRawClassifier()
+{
+}
+
+void GPHIKRawClassifier::initFromConfig(const Config *_conf,
+                                     const string & _confSection
+                                    )
+{
+  this->d_noise     = _conf->gD( _confSection, "noise", 0.01);
+
+  this->confSection = _confSection;
+  this->b_verbose   = _conf->gB( _confSection, "verbose", false);
+  this->b_debug     = _conf->gB( _confSection, "debug", false);
+}
+
+///////////////////// ///////////////////// /////////////////////
+//                         GET / SET
+///////////////////// ///////////////////// /////////////////////
+
+std::set<uint> GPHIKRawClassifier::getKnownClassNumbers ( ) const
+{
+  if ( ! this->b_isTrained )
+     fthrow(Exception, "Classifier not trained yet -- aborting!" );
+
+  fthrow(Exception, "GPHIKRawClassifier::getKnownClassNumbers() not yet implemented");
+}
+
+
+///////////////////// ///////////////////// /////////////////////
+//                      CLASSIFIER STUFF
+///////////////////// ///////////////////// /////////////////////
+
+void GPHIKRawClassifier::classify ( const SparseVector * _example,
+                                 uint & _result,
+                                 SparseVector & _scores
+                               ) const
+{
+  if ( ! this->b_isTrained )
+     fthrow(Exception, "Classifier not trained yet -- aborting!" );
+
+  _scores.clear();
+
+  if ( this->b_debug )
+  {
+    std::cerr << "GPHIKRawClassifier::classify (sparse)" << std::endl;
+    _example->store( std::cerr );
+  }
+
+  // MAGIC happens here....
+
+
+  // ...
+  if ( this->b_debug )
+  {
+    _scores.store ( std::cerr );
+    std::cerr << "_result: " << _result << std::endl;
+  }
+
+  if ( _scores.size() == 0 ) {
+    fthrow(Exception, "Zero scores, something is likely to be wrong here: svec.size() = " << _example->size() );
+  }
+}
+
+void GPHIKRawClassifier::classify ( const NICE::Vector * _example,
+                                 uint & _result,
+                                 SparseVector & _scores
+                               ) const
+{
+    fthrow(Exception, "GPHIKRawClassifier::classify( Vector ... ) not yet implemented");
+}
+
+
+/** training process */
+void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *> & _examples,
+                              const NICE::Vector & _labels
+                            )
+{
+  // security-check: examples and labels have to be of same size
+  if ( _examples.size() != _labels.size() )
+  {
+    fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
+  }
+
+  set<uint> classes;
+  for ( uint i = 0; i < _labels.size(); i++ )
+    classes.insert((uint)_labels[i]);
+
+  std::map<uint, NICE::Vector> binLabels;
+  for ( set<uint>::const_iterator j = classes.begin(); j != classes.end(); j++ )
+  {
+    uint current_class = *j;
+    Vector labels_binary ( _labels.size() );
+    for ( uint i = 0; i < _labels.size(); i++ )
+        labels_binary[i] = ( _labels[i] == current_class ) ? 1.0 : -1.0;
+
+    binLabels.insert ( pair<uint, NICE::Vector>( current_class, labels_binary) );
+  }
+
+
+  train ( _examples, binLabels );
+}
+
+void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *> & _examples,
+                              std::map<uint, NICE::Vector> & _binLabels
+                            )
+{
+  // security-check: examples and labels have to be of same size
+  for ( std::map< uint, NICE::Vector >::const_iterator binLabIt = _binLabels.begin();
+        binLabIt != _binLabels.end();
+        binLabIt++
+      )
+  {
+    if ( _examples.size() != binLabIt->second.size() )
+    {
+      fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
+    }
+  }
+
+  if ( this->b_verbose )
+    std::cerr << "GPHIKRawClassifier::train" << std::endl;
+
+  Timer t;
+  t.start();
+
+  // sort examples in each dimension and "transpose" the feature matrix
+  // set up the GenericMatrix interface
+  GMHIKernelRaw gm ( _examples );
+  IterativeLinearSolver *ils = new ILSConjugateGradients();
+
+  // solve linear equations for each class
+  for ( map<uint, NICE::Vector>::const_iterator i = _binLabels.begin();
+          i != _binLabels.end(); i++ )
+  {
+    const Vector & y = i->second;
+    Vector alpha;
+    ils->solveLin( gm, y, alpha );
+    // TODO: get lookup tables, A, B, etc. and store them
+  }
+
+  delete ils;
+
+  t.stop();
+  if ( this->b_verbose )
+    std::cerr << "Time used for setting up the fmk object: " << t.getLast() << std::endl;
+
+
+  //indicate that we finished training successfully
+  this->b_isTrained = true;
+
+  // clean up all examples ??
+  if ( this->b_verbose )
+    std::cerr << "Learning finished" << std::endl;
+}
+
+

+ 164 - 0
GPHIKRawClassifier.h

@@ -0,0 +1,164 @@
+/**
+* @file GPHIKRawClassifier.h
+* @brief ..
+* @author Erik Rodner
+* @date 16-09-2015 (dd-mm-yyyy)
+*/
+#ifndef _NICE_GPHIKRAWCLASSIFIERINCLUDE
+#define _NICE_GPHIKRAWCLASSIFIERINCLUDE
+
+// STL includes
+#include <string>
+#include <limits>
+
+// NICE-core includes
+#include <core/basics/Config.h>
+#include <core/basics/Persistent.h>
+#include <core/vector/SparseVectorT.h>
+//
+
+namespace NICE {
+
+ /**
+ * @class GPHIKClassifier
+ * @brief ...
+ * @author Erik Rodner
+ */
+
+class GPHIKRawClassifier //: public NICE::Persistent
+{
+
+  protected:
+
+    /////////////////////////
+    /////////////////////////
+    // PROTECTED VARIABLES //
+    /////////////////////////
+    /////////////////////////
+
+    ///////////////////////////////////
+    // output/debug related settings //
+    ///////////////////////////////////
+
+    /** verbose flag for useful output*/
+    bool b_verbose;
+    /** debug flag for several outputs useful for debugging*/
+    bool b_debug;
+
+    //////////////////////////////////////
+    //      general specifications      //
+    //////////////////////////////////////
+
+    /** Header in configfile where variable settings are stored */
+    std::string confSection;
+
+    //////////////////////////////////////
+    // classification related variables //
+    //////////////////////////////////////
+    /** memorize whether the classifier was already trained*/
+    bool b_isTrained;
+
+
+    /** Gaussian label noise for model regularization */
+    double d_noise;
+
+    /////////////////////////
+    /////////////////////////
+    //  PROTECTED METHODS  //
+    /////////////////////////
+    /////////////////////////
+
+
+  public:
+
+    /**
+     * @brief default constructor
+     */
+    GPHIKRawClassifier( );
+
+
+    /**
+     * @brief standard constructor
+     */
+    GPHIKRawClassifier( const NICE::Config *_conf ,
+                     const std::string & s_confSection = "GPHIKClassifier"
+                   );
+
+    /**
+     * @brief simple destructor
+     */
+    ~GPHIKRawClassifier();
+
+    /**
+    * @brief Setup internal variables and objects used
+    * @param conf Config file to specify variable settings
+    * @param s_confSection
+    */
+    void initFromConfig(const NICE::Config *_conf,
+                        const std::string & s_confSection
+                       );
+
+    ///////////////////// ///////////////////// /////////////////////
+    //                         GET / SET
+    ///////////////////// ///////////////////// /////////////////////
+
+    /**
+     * @brief Return currently known class numbers
+     */
+    std::set<uint> getKnownClassNumbers ( ) const;
+
+    ///////////////////// ///////////////////// /////////////////////
+    //                      CLASSIFIER STUFF
+    ///////////////////// ///////////////////// /////////////////////
+
+    /**
+     * @brief classify a given example with the previously learned model
+     * @author Alexander Freytag, Erik Rodner
+     * @param example (SparseVector) to be classified given in a sparse representation
+     * @param result (int) class number of most likely class
+     * @param scores (SparseVector) classification scores for known classes
+     */
+    void classify ( const NICE::SparseVector * _example,
+                    uint & _result,
+                    NICE::SparseVector & _scores
+                  ) const;
+
+    /**
+     * @brief classify a given example with the previously learnt model
+     * NOTE: whenever possible, you should the sparse version to obtain significantly smaller computation times*
+     * @author Alexander Freytag, Erik Rodner
+     * @param example (non-sparse Vector) to be classified given in a non-sparse representation
+     * @param result (int) class number of most likely class
+     * @param scores (SparseVector) classification scores for known classes
+     */
+    void classify ( const NICE::Vector * _example,
+                    uint & _result,
+                    NICE::SparseVector & _scores
+                  ) const;
+
+    /**
+     * @brief train this classifier using a given set of examples and a given set of binary label vectors
+     * @date 18-10-2012 (dd-mm-yyyy)
+     * @author Alexander Freytag, Erik Rodner
+     * @param examples (std::vector< NICE::SparseVector *>) training data given in a sparse representation
+     * @param labels (Vector) class labels (multi-class)
+     */
+    void train ( const std::vector< const NICE::SparseVector *> & _examples,
+                 const NICE::Vector & _labels
+               );
+
+    /**
+     * @brief train this classifier using a given set of examples and a given set of binary label vectors
+     * @author Alexander Freytag, Erik Rodner
+     * @param examples examples to use given in a sparse data structure
+     * @param binLabels corresponding binary labels with class no. There is no need here that every examples has only on positive entry in this set (1,-1)
+     */
+    void train ( const std::vector< const NICE::SparseVector *> & _examples,
+                 std::map<uint, NICE::Vector> & _binLabels
+               );
+
+};
+
+}
+
+#endif