Эх сурвалжийг харах

added pre-conditioning and initial alpha guess

Alexander Freytag 9 жил өмнө
parent
commit
2018fb5119

+ 6 - 1
FMKGPHyperparameterOptimization.cpp

@@ -23,7 +23,6 @@
 #include <core/basics/Exception.h>
 // 
 #include <core/vector/Algorithms.h>
-#include <core/vector/Eigen.h>
 // 
 #include <core/optimization/blackbox/DownhillSimplexOptimizer.h>
 
@@ -827,6 +826,12 @@ void FMKGPHyperparameterOptimization::computeMatricesAndLUTs ( const GPLikelihoo
     PrecomputedType A;
     PrecomputedType B;
 
+    if ( this->b_debug &&  i->first == 1)
+    {
+        std::cerr << "Training for class " << i->first << endl;
+        std::cerr << "  " << i->second << std::endl;
+    }
+
     fmk->hik_prepare_alpha_multiplications ( i->second, A, B );
     A.setIoUntilEndOfFile ( false );
     B.setIoUntilEndOfFile ( false );

+ 31 - 4
GMHIKernelRaw.cpp

@@ -83,21 +83,42 @@ void GMHIKernelRaw::initData ( const std::vector< const NICE::SparseVector *> &_
         this->nnz_per_dimension[d] = 0;
     }
 
+    // additionally allocate a Vector with as many entries as examples
+    // this vector will contain the L1 norm values of all examples + noise
+    // thereby, it represents the diagonal entries of our kernel matrix for
+    // the special case of minimum kernel
+    this->diagonalElements.resize ( this->num_examples );
+    this->diagonalElements.set ( this->d_noise );
+
+
     uint example_index = 0;
-    for (std::vector< const NICE::SparseVector * >::const_iterator i = _examples.begin();
-            i != _examples.end(); i++, example_index++)
+    NICE::Vector::iterator itDiagEl = this->diagonalElements.begin();
+
+    // minor pre-allocation
+    uint index;
+    double value;
+    double l1norm;
+
+    for ( std::vector< const NICE::SparseVector * >::const_iterator i = _examples.begin();
+          i != _examples.end();
+          i++, example_index++, itDiagEl++
+        )
     {
+        l1norm = 0.0;
         const NICE::SparseVector *x = *i;
         for ( NICE::SparseVector::const_iterator j = x->begin(); j != x->end(); j++ )
         {
-            uint index = j->first;
-            double value = j->second;
+            index = j->first;
+            value = j->second;
             examples_raw_increment[index]->value = value;
             examples_raw_increment[index]->example_index = example_index;
             // move to the next element
             examples_raw_increment[index]++;
             this->nnz_per_dimension[index]++;
+
+            l1norm = l1norm + value;
         }
+        *itDiagEl = *itDiagEl + l1norm;
     }
 
     delete [] examples_raw_increment;
@@ -258,3 +279,9 @@ uint *GMHIKernelRaw::getNNZPerDimension() const
         v[i] = this->nnz_per_dimension[i];
     return v;
 }
+
+
+void NICE::GMHIKernelRaw::getDiagonalElements( NICE::Vector & _diagonalElements) const
+{
+    _diagonalElements = this->diagonalElements;
+}

+ 6 - 0
GMHIKernelRaw.h

@@ -39,6 +39,8 @@ class GMHIKernelRaw : public GenericMatrix
     double **table_A;
     double **table_B;
 
+    NICE::Vector diagonalElements;
+
     uint *nnz_per_dimension;
     uint num_dimension;
     uint num_examples;
@@ -72,6 +74,10 @@ class GMHIKernelRaw : public GenericMatrix
 
     sparseVectorElement **getDataMatrix() const { return examples_raw; };
     void updateTables ( const NICE::Vector _x ) const;
+
+    /** get the diagonal elements of the current matrix */
+    void getDiagonalElements ( NICE::Vector & _diagonalElements ) const;
+
 };
 
 }

+ 61 - 5
GPHIKRawClassifier.cpp

@@ -14,6 +14,7 @@
 #include <core/basics/Timer.h>
 
 #include <core/algebra/ILSConjugateGradients.h>
+#include <core/algebra/EigValues.h>
 
 // gp-hik-core includes
 #include "GPHIKRawClassifier.h"
@@ -272,7 +273,7 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
     binLabels.erase( binLabels.begin(), it );
   }
 
-  train ( _examples, binLabels );
+  this->train ( _examples, binLabels );
 }
 
 void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *> & _examples,
@@ -306,17 +307,72 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
   gm = new GMHIKernelRaw ( _examples, this->d_noise );
   nnz_per_dimension = gm->getNNZPerDimension();
 
+  // compute largest eigenvalue of our kernel matrix
+  // note: this guy is shared among all categories,
+  //       since the kernel matrix is shared as well
+  NICE::Vector eigenMax;
+  NICE::Matrix eigenMaxV;
+  // for reproducibility during debuggin
+  srand ( 0 );
+  srand48 ( 0 );
+  NICE::EigValues * eig = new EVArnoldi ( false /* verbose flag */,
+                                        10 /*_maxiterations*/
+                                      );
+  eig->getEigenvalues( *gm, eigenMax, eigenMaxV, 1 /*rank*/ );
+
+  delete eig;
+
+  std::cerr << " largest eigenvalue: " << eigenMax[0] << std::endl;
+  // set simple jacobi pre-conditioning
+  NICE::Vector diagonalElements;
+  gm->getDiagonalElements ( diagonalElements );
+  solver->setJacobiPreconditioner ( diagonalElements );
+
   // solve linear equations for each class
   // be careful when parallising this!
-  for ( map<uint, NICE::Vector>::const_iterator i = _binLabels.begin();
-          i != _binLabels.end(); i++ )
+  for ( std::map<uint, NICE::Vector>::const_iterator i = _binLabels.begin();
+        i != _binLabels.end();
+        i++
+      )
   {
     uint classno = i->first;
     if (b_verbose)
         std::cerr << "Training for class " << classno << endl;
-    const Vector & y = i->second;
-    Vector alpha;
+    const NICE::Vector & y = i->second;
+    NICE::Vector alpha;
+
+
+  /** About finding a good initial solution (see also GPLikelihoodApproximation)
+    * K~ = K + sigma^2 I
+    *
+    * K~ \approx lambda_max v v^T
+    * \lambda_max v v^T * alpha = k_*     | multiply with v^T from left
+    * => \lambda_max v^T alpha = v^T k_*
+    * => alpha = k_* / lambda_max could be a good initial start
+    * If we put everything in the first equation this gives us
+    * v = k_*
+    *  This reduces the number of iterations by 5 or 8
+    */
+    alpha = (y * (1.0 / eigenMax[0]) );
+
+    //DEBUG!!!
+    if ( this->b_debug && classno == 1 )
+    {
+        std::cerr << "Training for class " << classno << endl;
+        std::cerr << y << std::endl;
+        std::cerr << " alpha before and after linsolve" << classno << endl;
+        std::cerr << "  " << alpha << std::endl;
+    }
+
     solver->solveLin( *gm, y, alpha );
+
+    //DEBUG!!!
+    if ( this->b_debug && classno == 1 )
+    {
+//        std::cerr << "Training for class " << classno << endl;
+        std::cerr << "  " << alpha << std::endl;
+    }
+
     // TODO: get lookup tables, A, B, etc. and store them
     gm->updateTables(alpha);
     double **A = gm->getTableA();

+ 6 - 3
GPHIKRawClassifier.h

@@ -10,14 +10,15 @@
 // STL includes
 #include <string>
 #include <limits>
+#include <set>
 
 // NICE-core includes
 #include <core/basics/Config.h>
 #include <core/basics/Persistent.h>
 #include <core/vector/SparseVectorT.h>
-#include <core/algebra/IterativeLinearSolver.h>
+#include <core/algebra/ILSConjugateGradients.h>
+
 //
-#include <set>
 #include "quantization/Quantization.h"
 #include "GMHIKernelRaw.h"
 
@@ -66,7 +67,7 @@ class GPHIKRawClassifier //: public NICE::Persistent
     /** Gaussian label noise for model regularization */
     double d_noise;
 
-    IterativeLinearSolver *solver;
+    ILSConjugateGradients *solver;
     /** object performing feature quantization */
     NICE::Quantization *q;
 
@@ -133,6 +134,8 @@ class GPHIKRawClassifier //: public NICE::Persistent
      */
     std::set<uint> getKnownClassNumbers ( ) const;
 
+
+
     ///////////////////// ///////////////////// /////////////////////
     //                      CLASSIFIER STUFF
     ///////////////////// ///////////////////// /////////////////////