فهرست منبع

first running version of the raw interface

Erik Rodner 9 سال پیش
والد
کامیت
74ece93cfa
3فایلهای تغییر یافته به همراه122 افزوده شده و 36 حذف شده
  1. 80 14
      GMHIKernelRaw.cpp
  2. 5 2
      GMHIKernelRaw.h
  3. 37 20
      tests/TestFastHIK.cpp

+ 80 - 14
GMHIKernelRaw.cpp

@@ -16,10 +16,10 @@ using namespace NICE;
 using namespace std;
 
 
-GMHIKernelRaw::GMHIKernelRaw( const std::vector< const NICE::SparseVector *> &_examples )
+GMHIKernelRaw::GMHIKernelRaw( const std::vector< const NICE::SparseVector *> &_examples, const double _d_noise )
 {
     initData(_examples);
-
+    this->d_noise = _d_noise;
 }
 
 GMHIKernelRaw::~GMHIKernelRaw()
@@ -40,9 +40,9 @@ void GMHIKernelRaw::initData ( const std::vector< const NICE::SparseVector *> &_
 
     // waste memory and allocate a non-sparse data block
     sparseVectorElement **examples_raw_increment = new sparseVectorElement *[num_dimension];
-    for (uint d = 0; d < num_dimension; d++)
+    for (uint d = 0; d < this->num_dimension; d++)
     {
-        this->examples_raw[d] = new sparseVectorElement [ this->num_dimension ];
+        this->examples_raw[d] = new sparseVectorElement [ this->num_examples ];
         examples_raw_increment[d] = this->examples_raw[d];
         this->nnz_per_dimension[d] = 0;
     }
@@ -67,21 +67,87 @@ void GMHIKernelRaw::initData ( const std::vector< const NICE::SparseVector *> &_
     // sort along each dimension
     for (uint d = 0; d < this->num_dimension; d++)
     {
-        std::sort( this->examples_raw[d], this->examples_raw[d] + this->nnz_per_dimension[d] );
+        uint nnz = this->nnz_per_dimension[d];
+        if ( nnz > 1 )
+            std::sort( this->examples_raw[d], this->examples_raw[d] + nnz );
+    }
+
+    // pre-allocate the A and B matrices
+    this->table_A = new double *[this->num_dimension];
+    this->table_B = new double *[this->num_dimension];
+    for (uint i = 0; i < this->num_dimension; i++)
+    {
+        uint nnz = this->nnz_per_dimension[i];
+        if (nnz>0) {
+            this->table_A[i] = new double [ nnz ];
+            this->table_B[i] = new double [ nnz ];
+        } else {
+            this->table_A[i] = NULL;
+            this->table_B[i] = NULL;
+        }
     }
 }
 
 /** multiply with a vector: A*x = y */
-void GMHIKernelRaw::multiply (NICE::Vector & y, const NICE::Vector & x) const
+void GMHIKernelRaw::multiply (NICE::Vector & _y, const NICE::Vector & _x) const
 {
-    /*
-    NICE::VVector A;
-    NICE::VVector B;
-    // prepare to calculate sum_i x_i K(x,x_i)
-    fmk->hik_prepare_alpha_multiplications(x, A, B);
-
-    fmk->hik_kernel_multiply(A, B, x, y);
-    */
+  // STEP 1: initialize tables A and B
+  for (uint dim = 0; dim < this->num_dimension; dim++)
+  {
+    double alpha_sum = 0.0;
+    double alpha_times_x_sum = 0.0;
+    uint nnz = nnz_per_dimension[dim];
+
+    // loop through all elements in sorted order
+    sparseVectorElement *training_values_in_dim = examples_raw[dim];
+    for ( uint cntNonzeroFeat = 0; cntNonzeroFeat < nnz; cntNonzeroFeat++, training_values_in_dim++ )
+    {
+      // index of the feature
+      int index = training_values_in_dim->example_index;
+      // element of the feature
+      double elem = training_values_in_dim->value;
+
+      alpha_times_x_sum += _x[index] * elem;
+      this->table_A[dim][cntNonzeroFeat] = alpha_times_x_sum;
+
+      alpha_sum += _x[index];
+      this->table_B[dim][cntNonzeroFeat] = alpha_sum;
+      cntNonzeroFeat++;
+    }
+  }
+
+  _y.resize( this->num_examples );
+  _y.set(0.0);
+
+  for (uint dim = 0; dim < this->num_dimension; dim++)
+  {
+    uint nnz = this->nnz_per_dimension[dim];
+
+    if ( nnz == this->num_examples ) {
+      // all values are zero in this dimension :) and we can simply ignore the feature
+      continue;
+    }
+
+    sparseVectorElement *training_values_in_dim = examples_raw[dim];
+    for ( uint cntNonzeroFeat = 0; cntNonzeroFeat < nnz; cntNonzeroFeat++, training_values_in_dim++ )
+    {
+      uint feat = training_values_in_dim->example_index;
+      uint inversePosition = cntNonzeroFeat;
+      double fval = training_values_in_dim->value;
+
+      double firstPart( this->table_A[dim][inversePosition] );
+      double secondPart( this->table_B[dim][this->num_examples-1-nnz] - this->table_B[dim][inversePosition]);
+
+      _y[cntNonzeroFeat] += firstPart + fval * secondPart;
+    }
+  }
+
+  for (uint feat = 0; feat < this->num_examples; feat++)
+  {
+    _y[feat] += this->d_noise * _x[feat];
+  }
+
+
 }
 
 /** get the number of rows in A */

+ 5 - 2
GMHIKernelRaw.h

@@ -35,17 +35,20 @@ class GMHIKernelRaw : public GenericMatrix
     } sparseVectorElement;
 
     sparseVectorElement **examples_raw;
+    double **table_A;
+    double **table_B;
 
     uint *nnz_per_dimension;
     uint num_dimension;
     uint num_examples;
+    double d_noise;
 
-    void initData ( const std::vector< const NICE::SparseVector *> &_examples );
+    void initData ( const std::vector< const NICE::SparseVector *> & examples );
 
   public:
 
     /** simple constructor */
-    GMHIKernelRaw( const std::vector< const NICE::SparseVector *> &_examples );
+    GMHIKernelRaw( const std::vector< const NICE::SparseVector *> & examples, const double d_noise = 0.1 );
 
     /** multiply with a vector: A*x = y */
     virtual void multiply (NICE::Vector & y, const NICE::Vector & x) const;

+ 37 - 20
tests/TestFastHIK.cpp

@@ -20,12 +20,12 @@
 
 #include "TestFastHIK.h"
 
-const bool b_debug = false;
+const bool b_debug = true;
 const bool verbose = true;
 const bool verboseStartEnd = true;
 const bool solveLinWithoutRand = false;
-const uint n = 1500;//1500;//1500;//10;
-const uint d = 200;//200;//2;
+const uint n = 5000;//1500;//1500;//10;
+const uint d = 1000;//200;//2;
 const uint numBins = 11;//1001;//1001;
 const uint solveLinMaxIterations = 1000;
 const double sparse_prob = 0.6;
@@ -140,12 +140,22 @@ void TestFastHIK::testKernelMultiplication()
   for ( uint i = 0; i < y.size(); i++ )
     y[i] = sin(i);
 
+
+  // Test the GMHIKernel interface
   Vector alpha;
 
+  t.start();
   gmk.multiply ( alpha, y );
+  t.stop();
+  if (verbose)
+      std::cerr << "Time for kernel multiplication with GMHIKernel: " << t.getLast() << std::endl;
+
 
+  // convert data structures to test the GMHIKernelRaw interface
+  std::vector<std::vector<double> > dataMatrix_transposed (dataMatrix);
+  transposeVectorOfVectors(dataMatrix_transposed);
   std::vector< const NICE::SparseVector * > dataMatrix_sparse;
-  for ( std::vector< std::vector<double> >::const_iterator i = dataMatrix.begin(); i != dataMatrix.end(); i++ )
+  for ( std::vector< std::vector<double> >::const_iterator i = dataMatrix_transposed.begin(); i != dataMatrix_transposed.end(); i++ )
   {
     Vector w ( *i );
     SparseVector *v = new SparseVector ( w );
@@ -153,20 +163,26 @@ void TestFastHIK::testKernelMultiplication()
   }
 
   t.start();
-  GMHIKernelRaw gmk_raw ( dataMatrix_sparse );
+  GMHIKernelRaw gmk_raw ( dataMatrix_sparse, noise );
   t.stop();
   if (verbose)
-    std::cerr << "Time for GMHIKernelRaw setup: " << t.getLast() << endl;
+    std::cerr << "Time for GMHIKernelRaw setup: " << t.getLast() << std::endl;
 
   Vector alpha_raw;
+  t.start();
   gmk_raw.multiply ( alpha_raw, y );
+  t.stop();
+  if (verbose)
+      std::cerr << "Time for kernel multiplication with GMHIKernelRaw: " << t.getLast() << std::endl;
+
+
 
+
+  // compute the kernel matrix multiplication exactly
   NICE::IntersectionKernelFunction<double> hikSlow;
 
   // tic
   time_t  slow_start = clock();
-  std::vector<std::vector<double> > dataMatrix_transposed (dataMatrix);
-  transposeVectorOfVectors(dataMatrix_transposed);
   NICE::Matrix K (hikSlow.computeKernelMatrix(dataMatrix_transposed, noise));
   //toc
   float time_slowComputation = (float) (clock() - slow_start);
@@ -190,9 +206,10 @@ void TestFastHIK::testKernelMultiplication()
   Vector alpha_slow = K*y;
 
   if (b_debug)
-    std::cerr << "Sparse multiplication [alpha, alpha_slow]: " << std::endl <<  alpha << std::endl << alpha_slow << std::endl << std::endl;
+    std::cerr << "Sparse multiplication [alpha, alpha_slow, alpha_raw]: " << std::endl <<  alpha << std::endl << alpha_slow << std::endl << alpha_raw << std::endl << std::endl;
 
   CPPUNIT_ASSERT_DOUBLES_EQUAL((alpha-alpha_slow).normL1(), 0.0, 1e-8);
+  CPPUNIT_ASSERT_DOUBLES_EQUAL((alpha_raw-alpha_slow).normL1(), 0.0, 1e-8);
 
   // test the case, where we first transform and then use the multiply stuff
   NICE::GeneralizedIntersectionKernelFunction<double> ghikSlow ( 1.2 );
@@ -291,7 +308,6 @@ void TestFastHIK::testKernelMultiplicationFast()
   NICE::Matrix gK ( ghikSlow.computeKernelMatrix(dataMatrix_transposed, noise) );
   pf->parameters()[0] = 1.2;
   fmk.applyFunctionToFeatureMatrix( pf );
-//   pf->applyFunctionToFeatureMatrix ( fmk.featureMatrix() );
 
   Vector galphaFast;
   gmkFast.multiply ( galphaFast, y );
@@ -335,7 +351,7 @@ void TestFastHIK::testKernelSum()
       }
   }
 
-  if ( verbose ) {
+  if ( b_debug ) {
     cerr << "data matrix: " << endl;
     printMatrix ( dataMatrix );
     cerr << endl;
@@ -414,7 +430,7 @@ void TestFastHIK::testKernelSumFast()
     dataMatrix.push_back(v);
   }
 
-  if ( verbose ) {
+  if ( b_debug ) {
     cerr << "data matrix: " << endl;
     printMatrix ( dataMatrix );
     cerr << endl;
@@ -423,7 +439,7 @@ void TestFastHIK::testKernelSumFast()
   double noise = 1.0;
   FastMinKernel fmk ( dataMatrix, noise );
   Vector alpha = Vector::UniformRandom( n, 0.0, 1.0, 0 );
-  if ( verbose )
+  if ( b_debug )
     std::cerr << "alpha = " << alpha << endl;
 
   // generate xstar
@@ -441,7 +457,7 @@ void TestFastHIK::testKernelSumFast()
   for ( uint i = 0 ; i < d; i++ )
     xstar_stl[i] = xstar[i];
 
-  if ( verbose )
+  if ( b_debug )
     cerr << "xstar = " << xstar << endl;
 
   for ( double gamma = 1.0 ; gamma < 2.0; gamma += 0.5 )
@@ -460,15 +476,16 @@ void TestFastHIK::testKernelSumFast()
       std::cerr << "fmk.hik_prepare_alpha_multiplications ( alpha, A, B ) " << std::endl;
     fmk.hik_prepare_alpha_multiplications ( alpha, A, B );
 
-    if (verbose)
+    if (b_debug)
       //std::cerr << "double *Tlookup = fmk.hik_prepare_alpha_multiplications_fast( A, B, q )" << std::endl;
       std::cerr << "double *Tlookup = fmk.hik_prepare_alpha_multiplications_fast_alltogether( alpha, q, &pf )" << std::endl;
+
     double *TlookupOld = fmk.hik_prepare_alpha_multiplications_fast( A, B, q, &pf );
     double *TlookupNew = fmk.hikPrepareLookupTable( alpha, q, &pf );
 
     int maxAcces(numBins*d);
 
-    if (verbose)
+    if (b_debug)
     {
       std::cerr << "TlookupOld:  " << std::endl;
       for (int i = 0; i < maxAcces; i++)
@@ -510,7 +527,7 @@ void TestFastHIK::testKernelSumFast()
     for ( uint i = 0 ; i < n; i++ )
       beta_slow += kstar_stl[i] * alpha[i];
 
-    if (verbose)
+    if (b_debug)
       std::cerr << "beta_slow: " << beta_slow << std::endl << "beta_fast: " << beta_fast << std::endl << "beta_fast_sparse: " << beta_fast_sparse << std::endl << "betaSparse: " << betaSparse<< std::endl;
 
     // clean-up
@@ -556,7 +573,7 @@ void TestFastHIK::testLUTUpdate()
     dataMatrix.push_back(v);
   }
 
-  if ( verbose ) {
+  if ( b_debug ) {
     cerr << "data matrix: " << endl;
     printMatrix ( dataMatrix );
     cerr << endl;
@@ -695,7 +712,7 @@ void TestFastHIK::testLinSolve()
     dataMatrix.push_back(v);
   }
 
-  if ( verbose ) {
+  if ( b_debug ) {
     std::cerr << "data matrix: " << std::endl;
     printMatrix ( dataMatrix );
     std::cerr << std::endl;
@@ -793,7 +810,7 @@ void TestFastHIK::testKernelVector()
   std::vector<double> dim2; dim2.push_back(0.3);dim2.push_back(0.6);dim2.push_back(1.0);dim2.push_back(0.4);dim2.push_back(0.3); dataMatrix.push_back(dim2);
   std::vector<double> dim3; dim3.push_back(0.5);dim3.push_back(0.3);dim3.push_back(0.0);dim3.push_back(0.6);dim3.push_back(0.3); dataMatrix.push_back(dim3);
 
-  if ( verbose ) {
+  if ( b_debug ) {
     std::cerr << "data matrix: " << std::endl;
     printMatrix ( dataMatrix );
     std::cerr << endl;