9 năm trước cách đây · 6b50a4117d
--- a/FastMinKernel.cpp
+++ b/FastMinKernel.cpp
@@ -147,6 +147,9 @@ void FastMinKernel::hik_prepare_alpha_multiplications(const NICE::Vector & _alph
 
				                                                       NICE::VVector & _A,
			
 
				                                                       NICE::VVector & _B) const
			
 
				 {
			
 
				+//  //debug
			
 
				+//    std::cerr << "alpha: " << _alpha << std::endl;
			
 
				+
			
 
				   _A.resize( this->ui_d );
			
 
				   _B.resize( this->ui_d );
			
 
				 
			
@@ -193,29 +196,29 @@ void FastMinKernel::hik_prepare_alpha_multiplications(const NICE::Vector & _alph
 
				 
			
 
				   for (uint dim = 0; dim < this->ui_d; dim++)
			
 
				   {
			
 
				-    double alpha_sum(0.0);
			
 
				-    double alpha_times_x_sum(0.0);
			
 
				-
			
 
				-    uint cntNonzeroFeat(0);
			
 
				+    double alpha_sum         = 0.0;
			
 
				+    double alpha_times_x_sum = 0.0;
			
 
				 
			
 
				-    const multimap< double, SortedVectorSparse<double>::dataelement> & nonzeroElements = this->X_sorted.getFeatureValues(dim).nonzeroElements();
			
 
				+    //////////
			
 
				     // loop through all elements in sorted order
			
 
				-    for ( SortedVectorSparse<double>::const_elementpointer i = nonzeroElements.begin(); i != nonzeroElements.end(); i++ )
			
 
				+    const multimap< double, SortedVectorSparse<double>::dataelement> & nonzeroElements = this->X_sorted.getFeatureValues(dim).nonzeroElements();
			
 
				+    uint cntNonzeroFeat = 0;
			
 
				+    for ( SortedVectorSparse<double>::const_elementpointer i = nonzeroElements.begin();
			
 
				+          i != nonzeroElements.end();
			
 
				+          i++, cntNonzeroFeat++ )
			
 
				     {
			
 
				       const SortedVectorSparse<double>::dataelement & de = i->second;
			
 
				 
			
 
				       // index of the feature
			
 
				-      int index = de.first;
			
 
				-      // transformed element of the feature
			
 
				-      //
			
 
				-      double elem( de.second );
			
 
				+      int index   = de.first;
			
 
				+      // element of the feature
			
 
				+      double elem = de.second;
			
 
				 
			
 
				       alpha_times_x_sum += _alpha[index] * elem;
			
 
				-      _A[dim][cntNonzeroFeat] = alpha_times_x_sum;
			
 
				+      alpha_sum         += _alpha[index];
			
 
				 
			
 
				-      alpha_sum += _alpha[index];
			
 
				+      _A[dim][cntNonzeroFeat] = alpha_times_x_sum;
			
 
				       _B[dim][cntNonzeroFeat] = alpha_sum;
			
 
				-      cntNonzeroFeat++;
			
 
				     }
			
 
				   }
			
 
				 
			
@@ -232,10 +235,13 @@ double *FastMinKernel::hik_prepare_alpha_multiplications_fast(const NICE::VVecto
 
				   // number of quantization bins
			
 
				   uint hmax = _q->getNumberOfBins();
			
 
				 
			
 
				-  // store (transformed) prototypes
			
 
				-  double * prototypes   = new double [ hmax * this->ui_d ];
			
 
				-  double * p_prototypes = prototypes;
			
 
				+  double * prototypes;
			
 
				+  prototypes   = new double [ hmax * this->ui_d ];
			
 
				+
			
 
				+  double * p_prototypes;
			
 
				+  p_prototypes = prototypes;
			
 
				 
			
 
				+  // compute all prototypes to compare against lateron
			
 
				   for (uint dim = 0; dim < this->ui_d; dim++)
			
 
				   {
			
 
				     for ( uint i = 0 ; i < hmax ; i++ )
			
@@ -252,17 +258,26 @@ double *FastMinKernel::hik_prepare_alpha_multiplications_fast(const NICE::VVecto
 
				     }
			
 
				   }
			
 
				 
			
 
				-  // creating the lookup table as pure C, which might be beneficial
			
 
				-  // for fast evaluation
			
 
				+  // allocate memory for LUT T
			
 
				   double *Tlookup = new double [ hmax * this->ui_d ];
			
 
				 
			
 
				-
			
 
				-  // loop through all dimensions
			
 
				+  // start the actual computation of  T
			
 
				   for ( uint dim = 0; dim < this->ui_d; dim++ )
			
 
				   {
			
 
				-    uint nrZeroIndices = this->X_sorted.getNumberOfZeroElementsPerDimension(dim);
			
 
				-    if ( nrZeroIndices == this->ui_n )
			
 
				-      continue;
			
 
				+    // nz == nrZeroIndices
			
 
				+    uint nz    = this->X_sorted.getNumberOfZeroElementsPerDimension(dim);
			
 
				+    // nnz == nrNonZeroIndices
			
 
				+    uint nnz  = this->ui_n-nz;
			
 
				+
			
 
				+    if ( nz == this->ui_n )
			
 
				+    {
			
 
				+        double * itT = Tlookup + dim*hmax;
			
 
				+        for ( uint idxProto = 0; idxProto < hmax; idxProto++, itT++ )
			
 
				+        {
			
 
				+            *itT = 0;
			
 
				+        }
			
 
				+        continue;
			
 
				+    }
			
 
				 
			
 
				     const multimap< double, SortedVectorSparse<double>::dataelement> & nonzeroElements = this->X_sorted.getFeatureValues(dim).nonzeroElements();
			
 
				 
			
@@ -270,54 +285,107 @@ double *FastMinKernel::hik_prepare_alpha_multiplications_fast(const NICE::VVecto
 
				     SortedVectorSparse<double>::const_elementpointer iPredecessor = nonzeroElements.begin();
			
 
				 
			
 
				     // index of the element, which is always bigger than the current value fval
			
 
				-    uint index = 0;
			
 
				+    int indexElem = 0;
			
 
				+    // element of the feature
			
 
				+    double elem = i->first;
			
 
				+
			
 
				     // we use the quantization of the original features! the transformed feature were
			
 
				     // already used to calculate A and B, this of course assumes monotonic functions!!!
			
 
				-    uint qBin = _q->quantize ( i->first, dim );
			
 
				-
			
 
				-    // the next loop is linear in max(hmax, n)
			
 
				-    // REMARK: this could be changed to hmax*log(n), when
			
 
				-    // we use binary search
			
 
				-
			
 
				-    for (uint j = 0; j < hmax; j++)
			
 
				+    uint idxProtoElem = _q->quantize ( elem, dim );// denotes the bin number in dim i of a quantized example, previously termed qBin
			
 
				+
			
 
				+    uint idxProto;
			
 
				+    double * itProtoVal = prototypes + dim*hmax;
			
 
				+    double * itT = Tlookup + dim*hmax;
			
 
				+
			
 
				+    // special case 1:
			
 
				+    // loop over all prototypes smaller then the smallest quantized example in this dimension
			
 
				+    for ( idxProto = 0;
			
 
				+          idxProto < idxProtoElem;
			
 
				+          idxProto++, itProtoVal++, itT++
			
 
				+        ) // idxProto previously j
			
 
				     {
			
 
				-      double fval = prototypes[ dim*hmax + j ];
			
 
				-      double t;
			
 
				+      // current prototype is smaller than all known examples
			
 
				+      // -> resulting value = fval * sum_l=1^n alpha_l
			
 
				+      (*itT) = (*itProtoVal) * ( _B[ dim ][ nnz-1 ] );
			
 
				+    }//for-loop over prototypes -- special case 1
			
 
				 
			
 
				-      if (  (index == 0) && (j < qBin) ) {
			
 
				-        // current element is smaller than everything else
			
 
				-        // resulting value = fval * sum_l=1^n alpha_l
			
 
				-        t = fval*( _B[dim][this->ui_n-1 - nrZeroIndices] );
			
 
				-      } else {
			
 
				-
			
 
				-         // move to next example, if necessary
			
 
				-        while ( (j >= qBin) && ( index < (this->ui_n-1-nrZeroIndices)) )
			
 
				+    // standard case: prototypes larger then the smallest element, but smaller then the largest one in the corrent dimension
			
 
				+    for ( ; idxProto < hmax; idxProto++, itProtoVal++, itT++)
			
 
				+    {
			
 
				+        //move to next example, which is smaller then the current prototype after quantization
			
 
				+        // pay attentation to not loop over the number of non-zero elements
			
 
				+        while ( (idxProto >= idxProtoElem) && ( indexElem  < ( nnz - 1)  ) ) //(this->ui_n-1-nrZeroIndices)) )
			
 
				         {
			
 
				-          index++;
			
 
				+          indexElem++;
			
 
				           iPredecessor = i;
			
 
				           i++;
			
 
				 
			
 
				+          // only quantize if value changed
			
 
				           if ( i->first !=  iPredecessor->first )
			
 
				-            qBin = _q->quantize ( i->first, dim );
			
 
				+          {
			
 
				+            idxProtoElem = _q->quantize ( i->first, dim );
			
 
				+          }
			
 
				         }
			
 
				-        // compute current element in the lookup table and keep in mind that
			
 
				-        // index is the next element and not the previous one
			
 
				-        //NOTE pay attention: this is only valid if all entries are positive! -
			
 
				-        // If not, ask whether the current feature is greater than zero. If so, subtract the nrZeroIndices, if not do not
			
 
				-        if ( (j >= qBin) && ( index==(this->ui_n-1-nrZeroIndices) ) ) {
			
 
				-          // the current element (fval) is equal or bigger to the element indexed by index
			
 
				-          // in fact, the term B[dim][this->n-1-nrZeroIndices] - B[dim][index] is equal to zero and vanishes, which is logical, since all elements are smaller than j!
			
 
				-          t = _A[dim][index];// + fval*( _B[dim][this->ui_n-1-nrZeroIndices] - _B[dim][index] );
			
 
				-        } else {
			
 
				-          // standard case
			
 
				-          t = _A[dim][index-1] + fval*( _B[dim][this->ui_n-1-nrZeroIndices] - _B[dim][index-1] );
			
 
				+
			
 
				+        // did we looped over the largest element in this dimension?
			
 
				+        if ( indexElem==( nnz-1 ) )
			
 
				+        {
			
 
				+          break;
			
 
				         }
			
 
				-      }
			
 
				 
			
 
				-      Tlookup[ dim*hmax + j ] = t;
			
 
				-    }
			
 
				-  }
			
 
				+        (*itT) = _A[ dim ][ indexElem-1 ] + (*itProtoVal)*( _B[ dim ][ nnz-1 ] - _B[ dim ][ indexElem-1 ] );
			
 
				+    }//for-loop over prototypes -- standard case
			
 
				 
			
 
				+    // special case 2:
			
 
				+    // the current prototype is equal to or larger than the largest training example in this dimension
			
 
				+    // -> the term B[ dim ][ nnz-1 ] - B[ dim ][ indexElem ] is equal to zero and vanishes, which is logical, since all elements are smaller than the remaining prototypes!
			
 
				+
			
 
				+    for ( ; idxProto < hmax; idxProto++, itProtoVal++, itT++)
			
 
				+    {
			
 
				+      (*itT) = _A[ dim ][ indexElem ];
			
 
				+    }//for-loop over prototypes -- special case 2
			
 
				+
			
 
				+//    for (uint j = 0; j < hmax; j++)
			
 
				+//    {
			
 
				+//      double fval = prototypes[ dim*hmax + j ];
			
 
				+//      double t;
			
 
				+
			
 
				+//      if (  (index == 0) && (j < idxProtoElem) ) {
			
 
				+//        // current element is smaller than everything else
			
 
				+//        // resulting value = fval * sum_l=1^n alpha_l
			
 
				+//        t = fval*( _B[dim][this->ui_n-1 - nrZeroIndices] );
			
 
				+//      } else {
			
 
				+
			
 
				+//         // move to next example, if necessary
			
 
				+//        while ( (j >= idxProtoElem) && ( index < (this->ui_n-1-nrZeroIndices)) )
			
 
				+//        {
			
 
				+//          index++;
			
 
				+//          iPredecessor = i;
			
 
				+//          i++;
			
 
				+
			
 
				+//          if ( i->first !=  iPredecessor->first )
			
 
				+//            idxProtoElem = _q->quantize ( i->first, dim );
			
 
				+//        }
			
 
				+//        // compute current element in the lookup table and keep in mind that
			
 
				+//        // index is the next element and not the previous one
			
 
				+//        //NOTE pay attention: this is only valid if all entries are positive! -
			
 
				+//        // If not, ask whether the current feature is greater than zero. If so, subtract the nrZeroIndices, if not do not
			
 
				+//        if ( (j >= idxProtoElem) && ( index==(this->ui_n-1-nrZeroIndices) ) ) {
			
 
				+//          // the current element (fval) is equal or bigger to the element indexed by index
			
 
				+//          // in fact, the term B[dim][this->n-1-nrZeroIndices] - B[dim][index] is equal to zero and vanishes, which is logical, since all elements are smaller than j!
			
 
				+//          t = _A[dim][index];// + fval*( _B[dim][this->ui_n-1-nrZeroIndices] - _B[dim][index] );
			
 
				+//        } else {
			
 
				+//          // standard case
			
 
				+//          t = _A[dim][index-1] + fval*( _B[dim][this->ui_n-1-nrZeroIndices] - _B[dim][index-1] );
			
 
				+//        }
			
 
				+//      }
			
 
				+
			
 
				+//      Tlookup[ dim*hmax + j ] = t;
			
 
				+//    }
			
 
				+
			
 
				+  }//for-loop over dimensions
			
 
				+
			
 
				+  // clean-up prototypes
			
 
				   delete [] prototypes;
			
 
				 
			
 
				   return Tlookup;
			
--- a/GMHIKernelRaw.cpp
+++ b/GMHIKernelRaw.cpp
@@ -290,6 +290,16 @@ void GMHIKernelRaw::updateTableT ( const NICE::Vector _x ) const
 
				     {
			
 
				       uint nnz = nnz_per_dimension[dim];
			
 
				 
			
 
				+      if ( nnz == 0 )
			
 
				+      {
			
 
				+          double * itT = this->table_T + dim*hmax;
			
 
				+          for ( uint idxProto = 0; idxProto < hmax; idxProto++, itT++ )
			
 
				+          {
			
 
				+              *itT = 0;
			
 
				+          }
			
 
				+          continue;
			
 
				+      }
			
 
				+
			
 
				         uint idxProtoElem; // denotes the bin number in dim i of a quantized example, previously termed qBin
			
 
				 
			
 
				         sparseVectorElement * i            = examples_raw[dim];
			
@@ -359,6 +369,14 @@ void GMHIKernelRaw::updateTableT ( const NICE::Vector _x ) const
 
				     {
			
 
				       delete [] prototypes;
			
 
				     }
			
 
				+
			
 
				+//    //debug
			
 
				+//    double * p_t = table_T;
			
 
				+//    for ( uint i=0; i < hmax; i++ , p_t++)
			
 
				+//    {
			
 
				+//        std::cerr << " " << *p_t;
			
 
				+//    }
			
 
				+//    std::cerr << std::endl;
			
 
				 }
			
 
				 
			
 
				 /** multiply with a vector: A*x = y */
			
--- a/GPHIKRawClassifier.cpp
+++ b/GPHIKRawClassifier.cpp
@@ -371,7 +371,7 @@ void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
 
				             // standard case: new example is larger then the smallest element, but smaller then the largest one in the corrent dimension        
			
 
				             else
			
 
				             {
			
 
				-              beta += A[ dim ][ position - 1 ] + fval * B[ dim ][ position - 1 ];
			
 
				+                beta += A[ dim ][ position - 1 ] + fval * ( B[ dim ][ nnz - 1 ] - B[ dim ][ position - 1 ] );
			
 
				             }
			
 
				             
			
 
				 //             // correct upper bound to correct position, only possible if new example is not the smallest value in this dimension
			
@@ -543,6 +543,9 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
 
				 
			
 
				     this->solver->solveLin( *gm, y, alpha );
			
 
				 
			
 
				+//    //debug
			
 
				+//      std::cerr << "alpha: " << alpha << std::endl;
			
 
				+
			
 
				     // get lookup tables, A, B, etc. and store them
			
 
				     this->gm->updateTablesAandB( alpha );
			
 
				     double **A = this->gm->getTableA();