Browse Source

corrected merge conflict

Alexander Freytag 9 năm trước cách đây
mục cha
commit
2d9d1b70ba
2 tập tin đã thay đổi với 150 bổ sung114 xóa
  1. 68 58
      GMHIKernelRaw.cpp
  2. 82 56
      GPHIKRawClassifier.cpp

+ 68 - 58
GMHIKernelRaw.cpp

@@ -198,10 +198,13 @@ void GMHIKernelRaw::copyTableAorB(double **src, double **dst) const
     for (uint i = 0; i < this->num_dimension; i++)
     for (uint i = 0; i < this->num_dimension; i++)
     {
     {
         uint nnz = this->nnz_per_dimension[i];
         uint nnz = this->nnz_per_dimension[i];
-        if (nnz>0) {
+        if (nnz>0)
+        {
             for (uint j = 0; j < nnz; j++)
             for (uint j = 0; j < nnz; j++)
                 dst[i][j] = src[i][j];
                 dst[i][j] = src[i][j];
-        } else {
+        }
+        else
+        {
             dst[i] = NULL;
             dst[i] = NULL;
         }
         }
     }
     }
@@ -211,7 +214,10 @@ void GMHIKernelRaw::copyTableT(double *_src, double *_dst) const
 {
 {
   double * p_src = _src;
   double * p_src = _src;
   double * p_dst = _dst;
   double * p_dst = _dst;
-  for ( int i = 0; i < this->num_dimension * this->q->getNumberOfBins(); i++, p_src++, p_dst++ )
+  for ( int i = 0; 
+        i < this->num_dimension * this->q->getNumberOfBins(); 
+        i++, p_src++, p_dst++ 
+      )
   {
   {
     *p_dst = *p_src;
     *p_dst = *p_src;
   }
   }
@@ -222,25 +228,28 @@ void GMHIKernelRaw::updateTablesAandB ( const NICE::Vector _x ) const
     // start the actual computations of A, B, and optionally T
     // start the actual computations of A, B, and optionally T
     for (uint dim = 0; dim < this->num_dimension; dim++)
     for (uint dim = 0; dim < this->num_dimension; dim++)
     {
     {
-      double alpha_sum = 0.0;
+      double alpha_sum         = 0.0;
       double alpha_times_x_sum = 0.0;
       double alpha_times_x_sum = 0.0;
-      uint nnz = nnz_per_dimension[dim];
+      uint nnz                 = nnz_per_dimension[dim];
       
       
 
 
       //////////
       //////////
       // loop through all elements in sorted order
       // loop through all elements in sorted order
       sparseVectorElement *training_values_in_dim = examples_raw[dim];
       sparseVectorElement *training_values_in_dim = examples_raw[dim];
-      for ( uint cntNonzeroFeat = 0; cntNonzeroFeat < nnz; cntNonzeroFeat++, training_values_in_dim++ )
+      for ( uint cntNonzeroFeat = 0; 
+            cntNonzeroFeat < nnz; 
+            cntNonzeroFeat++, training_values_in_dim++ 
+          )
       {
       {
         // index of the feature
         // index of the feature
-        int index = training_values_in_dim->example_index;
+        int index   = training_values_in_dim->example_index;
         // element of the feature
         // element of the feature
         double elem = training_values_in_dim->value;
         double elem = training_values_in_dim->value;
 
 
         alpha_times_x_sum += _x[index] * elem;
         alpha_times_x_sum += _x[index] * elem;
-        this->table_A[dim][cntNonzeroFeat] = alpha_times_x_sum;
-
-        alpha_sum += _x[index];
+        alpha_sum         += _x[index];
+        
+        this->table_A[dim][cntNonzeroFeat] = alpha_times_x_sum;        
         this->table_B[dim][cntNonzeroFeat] = alpha_sum;
         this->table_B[dim][cntNonzeroFeat] = alpha_sum;
       }      
       }      
     }
     }
@@ -290,57 +299,58 @@ void GMHIKernelRaw::updateTableT ( const NICE::Vector _x ) const
         int indexElem = 0;
         int indexElem = 0;
         // element of the feature
         // element of the feature
         double elem = i->value;
         double elem = i->value;
-
-        for (uint idxProto = 0; idxProto < hmax; idxProto++) // previously j
+        
+        idxProtoElem = this->q->quantize ( elem, dim );
+
+        uint idxProto;
+        double * itProtoVal = prototypes + dim*hmax;
+        double * itT = this->table_T + dim*hmax;
+        
+        // special case 1:
+        // loop over all prototypes smaller then the smallest quantized example in this dimension
+        for ( idxProto = 0; idxProto < idxProtoElem; idxProto++, itProtoVal++, itT++) // idxProto previously j
         {
         {
-          double fvalProto = prototypes[ dim*hmax + idxProto ];
-          double t;
-
-
-          idxProtoElem = this->q->quantize ( elem, dim );
+          // current prototype is smaller than all known examples
+          // -> resulting value = fval * sum_l=1^n alpha_l          
+          (*itT) = (*itProtoVal) * ( this->table_B[ dim ][ nnz-1 ] );          
+        }//for-loop over prototypes -- special case 1
 
 
-
-          if (  (indexElem == 0) && (idxProto < idxProtoElem) )
-          {
-            // current prototype is smaller than everything else
-            // resulting value = fval * sum_l=1^n alpha_l
-            t = fvalProto*( this->table_B[ dim ][ nnz-1 ] );
-          }
-          else
-          {
-            //move to next example, which is smaller then the current prototype (if necessary)
+        // standard case: prototypes larger then the smallest element, but smaller then the largest one in the corrent dimension        
+        for ( ; idxProto < hmax; idxProto++, itProtoVal++, itT++)
+        {
+            //move to next example, which is smaller then the current prototype after quantization
             // pay attentation to not loop over the number of non-zero elements
             // pay attentation to not loop over the number of non-zero elements
-               while ( (idxProto >= idxProtoElem) && ( indexElem < ( nnz - 1 ) ) ) //(this->ui_n-1-nrZeroIndices)) )
-               {
-                 indexElem++;
-                 iPredecessor = i;
-                 i++;
-
-                 if ( i->value !=  iPredecessor->value )
-                 {
-                   idxProtoElem = this->q->quantize ( i->value, dim );
-                 }
-               }
-               // compute current element in the lookup table and keep in mind that
-               // indexElem is the next element and not the previous one
-
-
-               if ( (idxProto >= idxProtoElem) && ( indexElem==( nnz-1 ) ) )
-               {
-                 // the current prototype is equal to or larger than the largest training example in this dimension
-                 // -> the term B[ dim ][ nnz-1 ] - B[ dim ][ indexElem ] is equal to zero and vanishes, which is logical, since all elements are smaller than j!
-                 t = table_A[ dim ][ indexElem ];
-               }
-               else
-               {
-                 // standard case
-                 t = table_A[ dim ][ indexElem-1 ] + fvalProto*( table_B[ dim ][ nnz-1 ] - table_B[ dim ][ indexElem-1 ] );
-               }
-
-           }
-
-           this->table_T[ dim*hmax + idxProto ] = t;
-        }//for-loop over prototypes
+            while ( (idxProto >= idxProtoElem) && ( indexElem < ( nnz - 1 ) ) ) //(this->ui_n-1-nrZeroIndices)) )
+            {
+              indexElem++;
+              iPredecessor = i;
+              i++;
+
+              // only quantize if value changed
+              if ( i->value !=  iPredecessor->value )
+              {
+                idxProtoElem = this->q->quantize ( i->value, dim );
+              }
+            }
+            
+            // did we looped over the largest element in this dimension?
+            if ( indexElem==( nnz-1 ) )
+            {
+              break;
+            }
+
+            (*itT) = table_A[ dim ][ indexElem-1 ] + (*itProtoVal)*( table_B[ dim ][ nnz-1 ] - table_B[ dim ][ indexElem-1 ] );
+        }//for-loop over prototypes -- standard case 
+            
+        // special case 2:
+        // the current prototype is equal to or larger than the largest training example in this dimension
+        // -> the term B[ dim ][ nnz-1 ] - B[ dim ][ indexElem ] is equal to zero and vanishes, which is logical, since all elements are smaller than the remaining prototypes!
+
+        for ( ; idxProto < hmax; idxProto++, itProtoVal++, itT++)
+        {
+          (*itT) = table_A[ dim ][ indexElem ];
+        }//for-loop over prototypes -- special case 2
+        
     }//for-loop over dimensions
     }//for-loop over dimensions
 
 
 
 

+ 82 - 56
GPHIKRawClassifier.cpp

@@ -90,15 +90,17 @@ void GPHIKRawClassifier::clearSetsOfTablesT( )
 /////////////////////////////////////////////////////
 /////////////////////////////////////////////////////
 GPHIKRawClassifier::GPHIKRawClassifier( )
 GPHIKRawClassifier::GPHIKRawClassifier( )
 {
 {
-  this->b_isTrained   = false;
-  this->confSection   = "";
+  this->b_isTrained       = false;
+  this->confSection       = "";
 
 
   this->nnz_per_dimension = NULL;
   this->nnz_per_dimension = NULL;
-  this->num_examples  = 0;
-  this->num_dimension = 0;
+  this->num_examples      = 0;
+  this->num_dimension     = 0;
+
+  this->solver            = NULL;    
+  this->q                 = NULL;
+  this->gm                = NULL;
 
 
-  this->q             = NULL;
-  this->gm            = NULL;
 
 
 
 
   // in order to be sure about all necessary variables be setup with default values, we
   // in order to be sure about all necessary variables be setup with default values, we
@@ -116,15 +118,16 @@ GPHIKRawClassifier::GPHIKRawClassifier( const Config *_conf,
   // same code as in empty constructor - duplication can be avoided with C++11 allowing for constructor delegation
   // same code as in empty constructor - duplication can be avoided with C++11 allowing for constructor delegation
   ///////////
   ///////////
 
 
-  this->b_isTrained = false;
-  this->confSection = "";
+  this->b_isTrained       = false;
+  this->confSection       = "";
 
 
   this->nnz_per_dimension = NULL;
   this->nnz_per_dimension = NULL;
-  this->num_examples  = 0;
-  this->num_dimension = 0;
+  this->num_examples      = 0;
+  this->num_dimension     = 0;
 
 
-  this->q = NULL;
-  this->gm = NULL;
+  this->solver            = NULL;    
+  this->q                 = NULL;
+  this->gm                = NULL;
 
 
   ///////////
   ///////////
   // here comes the new code part different from the empty constructor
   // here comes the new code part different from the empty constructor
@@ -292,8 +295,8 @@ void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
 
 
           for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++ )
           for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++ )
           {
           {
-            uint dim = i->first;
-            double v = i->second;
+            uint dim  = i->first;
+            double v  = i->second;
             uint qBin = this->q->quantize( v, dim );
             uint qBin = this->q->quantize( v, dim );
 
 
             beta += T[dim * this->q->getNumberOfBins() + qBin];
             beta += T[dim * this->q->getNumberOfBins() + qBin];
@@ -312,8 +315,7 @@ void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
           uint classno = i->first;
           uint classno = i->first;
           maxClassNo   = std::max ( maxClassNo, classno );
           maxClassNo   = std::max ( maxClassNo, classno );
           double beta  = 0;
           double beta  = 0;
-
-          GMHIKernelRaw::sparseVectorElement **dataMatrix = gm->getDataMatrix();
+          GMHIKernelRaw::sparseVectorElement **dataMatrix = this->gm->getDataMatrix();
 
 
           const PrecomputedType & A = i->second;
           const PrecomputedType & A = i->second;
           std::map<uint, PrecomputedType>::const_iterator j = this->precomputedB.find ( classno );
           std::map<uint, PrecomputedType>::const_iterator j = this->precomputedB.find ( classno );
@@ -344,26 +346,49 @@ void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
 
 
             GMHIKernelRaw::sparseVectorElement *it = upper_bound ( dataMatrix[dim], dataMatrix[dim] + nnz, fval_element );
             GMHIKernelRaw::sparseVectorElement *it = upper_bound ( dataMatrix[dim], dataMatrix[dim] + nnz, fval_element );
             position = distance ( dataMatrix[dim], it );
             position = distance ( dataMatrix[dim], it );
-            // add zero elements
-            if ( fval_element.value > 0.0 )
-                position += nz;
+            
+//             /*// add zero elements
+//             if ( fval_element.value > 0.0 )
+//                 position += nz;*/
 
 
 
 
             bool posIsZero ( position == 0 );
             bool posIsZero ( position == 0 );
-            if ( !posIsZero )
-                position--;
-
-
-            double firstPart = 0.0;
-            if ( !posIsZero && ((position-nz) < this->num_examples) )
-              firstPart = (A[dim][position-nz]);
-
-            double secondPart( B[dim][this->num_examples-1-nz]);
-            if ( !posIsZero && (position >= nz) )
-                secondPart -= B[dim][position-nz];
-
-            // but apply using the transformed one
-            beta += firstPart + secondPart* fval;
+            
+            // special case 1:
+            // new example is smaller than all known examples
+            // -> resulting value = fval * sum_l=1^n alpha_l               
+            if ( position == 0 )
+            {
+              beta += fval * B[ dim ][ nnz - 1 ];  
+            }
+            // special case 2:
+            // new example is equal to or larger than the largest training example in this dimension
+            // -> the term B[ dim ][ nnz-1 ] - B[ dim ][ indexElem ] is equal to zero and vanishes, which is logical, since all elements are smaller than the remaining prototypes!            
+            else if ( position == nnz )
+            {
+              beta += A[ dim ][ nnz - 1 ];
+            }
+            // standard case: new example is larger then the smallest element, but smaller then the largest one in the corrent dimension        
+            else
+            {
+              beta += A[ dim ][ position - 1 ] + fval * B[ dim ][ position - 1 ];
+            }
+            
+//             // correct upper bound to correct position, only possible if new example is not the smallest value in this dimension
+//             if ( !posIsZero )
+//                 position--;
+// 
+// 
+//             double firstPart = 0.0;
+//             if ( !posIsZero  )
+//               firstPart = ( A[ dim ][ position ] );
+// 
+//             double secondPart( B[ dim ][ this->num_examples-1-nz ]);
+//             if ( !posIsZero && (position >= nz) )
+//                 secondPart -= B[dim][ position ];
+// 
+//             // but apply using the transformed one
+//             beta += firstPart + secondPart* fval;
           }//for-loop over dimensions of test input
           }//for-loop over dimensions of test input
 
 
           _scores[ classno ] = beta;
           _scores[ classno ] = beta;
@@ -415,9 +440,11 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
     uint current_class = *j;
     uint current_class = *j;
     Vector labels_binary ( _labels.size() );
     Vector labels_binary ( _labels.size() );
     for ( uint i = 0; i < _labels.size(); i++ )
     for ( uint i = 0; i < _labels.size(); i++ )
+    {
         labels_binary[i] = ( _labels[i] == current_class ) ? 1.0 : -1.0;
         labels_binary[i] = ( _labels[i] == current_class ) ? 1.0 : -1.0;
+    }
 
 
-    binLabels.insert ( pair<uint, NICE::Vector>( current_class, labels_binary) );
+    binLabels.insert ( std::pair<uint, NICE::Vector>( current_class, labels_binary) );
   }
   }
 
 
   // handle special binary case
   // handle special binary case
@@ -432,8 +459,8 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
 }
 }
 
 
 void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *> & _examples,
 void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *> & _examples,
-                              std::map<uint, NICE::Vector> & _binLabels
-                            )
+                                 std::map<uint, NICE::Vector> & _binLabels
+                               )
 {
 {
   // security-check: examples and labels have to be of same size
   // security-check: examples and labels have to be of same size
   for ( std::map< uint, NICE::Vector >::const_iterator binLabIt = _binLabels.begin();
   for ( std::map< uint, NICE::Vector >::const_iterator binLabIt = _binLabels.begin();
@@ -459,12 +486,12 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
 
 
   // sort examples in each dimension and "transpose" the feature matrix
   // sort examples in each dimension and "transpose" the feature matrix
   // set up the GenericMatrix interface
   // set up the GenericMatrix interface
-  if (gm != NULL)
-    delete gm;
+  if ( this->gm != NULL )
+    delete this->gm;
 
 
-  gm = new GMHIKernelRaw ( _examples, this->d_noise, this->q );
-  this->nnz_per_dimension = gm->getNNZPerDimension();
-  this->num_dimension     = gm->getNumberOfDimensions();
+  this->gm = new GMHIKernelRaw ( _examples, this->d_noise, this->q );
+  this->nnz_per_dimension = this->gm->getNNZPerDimension();
+  this->num_dimension     = this->gm->getNumberOfDimensions();
 
 
 
 
   // compute largest eigenvalue of our kernel matrix
   // compute largest eigenvalue of our kernel matrix
@@ -473,18 +500,19 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
   NICE::Vector eigenMax;
   NICE::Vector eigenMax;
   NICE::Matrix eigenMaxV;
   NICE::Matrix eigenMaxV;
   // for reproducibility during debuggin
   // for reproducibility during debuggin
+  //FIXME
   srand ( 0 );
   srand ( 0 );
   srand48 ( 0 );
   srand48 ( 0 );
   NICE::EigValues * eig = new EVArnoldi ( false /* verbose flag */,
   NICE::EigValues * eig = new EVArnoldi ( false /* verbose flag */,
-                                        10 /*_maxiterations*/
-                                      );
+                                          10 /*_maxiterations*/
+                                        );
   eig->getEigenvalues( *gm, eigenMax, eigenMaxV, 1 /*rank*/ );
   eig->getEigenvalues( *gm, eigenMax, eigenMaxV, 1 /*rank*/ );
   delete eig;
   delete eig;
 
 
   // set simple jacobi pre-conditioning
   // set simple jacobi pre-conditioning
   NICE::Vector diagonalElements;
   NICE::Vector diagonalElements;
-  gm->getDiagonalElements ( diagonalElements );
-  solver->setJacobiPreconditioner ( diagonalElements );
+  this->gm->getDiagonalElements ( diagonalElements );
+  this->solver->setJacobiPreconditioner ( diagonalElements );
 
 
   // solve linear equations for each class
   // solve linear equations for each class
   // be careful when parallising this!
   // be careful when parallising this!
@@ -513,22 +541,22 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
     */
     */
     alpha = (y * (1.0 / eigenMax[0]) );
     alpha = (y * (1.0 / eigenMax[0]) );
 
 
-    solver->solveLin( *gm, y, alpha );
+    this->solver->solveLin( *gm, y, alpha );
 
 
     // get lookup tables, A, B, etc. and store them
     // get lookup tables, A, B, etc. and store them
-    gm->updateTablesAandB( alpha );
-    double **A = gm->getTableA();
-    double **B = gm->getTableB();
+    this->gm->updateTablesAandB( alpha );
+    double **A = this->gm->getTableA();
+    double **B = this->gm->getTableB();
 
 
-    precomputedA.insert ( pair<uint, PrecomputedType> ( classno, A ) );
-    precomputedB.insert ( pair<uint, PrecomputedType> ( classno, B ) );
+    this->precomputedA.insert ( std::pair<uint, PrecomputedType> ( classno, A ) );
+    this->precomputedB.insert ( std::pair<uint, PrecomputedType> ( classno, B ) );
 
 
     // Quantization for classification?
     // Quantization for classification?
     if ( this->q != NULL )
     if ( this->q != NULL )
     {
     {
-      gm->updateTableT( alpha );
-      double *T = gm->getTableT ( );
-      precomputedT.insert( pair<uint, double * > ( classno, T ) );
+      this->gm->updateTableT( alpha );
+      double *T = this->gm->getTableT ( );
+      this->precomputedT.insert( std::pair<uint, double * > ( classno, T ) );
 
 
     }
     }
   }
   }
@@ -553,5 +581,3 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
 
 
 
 
 }
 }
-
-