소스 검색

more modifications, clean-up, consistency, etc.

Alexander Freytag 9 년 전
부모
커밋
e6c6fb1682
2개의 변경된 파일99개의 추가작업 그리고 64개의 파일을 삭제
  1. 19 10
      GMHIKernelRaw.cpp
  2. 80 54
      GPHIKRawClassifier.cpp

+ 19 - 10
GMHIKernelRaw.cpp

@@ -198,10 +198,13 @@ void GMHIKernelRaw::copyTableAorB(double **src, double **dst) const
     for (uint i = 0; i < this->num_dimension; i++)
     {
         uint nnz = this->nnz_per_dimension[i];
-        if (nnz>0) {
+        if (nnz>0)
+        {
             for (uint j = 0; j < nnz; j++)
                 dst[i][j] = src[i][j];
-        } else {
+        }
+        else
+        {
             dst[i] = NULL;
         }
     }
@@ -211,7 +214,10 @@ void GMHIKernelRaw::copyTableT(double *_src, double *_dst) const
 {
   double * p_src = _src;
   double * p_dst = _dst;
-  for ( int i = 0; i < this->num_dimension * this->q->getNumberOfBins(); i++, p_src++, p_dst++ )
+  for ( int i = 0; 
+        i < this->num_dimension * this->q->getNumberOfBins(); 
+        i++, p_src++, p_dst++ 
+      )
   {
     *p_dst = *p_src;
   }
@@ -222,25 +228,28 @@ void GMHIKernelRaw::updateTablesAandB ( const NICE::Vector _x ) const
     // start the actual computations of A, B, and optionally T
     for (uint dim = 0; dim < this->num_dimension; dim++)
     {
-      double alpha_sum = 0.0;
+      double alpha_sum         = 0.0;
       double alpha_times_x_sum = 0.0;
-      uint nnz = nnz_per_dimension[dim];
+      uint nnz                 = nnz_per_dimension[dim];
       
 
       //////////
       // loop through all elements in sorted order
       sparseVectorElement *training_values_in_dim = examples_raw[dim];
-      for ( uint cntNonzeroFeat = 0; cntNonzeroFeat < nnz; cntNonzeroFeat++, training_values_in_dim++ )
+      for ( uint cntNonzeroFeat = 0; 
+            cntNonzeroFeat < nnz; 
+            cntNonzeroFeat++, training_values_in_dim++ 
+          )
       {
         // index of the feature
-        int index = training_values_in_dim->example_index;
+        int index   = training_values_in_dim->example_index;
         // element of the feature
         double elem = training_values_in_dim->value;
 
         alpha_times_x_sum += _x[index] * elem;
-        this->table_A[dim][cntNonzeroFeat] = alpha_times_x_sum;
-
-        alpha_sum += _x[index];
+        alpha_sum         += _x[index];
+        
+        this->table_A[dim][cntNonzeroFeat] = alpha_times_x_sum;        
         this->table_B[dim][cntNonzeroFeat] = alpha_sum;
       }      
     }

+ 80 - 54
GPHIKRawClassifier.cpp

@@ -90,15 +90,17 @@ void GPHIKRawClassifier::clearSetsOfTablesT( )
 /////////////////////////////////////////////////////
 GPHIKRawClassifier::GPHIKRawClassifier( )
 {
-  this->b_isTrained   = false;
-  this->confSection   = "";
+  this->b_isTrained       = false;
+  this->confSection       = "";
 
   this->nnz_per_dimension = NULL;
-  this->num_examples  = 0;
-  this->num_dimension = 0;
+  this->num_examples      = 0;
+  this->num_dimension     = 0;
+
+  this->solver            = NULL;    
+  this->q                 = NULL;
+  this->gm                = NULL;
 
-  this->q             = NULL;
-  this->gm            = NULL;
 
 
   // in order to be sure about all necessary variables be setup with default values, we
@@ -116,15 +118,16 @@ GPHIKRawClassifier::GPHIKRawClassifier( const Config *_conf,
   // same code as in empty constructor - duplication can be avoided with C++11 allowing for constructor delegation
   ///////////
 
-  this->b_isTrained = false;
-  this->confSection = "";
+  this->b_isTrained       = false;
+  this->confSection       = "";
 
   this->nnz_per_dimension = NULL;
-  this->num_examples  = 0;
-  this->num_dimension = 0;
+  this->num_examples      = 0;
+  this->num_dimension     = 0;
 
-  this->q = NULL;
-  this->gm = NULL;
+  this->solver            = NULL;    
+  this->q                 = NULL;
+  this->gm                = NULL;
 
   ///////////
   // here comes the new code part different from the empty constructor
@@ -296,7 +299,6 @@ void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
             double v  = i->second;
             uint qBin = this->q->quantize( v, dim );
 
-            //FIXME do we have problems indexing with uints if number of bins is quite large?
             beta += T[dim * this->q->getNumberOfBins() + qBin];
           }//for-loop over dimensions of test input
 
@@ -313,7 +315,7 @@ void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
           uint classno = i->first;
           maxClassNo = std::max ( maxClassNo, classno );
           double beta = 0;
-          GMHIKernelRaw::sparseVectorElement **dataMatrix = gm->getDataMatrix();
+          GMHIKernelRaw::sparseVectorElement **dataMatrix = this->gm->getDataMatrix();
 
           const PrecomputedType & A = i->second;
           std::map<uint, PrecomputedType>::const_iterator j = this->precomputedB.find ( classno );
@@ -344,26 +346,49 @@ void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
 
             GMHIKernelRaw::sparseVectorElement *it = upper_bound ( dataMatrix[dim], dataMatrix[dim] + nnz, fval_element );
             position = distance ( dataMatrix[dim], it );
-            // add zero elements
-            if ( fval_element.value > 0.0 )
-                position += nz;
+            
+//             /*// add zero elements
+//             if ( fval_element.value > 0.0 )
+//                 position += nz;*/
 
 
             bool posIsZero ( position == 0 );
-            if ( !posIsZero )
-                position--;
-
-
-            double firstPart = 0.0;
-            if ( !posIsZero && ((position-nz) < this->num_examples) )
-              firstPart = (A[dim][position-nz]);
-
-            double secondPart( B[dim][this->num_examples-1-nz]);
-            if ( !posIsZero && (position >= nz) )
-                secondPart -= B[dim][position-nz];
-
-            // but apply using the transformed one
-            beta += firstPart + secondPart* fval;
+            
+            // special case 1:
+            // new example is smaller than all known examples
+            // -> resulting value = fval * sum_l=1^n alpha_l               
+            if ( position == 0 )
+            {
+              beta += fval * B[ dim ][ nnz - 1 ];  
+            }
+            // special case 2:
+            // new example is equal to or larger than the largest training example in this dimension
+            // -> the term B[ dim ][ nnz-1 ] - B[ dim ][ indexElem ] is equal to zero and vanishes, which is logical, since all elements are smaller than the remaining prototypes!            
+            else if ( position == nnz )
+            {
+              beta += A[ dim ][ nnz - 1 ];
+            }
+            // standard case: new example is larger then the smallest element, but smaller then the largest one in the corrent dimension        
+            else
+            {
+              beta += A[ dim ][ position - 1 ] + fval * B[ dim ][ position - 1 ];
+            }
+            
+//             // correct upper bound to correct position, only possible if new example is not the smallest value in this dimension
+//             if ( !posIsZero )
+//                 position--;
+// 
+// 
+//             double firstPart = 0.0;
+//             if ( !posIsZero  )
+//               firstPart = ( A[ dim ][ position ] );
+// 
+//             double secondPart( B[ dim ][ this->num_examples-1-nz ]);
+//             if ( !posIsZero && (position >= nz) )
+//                 secondPart -= B[dim][ position ];
+// 
+//             // but apply using the transformed one
+//             beta += firstPart + secondPart* fval;
           }//for-loop over dimensions of test input
 
           _scores[ classno ] = beta;
@@ -415,9 +440,11 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
     uint current_class = *j;
     Vector labels_binary ( _labels.size() );
     for ( uint i = 0; i < _labels.size(); i++ )
+    {
         labels_binary[i] = ( _labels[i] == current_class ) ? 1.0 : -1.0;
+    }
 
-    binLabels.insert ( pair<uint, NICE::Vector>( current_class, labels_binary) );
+    binLabels.insert ( std::pair<uint, NICE::Vector>( current_class, labels_binary) );
   }
 
   // handle special binary case
@@ -432,8 +459,8 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
 }
 
 void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *> & _examples,
-                              std::map<uint, NICE::Vector> & _binLabels
-                            )
+                                 std::map<uint, NICE::Vector> & _binLabels
+                               )
 {
   // security-check: examples and labels have to be of same size
   for ( std::map< uint, NICE::Vector >::const_iterator binLabIt = _binLabels.begin();
@@ -459,12 +486,12 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
 
   // sort examples in each dimension and "transpose" the feature matrix
   // set up the GenericMatrix interface
-  if (gm != NULL)
-    delete gm;
+  if ( this->gm != NULL )
+    delete this->gm;
 
-  gm = new GMHIKernelRaw ( _examples, this->d_noise, this->q );
-  this->nnz_per_dimension = gm->getNNZPerDimension();
-  this->num_dimension     = gm->getNumberOfDimensions();
+  this->gm = new GMHIKernelRaw ( _examples, this->d_noise, this->q );
+  this->nnz_per_dimension = this->gm->getNNZPerDimension();
+  this->num_dimension     = this->gm->getNumberOfDimensions();
 
 
   // compute largest eigenvalue of our kernel matrix
@@ -473,18 +500,19 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
   NICE::Vector eigenMax;
   NICE::Matrix eigenMaxV;
   // for reproducibility during debuggin
+  //FIXME
   srand ( 0 );
   srand48 ( 0 );
   NICE::EigValues * eig = new EVArnoldi ( false /* verbose flag */,
-                                        10 /*_maxiterations*/
-                                      );
+                                          10 /*_maxiterations*/
+                                        );
   eig->getEigenvalues( *gm, eigenMax, eigenMaxV, 1 /*rank*/ );
   delete eig;
 
   // set simple jacobi pre-conditioning
   NICE::Vector diagonalElements;
-  gm->getDiagonalElements ( diagonalElements );
-  solver->setJacobiPreconditioner ( diagonalElements );
+  this->gm->getDiagonalElements ( diagonalElements );
+  this->solver->setJacobiPreconditioner ( diagonalElements );
 
   // solve linear equations for each class
   // be careful when parallising this!
@@ -513,22 +541,22 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
     */
     alpha = (y * (1.0 / eigenMax[0]) );
 
-    solver->solveLin( *gm, y, alpha );
+    this->solver->solveLin( *gm, y, alpha );
 
     // get lookup tables, A, B, etc. and store them
-    gm->updateTablesAandB( alpha );
-    double **A = gm->getTableA();
-    double **B = gm->getTableB();
+    this->gm->updateTablesAandB( alpha );
+    double **A = this->gm->getTableA();
+    double **B = this->gm->getTableB();
 
-    precomputedA.insert ( pair<uint, PrecomputedType> ( classno, A ) );
-    precomputedB.insert ( pair<uint, PrecomputedType> ( classno, B ) );
+    this->precomputedA.insert ( std::pair<uint, PrecomputedType> ( classno, A ) );
+    this->precomputedB.insert ( std::pair<uint, PrecomputedType> ( classno, B ) );
 
     // Quantization for classification?
     if ( this->q != NULL )
     {
-      gm->updateTableT( alpha );
-      double *T = gm->getTableT ( );
-      precomputedT.insert( pair<uint, double * > ( classno, T ) );
+      this->gm->updateTableT( alpha );
+      double *T = this->gm->getTableT ( );
+      this->precomputedT.insert( std::pair<uint, double * > ( classno, T ) );
 
     }
   }
@@ -553,5 +581,3 @@ void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *>
 
 
 }
-
-