Ver Fonte

minor corrections for IL when going from OCC to binary and from binary to MC

Alexander Freytag há 11 anos atrás
pai
commit
95ba8b79ec

+ 53 - 11
FMKGPHyperparameterOptimization.cpp

@@ -92,6 +92,22 @@ void FMKGPHyperparameterOptimization::updateAfterIncrement (
     // alpha = (binaryLabels[classCnt] * (1.0 / eigenmax[0]) );
     double factor ( 1.0 / this->eigenMax[0] );
     
+    // if we came from an OCC setting and are going to a binary setting,
+    // we have to be aware that the 'positive' label is always the one associated with the previous alpha
+    // otherwise, we would get into trouble when going to more classes...
+    // note that this is needed, since knownClasses is a map, so we loose the order of insertion
+    if ( ( this->previousAlphas.size () == 1 ) && ( this->knownClasses.size () == 2 ) )
+    {
+      // if the first class has a larger value then the currently added second class, we have to 
+      // switch the index, which unfortunately is not sooo easy in the map
+      if ( this->previousAlphas.begin()->first == this->binaryLabelNegative )
+      {
+        this->previousAlphas.insert( std::pair<int, NICE::Vector> ( this->binaryLabelPositive, this->previousAlphas.begin()->second) );
+        this->previousAlphas.erase( this->binaryLabelNegative );
+      }
+    }
+    
+    
     std::map<int, NICE::Vector>::const_iterator binaryLabelsIt = binaryLabels.begin();
     
     for ( std::map<int, NICE::Vector>::iterator prevAlphaIt = this->previousAlphas.begin();
@@ -847,7 +863,7 @@ int FMKGPHyperparameterOptimization::prepareBinaryLabels ( std::map<int, NICE::V
   if ( nrOfClasses > 2 )
   {
     //resize every labelVector and set all entries to -1.0
-    for ( set<int>::const_iterator k = myClasses.begin(); k != myClasses.end(); k++ )
+    for ( std::set<int>::const_iterator k = myClasses.begin(); k != myClasses.end(); k++ )
     {
       binaryLabels[ *k ].resize ( y.size() );
       binaryLabels[ *k ].set ( -1.0 );
@@ -860,12 +876,12 @@ int FMKGPHyperparameterOptimization::prepareBinaryLabels ( std::map<int, NICE::V
   }
   else if ( nrOfClasses == 2 )
   {
-    //binary setting -- prepare two binary label vectors with opposite signs
+    //binary setting -- prepare a binary label vector
     NICE::Vector yb ( y );
 
-    binaryLabelNegative = *(myClasses.begin());
+    this->binaryLabelNegative = *(myClasses.begin());
     std::set<int>::const_iterator classIt = myClasses.begin(); classIt++;
-    binaryLabelPositive = *classIt;
+    this->binaryLabelPositive = *classIt;
     
     if ( verbose )
       std::cerr << "positiveClass : " << binaryLabelPositive << " negativeClass: " << binaryLabelNegative << std::endl;
@@ -2359,7 +2375,14 @@ void FMKGPHyperparameterOptimization::addExample( const NICE::SparseVector * exa
   {
     this->knownClasses.insert( label );
     newClasses.insert( label );
-  }    
+  }
+  
+  // If we currently have been in a binary setting, we now have to take care
+  // that we also compute an alpha vector for the second class, which previously 
+  // could be dealt with implicitely.
+  // Therefore, we insert its label here...
+  if ( (newClasses.size() > 0 ) && ( (this->knownClasses.size() - newClasses.size() ) == 2 ) )
+    newClasses.insert( binaryLabelNegative );    
 
   // add the new example to our data structure
   // It is necessary to do this already here and not lateron for internal reasons (see GMHIKernel for more details)
@@ -2416,15 +2439,34 @@ void FMKGPHyperparameterOptimization::addMultipleExamples( const std::vector< co
   if ( !this->b_performRegression)
   {
     for ( NICE::Vector::const_iterator vecIt = newLabels.begin(); 
-	vecIt != newLabels.end(); vecIt++
-	)
+          vecIt != newLabels.end();
+          vecIt++
+      )
     {  
-	if ( this->knownClasses.find( *vecIt ) == this->knownClasses.end() )
+      if ( this->knownClasses.find( *vecIt ) == this->knownClasses.end() )
       {
-	this->knownClasses.insert( *vecIt );
-	newClasses.insert( *vecIt );
+        this->knownClasses.insert( *vecIt );
+        newClasses.insert( *vecIt );
       } 
     }
+
+    // If we currently have been in a OCC setting, and only add a single new class
+    // we have to take care that are still efficient, i.e., that we solve for alpha
+    // only ones, since scores are symmetric in binary cases
+    // Therefore, we remove the label of the secodn class from newClasses, to skip
+    // alpha computations for this class lateron...
+    // 
+    // Therefore, we insert its label here...
+    if ( (newClasses.size() == 1 ) && ( (this->knownClasses.size() - newClasses.size() ) == 1 ) )
+      newClasses.clear();
+
+    // If we currently have been in a binary setting, we now have to take care
+    // that we also compute an alpha vector for the second class, which previously 
+    // could be dealt with implicitely.
+    // Therefore, we insert its label here...
+    if ( (newClasses.size() > 0 ) && ( (this->knownClasses.size() - newClasses.size() ) == 2 ) )
+      newClasses.insert( binaryLabelNegative );      
+      
   }
   // in a regression setting, we do not have to remember any "class labels"
   else{}
@@ -2440,7 +2482,7 @@ void FMKGPHyperparameterOptimization::addMultipleExamples( const std::vector< co
   
   // add examples to all implicite kernel matrices we currently use
   this->ikmsum->addMultipleExamples ( newExamples, newLabels, performOptimizationAfterIncrement );
-    
+  
   // update the corresponding matrices A, B and lookup tables T  
   // optional: do the optimization again using the previously known solutions as initialization
   this->updateAfterIncrement ( newClasses, performOptimizationAfterIncrement );

+ 1 - 1
FMKGPHyperparameterOptimization.h

@@ -415,7 +415,7 @@ class FMKGPHyperparameterOptimization : public NICE::Persistent, public NICE::On
     
     /**
     * @brief classify an example that is given as non-sparse vector
-    * NOTE: whenever possible, you should sparse vectors to obtain significantly smaller computation times
+    * NOTE: whenever possible, you should use sparse vectors to obtain significantly smaller computation times
     * 
     * @date 18-06-2013 (dd-mm-yyyy)
     * @author Alexander Freytag

+ 8 - 1
GPLikelihoodApprox.cpp

@@ -308,7 +308,14 @@ double GPLikelihoodApprox::evaluate(const OPTIMIZATION::matrix_type & x)
     NICE::Vector alpha;
     if ( this->initialAlphaGuess != NULL )
     {
-      alpha = this->initialAlphaGuess->find(classCnt)->second;
+      std::map<int, NICE::Vector>::iterator myIt = this->initialAlphaGuess->find(classCnt);
+      if ( myIt != this->initialAlphaGuess->end() )
+        alpha = myIt->second;
+      else
+      {
+        //NOTE this should never happen in theory...
+        alpha = (binaryLabels[classCnt] * (1.0 / eigenmax[0]) );
+      }
     }
     else
     {

+ 56 - 7
tests/TestGPHIKOnlineLearnable.cpp

@@ -112,6 +112,46 @@ void evaluateClassifier ( NICE::Matrix & confusionMatrix,
   }
 }
 
+void compareClassifierOutputs ( const NICE::GPHIKClassifier * classifier,
+                                const NICE::GPHIKClassifier * classifierScratch, 
+                                const NICE::Matrix & data
+                              )
+{
+  int i_loopEnd  ( (int)data.rows() );  
+  
+  for (int i = 0; i < i_loopEnd ; i++)
+  {
+    NICE::Vector example ( data.getRow(i) );
+    
+    NICE::SparseVector scores;
+    int result;    
+    
+    // classify with incrementally trained classifier 
+    classifier->classify( &example, result, scores );
+
+    
+    NICE::SparseVector scoresScratch;
+    int resultScratch;
+    classifierScratch->classify( &example, resultScratch, scoresScratch );
+    
+    
+    bool equal(true);
+    NICE::SparseVector::const_iterator itScores        = scores.begin();
+    NICE::SparseVector::const_iterator itScoresScratch = scoresScratch.begin();
+    for ( ; itScores != scores.end(); itScores++, itScoresScratch++)
+    {
+      if ( fabs( itScores->second - itScores->second ) > 10e-3)
+      {
+        std::cerr << " itScores->second: " << itScores->second << " itScores->second: " << itScores->second << std::endl;
+        equal = false;
+        break;
+      }        
+    }
+    
+    CPPUNIT_ASSERT_EQUAL ( equal, true );     
+  }  
+}
+
 void TestGPHIKOnlineLearnable::testOnlineLearningStartEmpty()
 {
   if (verboseStartEnd)
@@ -145,7 +185,7 @@ void TestGPHIKOnlineLearnable::testOnlineLearningStartEmpty()
   //create classifier object
   NICE::GPHIKClassifier * classifier;
   classifier = new NICE::GPHIKClassifier ( &conf );  
-  bool performOptimizationAfterIncrement ( false );
+  bool performOptimizationAfterIncrement ( true );
 
   // add training samples, but without running training method first
   classifier->addMultipleExamples ( examplesTrain,yMultiTrain, performOptimizationAfterIncrement );  
@@ -253,6 +293,10 @@ void TestGPHIKOnlineLearnable::testOnlineLearningOCCtoBinary()
   
   examplesTrain.resize( dataTrain.rows() );
   
+  // to check whether non-consecutive and even wrongly odered class numbers work as well
+  int clNoFirst  ( 2 );
+  int clNoSecond ( 0 );
+  
   std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
   for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
   {
@@ -261,20 +305,22 @@ void TestGPHIKOnlineLearnable::testOnlineLearningOCCtoBinary()
     if ( yBinTrain[i] == 1 )
     {
       examplesTrainPlus.push_back ( *exTrainIt );
+      yBinTrain[i] = clNoFirst;
     }
     else
     {
        examplesTrainMinus.push_back ( *exTrainIt );
+       yBinTrain[i] = clNoSecond;
     }
   }
-  NICE::Vector yBinPlus  ( examplesTrainPlus.size(), 1 ) ;
-  NICE::Vector yBinMinus ( examplesTrainMinus.size(), 0 );
+  NICE::Vector yBinPlus  ( examplesTrainPlus.size(), clNoFirst ) ;
+  NICE::Vector yBinMinus ( examplesTrainMinus.size(), clNoSecond );
   
   
   //create classifier object
   NICE::GPHIKClassifier * classifier;
   classifier = new NICE::GPHIKClassifier ( &conf );  
-  bool performOptimizationAfterIncrement ( false );
+  bool performOptimizationAfterIncrement ( true );
 
   // training with examples for positive class only
   classifier->train ( examplesTrainPlus, yBinPlus );
@@ -365,6 +411,7 @@ void TestGPHIKOnlineLearnable::testOnlineLearningBinarytoMultiClass()
   
   conf.sB ( "GPHIKClassifier", "eig_verbose", false);
   conf.sS ( "GPHIKClassifier", "optimization_method", "downhillsimplex");
+//   conf.sS ( "GPHIKClassifier", "optimization_method", "none");
   
   std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
   
@@ -413,11 +460,11 @@ void TestGPHIKOnlineLearnable::testOnlineLearningBinarytoMultiClass()
   //create classifier object
   NICE::GPHIKClassifier * classifier;
   classifier = new NICE::GPHIKClassifier ( &conf );  
-  bool performOptimizationAfterIncrement ( false );
+  bool performOptimizationAfterIncrement ( true );
 
-  // training with examples for positive class only
+  // training with examples for first and second class only
   classifier->train ( examplesTrain12, yMulti12 );
-  // add samples for negative class, thereby going from OCC to binary setting
+  // add samples for third class, thereby going from binary to multi-class setting
   classifier->addMultipleExamples ( examplesTrain3, yMulti3, performOptimizationAfterIncrement );  
   
   // create second object trained in the standard way
@@ -457,6 +504,8 @@ void TestGPHIKOnlineLearnable::testOnlineLearningBinarytoMultiClass()
   // ------------------------------------------
   // ------------- CLASSIFICATION --------------
   // ------------------------------------------  
+  
+  compareClassifierOutputs ( classifier, classifierScratch, dataTest ); 
   evaluateClassifier ( confusionMatrix, classifier, dataTest, yMultiTest,
                           mapClNoToIdxTrain,mapClNoToIdxTest );