Bläddra i källkod

GPHIKRegression - test cases for adding multiple examples, minor bug fix

Alexander Freytag 11 år sedan
förälder
incheckning
146cda50d2
3 ändrade filer med 181 tillägg och 15 borttagningar
  1. 14 9
      FMKGPHyperparameterOptimization.cpp
  2. 162 3
      tests/TestGPHIKRegression.cpp
  3. 5 3
      tests/TestGPHIKRegression.h

+ 14 - 9
FMKGPHyperparameterOptimization.cpp

@@ -1897,16 +1897,21 @@ void FMKGPHyperparameterOptimization::addMultipleExamples( const std::vector< co
   
   this->labels.append ( newLabels );
   //have we seen this class already?
-  for ( NICE::Vector::const_iterator vecIt = newLabels.begin(); 
-       vecIt != newLabels.end(); vecIt++
-      )
-  {  
-      if ( this->knownClasses.find( *vecIt ) == this->knownClasses.end() )
-    {
-      this->knownClasses.insert( *vecIt );
-      newClasses.insert( *vecIt );
-    } 
+  if ( !this->b_performRegression)
+  {
+    for ( NICE::Vector::const_iterator vecIt = newLabels.begin(); 
+	vecIt != newLabels.end(); vecIt++
+	)
+    {  
+	if ( this->knownClasses.find( *vecIt ) == this->knownClasses.end() )
+      {
+	this->knownClasses.insert( *vecIt );
+	newClasses.insert( *vecIt );
+      } 
+    }
   }
+  // in a regression setting, we do not have to remember any "class labels"
+  else{}
   
   // add the new example to our data structure
   // It is necessary to do this already here and not lateron for internal reasons (see GMHIKernel for more details)

+ 162 - 3
tests/TestGPHIKRegression.cpp

@@ -226,10 +226,10 @@ void TestGPHIKRegression::testRegressionHoldOutData()
     std::cerr << "================== TestGPHIKRegression::testRegressionHoldOutData done ===================== " << std::endl;     
 }
     
-void TestGPHIKRegression::testRegressionOnlineLearning()
+void TestGPHIKRegression::testRegressionOnlineLearnableAdd1Example()
 {
   if (verboseStartEnd)
-    std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearning ===================== " << std::endl;  
+    std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAdd1Example ===================== " << std::endl;  
 
   NICE::Config conf;
   
@@ -359,7 +359,166 @@ void TestGPHIKRegression::testRegressionOnlineLearning()
 
   
   if (verboseStartEnd)
-    std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearning done ===================== " << std::endl;   
+    std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAdd1Example done ===================== " << std::endl;   
 }
 
+void TestGPHIKRegression::testRegressionOnlineLearnableAddMultipleExamples()
+{
+  if (verboseStartEnd)
+    std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAddMultipleExamples ===================== " << std::endl;  
+
+  NICE::Config conf;
+  
+  conf.sB ( "GPHIKRegressionMethod", "eig_verbose", false);
+  conf.sS ( "GPHIKRegressionMethod", "optimization_method", "downhillsimplex");//downhillsimplex greedy
+  // set higher built-in noise for hold-out regression estimation
+  conf.sD ( "GPHIKRegression", "noise", 1e-4 );  
+  
+  std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
+  
+  //------------- read the training data --------------
+  
+  NICE::Matrix dataTrain;
+  NICE::Vector yValuesTrain; 
+  
+  readData ( s_trainData, dataTrain, yValuesTrain );
+  
+  //----------------- convert data to sparse data structures ---------
+  std::vector< const NICE::SparseVector *> examplesTrain;
+  std::vector< const NICE::SparseVector *> examplesTrainPlus;
+  std::vector< const NICE::SparseVector *> examplesTrainMinus;
+  
+  examplesTrain.resize( dataTrain.rows() );
+  NICE::Vector yValuesPlus( dataTrain.rows() );
+  NICE::Vector yValuesMinus( dataTrain.rows() );  
+  
+  std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
+  
+  int cntPlus ( 0 );
+  int cntMinus ( 0 );
+  // note: we also slightly shuffle the order of how examples are added compared to the scratch-classifier... 
+  // this should not result in any difference of behaviour...
+  for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
+  {
+    *exTrainIt =  new NICE::SparseVector( dataTrain.getRow(i) );
+    
+    if ( ( yValuesTrain[i] == 1 ) || ( yValuesTrain[i] == 2 ) )
+    {
+      examplesTrainPlus.push_back ( *exTrainIt );
+      yValuesPlus[cntPlus] = yValuesTrain[i];
+      cntPlus++;
+    }
+    else
+    {
+       examplesTrainMinus.push_back ( *exTrainIt );
+      yValuesMinus[cntMinus] = yValuesTrain[i];
+      cntMinus++;      
+    }
+  }
+  
+  yValuesPlus.resize ( examplesTrainPlus.size()  ) ;
+  yValuesMinus.resize( examplesTrainMinus.size() );  
+
+  
+  // TRAIN INITIAL CLASSIFIER FROM SCRATCH
+  NICE::GPHIKRegression * regressionMethod;
+  regressionMethod = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
+  
+  regressionMethod->train ( examplesTrainPlus , yValuesPlus );
+  
+  if ( verbose ) 
+  {
+    std::cerr << "Initial values: " << yValuesPlus << std::endl;
+    std::cerr << "Values to add: " << yValuesMinus << std::endl;
+  }
+  
+  
+  // RUN INCREMENTAL LEARNING
+  
+  bool performOptimizationAfterIncrement ( true );
+  
+  regressionMethod->addMultipleExamples ( examplesTrainMinus, yValuesMinus, performOptimizationAfterIncrement );
+  
+  
+  // TRAIN SECOND REGRESSOR FROM SCRATCH USING THE SAME OVERALL AMOUNT OF EXAMPLES
+
+  NICE::GPHIKRegression * regressionMethodScratch = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
+  regressionMethodScratch->train ( examplesTrain, yValuesTrain );
+  
+  if ( verbose )
+    std::cerr << "trained both regressionMethods - now start evaluating them" << std::endl;
+  
+  
+  // TEST that both regressionMethods produce equal store-files
+   std::string s_destination_save_IL ( "myRegressionMethodIL.txt" );
+  
+  std::filebuf fbOut;
+  fbOut.open ( s_destination_save_IL.c_str(), ios::out );
+  std::ostream os (&fbOut);
+  //
+  regressionMethod->store( os );
+  //   
+  fbOut.close(); 
+  
+  std::string s_destination_save_scratch ( "myRegressionMethodScratch.txt" );
+  
+  std::filebuf fbOutScratch;
+  fbOutScratch.open ( s_destination_save_scratch.c_str(), ios::out );
+  std::ostream osScratch (&fbOutScratch);
+  //
+  regressionMethodScratch->store( osScratch );
+  //   
+  fbOutScratch.close(); 
+  
+  
+  // TEST both regressionMethods to produce equal results
+  
+  //------------- read the test data --------------
+  
+  
+  NICE::Matrix dataTest;
+  NICE::Vector yValuesTest; 
+  
+  std::string s_testData = conf.gS( "main", "testData", "toyExampleTest.data" );  
+  
+  readData ( s_testData, dataTest, yValuesTest );
+  
+
+  // ------------------------------------------
+  // ------------- REGRESSION --------------
+  // ------------------------------------------  
+
+
+  double holdOutLossIL ( 0.0 );
+  double holdOutLossScratch ( 0.0 );
+  
+  evaluateRegressionMethod ( holdOutLossIL, regressionMethod, dataTest, yValuesTest ); 
+  
+  evaluateRegressionMethod ( holdOutLossScratch, regressionMethodScratch, dataTest, yValuesTest );  
+  
+    
+  if ( verbose ) 
+  {
+    std::cerr << "holdOutLossIL: " << holdOutLossIL  << std::endl;
+  
+    std::cerr << "holdOutLossScratch: " << holdOutLossScratch << std::endl;
+  }
+  
+  
+  CPPUNIT_ASSERT_DOUBLES_EQUAL( holdOutLossIL, holdOutLossScratch, 1e-4);
+  
+  // don't waste memory
+  
+  delete regressionMethod;
+  delete regressionMethodScratch;
+  
+  for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
+  {
+    delete *exTrainIt;
+  }   
+  
+  if (verboseStartEnd)
+    std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAddMultipleExamples done ===================== " << std::endl;   
+}    
+
 #endif

+ 5 - 3
tests/TestGPHIKRegression.h

@@ -15,7 +15,9 @@ class TestGPHIKRegression : public CppUnit::TestFixture {
     CPPUNIT_TEST_SUITE( TestGPHIKRegression );
       CPPUNIT_TEST(testRegressionHoldInData);
       CPPUNIT_TEST(testRegressionHoldOutData);
-      CPPUNIT_TEST(testRegressionOnlineLearning);
+      
+      CPPUNIT_TEST(testRegressionOnlineLearnableAdd1Example);
+      CPPUNIT_TEST(testRegressionOnlineLearnableAddMultipleExamples);
       
     CPPUNIT_TEST_SUITE_END();
   
@@ -28,8 +30,8 @@ class TestGPHIKRegression : public CppUnit::TestFixture {
     void testRegressionHoldInData();
     void testRegressionHoldOutData();    
     
-    void testRegressionOnlineLearning();
-    
+    void testRegressionOnlineLearnableAdd1Example();
+    void testRegressionOnlineLearnableAddMultipleExamples();    
 };
 
 #endif // _TESTGPHIKREGRESSION_H