123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535 |
- /**
- * @file TestGPHIKRegression.cpp
- * @brief CppUnit-Testcase to verify that GPHIKRegression works as desired.
- * @author Alexander Freytag
- * @date 16-01-2014 (dd-mm-yyyy)
- */
- #ifdef NICE_USELIB_CPPUNIT
- // STL includes
- #include <iostream>
- #include <vector>
- // NICE-core includes
- #include <core/basics/Config.h>
- #include <core/basics/Timer.h>
- // gp-hik-core includes
- #include "gp-hik-core/GPHIKRegression.h"
- #include "TestGPHIKRegression.h"
- using namespace std; //C basics
- using namespace NICE; // nice-core
- const bool verboseStartEnd = true;
- const bool verbose = false;
- const bool writeRegressionObjectsForVerification = false;
- CPPUNIT_TEST_SUITE_REGISTRATION( TestGPHIKRegression );
- void TestGPHIKRegression::setUp() {
- }
- void TestGPHIKRegression::tearDown() {
- }
- void readData ( const std::string filename, NICE::Matrix & data, NICE::Vector & yValues )
- {
- std::ifstream ifs ( filename.c_str() , ios::in );
- if ( ifs.good() )
- {
- NICE::Vector tmp;
- ifs >> data;
- ifs >> tmp; //yBin;
- ifs >> yValues;
- ifs.close();
- }
- else
- {
- std::cerr << "Unable to read data from file " << filename << " -- aborting." << std::endl;
- CPPUNIT_ASSERT ( ifs.good() );
- }
- }
- void evaluateRegressionMethod ( double & regressionLoss,
- const NICE::GPHIKRegression * regressionMethod,
- const NICE::Matrix & data,
- const NICE::Vector & yValues
- )
- {
- regressionLoss = 0.0;
-
- int i_loopEnd ( (int)data.rows() );
-
- for (int i = 0; i < i_loopEnd ; i++)
- {
- NICE::Vector example ( data.getRow(i) );
- double result;
-
- // classify with previously trained regression method
- regressionMethod->estimate( &example, result );
-
- if ( verbose )
- std::cerr << "i: " << i << " gt: " << yValues[i] << " result: " << result << std::endl;
-
- //use L2-loss for evaluation
- regressionLoss += pow( yValues[i] - result, 2 );
- }
- }
- void TestGPHIKRegression::testRegressionHoldInData()
- {
- if (verboseStartEnd)
- std::cerr << "================== TestGPHIKRegression::testRegressionHoldInData ===================== " << std::endl;
-
- NICE::Config conf;
-
- conf.sB ( "GPHIKRegression", "eig_verbose", false);
- conf.sS ( "GPHIKRegression", "optimization_method", "downhillsimplex");
- // set pretty low built-in noise for hold-in regression estimation
- conf.sD ( "GPHIKRegression", "noise", 1e-6 );
-
- std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
-
- //------------- read the training data --------------
-
- NICE::Matrix dataTrain;
- NICE::Vector yValues;
-
- readData ( s_trainData, dataTrain, yValues );
-
- //----------------- convert data to sparse data structures ---------
- std::vector< const NICE::SparseVector *> examplesTrain;
- examplesTrain.resize( dataTrain.rows() );
-
- std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
- for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
- {
- *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
- }
-
- //create regressionMethod object
- NICE::GPHIKRegression * regressionMethod;
- regressionMethod = new NICE::GPHIKRegression ( &conf );
- regressionMethod->train ( examplesTrain , yValues );
- if (verbose)
- {
- std::cerr << " yValues used for training regression object" << std::endl;
- std::cerr << yValues << std::endl;
- }
-
- double holdInLoss ( 0.0 );
-
-
- // ------------------------------------------
- // ------------- REGRESSION --------------
- // ------------------------------------------
- evaluateRegressionMethod ( holdInLoss, regressionMethod, dataTrain, yValues );
-
-
- if ( verbose )
- {
- std::cerr << " holdInLoss: " << holdInLoss << std::endl;
- }
-
- CPPUNIT_ASSERT_DOUBLES_EQUAL( 0.0, holdInLoss, 1e-8);
-
- // don't waste memory
-
- delete regressionMethod;
-
- for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
- {
- delete *exTrainIt;
- }
-
-
- if (verboseStartEnd)
- std::cerr << "================== TestGPHIKRegression::testRegressionHoldInData done ===================== " << std::endl;
- }
- void TestGPHIKRegression::testRegressionHoldOutData()
- {
- if (verboseStartEnd)
- std::cerr << "================== TestGPHIKRegression::testRegressionHoldOutData ===================== " << std::endl;
- NICE::Config conf;
-
- conf.sB ( "GPHIKRegression", "eig_verbose", false);
- conf.sS ( "GPHIKRegression", "optimization_method", "downhillsimplex");
- // set higher built-in noise for hold-out regression estimation
- conf.sD ( "GPHIKRegression", "noise", 1e-4 );
-
- std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
-
- //------------- read the training data --------------
-
- NICE::Matrix dataTrain;
- NICE::Vector yValues;
-
- readData ( s_trainData, dataTrain, yValues );
-
- //----------------- convert data to sparse data structures ---------
- std::vector< const NICE::SparseVector *> examplesTrain;
- examplesTrain.resize( dataTrain.rows() );
-
- std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
- for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
- {
- *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
- }
-
- //create regressionMethod object
- NICE::GPHIKRegression * regressionMethod;
- regressionMethod = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
- regressionMethod->train ( examplesTrain , yValues );
-
- //------------- read the test data --------------
-
-
- NICE::Matrix dataTest;
- NICE::Vector yValuesTest;
-
- std::string s_testData = conf.gS( "main", "testData", "toyExampleTest.data" );
-
- readData ( s_testData, dataTest, yValuesTest );
-
- double holdOutLoss ( 0.0 );
-
-
- // ------------------------------------------
- // ------------- REGRESSION --------------
- // ------------------------------------------
- evaluateRegressionMethod ( holdOutLoss, regressionMethod, dataTest, yValuesTest );
- // acceptable difference for every estimated y-value on average
- double diffOkay ( 0.4 );
-
- if ( verbose )
- {
- std::cerr << " holdOutLoss: " << holdOutLoss << " accepting: " << pow(diffOkay,2)*yValuesTest.size() << std::endl;
- }
-
- CPPUNIT_ASSERT( pow(diffOkay,2)*yValuesTest.size() - holdOutLoss > 0.0);
-
- // don't waste memory
-
- delete regressionMethod;
-
- for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
- {
- delete *exTrainIt;
- }
-
- if (verboseStartEnd)
- std::cerr << "================== TestGPHIKRegression::testRegressionHoldOutData done ===================== " << std::endl;
- }
-
- void TestGPHIKRegression::testRegressionOnlineLearnableAdd1Example()
- {
- if (verboseStartEnd)
- std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAdd1Example ===================== " << std::endl;
- NICE::Config conf;
-
- conf.sB ( "GPHIKRegressionMethod", "eig_verbose", false);
- conf.sS ( "GPHIKRegressionMethod", "optimization_method", "downhillsimplex");//downhillsimplex greedy
- // set higher built-in noise for hold-out regression estimation
- conf.sD ( "GPHIKRegression", "noise", 1e-4 );
-
- std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
-
- //------------- read the training data --------------
-
- NICE::Matrix dataTrain;
- NICE::Vector yValuesTrain;
-
- readData ( s_trainData, dataTrain, yValuesTrain );
- //----------------- convert data to sparse data structures ---------
- std::vector< const NICE::SparseVector *> examplesTrain;
- examplesTrain.resize( dataTrain.rows()-1 );
-
- std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
- for (int i = 0; i < (int)dataTrain.rows()-1; i++, exTrainIt++)
- {
- *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
- }
-
- // TRAIN INITIAL CLASSIFIER FROM SCRATCH
- NICE::GPHIKRegression * regressionMethod;
- regressionMethod = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
- //use all but the first example for training and add the first one lateron
- NICE::Vector yValuesRelevantTrain ( yValuesTrain.getRangeRef( 0, yValuesTrain.size()-2 ) );
-
- regressionMethod->train ( examplesTrain , yValuesRelevantTrain );
-
-
- // RUN INCREMENTAL LEARNING
-
- bool performOptimizationAfterIncrement ( true );
-
- NICE::SparseVector * exampleToAdd = new NICE::SparseVector ( dataTrain.getRow( (int)dataTrain.rows()-1 ) );
-
-
- regressionMethod->addExample ( exampleToAdd, yValuesTrain[ (int)dataTrain.rows()-2 ], performOptimizationAfterIncrement );
-
- if ( verbose )
- std::cerr << "label of example to add: " << yValuesTrain[ (int)dataTrain.rows()-1 ] << std::endl;
-
- // TRAIN SECOND REGRESSOR FROM SCRATCH USING THE SAME OVERALL AMOUNT OF EXAMPLES
- examplesTrain.push_back( exampleToAdd );
- NICE::GPHIKRegression * regressionMethodScratch = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
- regressionMethodScratch->train ( examplesTrain, yValuesTrain );
-
- if ( verbose )
- std::cerr << "trained both regressionMethods - now start evaluating them" << std::endl;
-
-
- // TEST that both regressionMethods produce equal store-files
- if ( writeRegressionObjectsForVerification )
- {
- std::string s_destination_save_IL ( "myRegressionMethodIL.txt" );
-
- std::filebuf fbOut;
- fbOut.open ( s_destination_save_IL.c_str(), ios::out );
- std::ostream os (&fbOut);
- //
- regressionMethod->store( os );
- //
- fbOut.close();
-
- std::string s_destination_save_scratch ( "myRegressionMethodScratch.txt" );
-
- std::filebuf fbOutScratch;
- fbOutScratch.open ( s_destination_save_scratch.c_str(), ios::out );
- std::ostream osScratch (&fbOutScratch);
- //
- regressionMethodScratch->store( osScratch );
- //
- fbOutScratch.close();
- }
-
-
- // TEST both regressionMethods to produce equal results
-
- //------------- read the test data --------------
-
-
- NICE::Matrix dataTest;
- NICE::Vector yValuesTest;
-
- std::string s_testData = conf.gS( "main", "testData", "toyExampleTest.data" );
-
- readData ( s_testData, dataTest, yValuesTest );
-
- // ------------------------------------------
- // ------------- REGRESSION --------------
- // ------------------------------------------
- double holdOutLossIL ( 0.0 );
- double holdOutLossScratch ( 0.0 );
-
- evaluateRegressionMethod ( holdOutLossIL, regressionMethod, dataTest, yValuesTest );
-
- evaluateRegressionMethod ( holdOutLossScratch, regressionMethodScratch, dataTest, yValuesTest );
-
-
- if ( verbose )
- {
- std::cerr << "holdOutLossIL: " << holdOutLossIL << std::endl;
-
- std::cerr << "holdOutLossScratch: " << holdOutLossScratch << std::endl;
- }
-
-
- CPPUNIT_ASSERT_DOUBLES_EQUAL( holdOutLossIL, holdOutLossScratch, 1e-4);
-
- // don't waste memory
-
- delete regressionMethod;
- delete regressionMethodScratch;
-
- for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
- {
- delete *exTrainIt;
- }
-
- if (verboseStartEnd)
- std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAdd1Example done ===================== " << std::endl;
- }
- void TestGPHIKRegression::testRegressionOnlineLearnableAddMultipleExamples()
- {
- if (verboseStartEnd)
- std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAddMultipleExamples ===================== " << std::endl;
- NICE::Config conf;
-
- conf.sB ( "GPHIKRegressionMethod", "eig_verbose", false);
- conf.sS ( "GPHIKRegressionMethod", "optimization_method", "downhillsimplex");//downhillsimplex greedy
- // set higher built-in noise for hold-out regression estimation
- conf.sD ( "GPHIKRegression", "noise", 1e-4 );
-
- std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
-
- //------------- read the training data --------------
-
- NICE::Matrix dataTrain;
- NICE::Vector yValuesTrain;
-
- readData ( s_trainData, dataTrain, yValuesTrain );
-
- //----------------- convert data to sparse data structures ---------
- std::vector< const NICE::SparseVector *> examplesTrain;
- std::vector< const NICE::SparseVector *> examplesTrainPlus;
- std::vector< const NICE::SparseVector *> examplesTrainMinus;
-
- examplesTrain.resize( dataTrain.rows() );
- NICE::Vector yValuesPlus( dataTrain.rows() );
- NICE::Vector yValuesMinus( dataTrain.rows() );
-
- std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
-
- int cntPlus ( 0 );
- int cntMinus ( 0 );
- // note: we also slightly shuffle the order of how examples are added compared to the scratch-classifier...
- // this should not result in any difference of behaviour...
- for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
- {
- *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
-
- if ( ( yValuesTrain[i] == 1 ) || ( yValuesTrain[i] == 2 ) )
- {
- examplesTrainPlus.push_back ( *exTrainIt );
- yValuesPlus[cntPlus] = yValuesTrain[i];
- cntPlus++;
- }
- else
- {
- examplesTrainMinus.push_back ( *exTrainIt );
- yValuesMinus[cntMinus] = yValuesTrain[i];
- cntMinus++;
- }
- }
-
- yValuesPlus.resize ( examplesTrainPlus.size() ) ;
- yValuesMinus.resize( examplesTrainMinus.size() );
-
- // TRAIN INITIAL CLASSIFIER FROM SCRATCH
- NICE::GPHIKRegression * regressionMethod;
- regressionMethod = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
-
- regressionMethod->train ( examplesTrainPlus , yValuesPlus );
-
- if ( verbose )
- {
- std::cerr << "Initial values: " << yValuesPlus << std::endl;
- std::cerr << "Values to add: " << yValuesMinus << std::endl;
- }
-
-
- // RUN INCREMENTAL LEARNING
-
- bool performOptimizationAfterIncrement ( true );
-
- regressionMethod->addMultipleExamples ( examplesTrainMinus, yValuesMinus, performOptimizationAfterIncrement );
-
-
- // TRAIN SECOND REGRESSOR FROM SCRATCH USING THE SAME OVERALL AMOUNT OF EXAMPLES
- NICE::GPHIKRegression * regressionMethodScratch = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
- regressionMethodScratch->train ( examplesTrain, yValuesTrain );
-
- if ( verbose )
- std::cerr << "trained both regressionMethods - now start evaluating them" << std::endl;
-
-
- // TEST that both regressionMethods produce equal store-files
- if ( writeRegressionObjectsForVerification )
- {
- std::string s_destination_save_IL ( "myRegressionMethodIL.txt" );
-
- std::filebuf fbOut;
- fbOut.open ( s_destination_save_IL.c_str(), ios::out );
- std::ostream os (&fbOut);
- //
- regressionMethod->store( os );
- //
- fbOut.close();
-
- std::string s_destination_save_scratch ( "myRegressionMethodScratch.txt" );
-
- std::filebuf fbOutScratch;
- fbOutScratch.open ( s_destination_save_scratch.c_str(), ios::out );
- std::ostream osScratch (&fbOutScratch);
- //
- regressionMethodScratch->store( osScratch );
- //
- fbOutScratch.close();
- }
-
-
- // TEST both regressionMethods to produce equal results
-
- //------------- read the test data --------------
-
-
- NICE::Matrix dataTest;
- NICE::Vector yValuesTest;
-
- std::string s_testData = conf.gS( "main", "testData", "toyExampleTest.data" );
-
- readData ( s_testData, dataTest, yValuesTest );
-
- // ------------------------------------------
- // ------------- REGRESSION --------------
- // ------------------------------------------
- double holdOutLossIL ( 0.0 );
- double holdOutLossScratch ( 0.0 );
-
- evaluateRegressionMethod ( holdOutLossIL, regressionMethod, dataTest, yValuesTest );
-
- evaluateRegressionMethod ( holdOutLossScratch, regressionMethodScratch, dataTest, yValuesTest );
-
-
- if ( verbose )
- {
- std::cerr << "holdOutLossIL: " << holdOutLossIL << std::endl;
-
- std::cerr << "holdOutLossScratch: " << holdOutLossScratch << std::endl;
- }
-
-
- CPPUNIT_ASSERT_DOUBLES_EQUAL( holdOutLossIL, holdOutLossScratch, 1e-4);
-
- // don't waste memory
-
- delete regressionMethod;
- delete regressionMethodScratch;
-
- for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
- {
- delete *exTrainIt;
- }
-
- if (verboseStartEnd)
- std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAddMultipleExamples done ===================== " << std::endl;
- }
- #endif
|