TestGPHIKOnlineLearnable.cpp 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. /**
  2. * @file TestGPHIKOnlineLearnable.cpp
  3. * @brief CppUnit-Testcase to verify that GPHIKClassifier methods herited from Persistent (store and restore) work as desired.
  4. * @author Alexander Freytag
  5. * @date 21-12-2013
  6. */
  7. #ifdef NICE_USELIB_CPPUNIT
  8. // STL includes
  9. #include <iostream>
  10. #include <vector>
  11. // NICE-core includes
  12. #include <core/basics/Config.h>
  13. #include <core/basics/Timer.h>
  14. // gp-hik-core includes
  15. #include "gp-hik-core/GPHIKClassifier.h"
  16. #include "TestGPHIKOnlineLearnable.h"
  17. using namespace std; //C basics
  18. using namespace NICE; // nice-core
  19. const bool verboseStartEnd = true;
  20. CPPUNIT_TEST_SUITE_REGISTRATION( TestGPHIKOnlineLearnable );
  21. void TestGPHIKOnlineLearnable::setUp() {
  22. }
  23. void TestGPHIKOnlineLearnable::tearDown() {
  24. }
  25. void TestGPHIKOnlineLearnable::testOnlineLearningMethods()
  26. {
  27. if (verboseStartEnd)
  28. std::cerr << "================== TestGPHIKOnlineLearnable::testOnlineLearningMethods ===================== " << std::endl;
  29. NICE::Config conf;
  30. conf.sB ( "GPHIKClassifier", "eig_verbose", false);
  31. conf.sS ( "GPHIKClassifier", "optimization_method", "downhillsimplex");
  32. std::string trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
  33. NICE::GPHIKClassifier * classifier;
  34. //------------- read the training data --------------
  35. NICE::Matrix dataTrain;
  36. NICE::Vector yBinTrain;
  37. NICE::Vector yMultiTrain;
  38. std::ifstream ifsTrain ( trainData.c_str() , ios::in );
  39. if ( ifsTrain.good() )
  40. {
  41. ifsTrain >> dataTrain;
  42. ifsTrain >> yBinTrain;
  43. ifsTrain >> yMultiTrain;
  44. ifsTrain.close();
  45. }
  46. else
  47. {
  48. std::cerr << "Unable to read training data from file " << trainData << " -- aborting." << std::endl;
  49. CPPUNIT_ASSERT ( ifsTrain.good() );
  50. }
  51. //----------------- convert data to sparse data structures ---------
  52. std::vector< NICE::SparseVector *> examplesTrain;
  53. examplesTrain.resize( dataTrain.rows()-1 );
  54. std::vector< NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
  55. for (int i = 0; i < (int)dataTrain.rows()-1; i++, exTrainIt++)
  56. {
  57. *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
  58. }
  59. // TRAIN INITIAL CLASSIFIER FROM SCRATCH
  60. classifier = new NICE::GPHIKClassifier ( &conf );
  61. //use all but the first example for training and add the first one lateron
  62. NICE::Vector yMultiRelevantTrain ( yMultiTrain.getRangeRef( 0, yMultiTrain.size()-2 ) );
  63. std::cerr << "yMultiRelevantTrain: " << yMultiRelevantTrain << std::endl;
  64. classifier->train ( examplesTrain , yMultiRelevantTrain );
  65. std::cerr << "Training done -- start incremental learning " << std::endl;
  66. // RUN INCREMENTAL LEARNING
  67. bool performOptimizationAfterIncrement ( false );
  68. NICE::SparseVector * exampleToAdd = new NICE::SparseVector ( dataTrain.getRow( (int)dataTrain.rows()-1 ) );
  69. classifier->addExample ( exampleToAdd, yMultiTrain[ (int)dataTrain.rows()-2 ], performOptimizationAfterIncrement );
  70. std::cerr << "label of example to add: " << yMultiTrain[ (int)dataTrain.rows()-1 ] << std::endl;
  71. // TRAIN SECOND CLASSIFIER FROM SCRATCH USING THE SAME OVERALL AMOUNT OF EXAMPLES
  72. examplesTrain.push_back( exampleToAdd );
  73. NICE::GPHIKClassifier * classifierScratch = new NICE::GPHIKClassifier ( &conf );
  74. classifierScratch->train ( examplesTrain, yMultiTrain );
  75. std::cerr << "trained both classifiers - now start evaluating them" << std::endl;
  76. // TEST that both classifiers produce equal store-files
  77. std::string s_destination_save_IL ( "myClassifierIL.txt" );
  78. std::filebuf fbOut;
  79. fbOut.open ( s_destination_save_IL.c_str(), ios::out );
  80. std::ostream os (&fbOut);
  81. //
  82. classifier->store( os );
  83. //
  84. fbOut.close();
  85. std::string s_destination_save_scratch ( "myClassifierScratch.txt" );
  86. std::filebuf fbOutScratch;
  87. fbOutScratch.open ( s_destination_save_scratch.c_str(), ios::out );
  88. std::ostream osScratch (&fbOutScratch);
  89. //
  90. classifierScratch->store( osScratch );
  91. //
  92. fbOutScratch.close();
  93. // TEST both classifiers to produce equal results
  94. //------------- read the test data --------------
  95. NICE::Matrix dataTest;
  96. NICE::Vector yBinTest;
  97. NICE::Vector yMultiTest;
  98. std::string testData = conf.gS( "main", "testData", "toyExampleTest.data" );
  99. std::ifstream ifsTest ( testData.c_str(), ios::in );
  100. if ( ifsTest.good() )
  101. {
  102. ifsTest >> dataTest;
  103. ifsTest >> yBinTest;
  104. ifsTest >> yMultiTest;
  105. ifsTest.close();
  106. }
  107. else
  108. {
  109. std::cerr << "Unable to read test data, aborting." << std::endl;
  110. CPPUNIT_ASSERT ( ifsTest.good() );
  111. }
  112. // ------------------------------------------
  113. // ------------- PREPARATION --------------
  114. // ------------------------------------------
  115. // determine classes known during training and corresponding mapping
  116. // thereby allow for non-continous class labels
  117. std::set<int> classesKnownTraining = classifier->getKnownClassNumbers();
  118. int noClassesKnownTraining ( classesKnownTraining.size() );
  119. std::map<int,int> mapClNoToIdxTrain;
  120. std::set<int>::const_iterator clTrIt = classesKnownTraining.begin();
  121. for ( int i=0; i < noClassesKnownTraining; i++, clTrIt++ )
  122. mapClNoToIdxTrain.insert ( std::pair<int,int> ( *clTrIt, i ) );
  123. // determine classes known during testing and corresponding mapping
  124. // thereby allow for non-continous class labels
  125. std::set<int> classesKnownTest;
  126. classesKnownTest.clear();
  127. // determine which classes we have in our label vector
  128. // -> MATLAB: myClasses = unique(y);
  129. for ( NICE::Vector::const_iterator it = yMultiTest.begin(); it != yMultiTest.end(); it++ )
  130. {
  131. if ( classesKnownTest.find ( *it ) == classesKnownTest.end() )
  132. {
  133. classesKnownTest.insert ( *it );
  134. }
  135. }
  136. int noClassesKnownTest ( classesKnownTest.size() );
  137. std::map<int,int> mapClNoToIdxTest;
  138. std::set<int>::const_iterator clTestIt = classesKnownTest.begin();
  139. for ( int i=0; i < noClassesKnownTest; i++, clTestIt++ )
  140. mapClNoToIdxTest.insert ( std::pair<int,int> ( *clTestIt, i ) );
  141. NICE::Matrix confusionMatrix ( noClassesKnownTraining, noClassesKnownTest, 0.0);
  142. NICE::Matrix confusionMatrixScratch ( noClassesKnownTraining, noClassesKnownTest, 0.0);
  143. std::cerr << "data preparation for testing is done "<< std::endl;
  144. int i_loopEnd ( (int)dataTest.rows() );
  145. for (int i = 0; i < i_loopEnd ; i++)
  146. {
  147. NICE::Vector example ( dataTest.getRow(i) );
  148. NICE::SparseVector scores;
  149. int result;
  150. // classify with incrementally trained classifier
  151. classifier->classify( &example, result, scores );
  152. std::cerr << "results with IL classifier: " << std::endl;
  153. scores.store ( std::cerr );
  154. confusionMatrix( mapClNoToIdxTrain.find(result)->second, mapClNoToIdxTest.find(yMultiTest[i])->second ) += 1.0;
  155. // classify with classifier learned from scratch
  156. scores.clear();
  157. classifierScratch->classify( &example, result, scores );
  158. std::cerr << "Results with scratch classifier: " << std::endl;
  159. scores.store( std::cerr );
  160. std::cerr << std::endl;
  161. confusionMatrixScratch( mapClNoToIdxTrain.find(result)->second, mapClNoToIdxTest.find(yMultiTest[i])->second ) += 1.0;
  162. }
  163. //TODO also check that both classifiers result in the same store-files
  164. std::cerr << "postprocess confusion matrices " << std::endl;
  165. confusionMatrix.normalizeColumnsL1();
  166. double arr ( confusionMatrix.trace()/confusionMatrix.cols() );
  167. confusionMatrixScratch.normalizeColumnsL1();
  168. double arrScratch ( confusionMatrixScratch.trace()/confusionMatrixScratch.cols() );
  169. CPPUNIT_ASSERT_DOUBLES_EQUAL( arr, arrScratch, 1e-8);
  170. // don't waste memory
  171. //TODO clean up of training data, also in TestGPHIKPersistent
  172. delete classifier;
  173. delete classifierScratch;
  174. if (verboseStartEnd)
  175. std::cerr << "================== TestGPHIKOnlineLearnable::testOnlineLearningMethods done ===================== " << std::endl;
  176. }
  177. #endif