TestGPHIKRegression.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. /**
  2. * @file TestGPHIKRegression.cpp
  3. * @brief CppUnit-Testcase to verify that GPHIKRegression works as desired.
  4. * @author Alexander Freytag
  5. * @date 16-01-2014 (dd-mm-yyyy)
  6. */
  7. #ifdef NICE_USELIB_CPPUNIT
  8. // STL includes
  9. #include <iostream>
  10. #include <vector>
  11. // NICE-core includes
  12. #include <core/basics/Config.h>
  13. #include <core/basics/Timer.h>
  14. // gp-hik-core includes
  15. #include "gp-hik-core/GPHIKRegression.h"
  16. #include "TestGPHIKRegression.h"
  17. using namespace std; //C basics
  18. using namespace NICE; // nice-core
  19. const bool verboseStartEnd = true;
  20. const bool verbose = false;
  21. CPPUNIT_TEST_SUITE_REGISTRATION( TestGPHIKRegression );
  22. void TestGPHIKRegression::setUp() {
  23. }
  24. void TestGPHIKRegression::tearDown() {
  25. }
  26. void readData ( const std::string filename, NICE::Matrix & data, NICE::Vector & yValues )
  27. {
  28. std::ifstream ifs ( filename.c_str() , ios::in );
  29. if ( ifs.good() )
  30. {
  31. NICE::Vector tmp;
  32. ifs >> data;
  33. ifs >> tmp; //yBin;
  34. ifs >> yValues;
  35. ifs.close();
  36. }
  37. else
  38. {
  39. std::cerr << "Unable to read data from file " << filename << " -- aborting." << std::endl;
  40. CPPUNIT_ASSERT ( ifs.good() );
  41. }
  42. }
  43. void evaluateRegressionMethod ( double & regressionLoss,
  44. const NICE::GPHIKRegression * regressionMethod,
  45. const NICE::Matrix & data,
  46. const NICE::Vector & yValues
  47. )
  48. {
  49. regressionLoss = 0.0;
  50. int i_loopEnd ( (int)data.rows() );
  51. for (int i = 0; i < i_loopEnd ; i++)
  52. {
  53. NICE::Vector example ( data.getRow(i) );
  54. double result;
  55. // classify with previously trained regression method
  56. regressionMethod->estimate( &example, result );
  57. if ( verbose )
  58. std::cerr << "i: " << i << " gt: " << yValues[i] << " result: " << result << std::endl;
  59. //use L2-loss for evaluation
  60. regressionLoss += pow( yValues[i] - result, 2 );
  61. }
  62. }
  63. void TestGPHIKRegression::testRegressionHoldInData()
  64. {
  65. if (verboseStartEnd)
  66. std::cerr << "================== TestGPHIKRegression::testRegressionHoldInData ===================== " << std::endl;
  67. NICE::Config conf;
  68. conf.sB ( "GPHIKRegression", "eig_verbose", false);
  69. conf.sS ( "GPHIKRegression", "optimization_method", "downhillsimplex");
  70. // set pretty low built-in noise for hold-in regression estimation
  71. conf.sD ( "GPHIKRegression", "noise", 1e-6 );
  72. std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
  73. //------------- read the training data --------------
  74. NICE::Matrix dataTrain;
  75. NICE::Vector yValues;
  76. readData ( s_trainData, dataTrain, yValues );
  77. //----------------- convert data to sparse data structures ---------
  78. std::vector< const NICE::SparseVector *> examplesTrain;
  79. examplesTrain.resize( dataTrain.rows() );
  80. std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
  81. for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
  82. {
  83. *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
  84. }
  85. //create regressionMethod object
  86. NICE::GPHIKRegression * regressionMethod;
  87. regressionMethod = new NICE::GPHIKRegression ( &conf );
  88. regressionMethod->train ( examplesTrain , yValues );
  89. double holdInLoss ( 0.0 );
  90. // ------------------------------------------
  91. // ------------- REGRESSION --------------
  92. // ------------------------------------------
  93. evaluateRegressionMethod ( holdInLoss, regressionMethod, dataTrain, yValues );
  94. if ( verbose )
  95. {
  96. std::cerr << " holdInLoss: " << holdInLoss << std::endl;
  97. }
  98. CPPUNIT_ASSERT_DOUBLES_EQUAL( 0.0, holdInLoss, 1e-8);
  99. // don't waste memory
  100. delete regressionMethod;
  101. for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
  102. {
  103. delete *exTrainIt;
  104. }
  105. if (verboseStartEnd)
  106. std::cerr << "================== TestGPHIKRegression::testRegressionHoldInData done ===================== " << std::endl;
  107. }
  108. void TestGPHIKRegression::testRegressionHoldOutData()
  109. {
  110. if (verboseStartEnd)
  111. std::cerr << "================== TestGPHIKRegression::testRegressionHoldOutData ===================== " << std::endl;
  112. NICE::Config conf;
  113. conf.sB ( "GPHIKRegression", "eig_verbose", false);
  114. conf.sS ( "GPHIKRegression", "optimization_method", "downhillsimplex");
  115. // set higher built-in noise for hold-out regression estimation
  116. conf.sD ( "GPHIKRegression", "noise", 1e-4 );
  117. std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
  118. //------------- read the training data --------------
  119. NICE::Matrix dataTrain;
  120. NICE::Vector yValues;
  121. readData ( s_trainData, dataTrain, yValues );
  122. //----------------- convert data to sparse data structures ---------
  123. std::vector< const NICE::SparseVector *> examplesTrain;
  124. examplesTrain.resize( dataTrain.rows() );
  125. std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
  126. for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
  127. {
  128. *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
  129. }
  130. //create regressionMethod object
  131. NICE::GPHIKRegression * regressionMethod;
  132. regressionMethod = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  133. regressionMethod->train ( examplesTrain , yValues );
  134. //------------- read the test data --------------
  135. NICE::Matrix dataTest;
  136. NICE::Vector yValuesTest;
  137. std::string s_testData = conf.gS( "main", "testData", "toyExampleTest.data" );
  138. readData ( s_testData, dataTest, yValuesTest );
  139. double holdOutLoss ( 0.0 );
  140. // ------------------------------------------
  141. // ------------- REGRESSION --------------
  142. // ------------------------------------------
  143. evaluateRegressionMethod ( holdOutLoss, regressionMethod, dataTest, yValuesTest );
  144. // acceptable difference for every estimated y-value on average
  145. double diffOkay ( 0.4 );
  146. if ( verbose )
  147. {
  148. std::cerr << " holdOutLoss: " << holdOutLoss << " accepting: " << pow(diffOkay,2)*yValuesTest.size() << std::endl;
  149. }
  150. CPPUNIT_ASSERT( pow(diffOkay,2)*yValuesTest.size() - holdOutLoss > 0.0);
  151. // don't waste memory
  152. delete regressionMethod;
  153. for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
  154. {
  155. delete *exTrainIt;
  156. }
  157. if (verboseStartEnd)
  158. std::cerr << "================== TestGPHIKRegression::testRegressionHoldOutData done ===================== " << std::endl;
  159. }
  160. void TestGPHIKRegression::testRegressionOnlineLearning()
  161. {
  162. if (verboseStartEnd)
  163. std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearning ===================== " << std::endl;
  164. NICE::Config conf;
  165. conf.sB ( "GPHIKRegressionMethod", "eig_verbose", false);
  166. conf.sS ( "GPHIKRegressionMethod", "optimization_method", "downhillsimplex");//downhillsimplex greedy
  167. // set higher built-in noise for hold-out regression estimation
  168. conf.sD ( "GPHIKRegression", "noise", 1e-4 );
  169. std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
  170. //------------- read the training data --------------
  171. NICE::Matrix dataTrain;
  172. NICE::Vector yValuesTrain;
  173. readData ( s_trainData, dataTrain, yValuesTrain );
  174. //----------------- convert data to sparse data structures ---------
  175. std::vector< const NICE::SparseVector *> examplesTrain;
  176. examplesTrain.resize( dataTrain.rows()-1 );
  177. std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
  178. for (int i = 0; i < (int)dataTrain.rows()-1; i++, exTrainIt++)
  179. {
  180. *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
  181. }
  182. // TRAIN INITIAL CLASSIFIER FROM SCRATCH
  183. NICE::GPHIKRegression * regressionMethod;
  184. regressionMethod = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  185. //use all but the first example for training and add the first one lateron
  186. NICE::Vector yValuesRelevantTrain ( yValuesTrain.getRangeRef( 0, yValuesTrain.size()-2 ) );
  187. regressionMethod->train ( examplesTrain , yValuesRelevantTrain );
  188. // RUN INCREMENTAL LEARNING
  189. bool performOptimizationAfterIncrement ( true );
  190. NICE::SparseVector * exampleToAdd = new NICE::SparseVector ( dataTrain.getRow( (int)dataTrain.rows()-1 ) );
  191. regressionMethod->addExample ( exampleToAdd, yValuesTrain[ (int)dataTrain.rows()-2 ], performOptimizationAfterIncrement );
  192. if ( verbose )
  193. std::cerr << "label of example to add: " << yValuesTrain[ (int)dataTrain.rows()-1 ] << std::endl;
  194. // TRAIN SECOND REGRESSOR FROM SCRATCH USING THE SAME OVERALL AMOUNT OF EXAMPLES
  195. examplesTrain.push_back( exampleToAdd );
  196. NICE::GPHIKRegression * regressionMethodScratch = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  197. regressionMethodScratch->train ( examplesTrain, yValuesTrain );
  198. if ( verbose )
  199. std::cerr << "trained both regressionMethods - now start evaluating them" << std::endl;
  200. // TEST that both regressionMethods produce equal store-files
  201. std::string s_destination_save_IL ( "myRegressionMethodIL.txt" );
  202. std::filebuf fbOut;
  203. fbOut.open ( s_destination_save_IL.c_str(), ios::out );
  204. std::ostream os (&fbOut);
  205. //
  206. regressionMethod->store( os );
  207. //
  208. fbOut.close();
  209. std::string s_destination_save_scratch ( "myRegressionMethodScratch.txt" );
  210. std::filebuf fbOutScratch;
  211. fbOutScratch.open ( s_destination_save_scratch.c_str(), ios::out );
  212. std::ostream osScratch (&fbOutScratch);
  213. //
  214. regressionMethodScratch->store( osScratch );
  215. //
  216. fbOutScratch.close();
  217. // TEST both regressionMethods to produce equal results
  218. //------------- read the test data --------------
  219. NICE::Matrix dataTest;
  220. NICE::Vector yValuesTest;
  221. std::string s_testData = conf.gS( "main", "testData", "toyExampleTest.data" );
  222. readData ( s_testData, dataTest, yValuesTest );
  223. // ------------------------------------------
  224. // ------------- REGRESSION --------------
  225. // ------------------------------------------
  226. double holdOutLossIL ( 0.0 );
  227. double holdOutLossScratch ( 0.0 );
  228. evaluateRegressionMethod ( holdOutLossIL, regressionMethod, dataTest, yValuesTest );
  229. evaluateRegressionMethod ( holdOutLossScratch, regressionMethodScratch, dataTest, yValuesTest );
  230. if ( verbose )
  231. {
  232. std::cerr << "holdOutLossIL: " << holdOutLossIL << std::endl;
  233. std::cerr << "holdOutLossScratch: " << holdOutLossScratch << std::endl;
  234. }
  235. CPPUNIT_ASSERT_DOUBLES_EQUAL( holdOutLossIL, holdOutLossScratch, 1e-4);
  236. // don't waste memory
  237. delete regressionMethod;
  238. delete regressionMethodScratch;
  239. for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
  240. {
  241. delete *exTrainIt;
  242. }
  243. if (verboseStartEnd)
  244. std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearning done ===================== " << std::endl;
  245. }
  246. #endif