TestGPHIKRegression.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. /**
  2. * @file TestGPHIKRegression.cpp
  3. * @brief CppUnit-Testcase to verify that GPHIKRegression works as desired.
  4. * @author Alexander Freytag
  5. * @date 16-01-2014 (dd-mm-yyyy)
  6. */
  7. #ifdef NICE_USELIB_CPPUNIT
  8. // STL includes
  9. #include <iostream>
  10. #include <vector>
  11. // NICE-core includes
  12. #include <core/basics/Config.h>
  13. #include <core/basics/Timer.h>
  14. // gp-hik-core includes
  15. #include "gp-hik-core/GPHIKRegression.h"
  16. #include <gtest/gtest.h>
  17. using namespace std; //C basics
  18. using namespace NICE; // nice-core
  19. const bool verboseStartEnd = true;
  20. const bool verbose = false;
  21. const bool writeRegressionObjectsForVerification = false;
  22. void readData ( const std::string filename, NICE::Matrix & data, NICE::Vector & yValues )
  23. {
  24. std::ifstream ifs ( filename.c_str() , ios::in );
  25. if ( ifs.good() )
  26. {
  27. NICE::Vector tmp;
  28. ifs >> data;
  29. ifs >> tmp; //yBin;
  30. ifs >> yValues;
  31. ifs.close();
  32. }
  33. else
  34. {
  35. std::cerr << "Unable to read data from file " << filename << " -- aborting." << std::endl;
  36. ASSERT_TRUE( ifs.good() );
  37. }
  38. }
  39. void evaluateRegressionMethod ( double & regressionLoss,
  40. const NICE::GPHIKRegression * regressionMethod,
  41. const NICE::Matrix & data,
  42. const NICE::Vector & yValues
  43. )
  44. {
  45. regressionLoss = 0.0;
  46. int i_loopEnd ( (int)data.rows() );
  47. for (int i = 0; i < i_loopEnd ; i++)
  48. {
  49. NICE::Vector example ( data.getRow(i) );
  50. double result;
  51. // classify with previously trained regression method
  52. regressionMethod->estimate( &example, result );
  53. if ( verbose )
  54. std::cerr << "i: " << i << " gt: " << yValues[i] << " result: " << result << std::endl;
  55. //use L2-loss for evaluation
  56. regressionLoss += pow( yValues[i] - result, 2 );
  57. }
  58. }
  59. TEST(TestGPHIKRegression, testRegressionHoldInData)
  60. {
  61. if (verboseStartEnd)
  62. std::cerr << "================== TestGPHIKRegression::testRegressionHoldInData ===================== " << std::endl;
  63. NICE::Config conf;
  64. conf.sB ( "GPHIKRegression", "eig_verbose", false);
  65. conf.sS ( "GPHIKRegression", "optimization_method", "downhillsimplex");
  66. // set pretty low built-in noise for hold-in regression estimation
  67. conf.sD ( "GPHIKRegression", "noise", 1e-6 );
  68. std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
  69. //------------- read the training data --------------
  70. NICE::Matrix dataTrain;
  71. NICE::Vector yValues;
  72. readData ( s_trainData, dataTrain, yValues );
  73. //----------------- convert data to sparse data structures ---------
  74. std::vector< const NICE::SparseVector *> examplesTrain;
  75. examplesTrain.resize( dataTrain.rows() );
  76. std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
  77. for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
  78. {
  79. *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
  80. }
  81. //create regressionMethod object
  82. NICE::GPHIKRegression * regressionMethod;
  83. regressionMethod = new NICE::GPHIKRegression ( &conf );
  84. regressionMethod->train ( examplesTrain , yValues );
  85. if (verbose)
  86. {
  87. std::cerr << " yValues used for training regression object" << std::endl;
  88. std::cerr << yValues << std::endl;
  89. }
  90. double holdInLoss ( 0.0 );
  91. // ------------------------------------------
  92. // ------------- REGRESSION --------------
  93. // ------------------------------------------
  94. evaluateRegressionMethod ( holdInLoss, regressionMethod, dataTrain, yValues );
  95. if ( verbose )
  96. {
  97. std::cerr << " holdInLoss: " << holdInLoss << std::endl;
  98. }
  99. ASSERT_NEAR( 0.0, holdInLoss, 1e-8);
  100. // don't waste memory
  101. delete regressionMethod;
  102. for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
  103. {
  104. delete *exTrainIt;
  105. }
  106. if (verboseStartEnd)
  107. std::cerr << "================== TestGPHIKRegression::testRegressionHoldInData done ===================== " << std::endl;
  108. }
  109. TEST(TestGPHIKRegression, testRegressionHoldOutData)
  110. {
  111. if (verboseStartEnd)
  112. std::cerr << "================== TestGPHIKRegression::testRegressionHoldOutData ===================== " << std::endl;
  113. NICE::Config conf;
  114. conf.sB ( "GPHIKRegression", "eig_verbose", false);
  115. conf.sS ( "GPHIKRegression", "optimization_method", "downhillsimplex");
  116. // set higher built-in noise for hold-out regression estimation
  117. conf.sD ( "GPHIKRegression", "noise", 1e-4 );
  118. std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
  119. //------------- read the training data --------------
  120. NICE::Matrix dataTrain;
  121. NICE::Vector yValues;
  122. readData ( s_trainData, dataTrain, yValues );
  123. //----------------- convert data to sparse data structures ---------
  124. std::vector< const NICE::SparseVector *> examplesTrain;
  125. examplesTrain.resize( dataTrain.rows() );
  126. std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
  127. for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
  128. {
  129. *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
  130. }
  131. //create regressionMethod object
  132. NICE::GPHIKRegression * regressionMethod;
  133. regressionMethod = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  134. regressionMethod->train ( examplesTrain , yValues );
  135. //------------- read the test data --------------
  136. NICE::Matrix dataTest;
  137. NICE::Vector yValuesTest;
  138. std::string s_testData = conf.gS( "main", "testData", "toyExampleTest.data" );
  139. readData ( s_testData, dataTest, yValuesTest );
  140. double holdOutLoss ( 0.0 );
  141. // ------------------------------------------
  142. // ------------- REGRESSION --------------
  143. // ------------------------------------------
  144. evaluateRegressionMethod ( holdOutLoss, regressionMethod, dataTest, yValuesTest );
  145. // acceptable difference for every estimated y-value on average
  146. double diffOkay ( 0.4 );
  147. if ( verbose )
  148. {
  149. std::cerr << " holdOutLoss: " << holdOutLoss << " accepting: " << pow(diffOkay,2)*yValuesTest.size() << std::endl;
  150. }
  151. ASSERT_TRUE( pow(diffOkay,2)*yValuesTest.size() - holdOutLoss > 0.0);
  152. // don't waste memory
  153. delete regressionMethod;
  154. for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
  155. {
  156. delete *exTrainIt;
  157. }
  158. if (verboseStartEnd)
  159. std::cerr << "================== TestGPHIKRegression::testRegressionHoldOutData done ===================== " << std::endl;
  160. }
  161. TEST(TestGPHIKRegression, testRegressionOnlineLearnableAdd1Example)
  162. {
  163. if (verboseStartEnd)
  164. std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAdd1Example ===================== " << std::endl;
  165. NICE::Config conf;
  166. conf.sB ( "GPHIKRegressionMethod", "eig_verbose", false);
  167. conf.sS ( "GPHIKRegressionMethod", "optimization_method", "downhillsimplex");//downhillsimplex greedy
  168. // set higher built-in noise for hold-out regression estimation
  169. conf.sD ( "GPHIKRegression", "noise", 1e-4 );
  170. std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
  171. //------------- read the training data --------------
  172. NICE::Matrix dataTrain;
  173. NICE::Vector yValuesTrain;
  174. readData ( s_trainData, dataTrain, yValuesTrain );
  175. //----------------- convert data to sparse data structures ---------
  176. std::vector< const NICE::SparseVector *> examplesTrain;
  177. examplesTrain.resize( dataTrain.rows()-1 );
  178. std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
  179. for (int i = 0; i < (int)dataTrain.rows()-1; i++, exTrainIt++)
  180. {
  181. *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
  182. }
  183. // TRAIN INITIAL CLASSIFIER FROM SCRATCH
  184. NICE::GPHIKRegression * regressionMethod;
  185. regressionMethod = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  186. //use all but the first example for training and add the first one lateron
  187. NICE::Vector yValuesRelevantTrain ( yValuesTrain.getRangeRef( 0, yValuesTrain.size()-2 ) );
  188. regressionMethod->train ( examplesTrain , yValuesRelevantTrain );
  189. // RUN INCREMENTAL LEARNING
  190. bool performOptimizationAfterIncrement ( true );
  191. NICE::SparseVector * exampleToAdd = new NICE::SparseVector ( dataTrain.getRow( (int)dataTrain.rows()-1 ) );
  192. regressionMethod->addExample ( exampleToAdd, yValuesTrain[ (int)dataTrain.rows()-2 ], performOptimizationAfterIncrement );
  193. if ( verbose )
  194. std::cerr << "label of example to add: " << yValuesTrain[ (int)dataTrain.rows()-1 ] << std::endl;
  195. // TRAIN SECOND REGRESSOR FROM SCRATCH USING THE SAME OVERALL AMOUNT OF EXAMPLES
  196. examplesTrain.push_back( exampleToAdd );
  197. NICE::GPHIKRegression * regressionMethodScratch = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  198. regressionMethodScratch->train ( examplesTrain, yValuesTrain );
  199. if ( verbose )
  200. std::cerr << "trained both regressionMethods - now start evaluating them" << std::endl;
  201. // TEST that both regressionMethods produce equal store-files
  202. if ( writeRegressionObjectsForVerification )
  203. {
  204. std::string s_destination_save_IL ( "myRegressionMethodIL.txt" );
  205. std::filebuf fbOut;
  206. fbOut.open ( s_destination_save_IL.c_str(), ios::out );
  207. std::ostream os (&fbOut);
  208. //
  209. regressionMethod->store( os );
  210. //
  211. fbOut.close();
  212. std::string s_destination_save_scratch ( "myRegressionMethodScratch.txt" );
  213. std::filebuf fbOutScratch;
  214. fbOutScratch.open ( s_destination_save_scratch.c_str(), ios::out );
  215. std::ostream osScratch (&fbOutScratch);
  216. //
  217. regressionMethodScratch->store( osScratch );
  218. //
  219. fbOutScratch.close();
  220. }
  221. // TEST both regressionMethods to produce equal results
  222. //------------- read the test data --------------
  223. NICE::Matrix dataTest;
  224. NICE::Vector yValuesTest;
  225. std::string s_testData = conf.gS( "main", "testData", "toyExampleTest.data" );
  226. readData ( s_testData, dataTest, yValuesTest );
  227. // ------------------------------------------
  228. // ------------- REGRESSION --------------
  229. // ------------------------------------------
  230. double holdOutLossIL ( 0.0 );
  231. double holdOutLossScratch ( 0.0 );
  232. evaluateRegressionMethod ( holdOutLossIL, regressionMethod, dataTest, yValuesTest );
  233. evaluateRegressionMethod ( holdOutLossScratch, regressionMethodScratch, dataTest, yValuesTest );
  234. if ( verbose )
  235. {
  236. std::cerr << "holdOutLossIL: " << holdOutLossIL << std::endl;
  237. std::cerr << "holdOutLossScratch: " << holdOutLossScratch << std::endl;
  238. }
  239. ASSERT_NEAR( holdOutLossIL, holdOutLossScratch, 1e-4);
  240. // don't waste memory
  241. delete regressionMethod;
  242. delete regressionMethodScratch;
  243. for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
  244. {
  245. delete *exTrainIt;
  246. }
  247. if (verboseStartEnd)
  248. std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAdd1Example done ===================== " << std::endl;
  249. }
  250. TEST(TestGPHIKRegression, testRegressionOnlineLearnableAddMultipleExamples)
  251. {
  252. if (verboseStartEnd)
  253. std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAddMultipleExamples ===================== " << std::endl;
  254. NICE::Config conf;
  255. conf.sB ( "GPHIKRegressionMethod", "eig_verbose", false);
  256. conf.sS ( "GPHIKRegressionMethod", "optimization_method", "downhillsimplex");//downhillsimplex greedy
  257. // set higher built-in noise for hold-out regression estimation
  258. conf.sD ( "GPHIKRegression", "noise", 1e-4 );
  259. std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
  260. //------------- read the training data --------------
  261. NICE::Matrix dataTrain;
  262. NICE::Vector yValuesTrain;
  263. readData ( s_trainData, dataTrain, yValuesTrain );
  264. //----------------- convert data to sparse data structures ---------
  265. std::vector< const NICE::SparseVector *> examplesTrain;
  266. std::vector< const NICE::SparseVector *> examplesTrainPlus;
  267. std::vector< const NICE::SparseVector *> examplesTrainMinus;
  268. examplesTrain.resize( dataTrain.rows() );
  269. NICE::Vector yValuesPlus( dataTrain.rows() );
  270. NICE::Vector yValuesMinus( dataTrain.rows() );
  271. std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
  272. int cntPlus ( 0 );
  273. int cntMinus ( 0 );
  274. // note: we also slightly shuffle the order of how examples are added compared to the scratch-classifier...
  275. // this should not result in any difference of behaviour...
  276. for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
  277. {
  278. *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
  279. if ( ( yValuesTrain[i] == 1 ) || ( yValuesTrain[i] == 2 ) )
  280. {
  281. examplesTrainPlus.push_back ( *exTrainIt );
  282. yValuesPlus[cntPlus] = yValuesTrain[i];
  283. cntPlus++;
  284. }
  285. else
  286. {
  287. examplesTrainMinus.push_back ( *exTrainIt );
  288. yValuesMinus[cntMinus] = yValuesTrain[i];
  289. cntMinus++;
  290. }
  291. }
  292. yValuesPlus.resize ( examplesTrainPlus.size() ) ;
  293. yValuesMinus.resize( examplesTrainMinus.size() );
  294. // TRAIN INITIAL CLASSIFIER FROM SCRATCH
  295. NICE::GPHIKRegression * regressionMethod;
  296. regressionMethod = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  297. regressionMethod->train ( examplesTrainPlus , yValuesPlus );
  298. if ( verbose )
  299. {
  300. std::cerr << "Initial values: " << yValuesPlus << std::endl;
  301. std::cerr << "Values to add: " << yValuesMinus << std::endl;
  302. }
  303. // RUN INCREMENTAL LEARNING
  304. bool performOptimizationAfterIncrement ( true );
  305. regressionMethod->addMultipleExamples ( examplesTrainMinus, yValuesMinus, performOptimizationAfterIncrement );
  306. // TRAIN SECOND REGRESSOR FROM SCRATCH USING THE SAME OVERALL AMOUNT OF EXAMPLES
  307. NICE::GPHIKRegression * regressionMethodScratch = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  308. regressionMethodScratch->train ( examplesTrain, yValuesTrain );
  309. if ( verbose )
  310. std::cerr << "trained both regressionMethods - now start evaluating them" << std::endl;
  311. // TEST that both regressionMethods produce equal store-files
  312. if ( writeRegressionObjectsForVerification )
  313. {
  314. std::string s_destination_save_IL ( "myRegressionMethodIL.txt" );
  315. std::filebuf fbOut;
  316. fbOut.open ( s_destination_save_IL.c_str(), ios::out );
  317. std::ostream os (&fbOut);
  318. //
  319. regressionMethod->store( os );
  320. //
  321. fbOut.close();
  322. std::string s_destination_save_scratch ( "myRegressionMethodScratch.txt" );
  323. std::filebuf fbOutScratch;
  324. fbOutScratch.open ( s_destination_save_scratch.c_str(), ios::out );
  325. std::ostream osScratch (&fbOutScratch);
  326. //
  327. regressionMethodScratch->store( osScratch );
  328. //
  329. fbOutScratch.close();
  330. }
  331. // TEST both regressionMethods to produce equal results
  332. //------------- read the test data --------------
  333. NICE::Matrix dataTest;
  334. NICE::Vector yValuesTest;
  335. std::string s_testData = conf.gS( "main", "testData", "toyExampleTest.data" );
  336. readData ( s_testData, dataTest, yValuesTest );
  337. // ------------------------------------------
  338. // ------------- REGRESSION --------------
  339. // ------------------------------------------
  340. double holdOutLossIL ( 0.0 );
  341. double holdOutLossScratch ( 0.0 );
  342. evaluateRegressionMethod ( holdOutLossIL, regressionMethod, dataTest, yValuesTest );
  343. evaluateRegressionMethod ( holdOutLossScratch, regressionMethodScratch, dataTest, yValuesTest );
  344. if ( verbose )
  345. {
  346. std::cerr << "holdOutLossIL: " << holdOutLossIL << std::endl;
  347. std::cerr << "holdOutLossScratch: " << holdOutLossScratch << std::endl;
  348. }
  349. ASSERT_NEAR( holdOutLossIL, holdOutLossScratch, 1e-4);
  350. // don't waste memory
  351. delete regressionMethod;
  352. delete regressionMethodScratch;
  353. for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
  354. {
  355. delete *exTrainIt;
  356. }
  357. if (verboseStartEnd)
  358. std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAddMultipleExamples done ===================== " << std::endl;
  359. }
  360. #endif