TestGPHIKRegression.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. /**
  2. * @file TestGPHIKRegression.cpp
  3. * @brief CppUnit-Testcase to verify that GPHIKRegression works as desired.
  4. * @author Alexander Freytag
  5. * @date 16-01-2014 (dd-mm-yyyy)
  6. */
  7. #ifdef NICE_USELIB_CPPUNIT
  8. // STL includes
  9. #include <iostream>
  10. #include <vector>
  11. // NICE-core includes
  12. #include <core/basics/Config.h>
  13. #include <core/basics/Timer.h>
  14. // gp-hik-core includes
  15. #include "gp-hik-core/GPHIKRegression.h"
  16. #include "TestGPHIKRegression.h"
  17. using namespace std; //C basics
  18. using namespace NICE; // nice-core
  19. const bool verboseStartEnd = true;
  20. const bool verbose = false;
  21. const bool writeRegressionObjectsForVerification = false;
  22. CPPUNIT_TEST_SUITE_REGISTRATION( TestGPHIKRegression );
  23. void TestGPHIKRegression::setUp() {
  24. }
  25. void TestGPHIKRegression::tearDown() {
  26. }
  27. void readData ( const std::string filename, NICE::Matrix & data, NICE::Vector & yValues )
  28. {
  29. std::ifstream ifs ( filename.c_str() , ios::in );
  30. if ( ifs.good() )
  31. {
  32. NICE::Vector tmp;
  33. ifs >> data;
  34. ifs >> tmp; //yBin;
  35. ifs >> yValues;
  36. ifs.close();
  37. }
  38. else
  39. {
  40. std::cerr << "Unable to read data from file " << filename << " -- aborting." << std::endl;
  41. CPPUNIT_ASSERT ( ifs.good() );
  42. }
  43. }
  44. void evaluateRegressionMethod ( double & regressionLoss,
  45. const NICE::GPHIKRegression * regressionMethod,
  46. const NICE::Matrix & data,
  47. const NICE::Vector & yValues
  48. )
  49. {
  50. regressionLoss = 0.0;
  51. int i_loopEnd ( (int)data.rows() );
  52. for (int i = 0; i < i_loopEnd ; i++)
  53. {
  54. NICE::Vector example ( data.getRow(i) );
  55. double result;
  56. // classify with previously trained regression method
  57. regressionMethod->estimate( &example, result );
  58. if ( verbose )
  59. std::cerr << "i: " << i << " gt: " << yValues[i] << " result: " << result << std::endl;
  60. //use L2-loss for evaluation
  61. regressionLoss += pow( yValues[i] - result, 2 );
  62. }
  63. }
  64. void TestGPHIKRegression::testRegressionHoldInData()
  65. {
  66. if (verboseStartEnd)
  67. std::cerr << "================== TestGPHIKRegression::testRegressionHoldInData ===================== " << std::endl;
  68. NICE::Config conf;
  69. conf.sB ( "GPHIKRegression", "eig_verbose", false);
  70. conf.sS ( "GPHIKRegression", "optimization_method", "downhillsimplex");
  71. // set pretty low built-in noise for hold-in regression estimation
  72. conf.sD ( "GPHIKRegression", "noise", 1e-6 );
  73. std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
  74. //------------- read the training data --------------
  75. NICE::Matrix dataTrain;
  76. NICE::Vector yValues;
  77. readData ( s_trainData, dataTrain, yValues );
  78. //----------------- convert data to sparse data structures ---------
  79. std::vector< const NICE::SparseVector *> examplesTrain;
  80. examplesTrain.resize( dataTrain.rows() );
  81. std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
  82. for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
  83. {
  84. *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
  85. }
  86. //create regressionMethod object
  87. NICE::GPHIKRegression * regressionMethod;
  88. regressionMethod = new NICE::GPHIKRegression ( &conf );
  89. regressionMethod->train ( examplesTrain , yValues );
  90. if (verbose)
  91. {
  92. std::cerr << " yValues used for training regression object" << std::endl;
  93. std::cerr << yValues << std::endl;
  94. }
  95. double holdInLoss ( 0.0 );
  96. // ------------------------------------------
  97. // ------------- REGRESSION --------------
  98. // ------------------------------------------
  99. evaluateRegressionMethod ( holdInLoss, regressionMethod, dataTrain, yValues );
  100. if ( verbose )
  101. {
  102. std::cerr << " holdInLoss: " << holdInLoss << std::endl;
  103. }
  104. CPPUNIT_ASSERT_DOUBLES_EQUAL( 0.0, holdInLoss, 1e-8);
  105. // don't waste memory
  106. delete regressionMethod;
  107. for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
  108. {
  109. delete *exTrainIt;
  110. }
  111. if (verboseStartEnd)
  112. std::cerr << "================== TestGPHIKRegression::testRegressionHoldInData done ===================== " << std::endl;
  113. }
  114. void TestGPHIKRegression::testRegressionHoldOutData()
  115. {
  116. if (verboseStartEnd)
  117. std::cerr << "================== TestGPHIKRegression::testRegressionHoldOutData ===================== " << std::endl;
  118. NICE::Config conf;
  119. conf.sB ( "GPHIKRegression", "eig_verbose", false);
  120. conf.sS ( "GPHIKRegression", "optimization_method", "downhillsimplex");
  121. // set higher built-in noise for hold-out regression estimation
  122. conf.sD ( "GPHIKRegression", "noise", 1e-4 );
  123. std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
  124. //------------- read the training data --------------
  125. NICE::Matrix dataTrain;
  126. NICE::Vector yValues;
  127. readData ( s_trainData, dataTrain, yValues );
  128. //----------------- convert data to sparse data structures ---------
  129. std::vector< const NICE::SparseVector *> examplesTrain;
  130. examplesTrain.resize( dataTrain.rows() );
  131. std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
  132. for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
  133. {
  134. *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
  135. }
  136. //create regressionMethod object
  137. NICE::GPHIKRegression * regressionMethod;
  138. regressionMethod = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  139. regressionMethod->train ( examplesTrain , yValues );
  140. //------------- read the test data --------------
  141. NICE::Matrix dataTest;
  142. NICE::Vector yValuesTest;
  143. std::string s_testData = conf.gS( "main", "testData", "toyExampleTest.data" );
  144. readData ( s_testData, dataTest, yValuesTest );
  145. double holdOutLoss ( 0.0 );
  146. // ------------------------------------------
  147. // ------------- REGRESSION --------------
  148. // ------------------------------------------
  149. evaluateRegressionMethod ( holdOutLoss, regressionMethod, dataTest, yValuesTest );
  150. // acceptable difference for every estimated y-value on average
  151. double diffOkay ( 0.4 );
  152. if ( verbose )
  153. {
  154. std::cerr << " holdOutLoss: " << holdOutLoss << " accepting: " << pow(diffOkay,2)*yValuesTest.size() << std::endl;
  155. }
  156. CPPUNIT_ASSERT( pow(diffOkay,2)*yValuesTest.size() - holdOutLoss > 0.0);
  157. // don't waste memory
  158. delete regressionMethod;
  159. for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
  160. {
  161. delete *exTrainIt;
  162. }
  163. if (verboseStartEnd)
  164. std::cerr << "================== TestGPHIKRegression::testRegressionHoldOutData done ===================== " << std::endl;
  165. }
  166. void TestGPHIKRegression::testRegressionOnlineLearnableAdd1Example()
  167. {
  168. if (verboseStartEnd)
  169. std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAdd1Example ===================== " << std::endl;
  170. NICE::Config conf;
  171. conf.sB ( "GPHIKRegressionMethod", "eig_verbose", false);
  172. conf.sS ( "GPHIKRegressionMethod", "optimization_method", "downhillsimplex");//downhillsimplex greedy
  173. // set higher built-in noise for hold-out regression estimation
  174. conf.sD ( "GPHIKRegression", "noise", 1e-4 );
  175. std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
  176. //------------- read the training data --------------
  177. NICE::Matrix dataTrain;
  178. NICE::Vector yValuesTrain;
  179. readData ( s_trainData, dataTrain, yValuesTrain );
  180. //----------------- convert data to sparse data structures ---------
  181. std::vector< const NICE::SparseVector *> examplesTrain;
  182. examplesTrain.resize( dataTrain.rows()-1 );
  183. std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
  184. for (int i = 0; i < (int)dataTrain.rows()-1; i++, exTrainIt++)
  185. {
  186. *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
  187. }
  188. // TRAIN INITIAL CLASSIFIER FROM SCRATCH
  189. NICE::GPHIKRegression * regressionMethod;
  190. regressionMethod = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  191. //use all but the first example for training and add the first one lateron
  192. NICE::Vector yValuesRelevantTrain ( yValuesTrain.getRangeRef( 0, yValuesTrain.size()-2 ) );
  193. regressionMethod->train ( examplesTrain , yValuesRelevantTrain );
  194. // RUN INCREMENTAL LEARNING
  195. bool performOptimizationAfterIncrement ( true );
  196. NICE::SparseVector * exampleToAdd = new NICE::SparseVector ( dataTrain.getRow( (int)dataTrain.rows()-1 ) );
  197. regressionMethod->addExample ( exampleToAdd, yValuesTrain[ (int)dataTrain.rows()-2 ], performOptimizationAfterIncrement );
  198. if ( verbose )
  199. std::cerr << "label of example to add: " << yValuesTrain[ (int)dataTrain.rows()-1 ] << std::endl;
  200. // TRAIN SECOND REGRESSOR FROM SCRATCH USING THE SAME OVERALL AMOUNT OF EXAMPLES
  201. examplesTrain.push_back( exampleToAdd );
  202. NICE::GPHIKRegression * regressionMethodScratch = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  203. regressionMethodScratch->train ( examplesTrain, yValuesTrain );
  204. if ( verbose )
  205. std::cerr << "trained both regressionMethods - now start evaluating them" << std::endl;
  206. // TEST that both regressionMethods produce equal store-files
  207. if ( writeRegressionObjectsForVerification )
  208. {
  209. std::string s_destination_save_IL ( "myRegressionMethodIL.txt" );
  210. std::filebuf fbOut;
  211. fbOut.open ( s_destination_save_IL.c_str(), ios::out );
  212. std::ostream os (&fbOut);
  213. //
  214. regressionMethod->store( os );
  215. //
  216. fbOut.close();
  217. std::string s_destination_save_scratch ( "myRegressionMethodScratch.txt" );
  218. std::filebuf fbOutScratch;
  219. fbOutScratch.open ( s_destination_save_scratch.c_str(), ios::out );
  220. std::ostream osScratch (&fbOutScratch);
  221. //
  222. regressionMethodScratch->store( osScratch );
  223. //
  224. fbOutScratch.close();
  225. }
  226. // TEST both regressionMethods to produce equal results
  227. //------------- read the test data --------------
  228. NICE::Matrix dataTest;
  229. NICE::Vector yValuesTest;
  230. std::string s_testData = conf.gS( "main", "testData", "toyExampleTest.data" );
  231. readData ( s_testData, dataTest, yValuesTest );
  232. // ------------------------------------------
  233. // ------------- REGRESSION --------------
  234. // ------------------------------------------
  235. double holdOutLossIL ( 0.0 );
  236. double holdOutLossScratch ( 0.0 );
  237. evaluateRegressionMethod ( holdOutLossIL, regressionMethod, dataTest, yValuesTest );
  238. evaluateRegressionMethod ( holdOutLossScratch, regressionMethodScratch, dataTest, yValuesTest );
  239. if ( verbose )
  240. {
  241. std::cerr << "holdOutLossIL: " << holdOutLossIL << std::endl;
  242. std::cerr << "holdOutLossScratch: " << holdOutLossScratch << std::endl;
  243. }
  244. CPPUNIT_ASSERT_DOUBLES_EQUAL( holdOutLossIL, holdOutLossScratch, 1e-4);
  245. // don't waste memory
  246. delete regressionMethod;
  247. delete regressionMethodScratch;
  248. for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
  249. {
  250. delete *exTrainIt;
  251. }
  252. if (verboseStartEnd)
  253. std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAdd1Example done ===================== " << std::endl;
  254. }
  255. void TestGPHIKRegression::testRegressionOnlineLearnableAddMultipleExamples()
  256. {
  257. if (verboseStartEnd)
  258. std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAddMultipleExamples ===================== " << std::endl;
  259. NICE::Config conf;
  260. conf.sB ( "GPHIKRegressionMethod", "eig_verbose", false);
  261. conf.sS ( "GPHIKRegressionMethod", "optimization_method", "downhillsimplex");//downhillsimplex greedy
  262. // set higher built-in noise for hold-out regression estimation
  263. conf.sD ( "GPHIKRegression", "noise", 1e-4 );
  264. std::string s_trainData = conf.gS( "main", "trainData", "toyExampleSmallScaleTrain.data" );
  265. //------------- read the training data --------------
  266. NICE::Matrix dataTrain;
  267. NICE::Vector yValuesTrain;
  268. readData ( s_trainData, dataTrain, yValuesTrain );
  269. //----------------- convert data to sparse data structures ---------
  270. std::vector< const NICE::SparseVector *> examplesTrain;
  271. std::vector< const NICE::SparseVector *> examplesTrainPlus;
  272. std::vector< const NICE::SparseVector *> examplesTrainMinus;
  273. examplesTrain.resize( dataTrain.rows() );
  274. NICE::Vector yValuesPlus( dataTrain.rows() );
  275. NICE::Vector yValuesMinus( dataTrain.rows() );
  276. std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin();
  277. int cntPlus ( 0 );
  278. int cntMinus ( 0 );
  279. // note: we also slightly shuffle the order of how examples are added compared to the scratch-classifier...
  280. // this should not result in any difference of behaviour...
  281. for (int i = 0; i < (int)dataTrain.rows(); i++, exTrainIt++)
  282. {
  283. *exTrainIt = new NICE::SparseVector( dataTrain.getRow(i) );
  284. if ( ( yValuesTrain[i] == 1 ) || ( yValuesTrain[i] == 2 ) )
  285. {
  286. examplesTrainPlus.push_back ( *exTrainIt );
  287. yValuesPlus[cntPlus] = yValuesTrain[i];
  288. cntPlus++;
  289. }
  290. else
  291. {
  292. examplesTrainMinus.push_back ( *exTrainIt );
  293. yValuesMinus[cntMinus] = yValuesTrain[i];
  294. cntMinus++;
  295. }
  296. }
  297. yValuesPlus.resize ( examplesTrainPlus.size() ) ;
  298. yValuesMinus.resize( examplesTrainMinus.size() );
  299. // TRAIN INITIAL CLASSIFIER FROM SCRATCH
  300. NICE::GPHIKRegression * regressionMethod;
  301. regressionMethod = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  302. regressionMethod->train ( examplesTrainPlus , yValuesPlus );
  303. if ( verbose )
  304. {
  305. std::cerr << "Initial values: " << yValuesPlus << std::endl;
  306. std::cerr << "Values to add: " << yValuesMinus << std::endl;
  307. }
  308. // RUN INCREMENTAL LEARNING
  309. bool performOptimizationAfterIncrement ( true );
  310. regressionMethod->addMultipleExamples ( examplesTrainMinus, yValuesMinus, performOptimizationAfterIncrement );
  311. // TRAIN SECOND REGRESSOR FROM SCRATCH USING THE SAME OVERALL AMOUNT OF EXAMPLES
  312. NICE::GPHIKRegression * regressionMethodScratch = new NICE::GPHIKRegression ( &conf, "GPHIKRegression" );
  313. regressionMethodScratch->train ( examplesTrain, yValuesTrain );
  314. if ( verbose )
  315. std::cerr << "trained both regressionMethods - now start evaluating them" << std::endl;
  316. // TEST that both regressionMethods produce equal store-files
  317. if ( writeRegressionObjectsForVerification )
  318. {
  319. std::string s_destination_save_IL ( "myRegressionMethodIL.txt" );
  320. std::filebuf fbOut;
  321. fbOut.open ( s_destination_save_IL.c_str(), ios::out );
  322. std::ostream os (&fbOut);
  323. //
  324. regressionMethod->store( os );
  325. //
  326. fbOut.close();
  327. std::string s_destination_save_scratch ( "myRegressionMethodScratch.txt" );
  328. std::filebuf fbOutScratch;
  329. fbOutScratch.open ( s_destination_save_scratch.c_str(), ios::out );
  330. std::ostream osScratch (&fbOutScratch);
  331. //
  332. regressionMethodScratch->store( osScratch );
  333. //
  334. fbOutScratch.close();
  335. }
  336. // TEST both regressionMethods to produce equal results
  337. //------------- read the test data --------------
  338. NICE::Matrix dataTest;
  339. NICE::Vector yValuesTest;
  340. std::string s_testData = conf.gS( "main", "testData", "toyExampleTest.data" );
  341. readData ( s_testData, dataTest, yValuesTest );
  342. // ------------------------------------------
  343. // ------------- REGRESSION --------------
  344. // ------------------------------------------
  345. double holdOutLossIL ( 0.0 );
  346. double holdOutLossScratch ( 0.0 );
  347. evaluateRegressionMethod ( holdOutLossIL, regressionMethod, dataTest, yValuesTest );
  348. evaluateRegressionMethod ( holdOutLossScratch, regressionMethodScratch, dataTest, yValuesTest );
  349. if ( verbose )
  350. {
  351. std::cerr << "holdOutLossIL: " << holdOutLossIL << std::endl;
  352. std::cerr << "holdOutLossScratch: " << holdOutLossScratch << std::endl;
  353. }
  354. CPPUNIT_ASSERT_DOUBLES_EQUAL( holdOutLossIL, holdOutLossScratch, 1e-4);
  355. // don't waste memory
  356. delete regressionMethod;
  357. delete regressionMethodScratch;
  358. for (std::vector< const NICE::SparseVector *>::iterator exTrainIt = examplesTrain.begin(); exTrainIt != examplesTrain.end(); exTrainIt++)
  359. {
  360. delete *exTrainIt;
  361. }
  362. if (verboseStartEnd)
  363. std::cerr << "================== TestGPHIKRegression::testRegressionOnlineLearnableAddMultipleExamples done ===================== " << std::endl;
  364. }
  365. #endif