GPHIKRegression.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. /**
  2. * @file GPHIKRegression.cpp
  3. * @brief Main interface for our GP HIK regression implementation (Implementation)
  4. * @author Alexander Freytag
  5. * @date 15-01-2014 (dd-mm-yyyy)
  6. */
  7. // STL includes
  8. #include <iostream>
  9. // NICE-core includes
  10. #include <core/basics/numerictools.h>
  11. #include <core/basics/Timer.h>
  12. // gp-hik-core includes
  13. #include "GPHIKRegression.h"
  14. #include "gp-hik-core/parameterizedFunctions/PFAbsExp.h"
  15. #include "gp-hik-core/parameterizedFunctions/PFExp.h"
  16. #include "gp-hik-core/parameterizedFunctions/PFMKL.h"
  17. using namespace std;
  18. using namespace NICE;
  19. /////////////////////////////////////////////////////
  20. /////////////////////////////////////////////////////
  21. // PROTECTED METHODS
  22. /////////////////////////////////////////////////////
  23. /////////////////////////////////////////////////////
  24. /////////////////////////////////////////////////////
  25. /////////////////////////////////////////////////////
  26. // PUBLIC METHODS
  27. /////////////////////////////////////////////////////
  28. /////////////////////////////////////////////////////
  29. GPHIKRegression::GPHIKRegression( )
  30. {
  31. this->b_isTrained = false;
  32. this->confSection = "";
  33. this->gphyper = new NICE::FMKGPHyperparameterOptimization();
  34. // in order to be sure about all necessary variables be setup with default values, we
  35. // run initFromConfig with an empty config
  36. NICE::Config tmpConfEmpty ;
  37. this->initFromConfig ( &tmpConfEmpty, this->confSection );
  38. //indicate that we perform regression here
  39. this->gphyper->setPerformRegression ( true );
  40. }
  41. GPHIKRegression::GPHIKRegression( const Config *conf, const string & s_confSection )
  42. {
  43. ///////////
  44. // same code as in empty constructor - duplication can be avoided with C++11 allowing for constructor delegation
  45. ///////////
  46. this->b_isTrained = false;
  47. this->confSection = "";
  48. this->gphyper = new NICE::FMKGPHyperparameterOptimization();
  49. ///////////
  50. // here comes the new code part different from the empty constructor
  51. ///////////
  52. this->confSection = s_confSection;
  53. // if no config file was given, we either restore the classifier from an external file, or run ::init with
  54. // an emtpy config (using default values thereby) when calling the train-method
  55. if ( conf != NULL )
  56. {
  57. this->initFromConfig( conf, confSection );
  58. }
  59. else
  60. {
  61. // if no config was given, we create an empty one
  62. NICE::Config tmpConfEmpty ;
  63. this->initFromConfig ( &tmpConfEmpty, this->confSection );
  64. }
  65. //indicate that we perform regression here
  66. this->gphyper->setPerformRegression ( true );
  67. }
  68. GPHIKRegression::~GPHIKRegression()
  69. {
  70. if ( gphyper != NULL )
  71. delete gphyper;
  72. }
  73. void GPHIKRegression::initFromConfig(const Config *conf, const string & s_confSection)
  74. {
  75. this->noise = conf->gD(confSection, "noise", 0.01);
  76. this->confSection = confSection;
  77. this->verbose = conf->gB(confSection, "verbose", false);
  78. this->debug = conf->gB(confSection, "debug", false);
  79. this->uncertaintyPredictionForRegression = conf->gB( confSection, "uncertaintyPredictionForRegression", false );
  80. //how do we approximate the predictive variance for regression uncertainty?
  81. string s_varianceApproximation = conf->gS(confSection, "varianceApproximation", "approximate_fine"); //default: fine approximative uncertainty prediction
  82. if ( (s_varianceApproximation.compare("approximate_rough") == 0) || ((s_varianceApproximation.compare("1") == 0)) )
  83. {
  84. this->varianceApproximation = APPROXIMATE_ROUGH;
  85. //no additional eigenvalue is needed here at all.
  86. this->gphyper->setNrOfEigenvaluesToConsiderForVarApprox ( 0 );
  87. }
  88. else if ( (s_varianceApproximation.compare("approximate_fine") == 0) || ((s_varianceApproximation.compare("2") == 0)) )
  89. {
  90. this->varianceApproximation = APPROXIMATE_FINE;
  91. //security check - compute at least one eigenvalue for this approximation strategy
  92. this->gphyper->setNrOfEigenvaluesToConsiderForVarApprox ( std::max( conf->gI(confSection, "nrOfEigenvaluesToConsiderForVarApprox", 1 ), 1) );
  93. }
  94. else if ( (s_varianceApproximation.compare("exact") == 0) || ((s_varianceApproximation.compare("3") == 0)) )
  95. {
  96. this->varianceApproximation = EXACT;
  97. //no additional eigenvalue is needed here at all.
  98. this->gphyper->setNrOfEigenvaluesToConsiderForVarApprox ( 0 );
  99. }
  100. else
  101. {
  102. this->varianceApproximation = NONE;
  103. //no additional eigenvalue is needed here at all.
  104. this->gphyper->setNrOfEigenvaluesToConsiderForVarApprox ( 0 );
  105. }
  106. if ( this->verbose )
  107. std::cerr << "varianceApproximationStrategy: " << s_varianceApproximation << std::endl;
  108. //NOTE init all member pointer variables here as well
  109. this->gphyper->initFromConfig ( conf, confSection /*possibly delete the handing of confSection*/);
  110. }
  111. ///////////////////// ///////////////////// /////////////////////
  112. // GET / SET
  113. ///////////////////// ///////////////////// /////////////////////
  114. ///////////////////// ///////////////////// /////////////////////
  115. // REGRESSION STUFF
  116. ///////////////////// ///////////////////// /////////////////////
  117. void GPHIKRegression::estimate ( const SparseVector * example, double & result ) const
  118. {
  119. double tmpUncertainty;
  120. this->estimate( example, result, tmpUncertainty );
  121. }
  122. void GPHIKRegression::estimate ( const NICE::Vector * example, double & result ) const
  123. {
  124. double tmpUncertainty;
  125. this->estimate( example, result, tmpUncertainty );
  126. }
  127. void GPHIKRegression::estimate ( const SparseVector * example, double & result, double & uncertainty ) const
  128. {
  129. if ( ! this->b_isTrained )
  130. fthrow(Exception, "Regression object not trained yet -- aborting!" );
  131. NICE::SparseVector scores;
  132. scores.clear();
  133. gphyper->classify ( *example, scores );
  134. if ( scores.size() == 0 ) {
  135. fthrow(Exception, "Zero scores, something is likely to be wrong here: svec.size() = " << example->size() );
  136. }
  137. // the internal gphyper object returns for regression a sparse vector with a single entry only
  138. result = scores.begin()->second;
  139. if (uncertaintyPredictionForRegression)
  140. {
  141. if (varianceApproximation != NONE)
  142. {
  143. this->predictUncertainty( example, uncertainty );
  144. }
  145. else
  146. {
  147. //do nothing
  148. uncertainty = std::numeric_limits<double>::max();
  149. }
  150. }
  151. else
  152. {
  153. //do nothing
  154. uncertainty = std::numeric_limits<double>::max();
  155. }
  156. }
  157. void GPHIKRegression::estimate ( const NICE::Vector * example, double & result, double & uncertainty ) const
  158. {
  159. if ( ! this->b_isTrained )
  160. fthrow(Exception, "Regression object not trained yet -- aborting!" );
  161. NICE::SparseVector scores;
  162. scores.clear();
  163. gphyper->classify ( *example, scores );
  164. if ( scores.size() == 0 ) {
  165. fthrow(Exception, "Zero scores, something is likely to be wrong here: svec.size() = " << example->size() );
  166. }
  167. // the internal gphyper object returns for regression a sparse vector with a single entry only
  168. result = scores.begin()->second;
  169. if (uncertaintyPredictionForRegression)
  170. {
  171. if (varianceApproximation != NONE)
  172. {
  173. this->predictUncertainty( example, uncertainty );
  174. }
  175. else
  176. {
  177. //do nothing
  178. uncertainty = std::numeric_limits<double>::max();
  179. }
  180. }
  181. else
  182. {
  183. //do nothing
  184. uncertainty = std::numeric_limits<double>::max();
  185. }
  186. }
  187. /** training process */
  188. void GPHIKRegression::train ( const std::vector< const NICE::SparseVector *> & examples, const NICE::Vector & labels )
  189. {
  190. // security-check: examples and labels have to be of same size
  191. if ( examples.size() != labels.size() )
  192. {
  193. fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
  194. }
  195. if (verbose)
  196. {
  197. std::cerr << "GPHIKRegression::train" << std::endl;
  198. }
  199. Timer t;
  200. t.start();
  201. FastMinKernel *fmk = new FastMinKernel ( examples, noise, this->debug );
  202. gphyper->setFastMinKernel ( fmk );
  203. t.stop();
  204. if (verbose)
  205. std::cerr << "Time used for setting up the fmk object: " << t.getLast() << std::endl;
  206. if (verbose)
  207. cerr << "Learning ..." << endl;
  208. // go go go
  209. gphyper->optimize ( labels );
  210. if (verbose)
  211. std::cerr << "optimization done" << std::endl;
  212. if ( ( varianceApproximation != NONE ) )
  213. {
  214. switch (varianceApproximation)
  215. {
  216. case APPROXIMATE_ROUGH:
  217. {
  218. gphyper->prepareVarianceApproximationRough();
  219. break;
  220. }
  221. case APPROXIMATE_FINE:
  222. {
  223. gphyper->prepareVarianceApproximationFine();
  224. break;
  225. }
  226. case EXACT:
  227. {
  228. //nothing to prepare
  229. break;
  230. }
  231. default:
  232. {
  233. //nothing to prepare
  234. }
  235. }
  236. }
  237. //indicate that we finished training successfully
  238. this->b_isTrained = true;
  239. // clean up all examples ??
  240. if (verbose)
  241. std::cerr << "Learning finished" << std::endl;
  242. }
  243. GPHIKRegression *GPHIKRegression::clone () const
  244. {
  245. fthrow(Exception, "GPHIKRegression: clone() not yet implemented" );
  246. return NULL;
  247. }
  248. void GPHIKRegression::predictUncertainty( const NICE::SparseVector * example, double & uncertainty ) const
  249. {
  250. if ( ! this->b_isTrained )
  251. fthrow(Exception, "Regression object not trained yet -- aborting!" );
  252. switch (varianceApproximation)
  253. {
  254. case APPROXIMATE_ROUGH:
  255. {
  256. gphyper->computePredictiveVarianceApproximateRough( *example, uncertainty );
  257. break;
  258. }
  259. case APPROXIMATE_FINE:
  260. {
  261. gphyper->computePredictiveVarianceApproximateFine( *example, uncertainty );
  262. break;
  263. }
  264. case EXACT:
  265. {
  266. gphyper->computePredictiveVarianceExact( *example, uncertainty );
  267. break;
  268. }
  269. default:
  270. {
  271. fthrow(Exception, "GPHIKRegression - your settings disabled the variance approximation needed for uncertainty prediction.");
  272. }
  273. }
  274. }
  275. void GPHIKRegression::predictUncertainty( const NICE::Vector * example, double & uncertainty ) const
  276. {
  277. if ( ! this->b_isTrained )
  278. fthrow(Exception, "Regression object not trained yet -- aborting!" );
  279. switch (varianceApproximation)
  280. {
  281. case APPROXIMATE_ROUGH:
  282. {
  283. gphyper->computePredictiveVarianceApproximateRough( *example, uncertainty );
  284. break;
  285. }
  286. case APPROXIMATE_FINE:
  287. {
  288. gphyper->computePredictiveVarianceApproximateFine( *example, uncertainty );
  289. break;
  290. }
  291. case EXACT:
  292. {
  293. gphyper->computePredictiveVarianceExact( *example, uncertainty );
  294. break;
  295. }
  296. default:
  297. {
  298. fthrow(Exception, "GPHIKRegression - your settings disabled the variance approximation needed for uncertainty prediction.");
  299. }
  300. }
  301. }
  302. ///////////////////// INTERFACE PERSISTENT /////////////////////
  303. // interface specific methods for store and restore
  304. ///////////////////// INTERFACE PERSISTENT /////////////////////
  305. void GPHIKRegression::restore ( std::istream & is, int format )
  306. {
  307. //delete everything we knew so far...
  308. this->clear();
  309. bool b_restoreVerbose ( false );
  310. #ifdef B_RESTOREVERBOSE
  311. b_restoreVerbose = true;
  312. #endif
  313. if ( is.good() )
  314. {
  315. if ( b_restoreVerbose )
  316. std::cerr << " restore GPHIKRegression" << std::endl;
  317. std::string tmp;
  318. is >> tmp; //class name
  319. if ( ! this->isStartTag( tmp, "GPHIKRegression" ) )
  320. {
  321. std::cerr << " WARNING - attempt to restore GPHIKRegression, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  322. throw;
  323. }
  324. if (gphyper != NULL)
  325. {
  326. delete gphyper;
  327. gphyper = NULL;
  328. }
  329. is.precision (numeric_limits<double>::digits10 + 1);
  330. bool b_endOfBlock ( false ) ;
  331. while ( !b_endOfBlock )
  332. {
  333. is >> tmp; // start of block
  334. if ( this->isEndTag( tmp, "GPHIKRegression" ) )
  335. {
  336. b_endOfBlock = true;
  337. continue;
  338. }
  339. tmp = this->removeStartTag ( tmp );
  340. if ( b_restoreVerbose )
  341. std::cerr << " currently restore section " << tmp << " in GPHIKRegression" << std::endl;
  342. if ( tmp.compare("confSection") == 0 )
  343. {
  344. is >> confSection;
  345. is >> tmp; // end of block
  346. tmp = this->removeEndTag ( tmp );
  347. }
  348. else if ( tmp.compare("gphyper") == 0 )
  349. {
  350. if ( gphyper == NULL )
  351. gphyper = new NICE::FMKGPHyperparameterOptimization();
  352. //then, load everything that we stored explicitely,
  353. // including precomputed matrices, LUTs, eigenvalues, ... and all that stuff
  354. gphyper->restore(is, format);
  355. is >> tmp; // end of block
  356. tmp = this->removeEndTag ( tmp );
  357. }
  358. else if ( tmp.compare("b_isTrained") == 0 )
  359. {
  360. is >> b_isTrained;
  361. is >> tmp; // end of block
  362. tmp = this->removeEndTag ( tmp );
  363. }
  364. else if ( tmp.compare("noise") == 0 )
  365. {
  366. is >> noise;
  367. is >> tmp; // end of block
  368. tmp = this->removeEndTag ( tmp );
  369. }
  370. else if ( tmp.compare("verbose") == 0 )
  371. {
  372. is >> verbose;
  373. is >> tmp; // end of block
  374. tmp = this->removeEndTag ( tmp );
  375. }
  376. else if ( tmp.compare("debug") == 0 )
  377. {
  378. is >> debug;
  379. is >> tmp; // end of block
  380. tmp = this->removeEndTag ( tmp );
  381. }
  382. else if ( tmp.compare("uncertaintyPredictionForRegression") == 0 )
  383. {
  384. is >> uncertaintyPredictionForRegression;
  385. is >> tmp; // end of block
  386. tmp = this->removeEndTag ( tmp );
  387. }
  388. else if ( tmp.compare("varianceApproximation") == 0 )
  389. {
  390. unsigned int ui_varianceApproximation;
  391. is >> ui_varianceApproximation;
  392. varianceApproximation = static_cast<VarianceApproximation> ( ui_varianceApproximation );
  393. is >> tmp; // end of block
  394. tmp = this->removeEndTag ( tmp );
  395. }
  396. else
  397. {
  398. std::cerr << "WARNING -- unexpected GPHIKRegression object -- " << tmp << " -- for restoration... aborting" << std::endl;
  399. throw;
  400. }
  401. }
  402. }
  403. else
  404. {
  405. std::cerr << "GPHIKRegression::restore -- InStream not initialized - restoring not possible!" << std::endl;
  406. throw;
  407. }
  408. }
  409. void GPHIKRegression::store ( std::ostream & os, int format ) const
  410. {
  411. if (gphyper == NULL)
  412. fthrow(Exception, "Regression object not trained yet -- aborting!" );
  413. if (os.good())
  414. {
  415. // show starting point
  416. os << this->createStartTag( "GPHIKRegression" ) << std::endl;
  417. os.precision (numeric_limits<double>::digits10 + 1);
  418. os << this->createStartTag( "confSection" ) << std::endl;
  419. os << confSection << std::endl;
  420. os << this->createEndTag( "confSection" ) << std::endl;
  421. os << this->createStartTag( "gphyper" ) << std::endl;
  422. //store the underlying data
  423. //will be done in gphyper->store(of,format)
  424. //store the optimized parameter values and all that stuff
  425. gphyper->store(os, format);
  426. os << this->createEndTag( "gphyper" ) << std::endl;
  427. os << this->createStartTag( "b_isTrained" ) << std::endl;
  428. os << b_isTrained << std::endl;
  429. os << this->createEndTag( "b_isTrained" ) << std::endl;
  430. os << this->createStartTag( "noise" ) << std::endl;
  431. os << noise << std::endl;
  432. os << this->createEndTag( "noise" ) << std::endl;
  433. os << this->createStartTag( "verbose" ) << std::endl;
  434. os << verbose << std::endl;
  435. os << this->createEndTag( "verbose" ) << std::endl;
  436. os << this->createStartTag( "debug" ) << std::endl;
  437. os << debug << std::endl;
  438. os << this->createEndTag( "debug" ) << std::endl;
  439. os << this->createStartTag( "uncertaintyPredictionForRegression" ) << std::endl;
  440. os << uncertaintyPredictionForRegression << std::endl;
  441. os << this->createEndTag( "uncertaintyPredictionForRegression" ) << std::endl;
  442. os << this->createStartTag( "varianceApproximation" ) << std::endl;
  443. os << varianceApproximation << std::endl;
  444. os << this->createEndTag( "varianceApproximation" ) << std::endl;
  445. // done
  446. os << this->createEndTag( "GPHIKRegression" ) << std::endl;
  447. }
  448. else
  449. {
  450. std::cerr << "OutStream not initialized - storing not possible!" << std::endl;
  451. }
  452. }
  453. void GPHIKRegression::clear ()
  454. {
  455. if ( gphyper != NULL )
  456. {
  457. delete gphyper;
  458. gphyper = NULL;
  459. }
  460. }
  461. ///////////////////// INTERFACE ONLINE LEARNABLE /////////////////////
  462. // interface specific methods for incremental extensions
  463. ///////////////////// INTERFACE ONLINE LEARNABLE /////////////////////
  464. void GPHIKRegression::addExample( const NICE::SparseVector * example,
  465. const double & label,
  466. const bool & performOptimizationAfterIncrement
  467. )
  468. {
  469. if ( ! this->b_isTrained )
  470. {
  471. //call train method instead
  472. std::cerr << "Regression object not initially trained yet -- run initial training instead of incremental extension!" << std::endl;
  473. std::vector< const NICE::SparseVector *> examplesVec;
  474. examplesVec.push_back ( example );
  475. NICE::Vector labelsVec ( 1 , label );
  476. this->train ( examplesVec, labelsVec );
  477. }
  478. else
  479. {
  480. this->gphyper->addExample( example, label, performOptimizationAfterIncrement );
  481. }
  482. }
  483. void GPHIKRegression::addMultipleExamples( const std::vector< const NICE::SparseVector * > & newExamples,
  484. const NICE::Vector & newLabels,
  485. const bool & performOptimizationAfterIncrement
  486. )
  487. {
  488. //are new examples available? If not, nothing has to be done
  489. if ( newExamples.size() < 1)
  490. return;
  491. if ( this->gphyper == NULL )
  492. {
  493. //call train method instead
  494. std::cerr << "Regression object not initially trained yet -- run initial training instead of incremental extension!" << std::endl;
  495. this->train ( newExamples, newLabels );
  496. }
  497. else
  498. {
  499. this->gphyper->addMultipleExamples( newExamples, newLabels, performOptimizationAfterIncrement );
  500. }
  501. }