GPHIKRegression.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649
  1. /**
  2. * @file GPHIKRegression.cpp
  3. * @brief Main interface for our GP HIK regression implementation (Implementation)
  4. * @author Alexander Freytag
  5. * @date 15-01-2014 (dd-mm-yyyy)
  6. */
  7. // STL includes
  8. #include <iostream>
  9. // NICE-core includes
  10. #include <core/basics/numerictools.h>
  11. #include <core/basics/Timer.h>
  12. // gp-hik-core includes
  13. #include "GPHIKRegression.h"
  14. #include "gp-hik-core/parameterizedFunctions/PFAbsExp.h"
  15. #include "gp-hik-core/parameterizedFunctions/PFExp.h"
  16. #include "gp-hik-core/parameterizedFunctions/PFMKL.h"
  17. using namespace std;
  18. using namespace NICE;
  19. /////////////////////////////////////////////////////
  20. /////////////////////////////////////////////////////
  21. // PROTECTED METHODS
  22. /////////////////////////////////////////////////////
  23. /////////////////////////////////////////////////////
  24. void GPHIKRegression::init(const Config *conf, const string & s_confSection)
  25. {
  26. //copy the given config to have it accessible lateron
  27. if ( this->confCopy != conf )
  28. {
  29. if ( this->confCopy != NULL )
  30. delete this->confCopy;
  31. this->confCopy = new Config ( *conf );
  32. //we do not want to read until end of file for restoring
  33. this->confCopy->setIoUntilEndOfFile(false);
  34. }
  35. double parameterUpperBound = confCopy->gD(confSection, "parameter_upper_bound", 5.0 );
  36. double parameterLowerBound = confCopy->gD(confSection, "parameter_lower_bound", 1.0 );
  37. this->noise = confCopy->gD(confSection, "noise", 0.01);
  38. string transform = confCopy->gS(confSection, "transform", "absexp" );
  39. if (pf == NULL)
  40. {
  41. if ( transform == "absexp" )
  42. {
  43. this->pf = new PFAbsExp( 1.0, parameterLowerBound, parameterUpperBound );
  44. } else if ( transform == "exp" ) {
  45. this->pf = new PFExp( 1.0, parameterLowerBound, parameterUpperBound );
  46. }else if ( transform == "MKL" ) {
  47. //TODO generic, please :) load from a separate file or something like this!
  48. std::set<int> steps; steps.insert(4000); steps.insert(6000); //specific for VISAPP
  49. this->pf = new PFMKL( steps, parameterLowerBound, parameterUpperBound );
  50. } else {
  51. fthrow(Exception, "Transformation type is unknown " << transform);
  52. }
  53. }
  54. else
  55. {
  56. //we already know the pf from the restore-function
  57. }
  58. this->confSection = confSection;
  59. this->verbose = confCopy->gB(confSection, "verbose", false);
  60. this->debug = confCopy->gB(confSection, "debug", false);
  61. this->uncertaintyPredictionForRegression = confCopy->gB( confSection, "uncertaintyPredictionForRegression", false );
  62. //how do we approximate the predictive variance for regression uncertainty?
  63. string s_varianceApproximation = confCopy->gS(confSection, "varianceApproximation", "approximate_fine"); //default: fine approximative uncertainty prediction
  64. if ( (s_varianceApproximation.compare("approximate_rough") == 0) || ((s_varianceApproximation.compare("1") == 0)) )
  65. {
  66. this->varianceApproximation = APPROXIMATE_ROUGH;
  67. //no additional eigenvalue is needed here at all.
  68. this->confCopy->sI ( confSection, "nrOfEigenvaluesToConsiderForVarApprox", 0 );
  69. }
  70. else if ( (s_varianceApproximation.compare("approximate_fine") == 0) || ((s_varianceApproximation.compare("2") == 0)) )
  71. {
  72. this->varianceApproximation = APPROXIMATE_FINE;
  73. //security check - compute at least one eigenvalue for this approximation strategy
  74. this->confCopy->sI ( confSection, "nrOfEigenvaluesToConsiderForVarApprox", std::max( confCopy->gI(confSection, "nrOfEigenvaluesToConsiderForVarApprox", 1 ), 1) );
  75. }
  76. else if ( (s_varianceApproximation.compare("exact") == 0) || ((s_varianceApproximation.compare("3") == 0)) )
  77. {
  78. this->varianceApproximation = EXACT;
  79. //no additional eigenvalue is needed here at all.
  80. this->confCopy->sI ( confSection, "nrOfEigenvaluesToConsiderForVarApprox", 1 );
  81. }
  82. else
  83. {
  84. this->varianceApproximation = NONE;
  85. //no additional eigenvalue is needed here at all.
  86. this->confCopy->sI ( confSection, "nrOfEigenvaluesToConsiderForVarApprox", 1 );
  87. }
  88. if ( this->verbose )
  89. std::cerr << "varianceApproximationStrategy: " << s_varianceApproximation << std::endl;
  90. }
  91. /////////////////////////////////////////////////////
  92. /////////////////////////////////////////////////////
  93. // PUBLIC METHODS
  94. /////////////////////////////////////////////////////
  95. /////////////////////////////////////////////////////
  96. GPHIKRegression::GPHIKRegression( const Config *conf, const string & s_confSection )
  97. {
  98. //default settings, may be overwritten lateron
  99. gphyper = NULL;
  100. pf = NULL;
  101. confCopy = NULL;
  102. //just a default value
  103. uncertaintyPredictionForRegression = false;
  104. this->confSection = s_confSection;
  105. // if no config file was given, we either restore the classifier from an external file, or run ::init with
  106. // an emtpy config (using default values thereby) when calling the train-method
  107. if ( conf != NULL )
  108. {
  109. this->init(conf, confSection);
  110. }
  111. }
  112. GPHIKRegression::~GPHIKRegression()
  113. {
  114. if ( gphyper != NULL )
  115. delete gphyper;
  116. if (pf != NULL)
  117. delete pf;
  118. if ( confCopy != NULL )
  119. delete confCopy;
  120. }
  121. ///////////////////// ///////////////////// /////////////////////
  122. // GET / SET
  123. ///////////////////// ///////////////////// /////////////////////
  124. ///////////////////// ///////////////////// /////////////////////
  125. // REGRESSION STUFF
  126. ///////////////////// ///////////////////// /////////////////////
  127. void GPHIKRegression::estimate ( const SparseVector * example, double & result ) const
  128. {
  129. double tmpUncertainty;
  130. this->estimate( example, result, tmpUncertainty );
  131. }
  132. void GPHIKRegression::estimate ( const NICE::Vector * example, double & result ) const
  133. {
  134. double tmpUncertainty;
  135. this->estimate( example, result, tmpUncertainty );
  136. }
  137. void GPHIKRegression::estimate ( const SparseVector * example, double & result, double & uncertainty ) const
  138. {
  139. if (gphyper == NULL)
  140. fthrow(Exception, "Regression object not trained yet -- aborting!" );
  141. NICE::SparseVector scores;
  142. scores.clear();
  143. gphyper->classify ( *example, scores );
  144. if ( scores.size() == 0 ) {
  145. fthrow(Exception, "Zero scores, something is likely to be wrong here: svec.size() = " << example->size() );
  146. }
  147. // the internal gphyper object returns for regression a sparse vector with a single entry only
  148. result = scores.begin()->second;
  149. if (uncertaintyPredictionForRegression)
  150. {
  151. if (varianceApproximation != NONE)
  152. {
  153. this->predictUncertainty( example, uncertainty );
  154. }
  155. else
  156. {
  157. //do nothing
  158. uncertainty = std::numeric_limits<double>::max();
  159. }
  160. }
  161. else
  162. {
  163. //do nothing
  164. uncertainty = std::numeric_limits<double>::max();
  165. }
  166. }
  167. void GPHIKRegression::estimate ( const NICE::Vector * example, double & result, double & uncertainty ) const
  168. {
  169. if (gphyper == NULL)
  170. fthrow(Exception, "Regression object not trained yet -- aborting!" );
  171. NICE::SparseVector scores;
  172. scores.clear();
  173. gphyper->classify ( *example, scores );
  174. if ( scores.size() == 0 ) {
  175. fthrow(Exception, "Zero scores, something is likely to be wrong here: svec.size() = " << example->size() );
  176. }
  177. // the internal gphyper object returns for regression a sparse vector with a single entry only
  178. result = scores.begin()->second;
  179. if (uncertaintyPredictionForRegression)
  180. {
  181. if (varianceApproximation != NONE)
  182. {
  183. this->predictUncertainty( example, uncertainty );
  184. }
  185. else
  186. {
  187. //do nothing
  188. uncertainty = std::numeric_limits<double>::max();
  189. }
  190. }
  191. else
  192. {
  193. //do nothing
  194. uncertainty = std::numeric_limits<double>::max();
  195. }
  196. }
  197. /** training process */
  198. void GPHIKRegression::train ( const std::vector< const NICE::SparseVector *> & examples, const NICE::Vector & labels )
  199. {
  200. // security-check: examples and labels have to be of same size
  201. if ( examples.size() != labels.size() )
  202. {
  203. fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
  204. }
  205. if (verbose)
  206. {
  207. std::cerr << "GPHIKRegression::train" << std::endl;
  208. }
  209. //TODO add flag fpr gphyper that only regression is performed, or add a new method for this.
  210. // thereby, all the binary-label-stuff should be skipped :)
  211. // also think about the internal stuff, like initialization of alpha vectors and stuff like that .... :(
  212. // in the worst case, stuff has to be re-written...
  213. if ( this->confCopy == NULL )
  214. {
  215. std::cerr << "WARNING -- No config used so far, initialize values with empty config file now..." << std::endl;
  216. NICE::Config tmpConfEmpty ;
  217. this->init ( &tmpConfEmpty, this->confSection );
  218. }
  219. Timer t;
  220. t.start();
  221. FastMinKernel *fmk = new FastMinKernel ( examples, noise, this->debug );
  222. t.stop();
  223. if (verbose)
  224. std::cerr << "Time used for setting up the fmk object: " << t.getLast() << std::endl;
  225. if (gphyper != NULL)
  226. delete gphyper;
  227. if ( ( varianceApproximation != APPROXIMATE_FINE) )
  228. confCopy->sI ( confSection, "nrOfEigenvaluesToConsiderForVarApprox", 0);
  229. confCopy->sB ( confSection, "b_performRegression", true );
  230. gphyper = new FMKGPHyperparameterOptimization ( confCopy, pf, fmk, confSection );
  231. if (verbose)
  232. cerr << "Learning ..." << endl;
  233. // go go go
  234. gphyper->optimize ( labels );
  235. if (verbose)
  236. std::cerr << "optimization done" << std::endl;
  237. if ( ( varianceApproximation != NONE ) )
  238. {
  239. switch (varianceApproximation)
  240. {
  241. case APPROXIMATE_ROUGH:
  242. {
  243. gphyper->prepareVarianceApproximationRough();
  244. break;
  245. }
  246. case APPROXIMATE_FINE:
  247. {
  248. gphyper->prepareVarianceApproximationFine();
  249. break;
  250. }
  251. case EXACT:
  252. {
  253. //nothing to prepare
  254. break;
  255. }
  256. default:
  257. {
  258. //nothing to prepare
  259. }
  260. }
  261. }
  262. // clean up all examples ??
  263. if (verbose)
  264. std::cerr << "Learning finished" << std::endl;
  265. }
  266. GPHIKRegression *GPHIKRegression::clone () const
  267. {
  268. fthrow(Exception, "GPHIKRegression: clone() not yet implemented" );
  269. return NULL;
  270. }
  271. void GPHIKRegression::predictUncertainty( const NICE::SparseVector * example, double & uncertainty ) const
  272. {
  273. if (gphyper == NULL)
  274. fthrow(Exception, "Regression object not trained yet -- aborting!" );
  275. switch (varianceApproximation)
  276. {
  277. case APPROXIMATE_ROUGH:
  278. {
  279. gphyper->computePredictiveVarianceApproximateRough( *example, uncertainty );
  280. break;
  281. }
  282. case APPROXIMATE_FINE:
  283. {
  284. gphyper->computePredictiveVarianceApproximateFine( *example, uncertainty );
  285. break;
  286. }
  287. case EXACT:
  288. {
  289. gphyper->computePredictiveVarianceExact( *example, uncertainty );
  290. break;
  291. }
  292. default:
  293. {
  294. fthrow(Exception, "GPHIKRegression - your settings disabled the variance approximation needed for uncertainty prediction.");
  295. }
  296. }
  297. }
  298. void GPHIKRegression::predictUncertainty( const NICE::Vector * example, double & uncertainty ) const
  299. {
  300. if (gphyper == NULL)
  301. fthrow(Exception, "Regression object not trained yet -- aborting!" );
  302. switch (varianceApproximation)
  303. {
  304. case APPROXIMATE_ROUGH:
  305. {
  306. gphyper->computePredictiveVarianceApproximateRough( *example, uncertainty );
  307. break;
  308. }
  309. case APPROXIMATE_FINE:
  310. {
  311. gphyper->computePredictiveVarianceApproximateFine( *example, uncertainty );
  312. break;
  313. }
  314. case EXACT:
  315. {
  316. gphyper->computePredictiveVarianceExact( *example, uncertainty );
  317. break;
  318. }
  319. default:
  320. {
  321. fthrow(Exception, "GPHIKRegression - your settings disabled the variance approximation needed for uncertainty prediction.");
  322. }
  323. }
  324. }
  325. ///////////////////// INTERFACE PERSISTENT /////////////////////
  326. // interface specific methods for store and restore
  327. ///////////////////// INTERFACE PERSISTENT /////////////////////
  328. void GPHIKRegression::restore ( std::istream & is, int format )
  329. {
  330. //delete everything we knew so far...
  331. this->clear();
  332. bool b_restoreVerbose ( false );
  333. #ifdef B_RESTOREVERBOSE
  334. b_restoreVerbose = true;
  335. #endif
  336. if ( is.good() )
  337. {
  338. if ( b_restoreVerbose )
  339. std::cerr << " restore GPHIKRegression" << std::endl;
  340. std::string tmp;
  341. is >> tmp; //class name
  342. if ( ! this->isStartTag( tmp, "GPHIKRegression" ) )
  343. {
  344. std::cerr << " WARNING - attempt to restore GPHIKRegression, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  345. throw;
  346. }
  347. if (pf != NULL)
  348. {
  349. delete pf;
  350. pf = NULL;
  351. }
  352. if ( confCopy != NULL )
  353. {
  354. delete confCopy;
  355. confCopy = NULL;
  356. }
  357. if (gphyper != NULL)
  358. {
  359. delete gphyper;
  360. gphyper = NULL;
  361. }
  362. is.precision (numeric_limits<double>::digits10 + 1);
  363. bool b_endOfBlock ( false ) ;
  364. while ( !b_endOfBlock )
  365. {
  366. is >> tmp; // start of block
  367. if ( this->isEndTag( tmp, "GPHIKRegression" ) )
  368. {
  369. b_endOfBlock = true;
  370. continue;
  371. }
  372. tmp = this->removeStartTag ( tmp );
  373. if ( b_restoreVerbose )
  374. std::cerr << " currently restore section " << tmp << " in GPHIKRegression" << std::endl;
  375. if ( tmp.compare("confSection") == 0 )
  376. {
  377. is >> confSection;
  378. is >> tmp; // end of block
  379. tmp = this->removeEndTag ( tmp );
  380. }
  381. else if ( tmp.compare("pf") == 0 )
  382. {
  383. is >> tmp; // start of block
  384. if ( this->isEndTag( tmp, "pf" ) )
  385. {
  386. std::cerr << " ParameterizedFunction object can not be restored. Aborting..." << std::endl;
  387. throw;
  388. }
  389. std::string transform = this->removeStartTag ( tmp );
  390. if ( transform == "PFAbsExp" )
  391. {
  392. this->pf = new PFAbsExp ();
  393. } else if ( transform == "PFExp" ) {
  394. this->pf = new PFExp ();
  395. } else {
  396. fthrow(Exception, "Transformation type is unknown " << transform);
  397. }
  398. pf->restore(is, format);
  399. is >> tmp; // end of block
  400. tmp = this->removeEndTag ( tmp );
  401. }
  402. else if ( tmp.compare("ConfigCopy") == 0 )
  403. {
  404. // possibly obsolete safety checks
  405. if ( confCopy == NULL )
  406. confCopy = new Config;
  407. confCopy->clear();
  408. //we do not want to read until the end of the file
  409. confCopy->setIoUntilEndOfFile( false );
  410. //load every options we determined explicitely
  411. confCopy->restore(is, format);
  412. is >> tmp; // end of block
  413. tmp = this->removeEndTag ( tmp );
  414. }
  415. else if ( tmp.compare("gphyper") == 0 )
  416. {
  417. if ( gphyper == NULL )
  418. gphyper = new NICE::FMKGPHyperparameterOptimization();
  419. //then, load everything that we stored explicitely,
  420. // including precomputed matrices, LUTs, eigenvalues, ... and all that stuff
  421. gphyper->restore(is, format);
  422. is >> tmp; // end of block
  423. tmp = this->removeEndTag ( tmp );
  424. }
  425. else
  426. {
  427. std::cerr << "WARNING -- unexpected GPHIKRegression object -- " << tmp << " -- for restoration... aborting" << std::endl;
  428. throw;
  429. }
  430. }
  431. //load every settings as well as default options
  432. std::cerr << "run this->init" << std::endl;
  433. this->init(confCopy, confSection);
  434. std::cerr << "run gphyper->initialize" << std::endl;
  435. gphyper->initialize ( confCopy, pf, NULL, confSection );
  436. }
  437. else
  438. {
  439. std::cerr << "GPHIKRegression::restore -- InStream not initialized - restoring not possible!" << std::endl;
  440. throw;
  441. }
  442. }
  443. void GPHIKRegression::store ( std::ostream & os, int format ) const
  444. {
  445. if (gphyper == NULL)
  446. fthrow(Exception, "Regression object not trained yet -- aborting!" );
  447. if (os.good())
  448. {
  449. // show starting point
  450. os << this->createStartTag( "GPHIKRegression" ) << std::endl;
  451. os.precision (numeric_limits<double>::digits10 + 1);
  452. os << this->createStartTag( "confSection" ) << std::endl;
  453. os << confSection << std::endl;
  454. os << this->createEndTag( "confSection" ) << std::endl;
  455. os << this->createStartTag( "pf" ) << std::endl;
  456. pf->store(os, format);
  457. os << this->createEndTag( "pf" ) << std::endl;
  458. os << this->createStartTag( "ConfigCopy" ) << std::endl;
  459. //we do not want to read until end of file for restoring
  460. confCopy->setIoUntilEndOfFile(false);
  461. confCopy->store(os,format);
  462. os << this->createEndTag( "ConfigCopy" ) << std::endl;
  463. os << this->createStartTag( "gphyper" ) << std::endl;
  464. //store the underlying data
  465. //will be done in gphyper->store(of,format)
  466. //store the optimized parameter values and all that stuff
  467. gphyper->store(os, format);
  468. os << this->createEndTag( "gphyper" ) << std::endl;
  469. // done
  470. os << this->createEndTag( "GPHIKRegression" ) << std::endl;
  471. }
  472. else
  473. {
  474. std::cerr << "OutStream not initialized - storing not possible!" << std::endl;
  475. }
  476. }
  477. void GPHIKRegression::clear ()
  478. {
  479. if ( gphyper != NULL )
  480. {
  481. delete gphyper;
  482. gphyper = NULL;
  483. }
  484. if (pf != NULL)
  485. {
  486. delete pf;
  487. pf = NULL;
  488. }
  489. if ( confCopy != NULL )
  490. {
  491. delete confCopy;
  492. confCopy = NULL;
  493. }
  494. }
  495. ///////////////////// INTERFACE ONLINE LEARNABLE /////////////////////
  496. // interface specific methods for incremental extensions
  497. ///////////////////// INTERFACE ONLINE LEARNABLE /////////////////////
  498. void GPHIKRegression::addExample( const NICE::SparseVector * example,
  499. const double & label,
  500. const bool & performOptimizationAfterIncrement
  501. )
  502. {
  503. if ( this->gphyper == NULL )
  504. {
  505. //call train method instead
  506. std::cerr << "Regression object not initially trained yet -- run initial training instead of incremental extension!" << std::endl;
  507. std::vector< const NICE::SparseVector *> examplesVec;
  508. examplesVec.push_back ( example );
  509. NICE::Vector labelsVec ( 1 , label );
  510. this->train ( examplesVec, labelsVec );
  511. }
  512. else
  513. {
  514. this->gphyper->addExample( example, label, performOptimizationAfterIncrement );
  515. }
  516. }
  517. void GPHIKRegression::addMultipleExamples( const std::vector< const NICE::SparseVector * > & newExamples,
  518. const NICE::Vector & newLabels,
  519. const bool & performOptimizationAfterIncrement
  520. )
  521. {
  522. //are new examples available? If not, nothing has to be done
  523. if ( newExamples.size() < 1)
  524. return;
  525. if ( this->gphyper == NULL )
  526. {
  527. //call train method instead
  528. std::cerr << "Regression object not initially trained yet -- run initial training instead of incremental extension!" << std::endl;
  529. this->train ( newExamples, newLabels );
  530. }
  531. else
  532. {
  533. this->gphyper->addMultipleExamples( newExamples, newLabels, performOptimizationAfterIncrement );
  534. }
  535. }