GPHIKClassifier.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683
  1. /**
  2. * @file GPHIKClassifier.cpp
  3. * @brief Main interface for our GP HIK classifier (similar to the feature pool classifier interface in vislearning) (Implementation)
  4. * @author Erik Rodner, Alexander Freytag
  5. * @date 02/01/2012
  6. */
  7. // STL includes
  8. #include <iostream>
  9. // NICE-core includes
  10. #include <core/basics/numerictools.h>
  11. #include <core/basics/Timer.h>
  12. // gp-hik-core includes
  13. #include "GPHIKClassifier.h"
  14. using namespace std;
  15. using namespace NICE;
  16. /////////////////////////////////////////////////////
  17. /////////////////////////////////////////////////////
  18. // PROTECTED METHODS
  19. /////////////////////////////////////////////////////
  20. /////////////////////////////////////////////////////
  21. /////////////////////////////////////////////////////
  22. /////////////////////////////////////////////////////
  23. // PUBLIC METHODS
  24. /////////////////////////////////////////////////////
  25. /////////////////////////////////////////////////////
  26. GPHIKClassifier::GPHIKClassifier( )
  27. {
  28. this->b_isTrained = false;
  29. this->confSection = "";
  30. this->gphyper = new NICE::FMKGPHyperparameterOptimization();
  31. // in order to be sure about all necessary variables be setup with default values, we
  32. // run initFromConfig with an empty config
  33. NICE::Config tmpConfEmpty ;
  34. this->initFromConfig ( &tmpConfEmpty, this->confSection );
  35. }
  36. GPHIKClassifier::GPHIKClassifier( const Config *conf, const string & s_confSection )
  37. {
  38. ///////////
  39. // same code as in empty constructor - duplication can be avoided with C++11 allowing for constructor delegation
  40. ///////////
  41. this->b_isTrained = false;
  42. this->confSection = "";
  43. this->gphyper = new NICE::FMKGPHyperparameterOptimization();
  44. ///////////
  45. // here comes the new code part different from the empty constructor
  46. ///////////
  47. this->confSection = s_confSection;
  48. // if no config file was given, we either restore the classifier from an external file, or run ::init with
  49. // an emtpy config (using default values thereby) when calling the train-method
  50. if ( conf != NULL )
  51. {
  52. this->initFromConfig( conf, confSection );
  53. }
  54. else
  55. {
  56. // if no config was given, we create an empty one
  57. NICE::Config tmpConfEmpty ;
  58. this->initFromConfig ( &tmpConfEmpty, this->confSection );
  59. }
  60. }
  61. GPHIKClassifier::~GPHIKClassifier()
  62. {
  63. if ( gphyper != NULL )
  64. delete gphyper;
  65. }
  66. void GPHIKClassifier::initFromConfig(const Config *conf, const string & s_confSection)
  67. {
  68. this->noise = conf->gD(confSection, "noise", 0.01);
  69. this->confSection = confSection;
  70. this->verbose = conf->gB(confSection, "verbose", false);
  71. this->debug = conf->gB(confSection, "debug", false);
  72. this->uncertaintyPredictionForClassification = conf->gB( confSection, "uncertaintyPredictionForClassification", false );
  73. //how do we approximate the predictive variance for classification uncertainty?
  74. string s_varianceApproximation = conf->gS(confSection, "varianceApproximation", "approximate_fine"); //default: fine approximative uncertainty prediction
  75. if ( (s_varianceApproximation.compare("approximate_rough") == 0) || ((s_varianceApproximation.compare("1") == 0)) )
  76. {
  77. this->varianceApproximation = APPROXIMATE_ROUGH;
  78. //no additional eigenvalue is needed here at all.
  79. this->gphyper->setNrOfEigenvaluesToConsiderForVarApprox ( 0 );
  80. }
  81. else if ( (s_varianceApproximation.compare("approximate_fine") == 0) || ((s_varianceApproximation.compare("2") == 0)) )
  82. {
  83. this->varianceApproximation = APPROXIMATE_FINE;
  84. //security check - compute at least one eigenvalue for this approximation strategy
  85. this->gphyper->setNrOfEigenvaluesToConsiderForVarApprox ( std::max( conf->gI(confSection, "nrOfEigenvaluesToConsiderForVarApprox", 1 ), 1) );
  86. }
  87. else if ( (s_varianceApproximation.compare("exact") == 0) || ((s_varianceApproximation.compare("3") == 0)) )
  88. {
  89. this->varianceApproximation = EXACT;
  90. //no additional eigenvalue is needed here at all.
  91. this->gphyper->setNrOfEigenvaluesToConsiderForVarApprox ( 0 );
  92. }
  93. else
  94. {
  95. this->varianceApproximation = NONE;
  96. //no additional eigenvalue is needed here at all.
  97. this->gphyper->setNrOfEigenvaluesToConsiderForVarApprox ( 0 );
  98. }
  99. if ( this->verbose )
  100. std::cerr << "varianceApproximationStrategy: " << s_varianceApproximation << std::endl;
  101. //NOTE init all member pointer variables here as well
  102. this->gphyper->initFromConfig ( conf, confSection /*possibly delete the handing of confSection*/);
  103. }
  104. ///////////////////// ///////////////////// /////////////////////
  105. // GET / SET
  106. ///////////////////// ///////////////////// /////////////////////
  107. std::set<int> GPHIKClassifier::getKnownClassNumbers ( ) const
  108. {
  109. if ( ! this->b_isTrained )
  110. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  111. return gphyper->getKnownClassNumbers();
  112. }
  113. ///////////////////// ///////////////////// /////////////////////
  114. // CLASSIFIER STUFF
  115. ///////////////////// ///////////////////// /////////////////////
  116. void GPHIKClassifier::classify ( const SparseVector * example, int & result, SparseVector & scores ) const
  117. {
  118. double tmpUncertainty;
  119. this->classify( example, result, scores, tmpUncertainty );
  120. }
  121. void GPHIKClassifier::classify ( const NICE::Vector * example, int & result, SparseVector & scores ) const
  122. {
  123. double tmpUncertainty;
  124. this->classify( example, result, scores, tmpUncertainty );
  125. }
  126. void GPHIKClassifier::classify ( const SparseVector * example, int & result, SparseVector & scores, double & uncertainty ) const
  127. {
  128. if ( ! this->b_isTrained )
  129. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  130. scores.clear();
  131. result = gphyper->classify ( *example, scores );
  132. if ( scores.size() == 0 ) {
  133. fthrow(Exception, "Zero scores, something is likely to be wrong here: svec.size() = " << example->size() );
  134. }
  135. if (uncertaintyPredictionForClassification)
  136. {
  137. if (varianceApproximation != NONE)
  138. {
  139. this->predictUncertainty( example, uncertainty );
  140. }
  141. else
  142. {
  143. //do nothing
  144. uncertainty = std::numeric_limits<double>::max();
  145. }
  146. }
  147. else
  148. {
  149. //do nothing
  150. uncertainty = std::numeric_limits<double>::max();
  151. }
  152. }
  153. void GPHIKClassifier::classify ( const NICE::Vector * _example,
  154. int & _result,
  155. SparseVector & _scores,
  156. double & _uncertainty
  157. ) const
  158. {
  159. if ( ! this->b_isTrained )
  160. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  161. _scores.clear();
  162. _result = gphyper->classify ( *_example, _scores );
  163. if ( _scores.size() == 0 ) {
  164. fthrow(Exception, "Zero scores, something is likely to be wrong here: svec.size() = " << _example->size() );
  165. }
  166. if (uncertaintyPredictionForClassification)
  167. {
  168. if (varianceApproximation != NONE)
  169. {
  170. this->predictUncertainty( _example, _uncertainty );
  171. }
  172. else
  173. {
  174. //do nothing
  175. _uncertainty = std::numeric_limits<double>::max();
  176. }
  177. }
  178. else
  179. {
  180. //do nothing
  181. _uncertainty = std::numeric_limits<double>::max();
  182. }
  183. }
  184. /** training process */
  185. void GPHIKClassifier::train ( const std::vector< const NICE::SparseVector *> & examples, const NICE::Vector & labels )
  186. {
  187. //FIXME add check whether the classifier has been trained already. if so, discard all previous results.
  188. // security-check: examples and labels have to be of same size
  189. if ( examples.size() != labels.size() )
  190. {
  191. fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
  192. }
  193. if (verbose)
  194. {
  195. std::cerr << "GPHIKClassifier::train" << std::endl;
  196. }
  197. Timer t;
  198. t.start();
  199. FastMinKernel *fmk = new FastMinKernel ( examples, noise, this->debug );
  200. gphyper->setFastMinKernel ( fmk );
  201. t.stop();
  202. if (verbose)
  203. std::cerr << "Time used for setting up the fmk object: " << t.getLast() << std::endl;
  204. if (verbose)
  205. cerr << "Learning ..." << endl;
  206. // go go go
  207. gphyper->optimize ( labels );
  208. if (verbose)
  209. std::cerr << "optimization done" << std::endl;
  210. if ( ( varianceApproximation != NONE ) )
  211. {
  212. switch (varianceApproximation)
  213. {
  214. case APPROXIMATE_ROUGH:
  215. {
  216. gphyper->prepareVarianceApproximationRough();
  217. break;
  218. }
  219. case APPROXIMATE_FINE:
  220. {
  221. gphyper->prepareVarianceApproximationFine();
  222. break;
  223. }
  224. case EXACT:
  225. {
  226. //nothing to prepare
  227. break;
  228. }
  229. default:
  230. {
  231. //nothing to prepare
  232. }
  233. }
  234. }
  235. //indicate that we finished training successfully
  236. this->b_isTrained = true;
  237. // clean up all examples ??
  238. if (verbose)
  239. std::cerr << "Learning finished" << std::endl;
  240. }
  241. /** training process */
  242. void GPHIKClassifier::train ( const std::vector< const NICE::SparseVector *> & examples, std::map<int, NICE::Vector> & binLabels )
  243. {
  244. // security-check: examples and labels have to be of same size
  245. for ( std::map< int, NICE::Vector >::const_iterator binLabIt = binLabels.begin();
  246. binLabIt != binLabels.end();
  247. binLabIt++
  248. )
  249. {
  250. if ( examples.size() != binLabIt->second.size() )
  251. {
  252. fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
  253. }
  254. }
  255. if (verbose)
  256. std::cerr << "GPHIKClassifier::train" << std::endl;
  257. Timer t;
  258. t.start();
  259. FastMinKernel *fmk = new FastMinKernel ( examples, noise, this->debug );
  260. gphyper->setFastMinKernel ( fmk );
  261. t.stop();
  262. if (verbose)
  263. std::cerr << "Time used for setting up the fmk object: " << t.getLast() << std::endl;
  264. if (verbose)
  265. cerr << "Learning ..." << endl;
  266. // go go go
  267. gphyper->optimize ( binLabels );
  268. if (verbose)
  269. std::cerr << "optimization done, now prepare for the uncertainty prediction" << std::endl;
  270. if ( ( varianceApproximation != NONE ) )
  271. {
  272. switch (varianceApproximation)
  273. {
  274. case APPROXIMATE_ROUGH:
  275. {
  276. gphyper->prepareVarianceApproximationRough();
  277. break;
  278. }
  279. case APPROXIMATE_FINE:
  280. {
  281. gphyper->prepareVarianceApproximationFine();
  282. break;
  283. }
  284. case EXACT:
  285. {
  286. //nothing to prepare
  287. break;
  288. }
  289. default:
  290. {
  291. //nothing to prepare
  292. }
  293. }
  294. }
  295. //indicate that we finished training successfully
  296. this->b_isTrained = true;
  297. // clean up all examples ??
  298. if (verbose)
  299. std::cerr << "Learning finished" << std::endl;
  300. }
  301. GPHIKClassifier *GPHIKClassifier::clone () const
  302. {
  303. fthrow(Exception, "GPHIKClassifier: clone() not yet implemented" );
  304. return NULL;
  305. }
  306. void GPHIKClassifier::predictUncertainty( const NICE::SparseVector * example, double & uncertainty ) const
  307. {
  308. if (gphyper == NULL)
  309. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  310. //we directly store the predictive variances in the vector, that contains the classification uncertainties lateron to save storage
  311. switch (varianceApproximation)
  312. {
  313. case APPROXIMATE_ROUGH:
  314. {
  315. gphyper->computePredictiveVarianceApproximateRough( *example, uncertainty );
  316. break;
  317. }
  318. case APPROXIMATE_FINE:
  319. {
  320. gphyper->computePredictiveVarianceApproximateFine( *example, uncertainty );
  321. break;
  322. }
  323. case EXACT:
  324. {
  325. gphyper->computePredictiveVarianceExact( *example, uncertainty );
  326. break;
  327. }
  328. default:
  329. {
  330. fthrow(Exception, "GPHIKClassifier - your settings disabled the variance approximation needed for uncertainty prediction.");
  331. }
  332. }
  333. }
  334. void GPHIKClassifier::predictUncertainty( const NICE::Vector * example, double & uncertainty ) const
  335. {
  336. if (gphyper == NULL)
  337. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  338. //we directly store the predictive variances in the vector, that contains the classification uncertainties lateron to save storage
  339. switch (varianceApproximation)
  340. {
  341. case APPROXIMATE_ROUGH:
  342. {
  343. gphyper->computePredictiveVarianceApproximateRough( *example, uncertainty );
  344. break;
  345. }
  346. case APPROXIMATE_FINE:
  347. {
  348. gphyper->computePredictiveVarianceApproximateFine( *example, uncertainty );
  349. break;
  350. }
  351. case EXACT:
  352. {
  353. gphyper->computePredictiveVarianceExact( *example, uncertainty );
  354. break;
  355. }
  356. default:
  357. {
  358. fthrow(Exception, "GPHIKClassifier - your settings disabled the variance approximation needed for uncertainty prediction.");
  359. }
  360. }
  361. }
  362. ///////////////////// INTERFACE PERSISTENT /////////////////////
  363. // interface specific methods for store and restore
  364. ///////////////////// INTERFACE PERSISTENT /////////////////////
  365. void GPHIKClassifier::restore ( std::istream & is, int format )
  366. {
  367. //delete everything we knew so far...
  368. this->clear();
  369. bool b_restoreVerbose ( false );
  370. #ifdef B_RESTOREVERBOSE
  371. b_restoreVerbose = true;
  372. #endif
  373. if ( is.good() )
  374. {
  375. if ( b_restoreVerbose )
  376. std::cerr << " restore GPHIKClassifier" << std::endl;
  377. std::string tmp;
  378. is >> tmp; //class name
  379. if ( ! this->isStartTag( tmp, "GPHIKClassifier" ) )
  380. {
  381. std::cerr << " WARNING - attempt to restore GPHIKClassifier, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  382. throw;
  383. }
  384. if (gphyper != NULL)
  385. {
  386. delete gphyper;
  387. gphyper = NULL;
  388. }
  389. is.precision (numeric_limits<double>::digits10 + 1);
  390. bool b_endOfBlock ( false ) ;
  391. while ( !b_endOfBlock )
  392. {
  393. is >> tmp; // start of block
  394. if ( this->isEndTag( tmp, "GPHIKClassifier" ) )
  395. {
  396. b_endOfBlock = true;
  397. continue;
  398. }
  399. tmp = this->removeStartTag ( tmp );
  400. if ( b_restoreVerbose )
  401. std::cerr << " currently restore section " << tmp << " in GPHIKClassifier" << std::endl;
  402. if ( tmp.compare("confSection") == 0 )
  403. {
  404. is >> confSection;
  405. is >> tmp; // end of block
  406. tmp = this->removeEndTag ( tmp );
  407. }
  408. else if ( tmp.compare("gphyper") == 0 )
  409. {
  410. if ( gphyper == NULL )
  411. gphyper = new NICE::FMKGPHyperparameterOptimization();
  412. //then, load everything that we stored explicitely,
  413. // including precomputed matrices, LUTs, eigenvalues, ... and all that stuff
  414. gphyper->restore(is, format);
  415. is >> tmp; // end of block
  416. tmp = this->removeEndTag ( tmp );
  417. }
  418. else if ( tmp.compare("b_isTrained") == 0 )
  419. {
  420. is >> b_isTrained;
  421. is >> tmp; // end of block
  422. tmp = this->removeEndTag ( tmp );
  423. }
  424. else if ( tmp.compare("noise") == 0 )
  425. {
  426. is >> noise;
  427. is >> tmp; // end of block
  428. tmp = this->removeEndTag ( tmp );
  429. }
  430. else if ( tmp.compare("verbose") == 0 )
  431. {
  432. is >> verbose;
  433. is >> tmp; // end of block
  434. tmp = this->removeEndTag ( tmp );
  435. }
  436. else if ( tmp.compare("debug") == 0 )
  437. {
  438. is >> debug;
  439. is >> tmp; // end of block
  440. tmp = this->removeEndTag ( tmp );
  441. }
  442. else if ( tmp.compare("uncertaintyPredictionForClassification") == 0 )
  443. {
  444. is >> uncertaintyPredictionForClassification;
  445. is >> tmp; // end of block
  446. tmp = this->removeEndTag ( tmp );
  447. }
  448. else if ( tmp.compare("varianceApproximation") == 0 )
  449. {
  450. unsigned int ui_varianceApproximation;
  451. is >> ui_varianceApproximation;
  452. varianceApproximation = static_cast<VarianceApproximation> ( ui_varianceApproximation );
  453. is >> tmp; // end of block
  454. tmp = this->removeEndTag ( tmp );
  455. }
  456. else
  457. {
  458. std::cerr << "WARNING -- unexpected GPHIKClassifier object -- " << tmp << " -- for restoration... aborting" << std::endl;
  459. throw;
  460. }
  461. }
  462. }
  463. else
  464. {
  465. std::cerr << "GPHIKClassifier::restore -- InStream not initialized - restoring not possible!" << std::endl;
  466. throw;
  467. }
  468. }
  469. void GPHIKClassifier::store ( std::ostream & os, int format ) const
  470. {
  471. if (os.good())
  472. {
  473. // show starting point
  474. os << this->createStartTag( "GPHIKClassifier" ) << std::endl;
  475. os.precision (numeric_limits<double>::digits10 + 1);
  476. os << this->createStartTag( "confSection" ) << std::endl;
  477. os << confSection << std::endl;
  478. os << this->createEndTag( "confSection" ) << std::endl;
  479. os << this->createStartTag( "gphyper" ) << std::endl;
  480. //store the underlying data
  481. //will be done in gphyper->store(of,format)
  482. //store the optimized parameter values and all that stuff
  483. gphyper->store(os, format);
  484. os << this->createEndTag( "gphyper" ) << std::endl;
  485. /////////////////////////////////////////////////////////
  486. // store variables which we previously set via config
  487. /////////////////////////////////////////////////////////
  488. os << this->createStartTag( "b_isTrained" ) << std::endl;
  489. os << b_isTrained << std::endl;
  490. os << this->createEndTag( "b_isTrained" ) << std::endl;
  491. os << this->createStartTag( "noise" ) << std::endl;
  492. os << noise << std::endl;
  493. os << this->createEndTag( "noise" ) << std::endl;
  494. os << this->createStartTag( "verbose" ) << std::endl;
  495. os << verbose << std::endl;
  496. os << this->createEndTag( "verbose" ) << std::endl;
  497. os << this->createStartTag( "debug" ) << std::endl;
  498. os << debug << std::endl;
  499. os << this->createEndTag( "debug" ) << std::endl;
  500. os << this->createStartTag( "uncertaintyPredictionForClassification" ) << std::endl;
  501. os << uncertaintyPredictionForClassification << std::endl;
  502. os << this->createEndTag( "uncertaintyPredictionForClassification" ) << std::endl;
  503. os << this->createStartTag( "varianceApproximation" ) << std::endl;
  504. os << varianceApproximation << std::endl;
  505. os << this->createEndTag( "varianceApproximation" ) << std::endl;
  506. // done
  507. os << this->createEndTag( "GPHIKClassifier" ) << std::endl;
  508. }
  509. else
  510. {
  511. std::cerr << "OutStream not initialized - storing not possible!" << std::endl;
  512. }
  513. }
  514. void GPHIKClassifier::clear ()
  515. {
  516. if ( gphyper != NULL )
  517. {
  518. delete gphyper;
  519. gphyper = NULL;
  520. }
  521. }
  522. ///////////////////// INTERFACE ONLINE LEARNABLE /////////////////////
  523. // interface specific methods for incremental extensions
  524. ///////////////////// INTERFACE ONLINE LEARNABLE /////////////////////
  525. void GPHIKClassifier::addExample( const NICE::SparseVector * example,
  526. const double & label,
  527. const bool & performOptimizationAfterIncrement
  528. )
  529. {
  530. if ( ! this->b_isTrained )
  531. {
  532. //call train method instead
  533. std::cerr << "Classifier not initially trained yet -- run initial training instead of incremental extension!" << std::endl;
  534. std::vector< const NICE::SparseVector *> examplesVec;
  535. examplesVec.push_back ( example );
  536. NICE::Vector labelsVec ( 1 , label );
  537. this->train ( examplesVec, labelsVec );
  538. }
  539. else
  540. {
  541. this->gphyper->addExample( example, label, performOptimizationAfterIncrement );
  542. }
  543. }
  544. void GPHIKClassifier::addMultipleExamples( const std::vector< const NICE::SparseVector * > & newExamples,
  545. const NICE::Vector & newLabels,
  546. const bool & performOptimizationAfterIncrement
  547. )
  548. {
  549. //are new examples available? If not, nothing has to be done
  550. if ( newExamples.size() < 1)
  551. return;
  552. if ( ! this->b_isTrained )
  553. {
  554. //call train method instead
  555. std::cerr << "Classifier not initially trained yet -- run initial training instead of incremental extension!" << std::endl;
  556. this->train ( newExamples, newLabels );
  557. }
  558. else
  559. {
  560. this->gphyper->addMultipleExamples( newExamples, newLabels, performOptimizationAfterIncrement );
  561. }
  562. }