FMKGPHyperparameterOptimization.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. /**
  2. * @file FMKGPHyperparameterOptimization.h
  3. * @brief Heart of the framework to set up everything, perform optimization, classification, and variance prediction (Interface)
  4. * @author Alexander Freytag, Erik Rodner
  5. * @date 01-02-2012 (dd-mm-yyyy)
  6. */
  7. #ifndef _NICE_FMKGPHYPERPARAMETEROPTIMIZATIONINCLUDE
  8. #define _NICE_FMKGPHYPERPARAMETEROPTIMIZATIONINCLUDE
  9. // STL includes
  10. #include <vector>
  11. #include <set>
  12. #include <map>
  13. // NICE-core includes
  14. #include <core/algebra/EigValues.h>
  15. #include <core/algebra/IterativeLinearSolver.h>
  16. #include <core/basics/Config.h>
  17. #include <core/basics/Persistent.h>
  18. #include <core/vector/VectorT.h>
  19. #ifdef NICE_USELIB_MATIO
  20. #include <core/matlabAccess/MatFileIO.h>
  21. #endif
  22. // gp-hik-core includes
  23. #include "gp-hik-core/FastMinKernel.h"
  24. #include "gp-hik-core/GPLikelihoodApprox.h"
  25. #include "gp-hik-core/IKMLinearCombination.h"
  26. #include "gp-hik-core/OnlineLearnable.h"
  27. #include "gp-hik-core/Quantization.h"
  28. #include "gp-hik-core/parameterizedFunctions/ParameterizedFunction.h"
  29. namespace NICE {
  30. /**
  31. * @class FMKGPHyperparameterOptimization
  32. * @brief Heart of the framework to set up everything, perform optimization, classification, and variance prediction
  33. * @author Alexander Freytag, Erik Rodner
  34. */
  35. class FMKGPHyperparameterOptimization : public NICE::Persistent, public NICE::OnlineLearnable
  36. {
  37. protected:
  38. /////////////////////////
  39. /////////////////////////
  40. // PROTECTED VARIABLES //
  41. /////////////////////////
  42. /////////////////////////
  43. ///////////////////////////////////
  44. // output/debug related settings //
  45. ///////////////////////////////////
  46. /** verbose flag */
  47. bool b_verbose;
  48. /** verbose flag for time measurement outputs */
  49. bool b_verboseTime;
  50. /** debug flag for several outputs useful for debugging*/
  51. bool b_debug;
  52. //////////////////////////////////////
  53. // classification related variables //
  54. //////////////////////////////////////
  55. /** per default, we perform classification, if not stated otherwise */
  56. bool b_performRegression;
  57. /** object storing sorted data and providing fast hik methods */
  58. NICE::FastMinKernel *fmk;
  59. /** object performing feature quantization */
  60. NICE::Quantization *q;
  61. /** upper bound for hyper parameters (ParameterizedFunction) to optimize */
  62. double d_parameterUpperBound;
  63. /** lower bound for hyper parameters (ParameterizedFunction) to optimize */
  64. double d_parameterLowerBound;
  65. /** the parameterized function we use within the minimum kernel */
  66. NICE::ParameterizedFunction *pf;
  67. /** Simple type definition for precomputation matrices used for fast classification */
  68. typedef VVector PrecomputedType;
  69. /** precomputed arrays A (1 per class) needed for classification without quantization */
  70. std::map< uint, PrecomputedType > precomputedA;
  71. /** precomputed arrays B (1 per class) needed for classification without quantization */
  72. std::map< uint, PrecomputedType > precomputedB;
  73. /** precomputed LUTs (1 per class) needed for classification with quantization */
  74. std::map< uint, double * > precomputedT;
  75. //! storing the labels is needed for Incremental Learning (re-optimization)
  76. NICE::Vector labels;
  77. //! store the class number of the positive class (i.e., larger class no), only used in binary settings
  78. int i_binaryLabelPositive;
  79. //! store the class number of the negative class (i.e., smaller class no), only used in binary settings
  80. int i_binaryLabelNegative;
  81. //! contains all class numbers of the currently known classes
  82. std::set<uint> knownClasses;
  83. //! container for multiple kernel matrices (e.g., a data-containing kernel matrix (GMHIKernel) and a noise matrix (IKMNoise) )
  84. NICE::IKMLinearCombination * ikmsum;
  85. //////////////////////////////////////////////
  86. // Iterative Linear Solver //
  87. //////////////////////////////////////////////
  88. /** method for solving linear equation systems - needed to compute K^-1 \times y */
  89. IterativeLinearSolver *linsolver;
  90. /** Max. number of iterations the iterative linear solver is allowed to run */
  91. int ils_max_iterations;
  92. /////////////////////////////////////
  93. // optimization related parameters //
  94. /////////////////////////////////////
  95. enum OPTIMIZATIONTECHNIQUE{
  96. OPT_GREEDY = 0,
  97. OPT_DOWNHILLSIMPLEX,
  98. OPT_NONE
  99. };
  100. /** specify the optimization method used (see corresponding enum) */
  101. OPTIMIZATIONTECHNIQUE optimizationMethod;
  102. //! whether or not to optimize noise with the GP likelihood
  103. bool optimizeNoise;
  104. // specific to greedy optimization
  105. /** step size used in grid based greedy optimization technique */
  106. double parameterStepSize;
  107. // specific to downhill simplex optimization
  108. /** Max. number of iterations the downhill simplex optimizer is allowed to run */
  109. int downhillSimplexMaxIterations;
  110. /** Max. time the downhill simplex optimizer is allowed to run */
  111. double downhillSimplexTimeLimit;
  112. /** Max. number of iterations the iterative linear solver is allowed to run */
  113. double downhillSimplexParamTol;
  114. //////////////////////////////////////////////
  115. // likelihood computation related variables //
  116. //////////////////////////////////////////////
  117. /** whether to compute the exact likelihood by computing the exact kernel matrix (not recommended - only for debugging/comparison purpose) */
  118. bool verifyApproximation;
  119. /** method computing eigenvalues and eigenvectors*/
  120. NICE::EigValues *eig;
  121. /** number of Eigenvalues to consider in the approximation of |K|_F used for approximating the likelihood */
  122. int nrOfEigenvaluesToConsider;
  123. //! k largest eigenvalues of the kernel matrix (k == nrOfEigenvaluesToConsider)
  124. NICE::Vector eigenMax;
  125. //! eigenvectors corresponding to k largest eigenvalues (k == nrOfEigenvaluesToConsider) -- format: nxk
  126. NICE::Matrix eigenMaxVectors;
  127. ////////////////////////////////////////////
  128. // variance computation related variables //
  129. ////////////////////////////////////////////
  130. /** number of Eigenvalues to consider in the fine approximation of the predictive variance (fine approximation only) */
  131. int nrOfEigenvaluesToConsiderForVarApprox;
  132. /** precomputed array needed for rough variance approximation without quantization */
  133. PrecomputedType precomputedAForVarEst;
  134. /** precomputed LUT needed for rough variance approximation with quantization */
  135. double * precomputedTForVarEst;
  136. /////////////////////////////////////////////////////
  137. // online / incremental learning related variables //
  138. /////////////////////////////////////////////////////
  139. /** whether or not to use previous alpha solutions as initialization after adding new examples*/
  140. bool b_usePreviousAlphas;
  141. //! store alpha vectors for good initializations in the IL setting, if activated
  142. std::map<uint, NICE::Vector> previousAlphas;
  143. /////////////////////////
  144. /////////////////////////
  145. // PROTECTED METHODS //
  146. /////////////////////////
  147. /////////////////////////
  148. /**
  149. * @brief calculate binary label vectors using a multi-class label vector
  150. * @author Alexander Freytag
  151. */
  152. uint prepareBinaryLabels ( std::map<uint, NICE::Vector> & _binaryLabels,
  153. const NICE::Vector & _y ,
  154. std::set<uint> & _myClasses
  155. );
  156. /**
  157. * @brief prepare the GPLike object for given binary labels and already given ikmsum-object
  158. * @author Alexander Freytag
  159. */
  160. inline void setupGPLikelihoodApprox( GPLikelihoodApprox * & _gplike,
  161. const std::map<uint, NICE::Vector> & _binaryLabels,
  162. uint & _parameterVectorSize
  163. );
  164. /**
  165. * @brief update eigenvectors and eigenvalues for given ikmsum-objects and a method to compute eigenvalues
  166. * @author Alexander Freytag
  167. */
  168. inline void updateEigenDecomposition( const int & _noEigenValues );
  169. /**
  170. * @brief core of the optimize-functions
  171. * @author Alexander Freytag
  172. */
  173. inline void performOptimization( GPLikelihoodApprox & gplike,
  174. const uint & parameterVectorSize
  175. );
  176. /**
  177. * @brief apply the optimized transformation values to the underlying features
  178. * @author Alexander Freytag
  179. */
  180. inline void transformFeaturesWithOptimalParameters(const GPLikelihoodApprox & _gplike,
  181. const uint & _parameterVectorSize
  182. );
  183. /**
  184. * @brief build the resulting matrices A and B as well as lookup tables T for fast evaluations using the optimized parameter settings
  185. * @author Alexander Freytag
  186. */
  187. inline void computeMatricesAndLUTs( const GPLikelihoodApprox & _gplike);
  188. /**
  189. * @brief Update matrices (A, B, LUTs) and optionally find optimal parameters after adding (a) new example(s).
  190. * @author Alexander Freytag
  191. */
  192. void updateAfterIncrement (
  193. const std::set<uint> _newClasses,
  194. const bool & _performOptimizationAfterIncrement = false
  195. );
  196. public:
  197. /**
  198. * @brief default constructor
  199. * @author Alexander Freytag
  200. */
  201. FMKGPHyperparameterOptimization( );
  202. /**
  203. * @brief simple constructor
  204. * @author Alexander Freytag
  205. * @param b_performRegression
  206. */
  207. FMKGPHyperparameterOptimization( const bool & _performRegression );
  208. /**
  209. * @brief recommended constructor, only calls this->initialize with same input arguments
  210. * @author Alexander Freytag
  211. * @param conf
  212. * @param confSection
  213. *
  214. */
  215. FMKGPHyperparameterOptimization( const Config *_conf,
  216. const std::string & _confSection = "FMKGPHyperparameterOptimization"
  217. );
  218. /**
  219. * @brief recommended constructor, only calls this->initialize with same input arguments
  220. * @author Alexander Freytag
  221. *
  222. * @param conf
  223. * @param fmk pointer to a pre-initialized structure (will be deleted)
  224. * @param confSection
  225. */
  226. FMKGPHyperparameterOptimization( const Config *_conf,
  227. FastMinKernel *_fmk,
  228. const std::string & _confSection = "FMKGPHyperparameterOptimization"
  229. );
  230. /**
  231. * @brief standard destructor
  232. * @author Alexander Freytag
  233. */
  234. virtual ~FMKGPHyperparameterOptimization();
  235. /**
  236. * @brief Set variables and parameters to default or config-specified values
  237. * @author Alexander Freytag
  238. */
  239. void initFromConfig( const Config *_conf,
  240. const std::string & _confSection = "FMKGPHyperparameterOptimization"
  241. );
  242. ///////////////////// ///////////////////// /////////////////////
  243. // GET / SET
  244. ///////////////////// ///////////////////// /////////////////////
  245. /**
  246. * @brief Set lower bound for hyper parameters to optimize
  247. * @author Alexander Freytag
  248. */
  249. void setParameterUpperBound(const double & _parameterUpperBound);
  250. /**
  251. * @brief Set upper bound for hyper parameters to optimize
  252. * @author Alexander Freytag
  253. */
  254. void setParameterLowerBound(const double & _parameterLowerBound);
  255. /**
  256. * @brief Get the currently known class numbers
  257. * @author Alexander Freytag
  258. */
  259. std::set<uint> getKnownClassNumbers ( ) const;
  260. /**
  261. * @brief Change between classification and regression, only allowed if not trained. Otherwise, exceptions will be thrown...
  262. * @author Alexander Freytag
  263. * @date 05-02-2014 (dd-mm-yyyy)
  264. */
  265. void setPerformRegression ( const bool & _performRegression );
  266. /**
  267. * @brief Set the FastMinKernel object. Only allowed if not trained. Otherwise, exceptions will be thrown...
  268. * @author Alexander Freytag
  269. * @date 05-02-2014 (dd-mm-yyyy)
  270. */
  271. void setFastMinKernel ( FastMinKernel *fmk );
  272. /**
  273. * @brief Set the number of EV we considere for variance approximation. Only allowed if not trained. Otherwise, exceptions will be thrown...
  274. * @author Alexander Freytag
  275. * @date 06-02-2014 (dd-mm-yyyy)
  276. */
  277. void setNrOfEigenvaluesToConsiderForVarApprox ( const int & _nrOfEigenvaluesToConsiderForVarApprox );
  278. ///////////////////// ///////////////////// /////////////////////
  279. // CLASSIFIER STUFF
  280. ///////////////////// ///////////////////// /////////////////////
  281. #ifdef NICE_USELIB_MATIO
  282. /**
  283. * @brief Perform hyperparameter optimization
  284. * @author Alexander Freytag
  285. *
  286. * @param data MATLAB data structure, like a feature matrix loaded from ImageNet
  287. * @param y label vector (arbitrary), will be converted into a binary label vector
  288. * @param positives set of positive examples (indices)
  289. * @param negatives set of negative examples (indices)
  290. */
  291. void optimizeBinary ( const sparse_t & data,
  292. const NICE::Vector & y,
  293. const std::set<uint> & positives,
  294. const std::set<uint> & negatives,
  295. double noise
  296. );
  297. /**
  298. * @brief Perform hyperparameter optimization for GP multi-class or binary problems
  299. * @author Alexander Freytag
  300. *
  301. * @param data MATLAB data structure, like a feature matrix loaded from ImageNet
  302. * @param y label vector with multi-class labels
  303. * @param examples mapping of example index to new index
  304. */
  305. void optimize ( const sparse_t & data,
  306. const NICE::Vector & y,
  307. const std::map<uint, uint> & examples,
  308. double noise
  309. );
  310. #endif
  311. /**
  312. * @brief Perform hyperparameter optimization (multi-class or binary) assuming an already initialized fmk object
  313. * @author Alexander Freytag
  314. *
  315. * @param y label vector (multi-class as well as binary labels supported)
  316. */
  317. void optimize ( const NICE::Vector & y );
  318. /**
  319. * @brief Perform hyperparameter optimization (multi-class or binary) assuming an already initialized fmk object
  320. *
  321. * @param binLabels vector of binary label vectors (1,-1) and corresponding class no.
  322. */
  323. void optimize ( std::map<uint, NICE::Vector> & _binaryLabels );
  324. /**
  325. * @brief Compute the necessary variables for appxorimations of predictive variance (LUTs), assuming an already initialized fmk object
  326. * @author Alexander Freytag
  327. * @date 11-04-2012 (dd-mm-yyyy)
  328. */
  329. void prepareVarianceApproximationRough();
  330. /**
  331. * @brief Compute the necessary variables for fine appxorimations of predictive variance (EVs), assuming an already initialized fmk object
  332. * @author Alexander Freytag
  333. * @date 11-04-2012 (dd-mm-yyyy)
  334. */
  335. void prepareVarianceApproximationFine();
  336. /**
  337. * @brief classify an example
  338. *
  339. * @param x input example (sparse vector)
  340. * @param scores scores for each class number
  341. *
  342. * @return class number achieving the best score
  343. */
  344. uint classify ( const NICE::SparseVector & _x,
  345. SparseVector & _scores
  346. ) const;
  347. /**
  348. * @brief classify an example that is given as non-sparse vector
  349. * NOTE: whenever possible, you should use sparse vectors to obtain significantly smaller computation times
  350. *
  351. * @date 18-06-2013 (dd-mm-yyyy)
  352. * @author Alexander Freytag
  353. *
  354. * @param x input example (non-sparse vector)
  355. * @param scores scores for each class number
  356. *
  357. * @return class number achieving the best score
  358. */
  359. uint classify ( const NICE::Vector & _x,
  360. SparseVector & _scores
  361. ) const;
  362. //////////////////////////////////////////
  363. // variance computation: sparse inputs
  364. //////////////////////////////////////////
  365. /**
  366. * @brief compute predictive variance for a given test example using a rough approximation: k_{**} - k_*^T (K+\sigma I)^{-1} k_* <= k_{**} - |k_*|^2 * 1 / \lambda_max(K + \sigma I), where we approximate |k_*|^2 by neglecting the mixed terms
  367. * @author Alexander Freytag
  368. * @date 10-04-2012 (dd-mm-yyyy)
  369. * @param x input example
  370. * @param predVariance contains the approximation of the predictive variance
  371. *
  372. */
  373. void computePredictiveVarianceApproximateRough(const NICE::SparseVector & _x,
  374. double & _predVariance
  375. ) const;
  376. /**
  377. * @brief compute predictive variance for a given test example using a fine approximation (k eigenvalues and eigenvectors to approximate the quadratic term)
  378. * @author Alexander Freytag
  379. * @date 18-04-2012 (dd-mm-yyyy)
  380. * @param x input example
  381. * @param predVariance contains the approximation of the predictive variance
  382. *
  383. */
  384. void computePredictiveVarianceApproximateFine(const NICE::SparseVector & _x,
  385. double & _predVariance
  386. ) const;
  387. /**
  388. * @brief compute exact predictive variance for a given test example using ILS methods (exact, but more time consuming than approx versions)
  389. * @author Alexander Freytag
  390. * @date 10-04-2012 (dd-mm-yyyy)
  391. * @param x input example
  392. * @param predVariance contains the approximation of the predictive variance
  393. *
  394. */
  395. void computePredictiveVarianceExact(const NICE::SparseVector & _x,
  396. double & _predVariance
  397. ) const;
  398. //////////////////////////////////////////
  399. // variance computation: non-sparse inputs
  400. //////////////////////////////////////////
  401. /**
  402. * @brief compute predictive variance for a given test example using a rough approximation: k_{**} - k_*^T (K+\sigma I)^{-1} k_* <= k_{**} - |k_*|^2 * 1 / \lambda_max(K + \sigma I), where we approximate |k_*|^2 by neglecting the mixed terms
  403. * @author Alexander Freytag
  404. * @date 19-12-2013 (dd-mm-yyyy)
  405. * @param x input example
  406. * @param predVariance contains the approximation of the predictive variance
  407. *
  408. */
  409. void computePredictiveVarianceApproximateRough(const NICE::Vector & _x,
  410. double & _predVariance
  411. ) const;
  412. /**
  413. * @brief compute predictive variance for a given test example using a fine approximation (k eigenvalues and eigenvectors to approximate the quadratic term)
  414. * @author Alexander Freytag
  415. * @date 19-12-2013 (dd-mm-yyyy)
  416. * @param x input example
  417. * @param predVariance contains the approximation of the predictive variance
  418. *
  419. */
  420. void computePredictiveVarianceApproximateFine(const NICE::Vector & _x,
  421. double & _predVariance
  422. ) const;
  423. /**
  424. * @brief compute exact predictive variance for a given test example using ILS methods (exact, but more time consuming than approx versions)
  425. * @author Alexander Freytag
  426. * @date 19-12-2013 (dd-mm-yyyy)
  427. * @param x input example
  428. * @param predVariance contains the approximation of the predictive variance
  429. *
  430. */
  431. void computePredictiveVarianceExact(const NICE::Vector & _x,
  432. double & _predVariance
  433. ) const;
  434. ///////////////////// INTERFACE PERSISTENT /////////////////////
  435. // interface specific methods for store and restore
  436. ///////////////////// INTERFACE PERSISTENT /////////////////////
  437. /**
  438. * @brief Load current object from external file (stream)
  439. * @author Alexander Freytag
  440. */
  441. void restore ( std::istream & _is,
  442. int _format = 0
  443. );
  444. /**
  445. * @brief Save current object to external file (stream)
  446. * @author Alexander Freytag
  447. */
  448. void store ( std::ostream & _os,
  449. int _format = 0
  450. ) const;
  451. /**
  452. * @brief Clear current object
  453. * @author Alexander Freytag
  454. */
  455. void clear ( ) ;
  456. ///////////////////// INTERFACE ONLINE LEARNABLE /////////////////////
  457. // interface specific methods for incremental extensions
  458. ///////////////////// INTERFACE ONLINE LEARNABLE /////////////////////
  459. /**
  460. * @brief add a new example
  461. * @author Alexander Freytag
  462. */
  463. virtual void addExample( const NICE::SparseVector * _example,
  464. const double & _label,
  465. const bool & _performOptimizationAfterIncrement = true
  466. );
  467. /**
  468. * @brief add several new examples
  469. * @author Alexander Freytag
  470. */
  471. virtual void addMultipleExamples( const std::vector< const NICE::SparseVector * > & _newExamples,
  472. const NICE::Vector & _newLabels,
  473. const bool & _performOptimizationAfterIncrement = true
  474. );
  475. };
  476. }
  477. #endif