FMKGPHyperparameterOptimization.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548
  1. /**
  2. * @file FMKGPHyperparameterOptimization.h
  3. * @brief Heart of the framework to set up everything, perform optimization, classification, and variance prediction (Interface)
  4. * @author Alexander Freytag, Erik Rodner
  5. * @date 01-02-2012 (dd-mm-yyyy)
  6. */
  7. #ifndef _NICE_FMKGPHYPERPARAMETEROPTIMIZATIONINCLUDE
  8. #define _NICE_FMKGPHYPERPARAMETEROPTIMIZATIONINCLUDE
  9. // STL includes
  10. #include <vector>
  11. #include <set>
  12. #include <map>
  13. // NICE-core includes
  14. #include <core/algebra/EigValues.h>
  15. #include <core/algebra/IterativeLinearSolver.h>
  16. #include <core/basics/Config.h>
  17. #include <core/basics/Persistent.h>
  18. #include <core/vector/VectorT.h>
  19. #ifdef NICE_USELIB_MATIO
  20. #include <core/matlabAccess/MatFileIO.h>
  21. #endif
  22. // gp-hik-core includes
  23. #include "gp-hik-core/FastMinKernel.h"
  24. #include "gp-hik-core/GPLikelihoodApprox.h"
  25. #include "gp-hik-core/IKMLinearCombination.h"
  26. #include "gp-hik-core/OnlineLearnable.h"
  27. #include "gp-hik-core/Quantization.h"
  28. #include "gp-hik-core/parameterizedFunctions/ParameterizedFunction.h"
  29. namespace NICE {
  30. /**
  31. * @class FMKGPHyperparameterOptimization
  32. * @brief Heart of the framework to set up everything, perform optimization, classification, and variance prediction
  33. * @author Alexander Freytag, Erik Rodner
  34. */
  35. class FMKGPHyperparameterOptimization : public NICE::Persistent, public NICE::OnlineLearnable
  36. {
  37. protected:
  38. /////////////////////////
  39. /////////////////////////
  40. // PROTECTED VARIABLES //
  41. /////////////////////////
  42. /////////////////////////
  43. ///////////////////////////////////
  44. // output/debug related settings //
  45. ///////////////////////////////////
  46. /** verbose flag */
  47. bool verbose;
  48. /** verbose flag for time measurement outputs */
  49. bool verboseTime;
  50. /** debug flag for several outputs useful for debugging*/
  51. bool debug;
  52. //////////////////////////////////////
  53. // classification related variables //
  54. //////////////////////////////////////
  55. /** per default, we perform classification, if not stated otherwise */
  56. bool b_performRegression;
  57. /** object storing sorted data and providing fast hik methods */
  58. NICE::FastMinKernel *fmk;
  59. /** object performing feature quantization */
  60. NICE::Quantization *q;
  61. /** the parameterized function we use within the minimum kernel */
  62. NICE::ParameterizedFunction *pf;
  63. /** method for solving linear equation systems - needed to compute K^-1 \times y */
  64. IterativeLinearSolver *linsolver;
  65. /** Max. number of iterations the iterative linear solver is allowed to run */
  66. int ils_max_iterations;
  67. /** Simple type definition for precomputation matrices used for fast classification */
  68. typedef VVector PrecomputedType;
  69. /** precomputed arrays A (1 per class) needed for classification without quantization */
  70. std::map< int, PrecomputedType > precomputedA;
  71. /** precomputed arrays B (1 per class) needed for classification without quantization */
  72. std::map< int, PrecomputedType > precomputedB;
  73. /** precomputed LUTs (1 per class) needed for classification with quantization */
  74. std::map< int, double * > precomputedT;
  75. //! storing the labels is needed for Incremental Learning (re-optimization)
  76. NICE::Vector labels;
  77. //! store the class number of the positive class (i.e., larger class no), only used in binary settings
  78. int binaryLabelPositive;
  79. //! store the class number of the negative class (i.e., smaller class no), only used in binary settings
  80. int binaryLabelNegative;
  81. //! contains all class numbers of the currently known classes
  82. std::set<int> knownClasses;
  83. //! container for multiple kernel matrices (e.g., a data-containing kernel matrix (GMHIKernel) and a noise matrix (IKMNoise) )
  84. NICE::IKMLinearCombination * ikmsum;
  85. /////////////////////////////////////
  86. // optimization related parameters //
  87. /////////////////////////////////////
  88. enum {
  89. OPT_GREEDY = 0,
  90. OPT_DOWNHILLSIMPLEX,
  91. OPT_NONE
  92. };
  93. /** specify the optimization method used (see corresponding enum) */
  94. int optimizationMethod;
  95. //! whether or not to optimize noise with the GP likelihood
  96. bool optimizeNoise;
  97. /** upper bound for hyper parameters to optimize */
  98. double parameterUpperBound;
  99. /** lower bound for hyper parameters to optimize */
  100. double parameterLowerBound;
  101. // specific to greedy optimization
  102. /** step size used in grid based greedy optimization technique */
  103. double parameterStepSize;
  104. // specific to downhill simplex optimization
  105. /** Max. number of iterations the downhill simplex optimizer is allowed to run */
  106. int downhillSimplexMaxIterations;
  107. /** Max. time the downhill simplex optimizer is allowed to run */
  108. double downhillSimplexTimeLimit;
  109. /** Max. number of iterations the iterative linear solver is allowed to run */
  110. double downhillSimplexParamTol;
  111. // likelihood computation related variables
  112. /** whether to compute the exact likelihood by computing the exact kernel matrix (not recommended - only for debugging/comparison purpose) */
  113. bool verifyApproximation;
  114. /** method computing eigenvalues and eigenvectors*/
  115. NICE::EigValues *eig;
  116. /** number of Eigenvalues to consider in the approximation of |K|_F used for approximating the likelihood */
  117. int nrOfEigenvaluesToConsider;
  118. //! k largest eigenvalues of the kernel matrix (k == nrOfEigenvaluesToConsider)
  119. NICE::Vector eigenMax;
  120. //! eigenvectors corresponding to k largest eigenvalues (k == nrOfEigenvaluesToConsider) -- format: nxk
  121. NICE::Matrix eigenMaxVectors;
  122. ////////////////////////////////////////////
  123. // variance computation related variables //
  124. ////////////////////////////////////////////
  125. /** number of Eigenvalues to consider in the fine approximation of the predictive variance (fine approximation only) */
  126. int nrOfEigenvaluesToConsiderForVarApprox;
  127. /** precomputed array needed for rough variance approximation without quantization */
  128. PrecomputedType precomputedAForVarEst;
  129. /** precomputed LUT needed for rough variance approximation with quantization */
  130. double * precomputedTForVarEst;
  131. /////////////////////////////////////////////////////
  132. // online / incremental learning related variables //
  133. /////////////////////////////////////////////////////
  134. /** whether or not to use previous alpha solutions as initialization after adding new examples*/
  135. bool b_usePreviousAlphas;
  136. //! store alpha vectors for good initializations in the IL setting, if activated
  137. std::map<int, NICE::Vector> previousAlphas;
  138. /////////////////////////
  139. /////////////////////////
  140. // PROTECTED METHODS //
  141. /////////////////////////
  142. /////////////////////////
  143. /**
  144. * @brief calculate binary label vectors using a multi-class label vector
  145. * @author Alexander Freytag
  146. */
  147. int prepareBinaryLabels ( std::map<int, NICE::Vector> & binaryLabels, const NICE::Vector & y , std::set<int> & myClasses);
  148. /**
  149. * @brief prepare the GPLike object for given binary labels and already given ikmsum-object
  150. * @author Alexander Freytag
  151. */
  152. inline void setupGPLikelihoodApprox( GPLikelihoodApprox * & gplike, const std::map<int, NICE::Vector> & binaryLabels, uint & parameterVectorSize);
  153. /**
  154. * @brief update eigenvectors and eigenvalues for given ikmsum-objects and a method to compute eigenvalues
  155. * @author Alexander Freytag
  156. */
  157. inline void updateEigenDecomposition( const int & i_noEigenValues );
  158. /**
  159. * @brief core of the optimize-functions
  160. * @author Alexander Freytag
  161. */
  162. inline void performOptimization( GPLikelihoodApprox & gplike, const uint & parameterVectorSize);
  163. /**
  164. * @brief apply the optimized transformation values to the underlying features
  165. * @author Alexander Freytag
  166. */
  167. inline void transformFeaturesWithOptimalParameters(const GPLikelihoodApprox & gplike, const uint & parameterVectorSize);
  168. /**
  169. * @brief build the resulting matrices A and B as well as lookup tables T for fast evaluations using the optimized parameter settings
  170. * @author Alexander Freytag
  171. */
  172. inline void computeMatricesAndLUTs( const GPLikelihoodApprox & gplike);
  173. /**
  174. * @brief Update matrices (A, B, LUTs) and optionally find optimal parameters after adding (a) new example(s).
  175. * @author Alexander Freytag
  176. */
  177. void updateAfterIncrement (
  178. const std::set<int> newClasses,
  179. const bool & performOptimizationAfterIncrement = false
  180. );
  181. public:
  182. /**
  183. * @brief default constructor
  184. * @author Alexander Freytag
  185. */
  186. FMKGPHyperparameterOptimization( );
  187. /**
  188. * @brief simple constructor
  189. * @author Alexander Freytag
  190. */
  191. FMKGPHyperparameterOptimization( const bool & b_performRegression );
  192. /**
  193. * @brief recommended constructor, only calls this->initialize with same input arguments
  194. *
  195. * @param pf pointer to a parameterized function used within the minimum kernel min(f(x_i), f(x_j)) (will not be deleted)
  196. * @param noise GP label noise
  197. * @param fmk pointer to a pre-initialized structure (will be deleted)
  198. */
  199. FMKGPHyperparameterOptimization( const Config *conf, const std::string & confSection = "GPHIKClassifier" );
  200. /**
  201. * @brief recommended constructor, only calls this->initialize with same input arguments
  202. *
  203. * @param pf pointer to a parameterized function used within the minimum kernel min(f(x_i), f(x_j)) (will not be deleted)
  204. * @param noise GP label noise
  205. * @param fmk pointer to a pre-initialized structure (will be deleted)
  206. */
  207. FMKGPHyperparameterOptimization( const Config *conf, ParameterizedFunction *_pf, FastMinKernel *_fmk, const std::string & confSection = "GPHIKClassifier" );
  208. /**
  209. * @brief standard destructor
  210. * @author Alexander Freytag
  211. */
  212. virtual ~FMKGPHyperparameterOptimization();
  213. /**
  214. * @brief Set variables and parameters to default or config-specified values
  215. * @author Alexander Freytag
  216. */
  217. void initFromConfig( const Config *conf, const std::string & confSection = "GPHIKClassifier" );
  218. ///////////////////// ///////////////////// /////////////////////
  219. // GET / SET
  220. ///////////////////// ///////////////////// /////////////////////
  221. /**
  222. * @brief Set lower bound for hyper parameters to optimize
  223. * @author Alexander Freytag
  224. */
  225. void setParameterUpperBound(const double & _parameterUpperBound);
  226. /**
  227. * @brief Set upper bound for hyper parameters to optimize
  228. * @author Alexander Freytag
  229. */
  230. void setParameterLowerBound(const double & _parameterLowerBound);
  231. /**
  232. * @brief Get the currently known class numbers
  233. * @author Alexander Freytag
  234. */
  235. std::set<int> getKnownClassNumbers ( ) const;
  236. /**
  237. * @brief Change between classification and regression, only allowed if not trained. Otherwise, exceptions will be thrown...
  238. * @author Alexander Freytag
  239. * @date 05-02-2014 (dd-mm-yyyy)
  240. */
  241. void setPerformRegression ( const bool & b_performRegression );
  242. /**
  243. * @brief Set the ParameterizedFunction object. Only allowed if not trained. Otherwise, exceptions will be thrown...
  244. * @author Alexander Freytag
  245. * @date 05-02-2014 (dd-mm-yyyy)
  246. */
  247. void setParameterizedFunction ( ParameterizedFunction *pf );
  248. /**
  249. * @brief Set the FastMinKernel object. Only allowed if not trained. Otherwise, exceptions will be thrown...
  250. * @author Alexander Freytag
  251. * @date 05-02-2014 (dd-mm-yyyy)
  252. */
  253. void setFastMinKernel ( FastMinKernel *fmk );
  254. /**
  255. * @brief Set the number of EV we considere for variance approximation. Only allowed if not trained. Otherwise, exceptions will be thrown...
  256. * @author Alexander Freytag
  257. * @date 06-02-2014 (dd-mm-yyyy)
  258. */
  259. void setNrOfEigenvaluesToConsiderForVarApprox ( const int & i_nrOfEigenvaluesToConsiderForVarApprox );
  260. ///////////////////// ///////////////////// /////////////////////
  261. // CLASSIFIER STUFF
  262. ///////////////////// ///////////////////// /////////////////////
  263. #ifdef NICE_USELIB_MATIO
  264. /**
  265. * @brief Perform hyperparameter optimization
  266. * @author Alexander Freytag
  267. *
  268. * @param data MATLAB data structure, like a feature matrix loaded from ImageNet
  269. * @param y label vector (arbitrary), will be converted into a binary label vector
  270. * @param positives set of positive examples (indices)
  271. * @param negatives set of negative examples (indices)
  272. */
  273. void optimizeBinary ( const sparse_t & data, const NICE::Vector & y, const std::set<int> & positives, const std::set<int> & negatives, double noise );
  274. /**
  275. * @brief Perform hyperparameter optimization for GP multi-class or binary problems
  276. * @author Alexander Freytag
  277. *
  278. * @param data MATLAB data structure, like a feature matrix loaded from ImageNet
  279. * @param y label vector with multi-class labels
  280. * @param examples mapping of example index to new index
  281. */
  282. void optimize ( const sparse_t & data, const NICE::Vector & y, const std::map<int, int> & examples, double noise );
  283. #endif
  284. /**
  285. * @brief Perform hyperparameter optimization (multi-class or binary) assuming an already initialized fmk object
  286. * @author Alexander Freytag
  287. *
  288. * @param y label vector (multi-class as well as binary labels supported)
  289. */
  290. void optimize ( const NICE::Vector & y );
  291. /**
  292. * @brief Perform hyperparameter optimization (multi-class or binary) assuming an already initialized fmk object
  293. *
  294. * @param binLabels vector of binary label vectors (1,-1) and corresponding class no.
  295. */
  296. void optimize ( std::map<int, NICE::Vector> & binaryLabels );
  297. /**
  298. * @brief Compute the necessary variables for appxorimations of predictive variance (LUTs), assuming an already initialized fmk object
  299. * @author Alexander Freytag
  300. * @date 11-04-2012 (dd-mm-yyyy)
  301. */
  302. void prepareVarianceApproximationRough();
  303. /**
  304. * @brief Compute the necessary variables for fine appxorimations of predictive variance (EVs), assuming an already initialized fmk object
  305. * @author Alexander Freytag
  306. * @date 11-04-2012 (dd-mm-yyyy)
  307. */
  308. void prepareVarianceApproximationFine();
  309. /**
  310. * @brief classify an example
  311. *
  312. * @param x input example (sparse vector)
  313. * @param scores scores for each class number
  314. *
  315. * @return class number achieving the best score
  316. */
  317. int classify ( const NICE::SparseVector & x, SparseVector & scores ) const;
  318. /**
  319. * @brief classify an example that is given as non-sparse vector
  320. * NOTE: whenever possible, you should sparse vectors to obtain significantly smaller computation times
  321. *
  322. * @date 18-06-2013 (dd-mm-yyyy)
  323. * @author Alexander Freytag
  324. *
  325. * @param x input example (non-sparse vector)
  326. * @param scores scores for each class number
  327. *
  328. * @return class number achieving the best score
  329. */
  330. int classify ( const NICE::Vector & x, SparseVector & scores ) const;
  331. //////////////////////////////////////////
  332. // variance computation: sparse inputs
  333. //////////////////////////////////////////
  334. /**
  335. * @brief compute predictive variance for a given test example using a rough approximation: k_{**} - k_*^T (K+\sigma I)^{-1} k_* <= k_{**} - |k_*|^2 * 1 / \lambda_max(K + \sigma I), where we approximate |k_*|^2 by neglecting the mixed terms
  336. * @author Alexander Freytag
  337. * @date 10-04-2012 (dd-mm-yyyy)
  338. * @param x input example
  339. * @param predVariance contains the approximation of the predictive variance
  340. *
  341. */
  342. void computePredictiveVarianceApproximateRough(const NICE::SparseVector & x, double & predVariance ) const;
  343. /**
  344. * @brief compute predictive variance for a given test example using a fine approximation (k eigenvalues and eigenvectors to approximate the quadratic term)
  345. * @author Alexander Freytag
  346. * @date 18-04-2012 (dd-mm-yyyy)
  347. * @param x input example
  348. * @param predVariance contains the approximation of the predictive variance
  349. *
  350. */
  351. void computePredictiveVarianceApproximateFine(const NICE::SparseVector & x, double & predVariance ) const;
  352. /**
  353. * @brief compute exact predictive variance for a given test example using ILS methods (exact, but more time consuming than approx versions)
  354. * @author Alexander Freytag
  355. * @date 10-04-2012 (dd-mm-yyyy)
  356. * @param x input example
  357. * @param predVariance contains the approximation of the predictive variance
  358. *
  359. */
  360. void computePredictiveVarianceExact(const NICE::SparseVector & x, double & predVariance ) const;
  361. //////////////////////////////////////////
  362. // variance computation: non-sparse inputs
  363. //////////////////////////////////////////
  364. /**
  365. * @brief compute predictive variance for a given test example using a rough approximation: k_{**} - k_*^T (K+\sigma I)^{-1} k_* <= k_{**} - |k_*|^2 * 1 / \lambda_max(K + \sigma I), where we approximate |k_*|^2 by neglecting the mixed terms
  366. * @author Alexander Freytag
  367. * @date 19-12-2013 (dd-mm-yyyy)
  368. * @param x input example
  369. * @param predVariance contains the approximation of the predictive variance
  370. *
  371. */
  372. void computePredictiveVarianceApproximateRough(const NICE::Vector & x, double & predVariance ) const;
  373. /**
  374. * @brief compute predictive variance for a given test example using a fine approximation (k eigenvalues and eigenvectors to approximate the quadratic term)
  375. * @author Alexander Freytag
  376. * @date 19-12-2013 (dd-mm-yyyy)
  377. * @param x input example
  378. * @param predVariance contains the approximation of the predictive variance
  379. *
  380. */
  381. void computePredictiveVarianceApproximateFine(const NICE::Vector & x, double & predVariance ) const;
  382. /**
  383. * @brief compute exact predictive variance for a given test example using ILS methods (exact, but more time consuming than approx versions)
  384. * @author Alexander Freytag
  385. * @date 19-12-2013 (dd-mm-yyyy)
  386. * @param x input example
  387. * @param predVariance contains the approximation of the predictive variance
  388. *
  389. */
  390. void computePredictiveVarianceExact(const NICE::Vector & x, double & predVariance ) const;
  391. ///////////////////// INTERFACE PERSISTENT /////////////////////
  392. // interface specific methods for store and restore
  393. ///////////////////// INTERFACE PERSISTENT /////////////////////
  394. /**
  395. * @brief Load current object from external file (stream)
  396. * @author Alexander Freytag
  397. */
  398. void restore ( std::istream & is, int format = 0 );
  399. /**
  400. * @brief Save current object to external file (stream)
  401. * @author Alexander Freytag
  402. */
  403. void store ( std::ostream & os, int format = 0 ) const;
  404. /**
  405. * @brief Clear current object
  406. * @author Alexander Freytag
  407. */
  408. void clear ( ) ;
  409. ///////////////////// INTERFACE ONLINE LEARNABLE /////////////////////
  410. // interface specific methods for incremental extensions
  411. ///////////////////// INTERFACE ONLINE LEARNABLE /////////////////////
  412. /**
  413. * @brief add a new example
  414. * @author Alexander Freytag
  415. */
  416. virtual void addExample( const NICE::SparseVector * example,
  417. const double & label,
  418. const bool & performOptimizationAfterIncrement = true
  419. );
  420. /**
  421. * @brief add several new examples
  422. * @author Alexander Freytag
  423. */
  424. virtual void addMultipleExamples( const std::vector< const NICE::SparseVector * > & newExamples,
  425. const NICE::Vector & newLabels,
  426. const bool & performOptimizationAfterIncrement = true
  427. );
  428. };
  429. }
  430. #endif