FMKGPHyperparameterOptimization.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. /**
  2. * @file FMKGPHyperparameterOptimization.h
  3. * @brief Heart of the framework to set up everything, perform optimization, classification, and variance prediction (Interface)
  4. * @author Erik Rodner, Alexander Freytag
  5. * @date 01/02/2012
  6. */
  7. #ifndef _NICE_FMKGPHYPERPARAMETEROPTIMIZATIONINCLUDE
  8. #define _NICE_FMKGPHYPERPARAMETEROPTIMIZATIONINCLUDE
  9. // STL includes
  10. #include <vector>
  11. #include <set>
  12. #include <map>
  13. // NICE-core includes
  14. #include <core/algebra/EigValues.h>
  15. #include <core/algebra/IterativeLinearSolver.h>
  16. #include <core/basics/Config.h>
  17. #include <core/basics/Persistent.h>
  18. #include <core/vector/VectorT.h>
  19. #ifdef NICE_USELIB_MATIO
  20. #include <core/matlabAccess/MatFileIO.h>
  21. #endif
  22. // gp-hik-core includes
  23. #include "gp-hik-core/FastMinKernel.h"
  24. #include "gp-hik-core/GPLikelihoodApprox.h"
  25. #include "gp-hik-core/IKMLinearCombination.h"
  26. #include "gp-hik-core/OnlineLearnable.h"
  27. #include "gp-hik-core/Quantization.h"
  28. #include "gp-hik-core/parameterizedFunctions/ParameterizedFunction.h"
  29. namespace NICE {
  30. /**
  31. * @class FMKGPHyperparameterOptimization
  32. * @brief Heart of the framework to set up everything, perform optimization, classification, and variance prediction
  33. * @author Erik Rodner, Alexander Freytag
  34. */
  35. class FMKGPHyperparameterOptimization : public NICE::Persistent, public NICE::OnlineLearnable
  36. {
  37. protected:
  38. enum {
  39. OPT_GREEDY = 0,
  40. OPT_DOWNHILLSIMPLEX,
  41. OPT_NONE,
  42. OPT_NUMBEROFMETHODS
  43. };
  44. /** optimization method used */
  45. int optimizationMethod;
  46. /** the parameterized function we use within the minimum kernel */
  47. ParameterizedFunction *pf;
  48. /** method computing eigenvalues */
  49. EigValues *eig;
  50. /** method for solving linear equation systems */
  51. IterativeLinearSolver *linsolver;
  52. /** object which stores our sorted data and provides fast hik functions */
  53. FastMinKernel *fmk;
  54. /** object which stores our quantization object */
  55. Quantization *q;
  56. /** verbose flag */
  57. bool verbose;
  58. /** verbose flag for time measurement outputs */
  59. bool verboseTime;
  60. /** debug flag for several outputs useful for debugging*/
  61. bool debug;
  62. /** optimization parameters */
  63. double parameterUpperBound;
  64. double parameterLowerBound;
  65. double parameterStepSize;
  66. int ils_max_iterations;
  67. int downhillSimplexMaxIterations;
  68. double downhillSimplexTimeLimit;
  69. double downhillSimplexParamTol;
  70. /** whether to compute the likelihood with the usual method */
  71. bool verifyApproximation;
  72. /** number of Eigenvalues to consider in the approximation of |K|_F */
  73. int nrOfEigenvaluesToConsider;
  74. /** number of Eigenvalues to consider in the fine approximation of the predictive variance */
  75. int nrOfEigenvaluesToConsiderForVarApprox;
  76. typedef VVector PrecomputedType;
  77. /** precomputed arrays and lookup tables */
  78. std::map< int, PrecomputedType > precomputedA;
  79. std::map< int, PrecomputedType > precomputedB;
  80. std::map< int, double * > precomputedT;
  81. PrecomputedType precomputedAForVarEst;
  82. double * precomputedTForVarEst;
  83. //! optimize noise with the GP likelihood
  84. bool optimizeNoise;
  85. //! k largest eigenvalues of the kernel matrix (k == nrOfEigenvaluesToConsider)
  86. NICE::Vector eigenMax;
  87. //! eigenvectors corresponding to k largest eigenvalues (k == nrOfEigenvaluesToConsider) -- format: nxk
  88. NICE::Matrix eigenMaxVectors;
  89. //! needed for optimization and variance approximation
  90. IKMLinearCombination * ikmsum;
  91. //! storing the labels is needed for Incremental Learning (re-optimization)
  92. NICE::Vector labels;
  93. //! calculate binary label vectors using a multi-class label vector
  94. int prepareBinaryLabels ( std::map<int, NICE::Vector> & binaryLabels, const NICE::Vector & y , std::set<int> & myClasses);
  95. //! prepare the GPLike object for given binary labels and already given ikmsum-object
  96. inline void setupGPLikelihoodApprox( GPLikelihoodApprox * & gplike, const std::map<int, NICE::Vector> & binaryLabels, uint & parameterVectorSize);
  97. //! update eigenvectors and eigenvalues for given ikmsum-objects and a method to compute eigenvalues
  98. inline void updateEigenDecomposition( const int & i_noEigenValues );
  99. //! core of the optimize-functions
  100. inline void performOptimization( GPLikelihoodApprox & gplike, const uint & parameterVectorSize);
  101. //! apply the optimized transformation values to the underlying features
  102. inline void transformFeaturesWithOptimalParameters(const GPLikelihoodApprox & gplike, const uint & parameterVectorSize);
  103. //! build the resulting matrices A and B as well as lookup tables T for fast evaluations using the optimized parameter settings
  104. inline void computeMatricesAndLUTs( const GPLikelihoodApprox & gplike);
  105. //! store the class number of the positive class (i.e., larger class no), only used in binary settings
  106. int binaryLabelPositive;
  107. //! store the class number of the negative class (i.e., smaller class no), only used in binary settings
  108. int binaryLabelNegative;
  109. //! contains all class numbers of the currently known classes
  110. std::set<int> knownClasses;
  111. bool b_usePreviousAlphas;
  112. //! we store the alpha vectors for good initializations in the IL setting
  113. std::map<int, NICE::Vector> lastAlphas;
  114. //! Update matrices (A, B, LUTs) and optionally find optimal parameters after adding a new example.
  115. void updateAfterSingleIncrement (
  116. const NICE::SparseVector & x,
  117. const std::set<int> newClasses,
  118. const bool & performOptimizationAfterIncrement = false
  119. );
  120. //! Update matrices (A, B, LUTs) and optionally find optimal parameters after adding multiple examples.
  121. void updateAfterMultipleIncrements (
  122. const std::vector<const NICE::SparseVector*> & x,
  123. const std::set<int> newClasses,
  124. const bool & performOptimizationAfterIncrement = false
  125. );
  126. public:
  127. FMKGPHyperparameterOptimization();
  128. /**
  129. * @brief standard constructor
  130. *
  131. * @param pf pointer to a parameterized function used within the minimum kernel min(f(x_i), f(x_j)) (will not be deleted)
  132. * @param noise GP label noise
  133. * @param fmk pointer to a pre-initialized structure (will be deleted)
  134. */
  135. FMKGPHyperparameterOptimization( const Config *conf, ParameterizedFunction *pf, FastMinKernel *fmk = NULL, const std::string & confSection = "GPHIKClassifier" );
  136. /** simple destructor */
  137. virtual ~FMKGPHyperparameterOptimization();
  138. ///////////////////// ///////////////////// /////////////////////
  139. // GET / SET
  140. ///////////////////// ///////////////////// /////////////////////
  141. void setParameterUpperBound(const double & _parameterUpperBound);
  142. void setParameterLowerBound(const double & _parameterLowerBound);
  143. std::set<int> getKnownClassNumbers ( ) const;
  144. ///////////////////// ///////////////////// /////////////////////
  145. // CLASSIFIER STUFF
  146. ///////////////////// ///////////////////// /////////////////////
  147. void initialize( const Config *conf, ParameterizedFunction *pf, FastMinKernel *fmk = NULL, const std::string & confSection = "GPHIKClassifier" );
  148. #ifdef NICE_USELIB_MATIO
  149. /**
  150. * @brief Perform hyperparameter optimization
  151. *
  152. * @param data MATLAB data structure, like a feature matrix loaded from ImageNet
  153. * @param y label vector (arbitrary), will be converted into a binary label vector
  154. * @param positives set of positive examples (indices)
  155. * @param negatives set of negative examples (indices)
  156. */
  157. void optimizeBinary ( const sparse_t & data, const NICE::Vector & y, const std::set<int> & positives, const std::set<int> & negatives, double noise );
  158. /**
  159. * @brief Perform hyperparameter optimization for GP multi-class or binary problems
  160. *
  161. * @param data MATLAB data structure, like a feature matrix loaded from ImageNet
  162. * @param y label vector with multi-class labels
  163. * @param examples mapping of example index to new index
  164. */
  165. void optimize ( const sparse_t & data, const NICE::Vector & y, const std::map<int, int> & examples, double noise );
  166. #endif
  167. /**
  168. * @brief Perform hyperparameter optimization (multi-class or binary) assuming an already initialized fmk object
  169. *
  170. * @param y label vector (multi-class as well as binary labels supported)
  171. */
  172. void optimize ( const NICE::Vector & y );
  173. /**
  174. * @brief Perform hyperparameter optimization (multi-class or binary) assuming an already initialized fmk object
  175. *
  176. * @param binLabels vector of binary label vectors (1,-1) and corresponding class no.
  177. */
  178. void optimize ( std::map<int, NICE::Vector> & binaryLabels );
  179. /**
  180. * @brief Compute the necessary variables for appxorimations of predictive variance (LUTs), assuming an already initialized fmk object
  181. * @author Alexander Freytag
  182. * @date 11-04-2012 (dd-mm-yyyy)
  183. */
  184. void prepareVarianceApproximationRough();
  185. /**
  186. * @brief Compute the necessary variables for fine appxorimations of predictive variance (EVs), assuming an already initialized fmk object
  187. * @author Alexander Freytag
  188. * @date 11-04-2012 (dd-mm-yyyy)
  189. */
  190. void prepareVarianceApproximationFine();
  191. /**
  192. * @brief classify an example
  193. *
  194. * @param x input example (sparse vector)
  195. * @param scores scores for each class number
  196. *
  197. * @return class number achieving the best score
  198. */
  199. int classify ( const NICE::SparseVector & x, SparseVector & scores ) const;
  200. /**
  201. * @brief classify an example that is given as non-sparse vector
  202. * NOTE: whenever possible, you should sparse vectors to obtain significantly smaller computation times
  203. *
  204. * @date 18-06-2013 (dd-mm-yyyy)
  205. * @author Alexander Freytag
  206. *
  207. * @param x input example (non-sparse vector)
  208. * @param scores scores for each class number
  209. *
  210. * @return class number achieving the best score
  211. */
  212. int classify ( const NICE::Vector & x, SparseVector & scores ) const;
  213. //////////////////////////////////////////
  214. // variance computation: sparse inputs
  215. //////////////////////////////////////////
  216. /**
  217. * @brief compute predictive variance for a given test example using a rough approximation: k_{**} - k_*^T (K+\sigma I)^{-1} k_* <= k_{**} - |k_*|^2 * 1 / \lambda_max(K + \sigma I), where we approximate |k_*|^2 by neglecting the mixed terms
  218. * @author Alexander Freytag
  219. * @date 10-04-2012 (dd-mm-yyyy)
  220. * @param x input example
  221. * @param predVariance contains the approximation of the predictive variance
  222. *
  223. */
  224. void computePredictiveVarianceApproximateRough(const NICE::SparseVector & x, double & predVariance ) const;
  225. /**
  226. * @brief compute predictive variance for a given test example using a fine approximation (k eigenvalues and eigenvectors to approximate the quadratic term)
  227. * @author Alexander Freytag
  228. * @date 18-04-2012 (dd-mm-yyyy)
  229. * @param x input example
  230. * @param predVariance contains the approximation of the predictive variance
  231. *
  232. */
  233. void computePredictiveVarianceApproximateFine(const NICE::SparseVector & x, double & predVariance ) const;
  234. /**
  235. * @brief compute exact predictive variance for a given test example using ILS methods (exact, but more time consuming than approx versions)
  236. * @author Alexander Freytag
  237. * @date 10-04-2012 (dd-mm-yyyy)
  238. * @param x input example
  239. * @param predVariance contains the approximation of the predictive variance
  240. *
  241. */
  242. void computePredictiveVarianceExact(const NICE::SparseVector & x, double & predVariance ) const;
  243. //////////////////////////////////////////
  244. // variance computation: non-sparse inputs
  245. //////////////////////////////////////////
  246. /**
  247. * @brief compute predictive variance for a given test example using a rough approximation: k_{**} - k_*^T (K+\sigma I)^{-1} k_* <= k_{**} - |k_*|^2 * 1 / \lambda_max(K + \sigma I), where we approximate |k_*|^2 by neglecting the mixed terms
  248. * @author Alexander Freytag
  249. * @date 19-12-2013 (dd-mm-yyyy)
  250. * @param x input example
  251. * @param predVariance contains the approximation of the predictive variance
  252. *
  253. */
  254. void computePredictiveVarianceApproximateRough(const NICE::Vector & x, double & predVariance ) const;
  255. /**
  256. * @brief compute predictive variance for a given test example using a fine approximation (k eigenvalues and eigenvectors to approximate the quadratic term)
  257. * @author Alexander Freytag
  258. * @date 19-12-2013 (dd-mm-yyyy)
  259. * @param x input example
  260. * @param predVariance contains the approximation of the predictive variance
  261. *
  262. */
  263. void computePredictiveVarianceApproximateFine(const NICE::Vector & x, double & predVariance ) const;
  264. /**
  265. * @brief compute exact predictive variance for a given test example using ILS methods (exact, but more time consuming than approx versions)
  266. * @author Alexander Freytag
  267. * @date 19-12-2013 (dd-mm-yyyy)
  268. * @param x input example
  269. * @param predVariance contains the approximation of the predictive variance
  270. *
  271. */
  272. void computePredictiveVarianceExact(const NICE::Vector & x, double & predVariance ) const;
  273. ///////////////////// INTERFACE PERSISTENT /////////////////////
  274. // interface specific methods for store and restore
  275. ///////////////////// INTERFACE PERSISTENT /////////////////////
  276. void restore ( std::istream & is, int format = 0 );
  277. void store ( std::ostream & os, int format = 0 ) const;
  278. void clear ( ) ;
  279. ///////////////////// INTERFACE ONLINE LEARNABLE /////////////////////
  280. // interface specific methods for incremental extensions
  281. ///////////////////// INTERFACE ONLINE LEARNABLE /////////////////////
  282. virtual void addExample( const NICE::SparseVector * example,
  283. const double & label,
  284. const bool & performOptimizationAfterIncrement = true
  285. );
  286. virtual void addMultipleExamples( const std::vector< const NICE::SparseVector * > & newExamples,
  287. const NICE::Vector & newLabels,
  288. const bool & performOptimizationAfterIncrement = true
  289. );
  290. };
  291. }
  292. #endif