FMKGPHyperparameterOptimization.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. /**
  2. * @file FMKGPHyperparameterOptimization.h
  3. * @brief Heart of the framework to set up everything, perform optimization, incremental updates, classification, variance prediction (Interface)
  4. * @author Erik Rodner, Alexander Freytag
  5. * @date 01/02/2012
  6. */
  7. #ifndef _NICE_FMKGPHYPERPARAMETEROPTIMIZATIONINCLUDE
  8. #define _NICE_FMKGPHYPERPARAMETEROPTIMIZATIONINCLUDE
  9. #include <vector>
  10. #include <set>
  11. #include <map>
  12. #include <core/algebra/EigValues.h>
  13. #include <core/algebra/IterativeLinearSolver.h>
  14. #include <core/basics/Config.h>
  15. #include <core/basics/Persistent.h>
  16. #include <core/vector/VectorT.h>
  17. #ifdef NICE_USELIB_MATIO
  18. #include <core/matlabAccess/MatFileIO.h>
  19. #endif
  20. #include "FastMinKernel.h"
  21. #include "GPLikelihoodApprox.h"
  22. #include "IKMLinearCombination.h"
  23. #include "Quantization.h"
  24. #include "gp-hik-core/parameterizedFunctions/ParameterizedFunction.h"
  25. namespace NICE {
  26. /**
  27. * @class FMKGPHyperparameterOptimization
  28. * @brief Heart of the framework to set up everything, perform optimization, incremental updates, classification, variance prediction
  29. * @author Erik Rodner, Alexander Freytag
  30. */
  31. class FMKGPHyperparameterOptimization : NICE::Persistent
  32. {
  33. protected:
  34. enum {
  35. OPT_GREEDY = 0,
  36. OPT_DOWNHILLSIMPLEX,
  37. OPT_NONE,
  38. OPT_NUMBEROFMETHODS
  39. };
  40. /** optimization method used */
  41. int optimizationMethod;
  42. /** the parameterized function we use within the minimum kernel */
  43. ParameterizedFunction *pf;
  44. /** method computing eigenvalues */
  45. EigValues *eig;
  46. /** method for solving linear equation systems */
  47. IterativeLinearSolver *linsolver;
  48. /** object which stores our sorted data and provides fast hik functions */
  49. FastMinKernel *fmk;
  50. /** object which stores our quantization object */
  51. Quantization *q;
  52. /** verbose flag */
  53. bool verbose;
  54. /** verbose flag for time measurement outputs */
  55. bool verboseTime;
  56. /** debug flag for several outputs useful for debugging*/
  57. bool debug;
  58. /** optimization parameters */
  59. double parameterUpperBound;
  60. double parameterLowerBound;
  61. double parameterStepSize;
  62. int ils_max_iterations;
  63. int downhillSimplexMaxIterations;
  64. double downhillSimplexTimeLimit;
  65. double downhillSimplexParamTol;
  66. /** whether to compute the likelihood with the usual method */
  67. bool verifyApproximation;
  68. /** number of Eigenvalues to consider in the approximation of |K|_F */
  69. int nrOfEigenvaluesToConsider;
  70. /** number of Eigenvalues to consider in the fine approximation of the predictive variance */
  71. int nrOfEigenvaluesToConsiderForVarApprox;
  72. typedef VVector PrecomputedType;
  73. /** precomputed arrays and lookup tables */
  74. std::map< int, PrecomputedType > precomputedA;
  75. std::map< int, PrecomputedType > precomputedB;
  76. std::map< int, double * > precomputedT;
  77. PrecomputedType precomputedAForVarEst;
  78. double * precomputedTForVarEst;
  79. //! optimize noise with the GP likelihood
  80. bool optimizeNoise;
  81. //! learn in a balanced manner
  82. bool learnBalanced;
  83. //! k largest eigenvalues for every kernel matrix (k == nrOfEigenvaluesToConsider, if we do not use balanced learning, we have only 1 entry)
  84. std::vector< NICE::Vector> eigenMax;
  85. //! eigenvectors corresponding to k largest eigenvalues for every matrix (k == nrOfEigenvaluesToConsider) -- format: nxk
  86. std::vector< NICE::Matrix> eigenMaxVectors;
  87. //! needed for optimization and variance approximation
  88. std::map<int, IKMLinearCombination * > ikmsums;
  89. //! storing the labels is needed for Incremental Learning (re-optimization)
  90. NICE::Vector labels;
  91. //! we store the alpha vectors for good initializations in the IL setting
  92. std::map<int, NICE::Vector> lastAlphas;
  93. //! calculate binary label vectors using a multi-class label vector
  94. int prepareBinaryLabels ( std::map<int, NICE::Vector> & binaryLabels, const NICE::Vector & y , std::set<int> & myClasses);
  95. //! prepare the GPLike objects for given binary labels and already given ikmsum-objects
  96. inline void setupGPLikelihoodApprox(std::map<int,GPLikelihoodApprox * > & gplikes, const std::map<int, NICE::Vector> & binaryLabels, std::map<int,uint> & parameterVectorSizes);
  97. //! update eigenvectors and eigenvalues for given ikmsum-objects and a method to compute eigenvalues
  98. inline void updateEigenVectors();
  99. //! core of the optimize-functions
  100. inline void performOptimization(std::map<int,GPLikelihoodApprox * > & gplikes, const std::map<int,uint> & parameterVectorSizes, const bool & roughOptimization = true);
  101. //! apply the optimized transformation values to the underlying features
  102. inline void transformFeaturesWithOptimalParameters(const std::map<int,GPLikelihoodApprox * > & gplikes, const std::map<int,uint> & parameterVectorSizes);
  103. //! build the resulting matrices A and B as well as lookup tables T for fast evaluations using the optimized parameter settings
  104. inline void computeMatricesAndLUTs(const std::map<int,GPLikelihoodApprox * > & gplikes);
  105. //! Update matrices (A, B, LUTs) and optionally find optimal parameters after adding a new example.
  106. void updateAfterSingleIncrement (const NICE::SparseVector & x, const bool & performOptimizationAfterIncrement = false);
  107. //! Update matrices (A, B, LUTs) and optionally find optimal parameters after adding multiple examples.
  108. void updateAfterMultipleIncrements (const std::vector<const NICE::SparseVector*> & x, const bool & performOptimizationAfterIncrement = false);
  109. //! use the alphas from the last iteration as initial guess for the ILS?
  110. bool usePreviousAlphas;
  111. //! store the class number of the positive class (i.e., larger class no), only used in binary settings
  112. int binaryLabelPositive;
  113. //! store the class number of the negative class (i.e., smaller class no), only used in binary settings
  114. int binaryLabelNegative;
  115. //! contains all class numbers of the currently known classes
  116. std::set<int> knownClasses;
  117. //! contains the class numbers of new classes - only needed within the increment step
  118. std::set<int> newClasses;
  119. public:
  120. FMKGPHyperparameterOptimization();
  121. /**
  122. * @brief standard constructor
  123. *
  124. * @param pf pointer to a parameterized function used within the minimum kernel min(f(x_i), f(x_j)) (will not be deleted)
  125. * @param noise GP label noise
  126. * @param fmk pointer to a pre-initialized structure (will be deleted)
  127. */
  128. FMKGPHyperparameterOptimization( const Config *conf, ParameterizedFunction *pf, FastMinKernel *fmk = NULL, const std::string & confSection = "GPHIKClassifier" );
  129. /** simple destructor */
  130. virtual ~FMKGPHyperparameterOptimization();
  131. // get and set methods
  132. void setParameterUpperBound(const double & _parameterUpperBound);
  133. void setParameterLowerBound(const double & _parameterLowerBound);
  134. //high level methods
  135. void initialize( const Config *conf, ParameterizedFunction *pf, FastMinKernel *fmk = NULL, const std::string & confSection = "GPHIKClassifier" );
  136. #ifdef NICE_USELIB_MATIO
  137. /**
  138. * @brief Perform hyperparameter optimization
  139. *
  140. * @param data MATLAB data structure, like a feature matrix loaded from ImageNet
  141. * @param y label vector (arbitrary), will be converted into a binary label vector
  142. * @param positives set of positive examples (indices)
  143. * @param negatives set of negative examples (indices)
  144. */
  145. void optimizeBinary ( const sparse_t & data, const NICE::Vector & y, const std::set<int> & positives, const std::set<int> & negatives, double noise );
  146. /**
  147. * @brief Perform hyperparameter optimization for GP multi-class or binary problems
  148. *
  149. * @param data MATLAB data structure, like a feature matrix loaded from ImageNet
  150. * @param y label vector with multi-class labels
  151. * @param examples mapping of example index to new index
  152. */
  153. void optimize ( const sparse_t & data, const NICE::Vector & y, const std::map<int, int> & examples, double noise );
  154. #endif
  155. /**
  156. * @brief Perform hyperparameter optimization (multi-class or binary) assuming an already initialized fmk object
  157. *
  158. * @param y label vector (multi-class as well as binary labels supported)
  159. */
  160. void optimize ( const NICE::Vector & y );
  161. /**
  162. * @brief Perform hyperparameter optimization (multi-class or binary) assuming an already initialized fmk object
  163. *
  164. * @param binLabels vector of binary label vectors (1,-1) and corresponding class no.
  165. */
  166. void optimize ( std::map<int, NICE::Vector> & binaryLabels );
  167. /**
  168. * @brief Compute the necessary variables for appxorimations of predictive variance, assuming an already initialized fmk object
  169. * @author Alexander Freytag
  170. * @date 11-04-2012 (dd-mm-yyyy)
  171. */
  172. void prepareVarianceApproximation();
  173. /**
  174. * @brief classify an example
  175. *
  176. * @param x input example (sparse vector)
  177. * @param scores scores for each class number
  178. *
  179. * @return class number achieving the best score
  180. */
  181. int classify ( const NICE::SparseVector & x, SparseVector & scores ) const;
  182. /**
  183. * @brief classify an example that is given as non-sparse vector
  184. * NOTE: whenever possible, you should sparse vectors to obtain significantly smaller computation times
  185. *
  186. * @date 18-06-2013 (dd-mm-yyyy)
  187. * @author Alexander Freytag
  188. *
  189. * @param x input example (non-sparse vector)
  190. * @param scores scores for each class number
  191. *
  192. * @return class number achieving the best score
  193. */
  194. int classify ( const NICE::Vector & x, SparseVector & scores ) const;
  195. /**
  196. * @brief compute predictive variance for a given test example using a rough approximation: k_{**} - k_*^T (K+\sigma I)^{-1} k_* <= k_{**} - |k_*|^2 * 1 / \lambda_max(K + \sigma I), where we approximate |k_*|^2 by neglecting the mixed terms
  197. * @author Alexander Freytag
  198. * @date 10-04-2012 (dd-mm-yyyy)
  199. * @param x input example
  200. * @param predVariances contains the approximations of the predictive variances
  201. *
  202. */
  203. void computePredictiveVarianceApproximateRough(const NICE::SparseVector & x, NICE::Vector & predVariances) const;
  204. /**
  205. * @brief compute predictive variance for a given test example using a fine approximation (k eigenvalues and eigenvectors to approximate the quadratic term)
  206. * @author Alexander Freytag
  207. * @date 18-04-2012 (dd-mm-yyyy)
  208. * @param x input example
  209. * @param predVariances contains the approximations of the predictive variances
  210. *
  211. */
  212. void computePredictiveVarianceApproximateFine(const NICE::SparseVector & x, NICE::Vector & predVariances) const;
  213. /**
  214. * @brief compute exact predictive variance for a given test example using ILS methods (exact, but more time consuming than approx versions)
  215. * @author Alexander Freytag
  216. * @date 10-04-2012 (dd-mm-yyyy)
  217. * @param x input example
  218. * @param predVariances contains the approximations of the predictive variances
  219. *
  220. */
  221. void computePredictiveVarianceExact(const NICE::SparseVector & x, NICE::Vector & predVariances) const;
  222. /** Persistent interface */
  223. void restore ( std::istream & is, int format = 0 );
  224. void store ( std::ostream & os, int format = 0 ) const;
  225. void clear ( ) ;
  226. void addExample( const NICE::SparseVector & x, const double & label, const bool & performOptimizationAfterIncrement = true);
  227. void addMultipleExamples( const std::vector<const NICE::SparseVector*> & newExamples, const NICE::Vector & labels, const bool & performOptimizationAfterIncrement = false);
  228. };
  229. }
  230. #endif