FMKGPHyperparameterOptimization.cpp 63 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827
  1. /**
  2. * @file FMKGPHyperparameterOptimization.cpp
  3. * @brief Heart of the framework to set up everything, perform optimization, incremental updates, classification, variance prediction (Implementation)
  4. * @author Erik Rodner, Alexander Freytag
  5. * @date 01/02/2012
  6. */
  7. #include <iostream>
  8. #include <map>
  9. #include <core/algebra/ILSConjugateGradients.h>
  10. #include <core/algebra/ILSConjugateGradientsLanczos.h>
  11. #include <core/algebra/ILSSymmLqLanczos.h>
  12. #include <core/algebra/ILSMinResLanczos.h>
  13. #include <core/algebra/ILSPlainGradient.h>
  14. #include <core/algebra/EigValuesTRLAN.h>
  15. #include <core/algebra/CholeskyRobust.h>
  16. #include <core/vector/Algorithms.h>
  17. #include <core/vector/Eigen.h>
  18. #include <core/basics/Timer.h>
  19. #include <core/basics/ResourceStatistics.h>
  20. #include "core/optimization/blackbox/DownhillSimplexOptimizer.h"
  21. #include "FMKGPHyperparameterOptimization.h"
  22. #include "FastMinKernel.h"
  23. #include "GMHIKernel.h"
  24. #include "IKMNoise.h"
  25. using namespace NICE;
  26. using namespace std;
  27. FMKGPHyperparameterOptimization::FMKGPHyperparameterOptimization()
  28. {
  29. pf = NULL;
  30. eig = NULL;
  31. linsolver = NULL;
  32. fmk = NULL;
  33. q = NULL;
  34. precomputedTForVarEst = NULL;
  35. verbose = false;
  36. verboseTime = false;
  37. debug = false;
  38. }
  39. FMKGPHyperparameterOptimization::FMKGPHyperparameterOptimization ( const Config *_conf, ParameterizedFunction *_pf, FastMinKernel *_fmk, const string & _confSection )
  40. {
  41. //default settings, may become overwritten lateron
  42. pf = NULL;
  43. eig = NULL;
  44. linsolver = NULL;
  45. fmk = NULL;
  46. q = NULL;
  47. precomputedTForVarEst = NULL;
  48. if ( _fmk == NULL )
  49. this->initialize ( _conf, _pf ); //then the confSection is also the default value
  50. //TODO not needed anymore, only for backword compatibility
  51. // else if ( _confSection.compare ( "HIKGP" ) == 0 )
  52. // this->initialize ( _conf, _pf, _fmk );
  53. else
  54. this->initialize ( _conf, _pf, _fmk, _confSection );
  55. }
  56. FMKGPHyperparameterOptimization::~FMKGPHyperparameterOptimization()
  57. {
  58. //pf will delete from outer program
  59. if ( this->eig != NULL )
  60. delete this->eig;
  61. if ( this->linsolver != NULL )
  62. delete this->linsolver;
  63. if ( this->fmk != NULL )
  64. delete this->fmk;
  65. if ( this->q != NULL )
  66. delete this->q;
  67. for ( uint i = 0 ; i < precomputedT.size(); i++ )
  68. delete [] ( precomputedT[i] );
  69. if ( precomputedTForVarEst != NULL )
  70. delete precomputedTForVarEst;
  71. for ( std::map<int, IKMLinearCombination * >::iterator it = ikmsums.begin(); it != ikmsums.end(); it++ )
  72. delete it->second;
  73. }
  74. void FMKGPHyperparameterOptimization::initialize ( const Config *_conf, ParameterizedFunction *_pf, FastMinKernel *_fmk, const std::string & _confSection )
  75. {
  76. if ( this->fmk != NULL )
  77. delete this->fmk;
  78. if ( _fmk != NULL )
  79. this->fmk = _fmk;
  80. this->pf = _pf;
  81. std::cerr << "------------" << std::endl;
  82. std::cerr << "| set-up |" << std::endl;
  83. std::cerr << "------------" << std::endl;
  84. this->eig = new EVArnoldi ( _conf->gB ( _confSection, "eig_verbose", false ) /* verbose flag */, 10 );
  85. // this->eig = new EigValuesTRLAN();
  86. // My time measurements show that both methods use equal time, a comparision
  87. // of their numerical performance has not been done yet
  88. this->parameterUpperBound = _conf->gD ( _confSection, "parameter_upper_bound", 2.5 );
  89. this->parameterLowerBound = _conf->gD ( _confSection, "parameter_lower_bound", 1.0 );
  90. this->parameterStepSize = _conf->gD ( _confSection, "parameter_step_size", 0.1 );
  91. this->verifyApproximation = _conf->gB ( _confSection, "verify_approximation", false );
  92. this->nrOfEigenvaluesToConsider = _conf->gI ( _confSection, "nrOfEigenvaluesToConsider", 1 );
  93. this->nrOfEigenvaluesToConsiderForVarApprox = _conf->gI ( _confSection, "nrOfEigenvaluesToConsiderForVarApprox", 2 );
  94. this->verbose = _conf->gB ( _confSection, "verbose", false );
  95. this->verboseTime = _conf->gB ( _confSection, "verboseTime", false );
  96. this->debug = _conf->gB ( _confSection, "debug", false );
  97. bool useQuantization = _conf->gB ( _confSection, "use_quantization", false );
  98. std::cerr << "_confSection: " << _confSection << std::endl;
  99. std::cerr << "use_quantization: " << useQuantization << std::endl;
  100. if ( _conf->gB ( _confSection, "use_quantization", false ) ) {
  101. int numBins = _conf->gI ( _confSection, "num_bins", 100 );
  102. if ( verbose )
  103. cerr << "FMKGPHyperparameterOptimization: quantization initialized with " << numBins << " bins." << endl;
  104. this->q = new Quantization ( numBins );
  105. } else {
  106. this->q = NULL;
  107. }
  108. bool ils_verbose = _conf->gB ( _confSection, "ils_verbose", false );
  109. ils_max_iterations = _conf->gI ( _confSection, "ils_max_iterations", 1000 );
  110. if ( verbose )
  111. cerr << "FMKGPHyperparameterOptimization: maximum number of iterations is " << ils_max_iterations << endl;
  112. double ils_min_delta = _conf->gD ( _confSection, "ils_min_delta", 1e-7 );
  113. double ils_min_residual = _conf->gD ( _confSection, "ils_min_residual", 1e-7/*1e-2 */ );
  114. string ils_method = _conf->gS ( _confSection, "ils_method", "CG" );
  115. if ( ils_method.compare ( "CG" ) == 0 )
  116. {
  117. if ( verbose )
  118. std::cerr << "We use CG with " << ils_max_iterations << " iterations, " << ils_min_delta << " as min delta, and " << ils_min_residual << " as min res " << std::endl;
  119. this->linsolver = new ILSConjugateGradients ( ils_verbose , ils_max_iterations, ils_min_delta, ils_min_residual );
  120. if ( verbose )
  121. cerr << "FMKGPHyperparameterOptimization: using ILS ConjugateGradients" << endl;
  122. }
  123. else if ( ils_method.compare ( "CGL" ) == 0 )
  124. {
  125. this->linsolver = new ILSConjugateGradientsLanczos ( ils_verbose , ils_max_iterations );
  126. if ( verbose )
  127. cerr << "FMKGPHyperparameterOptimization: using ILS ConjugateGradients (Lanczos)" << endl;
  128. }
  129. else if ( ils_method.compare ( "SYMMLQ" ) == 0 )
  130. {
  131. this->linsolver = new ILSSymmLqLanczos ( ils_verbose , ils_max_iterations );
  132. if ( verbose )
  133. cerr << "FMKGPHyperparameterOptimization: using ILS SYMMLQ" << endl;
  134. }
  135. else if ( ils_method.compare ( "MINRES" ) == 0 )
  136. {
  137. this->linsolver = new ILSMinResLanczos ( ils_verbose , ils_max_iterations );
  138. if ( verbose )
  139. cerr << "FMKGPHyperparameterOptimization: using ILS MINRES" << endl;
  140. }
  141. else
  142. {
  143. cerr << "FMKGPHyperparameterOptimization: " << _confSection << ":ils_method (" << ils_method << ") does not match any type (CG,CGL,SYMMLQ,MINRES), I will use CG" << endl;
  144. this->linsolver = new ILSConjugateGradients ( ils_verbose , ils_max_iterations, ils_min_delta, ils_min_residual );
  145. }
  146. this->usePreviousAlphas = _conf->gB (_confSection, "usePreviousAlphas", true );
  147. string optimizationMethod_s = _conf->gS ( _confSection, "optimization_method", "greedy" );
  148. if ( optimizationMethod_s == "greedy" )
  149. optimizationMethod = OPT_GREEDY;
  150. else if ( optimizationMethod_s == "downhillsimplex" )
  151. optimizationMethod = OPT_DOWNHILLSIMPLEX;
  152. else if ( optimizationMethod_s == "none" )
  153. optimizationMethod = OPT_NONE;
  154. else
  155. fthrow ( Exception, "Optimization method " << optimizationMethod_s << " is not known." );
  156. if ( verbose )
  157. cerr << "Using optimization method: " << optimizationMethod_s << endl;
  158. downhillSimplexMaxIterations = _conf->gI ( _confSection, "downhillsimplex_max_iterations", 20 );
  159. // do not run longer than a day :)
  160. downhillSimplexTimeLimit = _conf->gD ( _confSection, "downhillsimplex_time_limit", 24 * 60 * 60 );
  161. downhillSimplexParamTol = _conf->gD ( _confSection, "downhillsimplex_delta", 0.01 );
  162. learnBalanced = _conf->gB ( _confSection, "learn_balanced", false );
  163. std::cerr << "balanced learning: " << learnBalanced << std::endl;
  164. optimizeNoise = _conf->gB ( _confSection, "optimize_noise", false );
  165. if ( verbose )
  166. cerr << "Optimize noise: " << ( optimizeNoise ? "on" : "off" ) << endl;
  167. std::cerr << "------------" << std::endl;
  168. std::cerr << "| start |" << std::endl;
  169. std::cerr << "------------" << std::endl;
  170. }
  171. void FMKGPHyperparameterOptimization::setParameterUpperBound ( const double & _parameterUpperBound )
  172. {
  173. parameterUpperBound = _parameterUpperBound;
  174. }
  175. void FMKGPHyperparameterOptimization::setParameterLowerBound ( const double & _parameterLowerBound )
  176. {
  177. parameterLowerBound = _parameterLowerBound;
  178. }
  179. void FMKGPHyperparameterOptimization::setupGPLikelihoodApprox ( std::map<int, GPLikelihoodApprox * > & gplikes, const std::map<int, NICE::Vector> & binaryLabels, std::map<int, uint> & parameterVectorSizes )
  180. {
  181. if ( learnBalanced )
  182. {
  183. if ( verbose )
  184. {
  185. std::cerr << "FMKGPHyperparameterOptimization::setupGPLikelihoodApprox -- balanced setting" << std::endl;
  186. std::cerr << "number of ikmsum-objects: " << ikmsums.size() << std::endl;
  187. }
  188. for ( std::map<int, IKMLinearCombination*>::const_iterator it = ikmsums.begin(); it != ikmsums.end(); it++ )
  189. {
  190. map<int, NICE::Vector> binaryLabelsSingle;
  191. binaryLabelsSingle.insert ( *binaryLabels.find ( it->first ) );
  192. GPLikelihoodApprox *gplike = new GPLikelihoodApprox ( binaryLabelsSingle, it->second, linsolver, eig, verifyApproximation, nrOfEigenvaluesToConsider );
  193. gplike->setUsePreviousAlphas( usePreviousAlphas );
  194. gplike->setDebug( debug );
  195. gplike->setVerbose( verbose );
  196. gplikes.insert ( std::pair<int, GPLikelihoodApprox * > ( it->first, gplike ) );
  197. parameterVectorSizes.insert ( std::pair<int, uint> ( it->first, it->second->getNumParameters() ) );
  198. }
  199. if ( verbose )
  200. std::cerr << "resulting number of gplike-objects: " << gplikes.size() << std::endl;
  201. }
  202. else
  203. {
  204. GPLikelihoodApprox *gplike = new GPLikelihoodApprox ( binaryLabels, ikmsums.begin()->second, linsolver, eig, verifyApproximation, nrOfEigenvaluesToConsider );
  205. gplike->setUsePreviousAlphas( usePreviousAlphas );
  206. gplike->setDebug( debug );
  207. gplike->setVerbose( verbose );
  208. gplikes.insert ( std::pair<int, GPLikelihoodApprox * > ( 0, gplike ) );
  209. parameterVectorSizes.insert ( std::pair<int, uint> ( 0, ikmsums.begin()->second->getNumParameters() ) );
  210. }
  211. }
  212. void FMKGPHyperparameterOptimization::updateEigenVectors()
  213. {
  214. if ( verbose )
  215. {
  216. std::cerr << "FMKGPHyperparameterOptimization::updateEigenVectors -- size of ikmsums: " << ikmsums.size() << std::endl;
  217. std::cerr << "class of first object: " << ikmsums.begin()->first << std::endl;
  218. }
  219. if ( learnBalanced )
  220. {
  221. //simply use the first kernel matrix to compute the eigenvalues and eigenvectors for the fine approximation of predictive uncertainties
  222. std::map<int, IKMLinearCombination * >::iterator ikmsumsIt;
  223. eigenMax.resize(ikmsums.size());
  224. eigenMaxVectors.resize(ikmsums.size());
  225. int classCnt(0);
  226. for ( ikmsumsIt = ikmsums.begin(); ikmsumsIt != ikmsums.end(); ikmsumsIt++, classCnt++ )
  227. {
  228. eig->getEigenvalues ( * ikmsumsIt->second, eigenMax[classCnt], eigenMaxVectors[classCnt], nrOfEigenvaluesToConsiderForVarApprox );
  229. }
  230. }
  231. else
  232. {
  233. std::cerr << "not balanced, considere for VarApprox: " << nrOfEigenvaluesToConsiderForVarApprox << " eigenvalues" << std::endl;
  234. std::cerr << "and for simple: " << nrOfEigenvaluesToConsider << std::endl;
  235. if (nrOfEigenvaluesToConsiderForVarApprox > 1)
  236. nrOfEigenvaluesToConsiderForVarApprox = 1;
  237. //compute the largest eigenvalue of K + noise
  238. eigenMax.resize(1);
  239. eigenMaxVectors.resize(1);
  240. eig->getEigenvalues ( * ( ikmsums.begin()->second ), eigenMax[0], eigenMaxVectors[0], nrOfEigenvaluesToConsiderForVarApprox );
  241. }
  242. }
  243. void FMKGPHyperparameterOptimization::performOptimization ( std::map<int, GPLikelihoodApprox * > & gplikes, const std::map<int, uint> & parameterVectorSizes, const bool & roughOptimization )
  244. {
  245. if (verbose)
  246. std::cerr << "perform optimization" << std::endl;
  247. if ( optimizationMethod == OPT_GREEDY )
  248. {
  249. if ( verbose )
  250. std::cerr << "OPT_GREEDY!!! " << std::endl;
  251. // simple greedy strategy
  252. if ( ikmsums.begin()->second->getNumParameters() != 1 )
  253. fthrow ( Exception, "Reduce size of the parameter vector or use downhill simplex!" );
  254. Vector lB = ikmsums.begin()->second->getParameterLowerBounds();
  255. Vector uB = ikmsums.begin()->second->getParameterUpperBounds();
  256. if ( verbose )
  257. cerr << "lower bound " << lB << " upper bound " << uB << endl;
  258. if ( learnBalanced )
  259. {
  260. if ( lB[0] == uB[0] ) //do we already know a specific parameter?
  261. {
  262. for ( std::map<int, GPLikelihoodApprox*>::const_iterator gpLikeIt = gplikes.begin(); gpLikeIt != gplikes.end(); gpLikeIt++ )
  263. {
  264. if ( verbose )
  265. std::cerr << "Optimizing class " << gpLikeIt->first << std::endl;
  266. OPTIMIZATION::matrix_type hyperp ( 1, 1, lB[0] );
  267. gpLikeIt->second->evaluate ( hyperp );
  268. }
  269. }
  270. else
  271. {
  272. fthrow ( Exception, "HYPERPARAMETER OPTIMZIATION SHOULD NOT BE USED TOGETHER WITH BALANCED LEARNING IN THIS FRAMEWORK!!!" );
  273. }
  274. }
  275. else
  276. {
  277. for ( double mypara = lB[0]; mypara <= uB[0]; mypara += this->parameterStepSize )
  278. {
  279. OPTIMIZATION::matrix_type hyperp ( 1, 1, mypara );
  280. gplikes.begin()->second->evaluate ( hyperp );
  281. }
  282. }
  283. }
  284. else if ( optimizationMethod == OPT_DOWNHILLSIMPLEX )
  285. {
  286. if ( learnBalanced )
  287. {
  288. if ( verbose )
  289. std::cerr << "DOWNHILLSIMPLEX WITH BALANCED LEARNING!!! " << std::endl;
  290. fthrow ( Exception, "HYPERPARAMETER OPTIMZIATION SHOULD NOT BE USED TOGETHER WITH BALANCED LEARNING IN THIS FRAMEWORK!!!" );
  291. //unfortunately, we suffer from the fact that we do only have a single fmk-object
  292. //therefore, we should either copy the fmk-object as often as we have classes or do some averaging or whatsoever
  293. }
  294. else
  295. { //standard as before, normal optimization
  296. if ( verbose )
  297. std::cerr << "DOWNHILLSIMPLEX WITHOUT BALANCED LEARNING!!! " << std::endl;
  298. // downhill simplex strategy
  299. OPTIMIZATION::DownhillSimplexOptimizer optimizer;
  300. OPTIMIZATION::matrix_type initialParams ( parameterVectorSizes.begin()->second, 1 );
  301. Vector currentParameters;
  302. ikmsums.begin()->second->getParameters ( currentParameters );
  303. for ( uint i = 0 ; i < parameterVectorSizes.begin()->second; i++ )
  304. initialParams(i,0) = currentParameters[ i ];
  305. if ( verbose )
  306. std::cerr << "Initial parameters: " << initialParams << std::endl;
  307. // OPTIMIZATION::matrix_type scales ( parameterVectorSizes.begin()->second, 1);
  308. if ( roughOptimization ) //should be used when we perform the optimziation for the first time
  309. {
  310. // scales.Set(1.0);
  311. }
  312. else //should be used, when we perform the optimization in an incremental learning scenario, so that we already have a good guess
  313. {
  314. // scales.Set(1.0);
  315. // for ( uint i = 0 ; i < parameterVectorSizes.begin()->second; i++ )
  316. // scales[i][0] = currentParameters[ i ];
  317. optimizer.setDownhillParams ( 0.2 /* default: 1.0 */, 0.1 /* default: 0.5 */, 0.2 /* default: 1.0 */ );
  318. }
  319. //the scales object does not really matter in the actual implementation of Downhill Simplex
  320. OPTIMIZATION::SimpleOptProblem optProblem ( gplikes.begin()->second, initialParams, initialParams /* scales*/ );
  321. // cerr << "OPT: " << mypara << " " << nlikelihood << " " << logdet << " " << dataterm << endl;
  322. optimizer.setMaxNumIter ( true, downhillSimplexMaxIterations );
  323. optimizer.setTimeLimit ( true, downhillSimplexTimeLimit );
  324. optimizer.setParamTol ( true, downhillSimplexParamTol );
  325. optimizer.optimizeProb ( optProblem );
  326. }
  327. }
  328. else if ( optimizationMethod == OPT_NONE )
  329. {
  330. if ( verbose )
  331. std::cerr << "NO OPTIMIZATION!!! " << std::endl;
  332. // without optimization
  333. if ( optimizeNoise )
  334. fthrow ( Exception, "Deactivate optimize_noise!" );
  335. if ( verbose )
  336. std::cerr << "Optimization is deactivated!" << std::endl;
  337. double value (1.0);
  338. if ( this->parameterLowerBound == this->parameterUpperBound)
  339. value = this->parameterLowerBound;
  340. pf->setParameterLowerBounds ( NICE::Vector ( 1, value ) );
  341. pf->setParameterUpperBounds ( NICE::Vector ( 1, value ) );
  342. // we use the standard value
  343. if ( learnBalanced )
  344. {
  345. for ( std::map<int, GPLikelihoodApprox*>::const_iterator gpLikeIt = gplikes.begin(); gpLikeIt != gplikes.end(); gpLikeIt++ )
  346. {
  347. OPTIMIZATION::matrix_type hyperp ( 1, 1, value);
  348. gpLikeIt->second->setParameterLowerBound ( value );
  349. gpLikeIt->second->setParameterUpperBound ( value );
  350. gpLikeIt->second->evaluate ( hyperp );
  351. }
  352. }
  353. else
  354. {
  355. OPTIMIZATION::matrix_type hyperp ( 1, 1, value );
  356. gplikes.begin()->second->setParameterLowerBound ( value );
  357. gplikes.begin()->second->setParameterUpperBound ( value );
  358. gplikes.begin()->second->evaluate ( hyperp );
  359. }
  360. }
  361. if ( learnBalanced )
  362. {
  363. lastAlphas.clear();
  364. for ( std::map<int, GPLikelihoodApprox*>::const_iterator gpLikeIt = gplikes.begin(); gpLikeIt != gplikes.end(); gpLikeIt++ )
  365. {
  366. if (verbose)
  367. std::cerr << "Optimal hyperparameter for class " << gpLikeIt->first << " was: " << gpLikeIt->second->getBestParameters() << std::endl;
  368. lastAlphas = gplikes.begin()->second->getBestAlphas();
  369. }
  370. }
  371. else
  372. {
  373. if ( verbose )
  374. std::cerr << "Optimal hyperparameter was: " << gplikes.begin()->second->getBestParameters() << std::endl;
  375. lastAlphas.clear();
  376. lastAlphas = gplikes.begin()->second->getBestAlphas();
  377. }
  378. }
  379. void FMKGPHyperparameterOptimization::transformFeaturesWithOptimalParameters ( const std::map<int, GPLikelihoodApprox * > & gplikes, const std::map<int, uint> & parameterVectorSizes )
  380. {
  381. if ( verbose )
  382. std::cerr << "FMKGPHyperparameterOptimization::transformFeaturesWithOptimalParameters" << std::endl;
  383. // transform all features with the "optimal" parameter
  384. if ( learnBalanced )
  385. {
  386. if ( verbose )
  387. std::cerr << "learn Balanced" << std::endl;
  388. double meanValue ( 0.0 );
  389. for ( std::map<int, GPLikelihoodApprox*>::const_iterator gpLikeIt = gplikes.begin(); gpLikeIt != gplikes.end(); gpLikeIt++ )
  390. {
  391. meanValue += gpLikeIt->second->getBestParameters() [0];
  392. }
  393. meanValue /= gplikes.size();
  394. NICE::Vector averagedParams ( parameterVectorSizes.begin()->second, meanValue );
  395. if ( verbose)
  396. std::cerr << "averaged Params: " << averagedParams << std::endl;
  397. //since we only have a single fmk-object, we only have to modify our data for a single time
  398. ikmsums.begin()->second->setParameters ( averagedParams );
  399. }
  400. else
  401. {
  402. if ( verbose )
  403. {
  404. std::cerr << "learn not Balanced" << std::endl;
  405. std::cerr << "previous best parameters. " << gplikes.begin()->second->getBestParameters() << std::endl;
  406. // std::cerr << "previous best alphas: " << gplikes.begin()->second->getBestAlphas() << std::endl;
  407. }
  408. ikmsums.begin()->second->setParameters ( gplikes.begin()->second->getBestParameters() );
  409. }
  410. }
  411. void FMKGPHyperparameterOptimization::computeMatricesAndLUTs ( const std::map<int, GPLikelihoodApprox * > & gplikes )
  412. {
  413. precomputedA.clear();
  414. precomputedB.clear();
  415. if ( learnBalanced )
  416. {
  417. for ( std::map<int, GPLikelihoodApprox*>::const_iterator gpLikeIt = gplikes.begin(); gpLikeIt != gplikes.end(); gpLikeIt++ )
  418. {
  419. map<int, Vector>::const_iterator i = gpLikeIt->second->getBestAlphas().begin();
  420. PrecomputedType A;
  421. PrecomputedType B;
  422. // std::cerr << "computeMatricesAndLUTs -- alpha: " << i->second << std::endl;
  423. fmk->hik_prepare_alpha_multiplications ( i->second, A, B );
  424. A.setIoUntilEndOfFile ( false );
  425. B.setIoUntilEndOfFile ( false );
  426. precomputedA[ gpLikeIt->first ] = A;
  427. precomputedB[ gpLikeIt->first ] = B;
  428. if ( q != NULL )
  429. {
  430. double *T = fmk->hik_prepare_alpha_multiplications_fast ( A, B, *q, pf );
  431. //just to be sure that we do not waste space here
  432. if ( precomputedT[ gpLikeIt->first ] != NULL )
  433. delete precomputedT[ gpLikeIt->first ];
  434. precomputedT[ gpLikeIt->first ] = T;
  435. }
  436. }
  437. }
  438. else
  439. { //no GP rebalancing
  440. for ( map<int, Vector>::const_iterator i = gplikes.begin()->second->getBestAlphas().begin(); i != gplikes.begin()->second->getBestAlphas().end(); i++ )
  441. {
  442. PrecomputedType A;
  443. PrecomputedType B;
  444. // std::cerr << "computeMatricesAndLUTs -- alpha: " << i->second << std::endl;
  445. fmk->hik_prepare_alpha_multiplications ( i->second, A, B );
  446. A.setIoUntilEndOfFile ( false );
  447. B.setIoUntilEndOfFile ( false );
  448. precomputedA[ i->first ] = A;
  449. precomputedB[ i->first ] = B;
  450. if ( q != NULL )
  451. {
  452. double *T = fmk->hik_prepare_alpha_multiplications_fast ( A, B, *q, pf );
  453. //just to be sure that we do not waste space here
  454. if ( precomputedT[ i->first ] != NULL )
  455. delete precomputedT[ i->first ];
  456. precomputedT[ i->first ] = T;
  457. }
  458. }
  459. }
  460. }
  461. #ifdef NICE_USELIB_MATIO
  462. void FMKGPHyperparameterOptimization::optimizeBinary ( const sparse_t & data, const NICE::Vector & yl, const std::set<int> & positives, const std::set<int> & negatives, double noise )
  463. {
  464. map<int, int> examples;
  465. Vector y ( yl.size() );
  466. int ind = 0;
  467. for ( uint i = 0 ; i < yl.size(); i++ )
  468. {
  469. if ( positives.find ( i ) != positives.end() ) {
  470. y[ examples.size() ] = 1.0;
  471. examples.insert ( pair<int, int> ( i, ind ) );
  472. ind++;
  473. } else if ( negatives.find ( i ) != negatives.end() ) {
  474. y[ examples.size() ] = -1.0;
  475. examples.insert ( pair<int, int> ( i, ind ) );
  476. ind++;
  477. }
  478. }
  479. y.resize ( examples.size() );
  480. cerr << "Examples: " << examples.size() << endl;
  481. optimize ( data, y, examples, noise );
  482. }
  483. void FMKGPHyperparameterOptimization::optimize ( const sparse_t & data, const NICE::Vector & y, const std::map<int, int> & examples, double noise )
  484. {
  485. Timer t;
  486. t.start();
  487. cerr << "Initializing data structure ..." << std::endl;
  488. if ( fmk != NULL ) delete fmk;
  489. fmk = new FastMinKernel ( data, noise, examples );
  490. t.stop();
  491. if (verboseTime)
  492. std::cerr << "Time used for initializing the FastMinKernel structure: " << t.getLast() << std::endl;
  493. optimize ( y );
  494. }
  495. #endif
  496. int FMKGPHyperparameterOptimization::prepareBinaryLabels ( map<int, NICE::Vector> & binaryLabels, const NICE::Vector & y , std::set<int> & myClasses )
  497. {
  498. myClasses.clear();
  499. for ( NICE::Vector::const_iterator it = y.begin(); it != y.end(); it++ )
  500. if ( myClasses.find ( *it ) == myClasses.end() )
  501. myClasses.insert ( *it );
  502. //count how many different classes appear in our data
  503. int nrOfClasses = myClasses.size();
  504. binaryLabels.clear();
  505. //compute the corresponding binary label vectors
  506. if ( nrOfClasses > 2 )
  507. {
  508. //resize every labelVector and set all entries to -1.0
  509. for ( set<int>::const_iterator k = myClasses.begin(); k != myClasses.end(); k++ )
  510. {
  511. binaryLabels[ *k ].resize ( y.size() );
  512. binaryLabels[ *k ].set ( -1.0 );
  513. }
  514. // now look on every example and set the entry of its corresponding label vector to 1.0
  515. // proper existance should not be a problem
  516. for ( int i = 0 ; i < ( int ) y.size(); i++ )
  517. binaryLabels[ y[i] ][i] = 1.0;
  518. }
  519. else if ( nrOfClasses == 2 )
  520. {
  521. std::cerr << "binary setting -- prepare two binary label vectors with opposite signs" << std::endl;
  522. Vector yb ( y );
  523. int negativeClass = *(myClasses.begin());
  524. std::set<int>::const_iterator classIt = myClasses.begin(); classIt++;
  525. int positiveClass = *classIt;
  526. std::cerr << "positiveClass : " << positiveClass << " negativeClass: " << negativeClass << std::endl;
  527. for ( uint i = 0 ; i < yb.size() ; i++ )
  528. yb[i] = ( y[i] == negativeClass ) ? -1.0 : 1.0;
  529. binaryLabels[ positiveClass ] = yb;
  530. //binaryLabels[ 1 ] = yb;
  531. //uncomment the following, if you want to perform real binary computations with 2 classes
  532. // //we only need one vector, which already contains +1 and -1, so we need only one computation too
  533. // binaryLabels[ negativeClass ] = yb;
  534. // binaryLabels[ negativeClass ] *= -1.0;
  535. std::cerr << "binaryLabels.size(): " << binaryLabels.size() << std::endl;
  536. // binaryLabels[ 0 ] = yb;
  537. // binaryLabels[ 0 ] *= -1.0;
  538. //comment the following, if you want to do a real binary computation. It should be senseless, but let's see...
  539. //we do no real binary computation, but an implicite one with only a single object
  540. nrOfClasses--;
  541. std::set<int>::iterator it = myClasses.begin(); it++;
  542. myClasses.erase(it);
  543. }
  544. else //OCC setting
  545. {
  546. //we set the labels to 1, independent of the previously given class number
  547. Vector yNew ( y.size(), 1 );
  548. myClasses.clear();
  549. myClasses.insert ( 1 );
  550. //we have to indicate, that we are in an OCC setting
  551. nrOfClasses--;
  552. }
  553. return nrOfClasses;
  554. }
  555. void FMKGPHyperparameterOptimization::optimize ( const NICE::Vector & y )
  556. {
  557. if ( fmk == NULL )
  558. fthrow ( Exception, "FastMinKernel object was not initialized!" );
  559. this->labels = y;
  560. std::map<int, NICE::Vector> binaryLabels;
  561. std::set<int> classesToUse;
  562. prepareBinaryLabels ( binaryLabels, y , classesToUse );
  563. //now call the main function :)
  564. this->optimize(binaryLabels);
  565. }
  566. void FMKGPHyperparameterOptimization::optimize ( std::map<int, NICE::Vector> & binaryLabels )
  567. {
  568. Timer t;
  569. t.start();
  570. //how many different classes do we have right now?
  571. int nrOfClasses = binaryLabels.size();
  572. std::set<int> classesToUse;
  573. classesToUse.clear();
  574. for (std::map<int, NICE::Vector>::const_iterator clIt = binaryLabels.begin(); clIt != binaryLabels.end(); clIt++)
  575. {
  576. classesToUse.insert(clIt->first);
  577. }
  578. if (verbose)
  579. {
  580. std::cerr << "Initial noise level: " << fmk->getNoise() << endl;
  581. std::cerr << "Number of classes (=1 means we have a binary setting):" << nrOfClasses << std::endl;
  582. std::cerr << "Effective number of classes (neglecting classes without positive examples): " << classesToUse.size() << std::endl;
  583. }
  584. // combine standard model and noise model
  585. ikmsums.clear();
  586. Timer t1;
  587. t1.start();
  588. //setup the kernel combination
  589. if ( learnBalanced )
  590. {
  591. for ( std::set<int>::const_iterator clIt = classesToUse.begin(); clIt != classesToUse.end(); clIt++ )
  592. {
  593. IKMLinearCombination *ikmsum = new IKMLinearCombination ();
  594. ikmsums.insert ( std::pair<int, IKMLinearCombination*> ( *clIt, ikmsum ) );
  595. }
  596. }
  597. else
  598. {
  599. IKMLinearCombination *ikmsum = new IKMLinearCombination ();
  600. ikmsums.insert ( std::pair<int, IKMLinearCombination*> ( 0, ikmsum ) );
  601. }
  602. if ( verbose )
  603. {
  604. std::cerr << "ikmsums.size(): " << ikmsums.size() << std::endl;
  605. std::cerr << "binaryLabels.size(): " << binaryLabels.size() << std::endl;
  606. }
  607. // First model: noise
  608. if ( learnBalanced )
  609. {
  610. int cnt ( 0 );
  611. for ( std::set<int>::const_iterator clIt = classesToUse.begin(); clIt != classesToUse.end(); clIt++, cnt++ )
  612. {
  613. ikmsums.find ( *clIt )->second->addModel ( new IKMNoise ( binaryLabels[*clIt], fmk->getNoise(), optimizeNoise ) );
  614. }
  615. }
  616. else
  617. {
  618. ikmsums.find ( 0 )->second->addModel ( new IKMNoise ( fmk->get_n(), fmk->getNoise(), optimizeNoise ) );
  619. }
  620. // set pretty low built-in noise, because we explicitely add the noise with the IKMNoise
  621. fmk->setNoise ( 0.0 );
  622. //NOTE The GMHIKernel is always the last model which is added (this is necessary for easy store and restore functionality)
  623. for ( std::map<int, IKMLinearCombination * >::iterator it = ikmsums.begin(); it != ikmsums.end(); it++ )
  624. {
  625. it->second->addModel ( new GMHIKernel ( fmk, pf, NULL /* no quantization */ ) );
  626. }
  627. t1.stop();
  628. if (verboseTime)
  629. std::cerr << "Time used for setting up the ikm-objects: " << t1.getLast() << std::endl;
  630. std::map<int, GPLikelihoodApprox * > gplikes;
  631. std::map<int, uint> parameterVectorSizes;
  632. t1.start();
  633. this->setupGPLikelihoodApprox ( gplikes, binaryLabels, parameterVectorSizes );
  634. t1.stop();
  635. if (verboseTime)
  636. std::cerr << "Time used for setting up the gplike-objects: " << t1.getLast() << std::endl;
  637. if (verbose)
  638. {
  639. std::cerr << "parameterVectorSizes: " << std::endl;
  640. for ( std::map<int, uint>::const_iterator pvsIt = parameterVectorSizes.begin(); pvsIt != parameterVectorSizes.end(); pvsIt++ )
  641. {
  642. std::cerr << pvsIt->first << " " << pvsIt->second << " ";
  643. }
  644. std::cerr << std::endl;
  645. }
  646. t1.start();
  647. this->updateEigenVectors();
  648. t1.stop();
  649. if (verboseTime)
  650. std::cerr << "Time used for setting up the eigenvectors-objects: " << t1.getLast() << std::endl;
  651. if ( verbose )
  652. std::cerr << "resulting eigenvalues for first class: " << eigenMax[0] << std::endl;
  653. t1.start();
  654. this->performOptimization ( gplikes, parameterVectorSizes );
  655. t1.stop();
  656. if (verboseTime)
  657. std::cerr << "Time used for performing the optimization: " << t1.getLast() << std::endl;
  658. if ( verbose )
  659. cerr << "Preparing classification ..." << endl;
  660. t1.start();
  661. this->transformFeaturesWithOptimalParameters ( gplikes, parameterVectorSizes );
  662. t1.stop();
  663. if (verboseTime)
  664. std::cerr << "Time used for transforming features with optimal parameters: " << t1.getLast() << std::endl;
  665. t1.start();
  666. this->computeMatricesAndLUTs ( gplikes );
  667. t1.stop();
  668. if (verboseTime)
  669. std::cerr << "Time used for setting up the A'nB -objects: " << t1.getLast() << std::endl;
  670. t.stop();
  671. ResourceStatistics rs;
  672. std::cerr << "Time used for learning: " << t.getLast() << std::endl;
  673. long maxMemory;
  674. rs.getMaximumMemory ( maxMemory );
  675. std::cerr << "Maximum memory used: " << maxMemory << " KB" << std::endl;
  676. //don't waste memory
  677. if ( learnBalanced )
  678. {
  679. for ( std::map<int, GPLikelihoodApprox*>::const_iterator gpLikeIt = gplikes.begin(); gpLikeIt != gplikes.end(); gpLikeIt++ )
  680. {
  681. delete gpLikeIt->second;
  682. }
  683. }
  684. else
  685. {
  686. delete gplikes.begin()->second;
  687. }
  688. }
  689. void FMKGPHyperparameterOptimization::optimizeAfterSingleIncrement ( const NICE::SparseVector & x, const bool & performOptimizationAfterIncrement )
  690. {
  691. Timer t;
  692. t.start();
  693. if ( fmk == NULL )
  694. fthrow ( Exception, "FastMinKernel object was not initialized!" );
  695. map<int, NICE::Vector> binaryLabels;
  696. set<int> classesToUse;
  697. prepareBinaryLabels ( binaryLabels, labels , classesToUse );
  698. if ( verbose )
  699. std::cerr << "labels.size() after increment: " << labels.size() << std::endl;
  700. Timer t1;
  701. t1.start();
  702. //update the kernel combinations
  703. std::map<int, NICE::Vector>::const_iterator labelIt = binaryLabels.begin();
  704. // note, that if we only have a single ikmsum-object, than the labelvector will not be used at all in the internal objects (only relevant in ikmnoise)
  705. for ( std::map<int, IKMLinearCombination * >::iterator it = ikmsums.begin(); it != ikmsums.end(); it++ )
  706. {
  707. it->second->addExample ( x, labelIt->second );
  708. labelIt++;
  709. }
  710. //we have to reset the fmk explicitely
  711. for ( std::map<int, IKMLinearCombination * >::iterator it = ikmsums.begin(); it != ikmsums.end(); it++ )
  712. {
  713. ( ( GMHIKernel* ) it->second->getModel ( it->second->getNumberOfModels() - 1 ) )->setFastMinKernel ( this->fmk );
  714. }
  715. t1.stop();
  716. if (verboseTime)
  717. std::cerr << "Time used for setting up the ikm-objects: " << t1.getLast() << std::endl;
  718. std::map<int, GPLikelihoodApprox * > gplikes;
  719. std::map<int, uint> parameterVectorSizes;
  720. t1.start();
  721. this->setupGPLikelihoodApprox ( gplikes, binaryLabels, parameterVectorSizes );
  722. t1.stop();
  723. if (verboseTime)
  724. std::cerr << "Time used for setting up the gplike-objects: " << t1.getLast() << std::endl;
  725. if ( verbose )
  726. {
  727. std::cerr << "parameterVectorSizes: " << std::endl;
  728. for ( std::map<int, uint>::const_iterator pvsIt = parameterVectorSizes.begin(); pvsIt != parameterVectorSizes.end(); pvsIt++ )
  729. {
  730. std::cerr << pvsIt->first << " " << pvsIt->second << " ";
  731. }
  732. std::cerr << std::endl;
  733. }
  734. t1.start();
  735. if ( usePreviousAlphas )
  736. {
  737. std::map<int, NICE::Vector>::const_iterator binaryLabelsIt = binaryLabels.begin();
  738. std::vector<NICE::Vector>::const_iterator eigenMaxIt = eigenMax.begin();
  739. for ( std::map<int, NICE::Vector>::iterator lastAlphaIt = lastAlphas.begin() ;lastAlphaIt != lastAlphas.end(); lastAlphaIt++ )
  740. {
  741. int oldSize ( lastAlphaIt->second.size() );
  742. lastAlphaIt->second.resize ( oldSize + 1 );
  743. //We initialize it with the same values as we use in GPLikelihoodApprox in batch training
  744. //default in GPLikelihoodApprox for the first time:
  745. // alpha = (binaryLabels[classCnt] * (1.0 / eigenmax[0]) );
  746. double maxEigenValue ( 1.0 );
  747. if ( (*eigenMaxIt).size() > 0 )
  748. maxEigenValue = (*eigenMaxIt)[0];
  749. double factor ( 1.0 / maxEigenValue );
  750. if ( binaryLabelsIt->second[oldSize] > 0 ) //we only have +1 and -1, so this might be benefitial in terms of speed
  751. lastAlphaIt->second[oldSize] = factor;
  752. else
  753. lastAlphaIt->second[oldSize] = -factor; //we follow the initialization as done in previous steps
  754. //lastAlphaIt->second[oldSize] = 0.0; // following the suggestion of Yeh and Darrell
  755. binaryLabelsIt++;
  756. if (learnBalanced)
  757. {
  758. eigenMaxIt++;
  759. }
  760. }
  761. for ( std::map<int, GPLikelihoodApprox * >::iterator gpLikeIt = gplikes.begin(); gpLikeIt != gplikes.end(); gpLikeIt++ )
  762. {
  763. gpLikeIt->second->setLastAlphas ( &lastAlphas );
  764. }
  765. }
  766. //if we do not use previous alphas, we do not have to set up anything here
  767. t1.stop();
  768. if (verboseTime)
  769. std::cerr << "Time used for setting up the alpha-objects: " << t1.getLast() << std::endl;
  770. t1.start();
  771. this->updateEigenVectors();
  772. t1.stop();
  773. if (verboseTime)
  774. std::cerr << "Time used for setting up the eigenvectors-objects: " << t1.getLast() << std::endl;
  775. if ( verbose )
  776. std::cerr << "resulting eigenvalues for first class: " << eigenMax[0] << std::endl;
  777. // we can reuse the already given performOptimization-method:
  778. // OPT_GREEDY
  779. // for this strategy we can't reuse any of the previously computed scores
  780. // so come on, let's do the whole thing again...
  781. // OPT_DOWNHILLSIMPLEX
  782. // Here we can benefit from previous results, when we use them as initialization for our optimizer
  783. // ikmsums.begin()->second->getParameters ( currentParameters ); uses the previously computed optimal parameters
  784. // as initialization
  785. // OPT_NONE
  786. // nothing to do, obviously
  787. //NOTE we could skip this, if we do not want to change our parameters given new examples
  788. if ( performOptimizationAfterIncrement )
  789. {
  790. t1.start();
  791. this->performOptimization ( gplikes, parameterVectorSizes, false /* initialize not with default values but using the last solution */ );
  792. t1.stop();
  793. if (verboseTime)
  794. std::cerr << "Time used for performing the optimization: " << t1.getLast() << std::endl;
  795. if ( verbose )
  796. cerr << "Preparing after retraining for classification ..." << endl;
  797. t1.start();
  798. this->transformFeaturesWithOptimalParameters ( gplikes, parameterVectorSizes );
  799. t1.stop();
  800. if (verboseTime)
  801. std::cerr << "Time used for transforming features with optimal parameters: " << t1.getLast() << std::endl;
  802. }
  803. else
  804. {
  805. t1.start();
  806. t1.stop();
  807. std::cerr << "skip optimization" << std::endl;
  808. if (verboseTime)
  809. std::cerr << "Time used for performing the optimization: " << t1.getLast() << std::endl;
  810. std::cerr << "skip feature transformation" << std::endl;
  811. if (verboseTime)
  812. std::cerr << "Time used for transforming features with optimal parameters: " << t1.getLast() << std::endl;
  813. }
  814. //NOTE unfortunately, the whole vector alpha differs, and not only its last entry.
  815. // If we knew any method, which could update this efficiently, we could also compute A and B more efficiently by updating them.
  816. // Since we are not aware of any such method, we have to compute them completely new
  817. // :/
  818. t1.start();
  819. this->computeMatricesAndLUTs ( gplikes );
  820. t1.stop();
  821. if (verboseTime)
  822. std::cerr << "Time used for setting up the A'nB -objects: " << t1.getLast() << std::endl;
  823. t.stop();
  824. ResourceStatistics rs;
  825. std::cerr << "Time used for re-learning: " << t.getLast() << std::endl;
  826. long maxMemory;
  827. rs.getMaximumMemory ( maxMemory );
  828. std::cerr << "Maximum memory used: " << maxMemory << " KB" << std::endl;
  829. //don't waste memory
  830. if ( learnBalanced )
  831. {
  832. for ( std::map<int, GPLikelihoodApprox*>::const_iterator gpLikeIt = gplikes.begin(); gpLikeIt != gplikes.end(); gpLikeIt++ )
  833. {
  834. delete gpLikeIt->second;
  835. }
  836. }
  837. else
  838. {
  839. delete gplikes.begin()->second;
  840. }
  841. }
  842. void FMKGPHyperparameterOptimization::optimizeAfterMultipleIncrements ( const std::vector<const NICE::SparseVector*> & x, const bool & performOptimizationAfterIncrement )
  843. {
  844. Timer t;
  845. t.start();
  846. if ( fmk == NULL )
  847. fthrow ( Exception, "FastMinKernel object was not initialized!" );
  848. map<int, NICE::Vector> binaryLabels;
  849. set<int> classesToUse;
  850. prepareBinaryLabels ( binaryLabels, labels , classesToUse );
  851. if ( verbose )
  852. std::cerr << "labels.size() after increment: " << labels.size() << std::endl;
  853. Timer t1;
  854. t1.start();
  855. //update the kernel combinations
  856. std::map<int, NICE::Vector>::const_iterator labelIt = binaryLabels.begin();
  857. // note, that if we only have a single ikmsum-object, than the labelvector will not be used at all in the internal objects (only relevant in ikmnoise)
  858. //TODO
  859. for ( std::map<int, IKMLinearCombination * >::iterator it = ikmsums.begin(); it != ikmsums.end(); it++ )
  860. {
  861. for ( std::vector<const NICE::SparseVector*>::const_iterator exampleIt = x.begin(); exampleIt != x.end(); exampleIt++ )
  862. {
  863. it->second->addExample ( **exampleIt, labelIt->second );
  864. }
  865. labelIt++;
  866. }
  867. //we have to reset the fmk explicitely
  868. for ( std::map<int, IKMLinearCombination * >::iterator it = ikmsums.begin(); it != ikmsums.end(); it++ )
  869. {
  870. ( ( GMHIKernel* ) it->second->getModel ( it->second->getNumberOfModels() - 1 ) )->setFastMinKernel ( this->fmk );
  871. }
  872. t1.stop();
  873. if (verboseTime)
  874. std::cerr << "Time used for setting up the ikm-objects: " << t1.getLast() << std::endl;
  875. std::map<int, GPLikelihoodApprox * > gplikes;
  876. std::map<int, uint> parameterVectorSizes;
  877. t1.start();
  878. this->setupGPLikelihoodApprox ( gplikes, binaryLabels, parameterVectorSizes );
  879. t1.stop();
  880. if (verboseTime)
  881. std::cerr << "Time used for setting up the gplike-objects: " << t1.getLast() << std::endl;
  882. if ( verbose )
  883. {
  884. std::cerr << "parameterVectorSizes: " << std::endl;
  885. for ( std::map<int, uint>::const_iterator pvsIt = parameterVectorSizes.begin(); pvsIt != parameterVectorSizes.end(); pvsIt++ )
  886. {
  887. std::cerr << pvsIt->first << " " << pvsIt->second << " ";
  888. }
  889. std::cerr << std::endl;
  890. }
  891. t1.start();
  892. if ( usePreviousAlphas )
  893. {
  894. std::map<int, NICE::Vector>::const_iterator binaryLabelsIt = binaryLabels.begin();
  895. std::vector<NICE::Vector>::const_iterator eigenMaxIt = eigenMax.begin();
  896. for ( std::map<int, NICE::Vector>::iterator lastAlphaIt = lastAlphas.begin() ;lastAlphaIt != lastAlphas.end(); lastAlphaIt++ )
  897. {
  898. int oldSize ( lastAlphaIt->second.size() );
  899. lastAlphaIt->second.resize ( oldSize + x.size() );
  900. //We initialize it with the same values as we use in GPLikelihoodApprox in batch training
  901. //default in GPLikelihoodApprox for the first time:
  902. // alpha = (binaryLabels[classCnt] * (1.0 / eigenmax[0]) );
  903. double maxEigenValue ( 1.0 );
  904. if ( (*eigenMaxIt).size() > 0 )
  905. maxEigenValue = (*eigenMaxIt)[0];
  906. double factor ( 1.0 / maxEigenValue );
  907. for ( uint i = 0; i < x.size(); i++ )
  908. {
  909. if ( binaryLabelsIt->second[oldSize+i] > 0 ) //we only have +1 and -1, so this might be benefitial in terms of speed
  910. lastAlphaIt->second[oldSize+i] = factor;
  911. else
  912. lastAlphaIt->second[oldSize+i] = -factor; //we follow the initialization as done in previous steps
  913. //lastAlphaIt->second[oldSize+i] = 0.0; // following the suggestion of Yeh and Darrell
  914. }
  915. binaryLabelsIt++;
  916. if (learnBalanced)
  917. {
  918. eigenMaxIt++;
  919. }
  920. }
  921. for ( std::map<int, GPLikelihoodApprox * >::iterator gpLikeIt = gplikes.begin(); gpLikeIt != gplikes.end(); gpLikeIt++ )
  922. {
  923. gpLikeIt->second->setLastAlphas ( &lastAlphas );
  924. }
  925. }
  926. //if we do not use previous alphas, we do not have to set up anything here
  927. t1.stop();
  928. if (verboseTime)
  929. std::cerr << "Time used for setting up the alpha-objects: " << t1.getLast() << std::endl;
  930. t1.start();
  931. this->updateEigenVectors();
  932. t1.stop();
  933. if (verboseTime)
  934. std::cerr << "Time used for setting up the eigenvectors-objects: " << t1.getLast() << std::endl;
  935. if ( verbose )
  936. std::cerr << "resulting eigenvalues of first class: " << eigenMax[0] << std::endl;
  937. // we can reuse the already given performOptimization-method:
  938. // OPT_GREEDY
  939. // for this strategy we can't reuse any of the previously computed scores
  940. // so come on, let's do the whole thing again...
  941. // OPT_DOWNHILLSIMPLEX
  942. // Here we can benefit from previous results, when we use them as initialization for our optimizer
  943. // ikmsums.begin()->second->getParameters ( currentParameters ); uses the previously computed optimal parameters
  944. // as initialization
  945. // OPT_NONE
  946. // nothing to do, obviously
  947. //NOTE we could skip this, if we do not want to change our parameters given new examples
  948. if ( performOptimizationAfterIncrement )
  949. {
  950. t1.start();
  951. this->performOptimization ( gplikes, parameterVectorSizes, false /* initialize not with default values but using the last solution */ );
  952. t1.stop();
  953. if (verboseTime)
  954. std::cerr << "Time used for performing the optimization: " << t1.getLast() << std::endl;
  955. t1.start();
  956. this->transformFeaturesWithOptimalParameters ( gplikes, parameterVectorSizes );
  957. t1.stop();
  958. if (verboseTime)
  959. std::cerr << "Time used for transforming features with optimal parameters: " << t1.getLast() << std::endl;
  960. }
  961. else
  962. {
  963. t1.start();
  964. t1.stop();
  965. std::cerr << "skip optimization" << std::endl;
  966. if (verboseTime)
  967. std::cerr << "Time used for performing the optimization: " << t1.getLast() << std::endl;
  968. std::cerr << "skip feature transformation" << std::endl;
  969. if (verboseTime)
  970. std::cerr << "Time used for transforming features with optimal parameters: " << t1.getLast() << std::endl;
  971. std::cerr << "skip computation of A, B and LUTs" << std::endl;
  972. if (verboseTime)
  973. std::cerr << "Time used for setting up the A'nB -objects: " << t1.getLast() << std::endl;
  974. }
  975. if ( verbose )
  976. cerr << "Preparing after retraining for classification ..." << endl;
  977. //NOTE unfortunately, the whole vector alpha differs, and not only its last entry.
  978. // If we knew any method, which could update this efficiently, we could also compute A and B more efficiently by updating them.
  979. // Since we are not aware of any such method, we have to compute them completely new
  980. // :/
  981. t1.start();
  982. this->computeMatricesAndLUTs ( gplikes );
  983. t1.stop();
  984. if (verboseTime)
  985. std::cerr << "Time used for setting up the A'nB -objects: " << t1.getLast() << std::endl;
  986. t.stop();
  987. ResourceStatistics rs;
  988. std::cerr << "Time used for re-learning: " << t.getLast() << std::endl;
  989. long maxMemory;
  990. rs.getMaximumMemory ( maxMemory );
  991. std::cerr << "Maximum memory used: " << maxMemory << " KB" << std::endl;
  992. //don't waste memory
  993. if ( learnBalanced )
  994. {
  995. for ( std::map<int, GPLikelihoodApprox*>::const_iterator gpLikeIt = gplikes.begin(); gpLikeIt != gplikes.end(); gpLikeIt++ )
  996. {
  997. delete gpLikeIt->second;
  998. }
  999. }
  1000. else
  1001. {
  1002. delete gplikes.begin()->second;
  1003. }
  1004. }
  1005. void FMKGPHyperparameterOptimization::prepareVarianceApproximation()
  1006. {
  1007. PrecomputedType AVar;
  1008. fmk->hikPrepareKVNApproximation ( AVar );
  1009. precomputedAForVarEst = AVar;
  1010. precomputedAForVarEst.setIoUntilEndOfFile ( false );
  1011. if ( q != NULL )
  1012. {
  1013. //do we have results from previous runs but called this method nonetheless?
  1014. //then delete it and compute it again
  1015. if (precomputedTForVarEst != NULL)
  1016. delete precomputedTForVarEst;
  1017. double *T = fmk->hikPrepareLookupTableForKVNApproximation ( *q, pf );
  1018. precomputedTForVarEst = T;
  1019. }
  1020. }
  1021. int FMKGPHyperparameterOptimization::classify ( const NICE::SparseVector & xstar, NICE::SparseVector & scores ) const
  1022. {
  1023. // loop through all classes
  1024. if ( precomputedA.size() == 0 )
  1025. {
  1026. fthrow ( Exception, "The precomputation vector is zero...have you trained this classifier?" );
  1027. }
  1028. uint maxClassNo = 0;
  1029. for ( map<int, PrecomputedType>::const_iterator i = precomputedA.begin() ; i != precomputedA.end(); i++ )
  1030. {
  1031. uint classno = i->first;
  1032. maxClassNo = std::max ( maxClassNo, classno );
  1033. double beta;
  1034. if ( q != NULL ) {
  1035. map<int, double *>::const_iterator j = precomputedT.find ( classno );
  1036. double *T = j->second;
  1037. fmk->hik_kernel_sum_fast ( T, *q, xstar, beta );
  1038. } else {
  1039. const PrecomputedType & A = i->second;
  1040. map<int, PrecomputedType>::const_iterator j = precomputedB.find ( classno );
  1041. const PrecomputedType & B = j->second;
  1042. // fmk->hik_kernel_sum ( A, B, xstar, beta ); if A, B are of type Matrix
  1043. // Giving the transformation pf as an additional
  1044. // argument is necessary due to the following reason:
  1045. // FeatureMatrixT is sorted according to the original values, therefore,
  1046. // searching for upper and lower bounds ( findFirst... functions ) require original feature
  1047. // values as inputs. However, for calculation we need the transformed features values.
  1048. fmk->hik_kernel_sum ( A, B, xstar, beta, pf );
  1049. }
  1050. scores[ classno ] = beta;
  1051. }
  1052. scores.setDim ( maxClassNo + 1 );
  1053. if ( precomputedA.size() > 1 ) {
  1054. // multi-class classification
  1055. return scores.maxElement();
  1056. } else {
  1057. // binary setting
  1058. // FIXME: not really flexible for every situation
  1059. scores[1] = -scores[0];
  1060. scores[0] = scores[0];
  1061. scores.setDim ( 2 );
  1062. return scores[ 0 ] <= 0.0 ? 0 : 1;
  1063. }
  1064. }
  1065. void FMKGPHyperparameterOptimization::computePredictiveVarianceApproximateRough ( const NICE::SparseVector & x, NICE::Vector & predVariances ) const
  1066. {
  1067. double kSelf ( 0.0 );
  1068. for ( NICE::SparseVector::const_iterator it = x.begin(); it != x.end(); it++ )
  1069. {
  1070. kSelf += pf->f ( 0, it->second );
  1071. // if weighted dimensions:
  1072. //kSelf += pf->f(it->first,it->second);
  1073. }
  1074. double normKStar;
  1075. if ( q != NULL )
  1076. {
  1077. if ( precomputedTForVarEst == NULL )
  1078. {
  1079. fthrow ( Exception, "The precomputed LUT for uncertainty prediction is NULL...have you prepared the uncertainty prediction?" );
  1080. }
  1081. fmk->hikComputeKVNApproximationFast ( precomputedTForVarEst, *q, x, normKStar );
  1082. }
  1083. else
  1084. {
  1085. fmk->hikComputeKVNApproximation ( precomputedAForVarEst, x, normKStar, pf );
  1086. }
  1087. predVariances.clear();
  1088. predVariances.resize( eigenMax.size() );
  1089. // for balanced setting, we get approximations for every binary task
  1090. int cnt( 0 );
  1091. for (std::vector<NICE::Vector>::const_iterator eigenMaxIt = eigenMax.begin(); eigenMaxIt != eigenMax.end(); eigenMaxIt++, cnt++)
  1092. {
  1093. predVariances[cnt] = kSelf - ( 1.0 / (*eigenMaxIt)[0] )* normKStar;
  1094. }
  1095. }
  1096. void FMKGPHyperparameterOptimization::computePredictiveVarianceApproximateFine ( const NICE::SparseVector & x, NICE::Vector & predVariances ) const
  1097. {
  1098. // ---------------- compute the first term --------------------
  1099. // Timer t;
  1100. // t.start();
  1101. double kSelf ( 0.0 );
  1102. for ( NICE::SparseVector::const_iterator it = x.begin(); it != x.end(); it++ )
  1103. {
  1104. kSelf += pf->f ( 0, it->second );
  1105. // if weighted dimensions:
  1106. //kSelf += pf->f(it->first,it->second);
  1107. }
  1108. // ---------------- compute the approximation of the second term --------------------
  1109. // t.stop();
  1110. // std::cerr << "ApproxFine -- time for first term: " << t.getLast() << std::endl;
  1111. // t.start();
  1112. NICE::Vector kStar;
  1113. fmk->hikComputeKernelVector ( x, kStar );
  1114. /* t.stop();
  1115. std::cerr << "ApproxFine -- time for kernel vector: " << t.getLast() << std::endl;*/
  1116. std::vector<NICE::Vector>::const_iterator eigenMaxIt = eigenMax.begin();
  1117. predVariances.clear();
  1118. predVariances.resize( eigenMax.size() );
  1119. int classIdx( 0 );
  1120. // for balanced setting, we get approximations for every binary task
  1121. for (std::vector< NICE::Matrix>::const_iterator eigenMaxVectorIt = eigenMaxVectors.begin(); eigenMaxVectorIt != eigenMaxVectors.end(); eigenMaxVectorIt++, eigenMaxIt++, classIdx++)
  1122. {
  1123. double currentSecondTerm ( 0.0 );
  1124. double sumOfProjectionLengths ( 0.0 );
  1125. if ( ( kStar.size() != (*eigenMaxVectorIt).rows() ) || ( kStar.size() <= 0 ) )
  1126. {
  1127. //NOTE output?
  1128. }
  1129. // NICE::Vector multiplicationResults; // will contain nrOfEigenvaluesToConsiderForVarApprox many entries
  1130. // multiplicationResults.multiply ( *eigenMaxVectorIt, kStar, true/* transpose */ );
  1131. NICE::Vector multiplicationResults( nrOfEigenvaluesToConsiderForVarApprox, 0.0 );
  1132. //ok, there seems to be a nasty thing in computing multiplicationResults.multiply ( *eigenMaxVectorIt, kStar, true/* transpose */ );
  1133. //wherefor it takes aeons...
  1134. //so we compute it by ourselves
  1135. for ( uint tmpI = 0; tmpI < kStar.size(); tmpI++)
  1136. {
  1137. double kStarI ( kStar[tmpI] );
  1138. for ( int tmpJ = 0; tmpJ < nrOfEigenvaluesToConsiderForVarApprox; tmpJ++)
  1139. {
  1140. multiplicationResults[tmpJ] += kStarI * (*eigenMaxVectorIt)(tmpI,tmpJ);
  1141. }
  1142. }
  1143. double projectionLength ( 0.0 );
  1144. int cnt ( 0 );
  1145. NICE::Vector::const_iterator it = multiplicationResults.begin();
  1146. while ( cnt < ( nrOfEigenvaluesToConsiderForVarApprox - 1 ) )
  1147. {
  1148. projectionLength = ( *it );
  1149. currentSecondTerm += ( 1.0 / (*eigenMaxIt)[cnt] ) * pow ( projectionLength, 2 );
  1150. sumOfProjectionLengths += pow ( projectionLength, 2 );
  1151. it++;
  1152. cnt++;
  1153. }
  1154. double normKStar ( pow ( kStar.normL2 (), 2 ) );
  1155. currentSecondTerm += ( 1.0 / (*eigenMaxIt)[nrOfEigenvaluesToConsiderForVarApprox-1] ) * ( normKStar - sumOfProjectionLengths );
  1156. if ( ( normKStar - sumOfProjectionLengths ) < 0 )
  1157. {
  1158. // std::cerr << "Attention: normKStar - sumOfProjectionLengths is smaller than zero -- strange!" << std::endl;
  1159. }
  1160. predVariances[classIdx] = kSelf - currentSecondTerm;
  1161. }
  1162. }
  1163. void FMKGPHyperparameterOptimization::computePredictiveVarianceExact ( const NICE::SparseVector & x, NICE::Vector & predVariances ) const
  1164. {
  1165. Timer t;
  1166. // t.start();
  1167. // ---------------- compute the first term --------------------
  1168. double kSelf ( 0.0 );
  1169. for ( NICE::SparseVector::const_iterator it = x.begin(); it != x.end(); it++ )
  1170. {
  1171. kSelf += pf->f ( 0, it->second );
  1172. // if weighted dimensions:
  1173. //kSelf += pf->f(it->first,it->second);
  1174. }
  1175. // ---------------- compute the second term --------------------
  1176. // t.stop();
  1177. // std::cerr << "ApproxExact -- time for first term: " << t.getLast() << std::endl;
  1178. // t.start();
  1179. NICE::Vector kStar;
  1180. fmk->hikComputeKernelVector ( x, kStar );
  1181. // t.stop();
  1182. // std::cerr << "ApproxExact -- time for kernel vector: " << t.getLast() << std::endl;
  1183. //
  1184. // for balanced setting, we get uncertainties for every binary task
  1185. std::vector<NICE::Vector>::const_iterator eigenMaxIt = eigenMax.begin();
  1186. predVariances.clear();
  1187. predVariances.resize( eigenMax.size() );
  1188. int cnt( 0 );
  1189. for (std::map<int, IKMLinearCombination * >::const_iterator ikmSumIt = ikmsums.begin(); ikmSumIt != ikmsums.end(); ikmSumIt++, eigenMaxIt++, cnt++ )
  1190. {
  1191. //now run the ILS method
  1192. NICE::Vector diagonalElements;
  1193. ikmSumIt->second->getDiagonalElements ( diagonalElements );
  1194. // t.start();
  1195. // init simple jacobi pre-conditioning
  1196. ILSConjugateGradients *linsolver_cg = dynamic_cast<ILSConjugateGradients *> ( linsolver );
  1197. //perform pre-conditioning
  1198. if ( linsolver_cg != NULL )
  1199. linsolver_cg->setJacobiPreconditioner ( diagonalElements );
  1200. Vector beta;
  1201. /** About finding a good initial solution (see also GPLikelihoodApproximation)
  1202. * K~ = K + sigma^2 I
  1203. *
  1204. * K~ \approx lambda_max v v^T
  1205. * \lambda_max v v^T * alpha = k_* | multiply with v^T from left
  1206. * => \lambda_max v^T alpha = v^T k_*
  1207. * => alpha = k_* / lambda_max could be a good initial start
  1208. * If we put everything in the first equation this gives us
  1209. * v = k_*
  1210. * This reduces the number of iterations by 5 or 8
  1211. */
  1212. beta = (kStar * (1.0 / (*eigenMaxIt)[0]) );
  1213. /* t.stop();
  1214. std::cerr << "ApproxExact -- time for preconditioning etc: " << t.getLast() << std::endl;
  1215. t.start();*/
  1216. // t.start();
  1217. linsolver->solveLin ( * ( ikmSumIt->second ), kStar, beta );
  1218. // t.stop();
  1219. // t.stop();
  1220. // t.stop();
  1221. // std::cerr << "ApproxExact -- time for lin solve: " << t.getLast() << std::endl;
  1222. beta *= kStar;
  1223. double currentSecondTerm( beta.Sum() );
  1224. predVariances[cnt] = kSelf - currentSecondTerm;
  1225. }
  1226. }
  1227. // ---------------------- STORE AND RESTORE FUNCTIONS ----------------------
  1228. void FMKGPHyperparameterOptimization::restore ( std::istream & is, int format )
  1229. {
  1230. if ( is.good() )
  1231. {
  1232. //load the underlying data
  1233. if (fmk != NULL)
  1234. delete fmk;
  1235. fmk = new FastMinKernel;
  1236. fmk->restore(is,format);
  1237. //now set up the GHIK-things in ikmsums
  1238. for ( std::map<int, IKMLinearCombination * >::iterator it = ikmsums.begin(); it != ikmsums.end(); it++ )
  1239. {
  1240. it->second->addModel ( new GMHIKernel ( fmk, this->pf, this->q ) );
  1241. }
  1242. is.precision ( numeric_limits<double>::digits10 + 1 );
  1243. string tmp;
  1244. is >> tmp; //class name
  1245. is >> tmp;
  1246. is >> learnBalanced;
  1247. is >> tmp; //precomputedA:
  1248. is >> tmp; //size:
  1249. int preCompSize ( 0 );
  1250. is >> preCompSize;
  1251. precomputedA.clear();
  1252. std::cerr << "precomputedA.size(): "<< preCompSize << std::endl;
  1253. for ( int i = 0; i < preCompSize; i++ )
  1254. {
  1255. int nr;
  1256. is >> nr;
  1257. PrecomputedType pct;
  1258. pct.setIoUntilEndOfFile ( false );
  1259. pct.restore ( is, format );
  1260. precomputedA.insert ( std::pair<int, PrecomputedType> ( nr, pct ) );
  1261. }
  1262. is >> tmp; //precomputedB:
  1263. is >> tmp; //size:
  1264. is >> preCompSize;
  1265. precomputedB.clear();
  1266. for ( int i = 0; i < preCompSize; i++ )
  1267. {
  1268. int nr;
  1269. is >> nr;
  1270. PrecomputedType pct;
  1271. pct.setIoUntilEndOfFile ( false );
  1272. pct.restore ( is, format );
  1273. precomputedB.insert ( std::pair<int, PrecomputedType> ( nr, pct ) );
  1274. }
  1275. is >> tmp;
  1276. int precomputedTSize;
  1277. is >> precomputedTSize;
  1278. precomputedT.clear();
  1279. if ( precomputedTSize > 0 )
  1280. {
  1281. is >> tmp;
  1282. int sizeOfLUT;
  1283. is >> sizeOfLUT;
  1284. for (int i = 0; i < precomputedTSize; i++)
  1285. {
  1286. is >> tmp;
  1287. int index;
  1288. is >> index;
  1289. double * array = new double [ sizeOfLUT];
  1290. for ( int i = 0; i < sizeOfLUT; i++ )
  1291. {
  1292. is >> array[i];
  1293. }
  1294. precomputedT.insert ( std::pair<int, double*> ( index, array ) );
  1295. }
  1296. }
  1297. //now restore the things we need for the variance computation
  1298. is >> tmp;
  1299. int sizeOfAForVarEst;
  1300. is >> sizeOfAForVarEst;
  1301. if ( sizeOfAForVarEst > 0 )
  1302. if (precomputedAForVarEst.size() > 0)
  1303. {
  1304. precomputedAForVarEst.setIoUntilEndOfFile ( false );
  1305. precomputedAForVarEst.restore ( is, format );
  1306. }
  1307. is >> tmp; //precomputedTForVarEst
  1308. is >> tmp; // NOTNULL or NULL
  1309. if (tmp.compare("NOTNULL") == 0)
  1310. {
  1311. int sizeOfLUT;
  1312. is >> sizeOfLUT;
  1313. precomputedTForVarEst = new double [ sizeOfLUT ];
  1314. for ( int i = 0; i < sizeOfLUT; i++ )
  1315. {
  1316. is >> precomputedTForVarEst[i];
  1317. }
  1318. }
  1319. else
  1320. {
  1321. if (precomputedTForVarEst != NULL)
  1322. delete precomputedTForVarEst;
  1323. }
  1324. //restore eigenvalues and eigenvectors
  1325. is >> tmp; //eigenMax.size():
  1326. int eigenMaxSize;
  1327. is >> eigenMaxSize;
  1328. for (int i = 0; i < eigenMaxSize; i++)
  1329. {
  1330. NICE::Vector eigenMaxEntry;
  1331. is >> eigenMaxEntry;
  1332. eigenMax.push_back( eigenMaxEntry );
  1333. }
  1334. is >> tmp; //eigenMaxVector.size():
  1335. int eigenMaxVectorsSize;
  1336. is >> eigenMaxVectorsSize;
  1337. for (int i = 0; i < eigenMaxVectorsSize; i++)
  1338. {
  1339. NICE::Matrix eigenMaxVectorsEntry;
  1340. is >> eigenMaxVectorsEntry;
  1341. eigenMaxVectors.push_back( eigenMaxVectorsEntry );
  1342. }
  1343. is >> tmp; //ikmsums:
  1344. is >> tmp; //size:
  1345. int ikmSumsSize ( 0 );
  1346. is >> ikmSumsSize;
  1347. ikmsums.clear();
  1348. for ( int i = 0; i < ikmSumsSize; i++ )
  1349. {
  1350. int clNr ( 0 );
  1351. is >> clNr;
  1352. IKMLinearCombination *ikmsum = new IKMLinearCombination ();
  1353. int nrOfModels ( 0 );
  1354. is >> tmp;
  1355. is >> nrOfModels;
  1356. //the first one is always our noise-model
  1357. IKMNoise * ikmnoise = new IKMNoise ();
  1358. ikmnoise->restore ( is, format );
  1359. ikmsum->addModel ( ikmnoise );
  1360. //NOTE are there any more models you added? then add them here respectively in the correct order
  1361. ikmsums.insert ( std::pair<int, IKMLinearCombination*> ( clNr, ikmsum ) );
  1362. //the last one is the GHIK - which we do not have to restore, but simple reset it lateron
  1363. }
  1364. }
  1365. else
  1366. {
  1367. std::cerr << "InStream not initialized - restoring not possible!" << std::endl;
  1368. }
  1369. }
  1370. void FMKGPHyperparameterOptimization::store ( std::ostream & os, int format ) const
  1371. {
  1372. if ( os.good() )
  1373. {
  1374. fmk->store ( os, format );
  1375. os.precision ( numeric_limits<double>::digits10 + 1 );
  1376. os << "FMKGPHyperparameterOptimization" << std::endl;
  1377. os << "learnBalanced: " << learnBalanced << std::endl;
  1378. //we only have to store the things we computed, since the remaining settings come with the config file afterwards
  1379. os << "precomputedA: size: " << precomputedA.size() << std::endl;
  1380. std::map< int, PrecomputedType >::const_iterator preCompIt = precomputedA.begin();
  1381. for ( uint i = 0; i < precomputedA.size(); i++ )
  1382. {
  1383. os << preCompIt->first << std::endl;
  1384. ( preCompIt->second ).store ( os, format );
  1385. preCompIt++;
  1386. }
  1387. os << "precomputedB: size: " << precomputedB.size() << std::endl;
  1388. preCompIt = precomputedB.begin();
  1389. for ( uint i = 0; i < precomputedB.size(); i++ )
  1390. {
  1391. os << preCompIt->first << std::endl;
  1392. ( preCompIt->second ).store ( os, format );
  1393. preCompIt++;
  1394. }
  1395. os << "precomputedT.size(): " << precomputedT.size() << std::endl;
  1396. if ( precomputedT.size() > 0 )
  1397. {
  1398. int sizeOfLUT ( 0 );
  1399. if ( q != NULL )
  1400. sizeOfLUT = q->size() * this->fmk->get_d();
  1401. os << "SizeOfLUTs: " << sizeOfLUT << std::endl;
  1402. for ( std::map< int, double * >::const_iterator it = precomputedT.begin(); it != precomputedT.end(); it++ )
  1403. {
  1404. os << "index: " << it->first << std::endl;
  1405. for ( int i = 0; i < sizeOfLUT; i++ )
  1406. {
  1407. os << ( it->second ) [i] << " ";
  1408. }
  1409. os << std::endl;
  1410. }
  1411. }
  1412. //now store the things needed for the variance estimation
  1413. os << "precomputedAForVarEst.size(): "<< precomputedAForVarEst.size() << std::endl;
  1414. if (precomputedAForVarEst.size() > 0)
  1415. {
  1416. precomputedAForVarEst.store ( os, format );
  1417. os << std::endl;
  1418. }
  1419. if ( precomputedTForVarEst != NULL )
  1420. {
  1421. os << "precomputedTForVarEst NOTNULL" << std::endl;
  1422. int sizeOfLUT ( 0 );
  1423. if ( q != NULL )
  1424. sizeOfLUT = q->size() * this->fmk->get_d();
  1425. os << sizeOfLUT << std::endl;
  1426. for ( int i = 0; i < sizeOfLUT; i++ )
  1427. {
  1428. os << precomputedTForVarEst[i] << " ";
  1429. }
  1430. os << std::endl;
  1431. }
  1432. else
  1433. {
  1434. os << "precomputedTForVarEst NULL" << std::endl;
  1435. }
  1436. //store the eigenvalues and eigenvectors
  1437. os << "eigenMax.size(): " << std::endl;
  1438. os << eigenMax.size() << std::endl;
  1439. for (std::vector<NICE::Vector>::const_iterator it = this->eigenMax.begin(); it != this->eigenMax.end(); it++)
  1440. {
  1441. os << *it << std::endl;
  1442. }
  1443. os << "eigenMaxVectors.size(): " << std::endl;
  1444. os << eigenMaxVectors.size() << std::endl;
  1445. for (std::vector<NICE::Matrix>::const_iterator it = eigenMaxVectors.begin(); it != eigenMaxVectors.end(); it++)
  1446. {
  1447. os << *it << std::endl;
  1448. }
  1449. os << "ikmsums: size: " << ikmsums.size() << std::endl;
  1450. std::map<int, IKMLinearCombination * >::const_iterator ikmSumIt = ikmsums.begin();
  1451. for ( uint i = 0; i < ikmsums.size(); i++ )
  1452. {
  1453. os << ikmSumIt->first << std::endl;
  1454. os << "numberOfModels: " << ( ikmSumIt->second )->getNumberOfModels() << std::endl;
  1455. //the last one os always the GHIK, which we do not have to restore
  1456. for ( int j = 0; j < ( ikmSumIt->second )->getNumberOfModels() - 1; j++ )
  1457. {
  1458. ( ( ikmSumIt->second )->getModel ( j ) )->store ( os, format );
  1459. }
  1460. ikmSumIt++;
  1461. }
  1462. }
  1463. else
  1464. {
  1465. std::cerr << "OutStream not initialized - storing not possible!" << std::endl;
  1466. }
  1467. }
  1468. void FMKGPHyperparameterOptimization::clear ( ) {};
  1469. void FMKGPHyperparameterOptimization::addExample ( const NICE::SparseVector & x, const double & label, const bool & performOptimizationAfterIncrement )
  1470. {
  1471. this->labels.append ( label );
  1472. // add the new example to our data structure
  1473. // It is necessary to do this already here and not lateron for internal reasons (see GMHIKernel for more details)
  1474. Timer t;
  1475. t.start();
  1476. fmk->addExample ( x, pf );
  1477. t.stop();
  1478. if (verboseTime)
  1479. std::cerr << "Time used for adding the data to the fmk object: " << t.getLast() << std::endl;
  1480. // do the optimization again using the previously known solutions as initialization
  1481. // update the corresponding matrices A, B and lookup tables T
  1482. optimizeAfterSingleIncrement ( x, performOptimizationAfterIncrement );
  1483. }
  1484. void FMKGPHyperparameterOptimization::addMultipleExamples ( const std::vector<const NICE::SparseVector*> & newExamples, const NICE::Vector & _labels, const bool & performOptimizationAfterIncrement )
  1485. {
  1486. int oldSize ( this->labels.size() );
  1487. this->labels.resize ( this->labels.size() + _labels.size() );
  1488. for ( uint i = 0; i < _labels.size(); i++ )
  1489. {
  1490. this->labels[i+oldSize] = _labels[i];
  1491. }
  1492. // add the new example to our data structure
  1493. // It is necessary to do this already here and not lateron for internal reasons (see GMHIKernel for more details)
  1494. Timer t;
  1495. t.start();
  1496. for ( std::vector<const NICE::SparseVector*>::const_iterator exampleIt = newExamples.begin(); exampleIt != newExamples.end(); exampleIt++ )
  1497. {
  1498. fmk->addExample ( **exampleIt , pf );
  1499. }
  1500. t.stop();
  1501. if (verboseTime)
  1502. std::cerr << "Time used for adding the data to the fmk object: " << t.getLast() << std::endl;
  1503. Timer tVar;
  1504. tVar.start();
  1505. //do we need to update our matrices?
  1506. if ( precomputedAForVarEst.size() != 0)
  1507. {
  1508. //this compute everything from the scratch
  1509. this->prepareVarianceApproximation();
  1510. //this would perform a more sophisticated update
  1511. //unfortunately, there is a bug somewhere
  1512. //TODO fixme!
  1513. // std::cerr << "update the LUTs needed for variance computation" << std::endl;
  1514. // for ( std::vector<const NICE::SparseVector*>::const_iterator exampleIt = newExamples.begin(); exampleIt != newExamples.end(); exampleIt++ )
  1515. // {
  1516. // std::cerr << "new example: " << std::endl;
  1517. // (**exampleIt).store(std::cerr);
  1518. // std::cerr << "now update the LUT for var est" << std::endl;
  1519. // fmk->updatePreparationForKVNApproximation( **exampleIt, precomputedAForVarEst, pf );
  1520. // if ( q != NULL )
  1521. // {
  1522. // fmk->updateLookupTableForKVNApproximation( **exampleIt, precomputedTForVarEst, *q, pf );
  1523. // }
  1524. // }
  1525. // std::cerr << "update of LUTs for variance compuation done" << std::endl;
  1526. }
  1527. tVar.stop();
  1528. if (verboseTime)
  1529. std::cerr << "Time used for computing the Variance Matrix and LUT: " << tVar.getLast() << std::endl;
  1530. // do the optimization again using the previously known solutions as initialization
  1531. // update the corresponding matrices A, B and lookup tables T
  1532. optimizeAfterMultipleIncrements ( newExamples, performOptimizationAfterIncrement );
  1533. }