GPHIKRawClassifier.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747
  1. /**
  2. * @file GPHIKRawClassifier.cpp
  3. * @brief Main interface for our GP HIK classifier (similar to the feature pool classifier interface in vislearning) (Implementation)
  4. * @author Erik Rodner, Alexander Freytag
  5. * @date 02/01/2012
  6. */
  7. // STL includes
  8. #include <iostream>
  9. // NICE-core includes
  10. #include <core/basics/numerictools.h>
  11. #include <core/basics/Timer.h>
  12. #include <core/algebra/ILSConjugateGradients.h>
  13. #include <core/algebra/EigValues.h>
  14. // gp-hik-core includes
  15. #include "gp-hik-core/GPHIKRawClassifier.h"
  16. #include "gp-hik-core/GMHIKernelRaw.h"
  17. //
  18. #include "gp-hik-core/quantization/Quantization1DAequiDist0To1.h"
  19. #include "gp-hik-core/quantization/Quantization1DAequiDist0ToMax.h"
  20. #include "gp-hik-core/quantization/QuantizationNDAequiDist0ToMax.h"
  21. using namespace std;
  22. using namespace NICE;
  23. /////////////////////////////////////////////////////
  24. /////////////////////////////////////////////////////
  25. // PROTECTED METHODS
  26. /////////////////////////////////////////////////////
  27. /////////////////////////////////////////////////////
  28. void GPHIKRawClassifier::clearSetsOfTablesAandB( )
  29. {
  30. // delete all LUTs A which are needed when no quantization is activated
  31. for ( std::map< uint,PrecomputedType >::iterator itA = this->precomputedA.begin();
  32. itA != this->precomputedA.end();
  33. itA++
  34. )
  35. {
  36. for ( uint idxDim = 0 ; idxDim < this->num_dimension; idxDim++ )
  37. {
  38. if ( (itA->second)[idxDim] != NULL )
  39. delete [] (itA->second)[idxDim];
  40. }
  41. delete [] itA->second;
  42. }
  43. this->precomputedA.clear();
  44. // delete all LUTs B which are needed when no quantization is activated
  45. for ( std::map< uint,PrecomputedType >::iterator itB = this->precomputedB.begin();
  46. itB != this->precomputedB.end();
  47. itB++
  48. )
  49. {
  50. for ( uint idxDim = 0 ; idxDim < this->num_dimension; idxDim++ )
  51. {
  52. if ( (itB->second)[idxDim] != NULL )
  53. delete [] (itB->second)[idxDim];
  54. }
  55. delete [] itB->second;
  56. }
  57. this->precomputedB.clear();
  58. }
  59. void GPHIKRawClassifier::clearSetsOfTablesT( )
  60. {
  61. // delete all LUTs used for quantization
  62. for ( std::map< uint, double * >::iterator itT = this->precomputedT.begin();
  63. itT != this->precomputedT.end();
  64. itT++
  65. )
  66. {
  67. delete [] itT->second;
  68. }
  69. this->precomputedT.clear();
  70. }
  71. /////////////////////////////////////////////////////
  72. /////////////////////////////////////////////////////
  73. // PUBLIC METHODS
  74. /////////////////////////////////////////////////////
  75. /////////////////////////////////////////////////////
  76. GPHIKRawClassifier::GPHIKRawClassifier( )
  77. {
  78. this->b_isTrained = false;
  79. this->confSection = "";
  80. this->nnz_per_dimension = NULL;
  81. this->num_examples = 0;
  82. this->num_dimension = 0;
  83. this->solver = NULL;
  84. this->q = NULL;
  85. this->gm = NULL;
  86. // in order to be sure about all necessary variables be setup with default values, we
  87. // run initFromConfig with an empty config
  88. NICE::Config tmpConfEmpty ;
  89. this->initFromConfig ( &tmpConfEmpty, this->confSection );
  90. }
  91. GPHIKRawClassifier::GPHIKRawClassifier( const Config *_conf,
  92. const string & _confSection
  93. )
  94. {
  95. ///////////
  96. // same code as in empty constructor - duplication can be avoided with C++11 allowing for constructor delegation
  97. ///////////
  98. this->b_isTrained = false;
  99. this->confSection = "";
  100. this->nnz_per_dimension = NULL;
  101. this->num_examples = 0;
  102. this->num_dimension = 0;
  103. this->solver = NULL;
  104. this->q = NULL;
  105. this->gm = NULL;
  106. ///////////
  107. // here comes the new code part different from the empty constructor
  108. ///////////
  109. this->confSection = _confSection;
  110. // if no config file was given, we either restore the classifier from an external file, or run ::init with
  111. // an emtpy config (using default values thereby) when calling the train-method
  112. if ( _conf != NULL )
  113. {
  114. this->initFromConfig( _conf, _confSection );
  115. }
  116. else
  117. {
  118. // if no config was given, we create an empty one
  119. NICE::Config tmpConfEmpty ;
  120. this->initFromConfig ( &tmpConfEmpty, this->confSection );
  121. }
  122. }
  123. GPHIKRawClassifier::~GPHIKRawClassifier()
  124. {
  125. if ( this->solver != NULL )
  126. {
  127. delete this->solver;
  128. this->solver = NULL;
  129. }
  130. if ( this->gm != NULL)
  131. {
  132. delete this->gm;
  133. this->gm = NULL;
  134. }
  135. this->clearSetsOfTablesAandB();
  136. this->clearSetsOfTablesT();
  137. if ( this->q != NULL )
  138. {
  139. delete this->q;
  140. this->q = NULL;
  141. }
  142. }
  143. void GPHIKRawClassifier::initFromConfig(const Config *_conf,
  144. const string & _confSection
  145. )
  146. {
  147. this->d_noise = _conf->gD( _confSection, "noise", 0.01);
  148. this->confSection = _confSection;
  149. this->b_verbose = _conf->gB( _confSection, "verbose", false);
  150. this->b_debug = _conf->gB( _confSection, "debug", false);
  151. this->f_tolerance = _conf->gD( _confSection, "f_tolerance", 1e-10);
  152. //FIXME this is not used in that way for the standard GPHIKClassifier
  153. //string ilssection = "FMKGPHyperparameterOptimization";
  154. string ilssection = _confSection;
  155. uint ils_max_iterations = _conf->gI( ilssection, "ils_max_iterations", 1000 );
  156. double ils_min_delta = _conf->gD( ilssection, "ils_min_delta", 1e-7 );
  157. double ils_min_residual = _conf->gD( ilssection, "ils_min_residual", 1e-7 );
  158. bool ils_verbose = _conf->gB( ilssection, "ils_verbose", false );
  159. this->solver = new ILSConjugateGradients( ils_verbose,
  160. ils_max_iterations,
  161. ils_min_delta,
  162. ils_min_residual
  163. );
  164. if ( this->b_verbose )
  165. {
  166. std::cerr << "GPHIKRawClassifier::initFromConfig " <<std::endl;
  167. std::cerr << " confSection " << confSection << std::endl;
  168. std::cerr << " d_noise " << d_noise << std::endl;
  169. std::cerr << " f_tolerance " << f_tolerance << std::endl;
  170. std::cerr << " ils_max_iterations " << ils_max_iterations << std::endl;
  171. std::cerr << " ils_min_delta " << ils_min_delta << std::endl;
  172. std::cerr << " ils_min_residual " << ils_min_residual << std::endl;
  173. std::cerr << " ils_verbose " << ils_verbose << std::endl;
  174. }
  175. //quantization during classification?
  176. bool useQuantization = _conf->gB ( _confSection, "use_quantization", false );
  177. if ( this->b_verbose )
  178. {
  179. std::cerr << "_confSection: " << _confSection << std::endl;
  180. std::cerr << "use_quantization: " << useQuantization << std::endl;
  181. }
  182. if ( _conf->gB ( _confSection, "use_quantization", false ) )
  183. {
  184. int numBins = _conf->gI ( _confSection, "num_bins", 100 );
  185. if ( this->b_verbose )
  186. std::cerr << "FMKGPHyperparameterOptimization: quantization initialized with " << numBins << " bins." << std::endl;
  187. std::string s_quantType = _conf->gS( _confSection, "s_quantType", "1d-aequi-0-1" );
  188. if ( s_quantType == "1d-aequi-0-1" )
  189. {
  190. this->q = new NICE::Quantization1DAequiDist0To1 ( numBins );
  191. }
  192. else if ( s_quantType == "1d-aequi-0-max" )
  193. {
  194. this->q = new NICE::Quantization1DAequiDist0ToMax ( numBins );
  195. }
  196. else if ( s_quantType == "nd-aequi-0-max" )
  197. {
  198. this->q = new NICE::QuantizationNDAequiDist0ToMax ( numBins );
  199. }
  200. else
  201. {
  202. fthrow(Exception, "Quantization type is unknown " << s_quantType);
  203. }
  204. }
  205. else
  206. {
  207. this->q = NULL;
  208. }
  209. }
  210. ///////////////////// ///////////////////// /////////////////////
  211. // GET / SET
  212. ///////////////////// ///////////////////// /////////////////////
  213. std::set<uint> GPHIKRawClassifier::getKnownClassNumbers ( ) const
  214. {
  215. if ( ! this->b_isTrained )
  216. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  217. return this->knownClasses;
  218. }
  219. ///////////////////// ///////////////////// /////////////////////
  220. // CLASSIFIER STUFF
  221. ///////////////////// ///////////////////// /////////////////////
  222. void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
  223. uint & _result,
  224. SparseVector & _scores
  225. ) const
  226. {
  227. if ( ! this->b_isTrained )
  228. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  229. _scores.clear();
  230. // classification with quantization of test inputs
  231. if ( this->q != NULL )
  232. {
  233. uint maxClassNo = 0;
  234. for ( std::map< uint, double * >::const_iterator itT = this->precomputedT.begin() ;
  235. itT != this->precomputedT.end();
  236. itT++
  237. )
  238. {
  239. uint classno = itT->first;
  240. maxClassNo = std::max ( maxClassNo, classno );
  241. double beta = 0;
  242. double *T = itT->second;
  243. for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++ )
  244. {
  245. uint dim = i->first;
  246. double v = i->second;
  247. uint qBin = this->q->quantize( v, dim );
  248. beta += T[dim * this->q->getNumberOfBins() + qBin];
  249. }//for-loop over dimensions of test input
  250. _scores[ classno ] = beta;
  251. }//for-loop over 1-vs-all models
  252. }
  253. // classification with exact test inputs, i.e., no quantization involved
  254. else
  255. {
  256. uint maxClassNo = 0;
  257. for ( std::map<uint, PrecomputedType>::const_iterator i = this->precomputedA.begin() ; i != this->precomputedA.end(); i++ )
  258. {
  259. uint classno = i->first;
  260. maxClassNo = std::max ( maxClassNo, classno );
  261. double beta = 0;
  262. GMHIKernelRaw::sparseVectorElement **dataMatrix = this->gm->getDataMatrix();
  263. const PrecomputedType & A = i->second;
  264. std::map<uint, PrecomputedType>::const_iterator j = this->precomputedB.find ( classno );
  265. const PrecomputedType & B = j->second;
  266. for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++)
  267. {
  268. uint dim = i->first;
  269. double fval = i->second;
  270. uint nnz = this->nnz_per_dimension[dim];
  271. uint nz = this->num_examples - nnz;
  272. if ( nnz == 0 ) continue;
  273. // useful
  274. //if ( fval < this->f_tolerance ) continue;
  275. uint position = 0;
  276. //this->X_sorted.findFirstLargerInDimension(dim, fval, position);
  277. GMHIKernelRaw::sparseVectorElement fval_element;
  278. fval_element.value = fval;
  279. //std::cerr << "value to search for " << fval << endl;
  280. //std::cerr << "data matrix in dimension " << dim << endl;
  281. //for (int j = 0; j < nnz; j++)
  282. // std::cerr << dataMatrix[dim][j].value << std::endl;
  283. GMHIKernelRaw::sparseVectorElement *it = upper_bound ( dataMatrix[dim], dataMatrix[dim] + nnz, fval_element );
  284. position = distance ( dataMatrix[dim], it );
  285. // /*// add zero elements
  286. // if ( fval_element.value > 0.0 )
  287. // position += nz;*/
  288. bool posIsZero ( position == 0 );
  289. // special case 1:
  290. // new example is smaller than all known examples
  291. // -> resulting value = fval * sum_l=1^n alpha_l
  292. if ( position == 0 )
  293. {
  294. beta += fval * B[ dim ][ nnz - 1 ];
  295. }
  296. // special case 2:
  297. // new example is equal to or larger than the largest training example in this dimension
  298. // -> the term B[ dim ][ nnz-1 ] - B[ dim ][ indexElem ] is equal to zero and vanishes, which is logical, since all elements are smaller than the remaining prototypes!
  299. else if ( position == nnz )
  300. {
  301. beta += A[ dim ][ nnz - 1 ];
  302. }
  303. // standard case: new example is larger then the smallest element, but smaller then the largest one in the corrent dimension
  304. else
  305. {
  306. beta += A[ dim ][ position - 1 ] + fval * ( B[ dim ][ nnz - 1 ] - B[ dim ][ position - 1 ] );
  307. }
  308. // // correct upper bound to correct position, only possible if new example is not the smallest value in this dimension
  309. // if ( !posIsZero )
  310. // position--;
  311. //
  312. //
  313. // double firstPart = 0.0;
  314. // if ( !posIsZero )
  315. // firstPart = ( A[ dim ][ position ] );
  316. //
  317. // double secondPart( B[ dim ][ this->num_examples-1-nz ]);
  318. // if ( !posIsZero && (position >= nz) )
  319. // secondPart -= B[dim][ position ];
  320. //
  321. // // but apply using the transformed one
  322. // beta += firstPart + secondPart* fval;
  323. }//for-loop over dimensions of test input
  324. _scores[ classno ] = beta;
  325. }//for-loop over 1-vs-all models
  326. } // if-condition wrt quantization
  327. _scores.setDim ( *this->knownClasses.rbegin() + 1 );
  328. if ( this->knownClasses.size() > 2 )
  329. { // multi-class classification
  330. _result = _scores.maxElement();
  331. }
  332. else if ( this->knownClasses.size() == 2 ) // binary setting
  333. {
  334. uint class1 = *(this->knownClasses.begin());
  335. uint class2 = *(this->knownClasses.rbegin());
  336. uint class_for_which_we_have_a_score = _scores.begin()->first;
  337. uint class_for_which_we_dont_have_a_score = (class1 == class_for_which_we_have_a_score ? class2 : class1);
  338. _scores[class_for_which_we_dont_have_a_score] = - _scores[class_for_which_we_have_a_score];
  339. _result = _scores[class_for_which_we_have_a_score] > 0.0 ? class_for_which_we_have_a_score : class_for_which_we_dont_have_a_score;
  340. }
  341. }
  342. void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
  343. uint & _result,
  344. Vector & _scores
  345. ) const
  346. {
  347. if ( ! this->b_isTrained )
  348. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  349. // classification with quantization of test inputs
  350. if ( this->q != NULL )
  351. {
  352. uint maxClassNo = 0;
  353. for ( std::map< uint, double * >::const_iterator itT = this->precomputedT.begin() ;
  354. itT != this->precomputedT.end();
  355. itT++
  356. )
  357. {
  358. uint classno = itT->first;
  359. maxClassNo = std::max ( maxClassNo, classno );
  360. double beta = 0;
  361. double *T = itT->second;
  362. for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++ )
  363. {
  364. uint dim = i->first;
  365. double v = i->second;
  366. uint qBin = this->q->quantize( v, dim );
  367. beta += T[dim * this->q->getNumberOfBins() + qBin];
  368. }//for-loop over dimensions of test input
  369. _scores[ classno ] = beta;
  370. }//for-loop over 1-vs-all models
  371. }
  372. // classification with exact test inputs, i.e., no quantization involved
  373. else
  374. {
  375. uint maxClassNo = 0;
  376. for ( std::map<uint, PrecomputedType>::const_iterator i = this->precomputedA.begin() ; i != this->precomputedA.end(); i++ )
  377. {
  378. uint classno = i->first;
  379. maxClassNo = std::max ( maxClassNo, classno );
  380. double beta = 0;
  381. GMHIKernelRaw::sparseVectorElement **dataMatrix = this->gm->getDataMatrix();
  382. const PrecomputedType & A = i->second;
  383. std::map<uint, PrecomputedType>::const_iterator j = this->precomputedB.find ( classno );
  384. const PrecomputedType & B = j->second;
  385. for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++)
  386. {
  387. uint dim = i->first;
  388. double fval = i->second;
  389. uint nnz = this->nnz_per_dimension[dim];
  390. uint nz = this->num_examples - nnz;
  391. if ( nnz == 0 ) continue;
  392. // useful
  393. //if ( fval < this->f_tolerance ) continue;
  394. uint position = 0;
  395. //this->X_sorted.findFirstLargerInDimension(dim, fval, position);
  396. GMHIKernelRaw::sparseVectorElement fval_element;
  397. fval_element.value = fval;
  398. //std::cerr << "value to search for " << fval << endl;
  399. //std::cerr << "data matrix in dimension " << dim << endl;
  400. //for (int j = 0; j < nnz; j++)
  401. // std::cerr << dataMatrix[dim][j].value << std::endl;
  402. GMHIKernelRaw::sparseVectorElement *it = upper_bound ( dataMatrix[dim], dataMatrix[dim] + nnz, fval_element );
  403. position = distance ( dataMatrix[dim], it );
  404. bool posIsZero ( position == 0 );
  405. // special case 1:
  406. // new example is smaller than all known examples
  407. // -> resulting value = fval * sum_l=1^n alpha_l
  408. if ( position == 0 )
  409. {
  410. beta += fval * B[ dim ][ nnz - 1 ];
  411. }
  412. // special case 2:
  413. // new example is equal to or larger than the largest training example in this dimension
  414. // -> the term B[ dim ][ nnz-1 ] - B[ dim ][ indexElem ] is equal to zero and vanishes, which is logical, since all elements are smaller than the remaining prototypes!
  415. else if ( position == nnz )
  416. {
  417. beta += A[ dim ][ nnz - 1 ];
  418. }
  419. // standard case: new example is larger then the smallest element, but smaller then the largest one in the corrent dimension
  420. else
  421. {
  422. beta += A[ dim ][ position - 1 ] + fval * ( B[ dim ][ nnz - 1 ] - B[ dim ][ position - 1 ] );
  423. }
  424. }//for-loop over dimensions of test input
  425. _scores[ classno ] = beta;
  426. }//for-loop over 1-vs-all models
  427. } // if-condition wrt quantization
  428. if ( this->knownClasses.size() > 2 )
  429. { // multi-class classification
  430. _result = _scores.MaxIndex();
  431. }
  432. else if ( this->knownClasses.size() == 2 ) // binary setting
  433. {
  434. uint class1 = *(this->knownClasses.begin());
  435. uint class2 = *(this->knownClasses.rbegin());
  436. uint class_for_which_we_have_a_score = _scores.begin()->first;
  437. uint class_for_which_we_dont_have_a_score = (class1 == class_for_which_we_have_a_score ? class2 : class1);
  438. _scores[class_for_which_we_dont_have_a_score] = - _scores[class_for_which_we_have_a_score];
  439. _result = _scores[class_for_which_we_have_a_score] > 0.0 ? class_for_which_we_have_a_score : class_for_which_we_dont_have_a_score;
  440. }
  441. }
  442. void GPHIKRawClassifier::classify ( const std::vector< const NICE::SparseVector *> _examples,
  443. NICE::Vector & _results,
  444. NICE::Matrix & _scores
  445. ) const
  446. {
  447. _scores.clear();
  448. _scores.resize( _examples.size(), this->knownClasses.size() );
  449. _scores.set( 0.0 );
  450. _results.clear();
  451. _results.resize( _examples.size() );
  452. _results.set( 0.0 );
  453. NICE::Vector::iterator resultsIt = _results.begin();
  454. NICE::Vector scoresSingle( this->knownClasses.size(), 0.0);
  455. uint exCnt ( 0 );
  456. for ( std::vector< const NICE::SparseVector *> exIt = _examples.begin();
  457. exIt != _examples.end();
  458. exIt++, resultsIt++
  459. )
  460. {
  461. this->classify ( exIt,
  462. *resultsIt,
  463. scoresSingle,
  464. exCnt++
  465. );
  466. _scores.setRow( exCnt, scoresSingle );
  467. scoresSingle.set( 0.0 );
  468. }
  469. }
  470. /** training process */
  471. void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *> & _examples,
  472. const NICE::Vector & _labels
  473. )
  474. {
  475. // security-check: examples and labels have to be of same size
  476. if ( _examples.size() != _labels.size() )
  477. {
  478. fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
  479. }
  480. this->num_examples = _examples.size();
  481. this->knownClasses.clear();
  482. for ( uint i = 0; i < _labels.size(); i++ )
  483. this->knownClasses.insert((uint)_labels[i]);
  484. std::map<uint, NICE::Vector> binLabels;
  485. for ( set<uint>::const_iterator j = knownClasses.begin(); j != knownClasses.end(); j++ )
  486. {
  487. uint current_class = *j;
  488. Vector labels_binary ( _labels.size() );
  489. for ( uint i = 0; i < _labels.size(); i++ )
  490. {
  491. labels_binary[i] = ( _labels[i] == current_class ) ? 1.0 : -1.0;
  492. }
  493. binLabels.insert ( std::pair<uint, NICE::Vector>( current_class, labels_binary) );
  494. }
  495. // handle special binary case
  496. if ( knownClasses.size() == 2 )
  497. {
  498. std::map<uint, NICE::Vector>::iterator it = binLabels.begin();
  499. it++;
  500. binLabels.erase( binLabels.begin(), it );
  501. }
  502. this->train ( _examples, binLabels );
  503. }
  504. void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *> & _examples,
  505. std::map<uint, NICE::Vector> & _binLabels
  506. )
  507. {
  508. // security-check: examples and labels have to be of same size
  509. for ( std::map< uint, NICE::Vector >::const_iterator binLabIt = _binLabels.begin();
  510. binLabIt != _binLabels.end();
  511. binLabIt++
  512. )
  513. {
  514. if ( _examples.size() != binLabIt->second.size() )
  515. {
  516. fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
  517. }
  518. }
  519. if ( this->b_verbose )
  520. std::cerr << "GPHIKRawClassifier::train" << std::endl;
  521. Timer t;
  522. t.start();
  523. this->clearSetsOfTablesAandB();
  524. this->clearSetsOfTablesT();
  525. // sort examples in each dimension and "transpose" the feature matrix
  526. // set up the GenericMatrix interface
  527. if ( this->gm != NULL )
  528. delete this->gm;
  529. this->gm = new GMHIKernelRaw ( _examples, this->d_noise, this->q );
  530. this->nnz_per_dimension = this->gm->getNNZPerDimension();
  531. this->num_dimension = this->gm->getNumberOfDimensions();
  532. // compute largest eigenvalue of our kernel matrix
  533. // note: this guy is shared among all categories,
  534. // since the kernel matrix is shared as well
  535. NICE::Vector eigenMax;
  536. NICE::Matrix eigenMaxV;
  537. // for reproducibility during debuggin
  538. //FIXME
  539. srand ( 0 );
  540. srand48 ( 0 );
  541. NICE::EigValues * eig = new EVArnoldi ( false /* verbose flag */,
  542. 10 /*_maxiterations*/
  543. );
  544. eig->getEigenvalues( *gm, eigenMax, eigenMaxV, 1 /*rank*/ );
  545. delete eig;
  546. // set simple jacobi pre-conditioning
  547. NICE::Vector diagonalElements;
  548. this->gm->getDiagonalElements ( diagonalElements );
  549. this->solver->setJacobiPreconditioner ( diagonalElements );
  550. // solve linear equations for each class
  551. // be careful when parallising this!
  552. for ( std::map<uint, NICE::Vector>::const_iterator i = _binLabels.begin();
  553. i != _binLabels.end();
  554. i++
  555. )
  556. {
  557. uint classno = i->first;
  558. if (b_verbose)
  559. std::cerr << "Training for class " << classno << endl;
  560. const NICE::Vector & y = i->second;
  561. NICE::Vector alpha;
  562. /** About finding a good initial solution (see also GPLikelihoodApproximation)
  563. * K~ = K + sigma^2 I
  564. *
  565. * K~ \approx lambda_max v v^T
  566. * \lambda_max v v^T * alpha = k_* | multiply with v^T from left
  567. * => \lambda_max v^T alpha = v^T k_*
  568. * => alpha = k_* / lambda_max could be a good initial start
  569. * If we put everything in the first equation this gives us
  570. * v = k_*
  571. * This reduces the number of iterations by 5 or 8
  572. */
  573. alpha = (y * (1.0 / eigenMax[0]) );
  574. this->solver->solveLin( *gm, y, alpha );
  575. // //debug
  576. // std::cerr << "alpha: " << alpha << std::endl;
  577. // get lookup tables, A, B, etc. and store them
  578. this->gm->updateTablesAandB( alpha );
  579. double **A = this->gm->getTableA();
  580. double **B = this->gm->getTableB();
  581. this->precomputedA.insert ( std::pair<uint, PrecomputedType> ( classno, A ) );
  582. this->precomputedB.insert ( std::pair<uint, PrecomputedType> ( classno, B ) );
  583. // Quantization for classification?
  584. if ( this->q != NULL )
  585. {
  586. this->gm->updateTableT( alpha );
  587. double *T = this->gm->getTableT ( );
  588. this->precomputedT.insert( std::pair<uint, double * > ( classno, T ) );
  589. }
  590. }
  591. // NOTE if quantization is turned on, we do not need LUTs A and B anymore
  592. if ( this->q != NULL )
  593. {
  594. this->clearSetsOfTablesAandB();
  595. }
  596. t.stop();
  597. if ( this->b_verbose )
  598. std::cerr << "Time used for setting up the fmk object: " << t.getLast() << std::endl;
  599. //indicate that we finished training successfully
  600. this->b_isTrained = true;
  601. // clean up all examples ??
  602. if ( this->b_verbose )
  603. std::cerr << "Learning finished" << std::endl;
  604. }