GPHIKRawClassifier.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773
  1. /**
  2. * @file GPHIKRawClassifier.cpp
  3. * @brief Main interface for our GP HIK classifier (similar to the feature pool classifier interface in vislearning) (Implementation)
  4. * @author Erik Rodner, Alexander Freytag
  5. * @date 02/01/2012
  6. */
  7. // STL includes
  8. #include <iostream>
  9. #include <unistd.h>
  10. // NICE-core includes
  11. #include <core/basics/numerictools.h>
  12. #include <core/basics/Timer.h>
  13. #include <core/algebra/ILSConjugateGradients.h>
  14. #include <core/algebra/EigValues.h>
  15. // gp-hik-core includes
  16. #include "gp-hik-core/GPHIKRawClassifier.h"
  17. #include "gp-hik-core/GMHIKernelRaw.h"
  18. //
  19. #include "gp-hik-core/quantization/Quantization1DAequiDist0To1.h"
  20. #include "gp-hik-core/quantization/Quantization1DAequiDist0ToMax.h"
  21. #include "gp-hik-core/quantization/QuantizationNDAequiDist0ToMax.h"
  22. using namespace std;
  23. using namespace NICE;
  24. /////////////////////////////////////////////////////
  25. /////////////////////////////////////////////////////
  26. // PROTECTED METHODS
  27. /////////////////////////////////////////////////////
  28. /////////////////////////////////////////////////////
  29. void GPHIKRawClassifier::clearSetsOfTablesAandB( )
  30. {
  31. // delete all LUTs A which are needed when no quantization is activated
  32. for ( std::map< uint,PrecomputedType >::iterator itA = this->precomputedA.begin();
  33. itA != this->precomputedA.end();
  34. itA++
  35. )
  36. {
  37. for ( uint idxDim = 0 ; idxDim < this->num_dimension; idxDim++ )
  38. {
  39. if ( (itA->second)[idxDim] != NULL )
  40. delete [] (itA->second)[idxDim];
  41. }
  42. delete [] itA->second;
  43. }
  44. this->precomputedA.clear();
  45. // delete all LUTs B which are needed when no quantization is activated
  46. for ( std::map< uint,PrecomputedType >::iterator itB = this->precomputedB.begin();
  47. itB != this->precomputedB.end();
  48. itB++
  49. )
  50. {
  51. for ( uint idxDim = 0 ; idxDim < this->num_dimension; idxDim++ )
  52. {
  53. if ( (itB->second)[idxDim] != NULL )
  54. delete [] (itB->second)[idxDim];
  55. }
  56. delete [] itB->second;
  57. }
  58. this->precomputedB.clear();
  59. }
  60. void GPHIKRawClassifier::clearSetsOfTablesT( )
  61. {
  62. // delete all LUTs used for quantization
  63. for ( std::map< uint, double * >::iterator itT = this->precomputedT.begin();
  64. itT != this->precomputedT.end();
  65. itT++
  66. )
  67. {
  68. delete [] itT->second;
  69. }
  70. this->precomputedT.clear();
  71. }
  72. /////////////////////////////////////////////////////
  73. /////////////////////////////////////////////////////
  74. // PUBLIC METHODS
  75. /////////////////////////////////////////////////////
  76. /////////////////////////////////////////////////////
  77. GPHIKRawClassifier::GPHIKRawClassifier( )
  78. {
  79. this->b_isTrained = false;
  80. this->confSection = "";
  81. this->nnz_per_dimension = NULL;
  82. this->num_examples = 0;
  83. this->num_dimension = 0;
  84. this->solver = NULL;
  85. this->q = NULL;
  86. this->gm = NULL;
  87. // in order to be sure about all necessary variables be setup with default values, we
  88. // run initFromConfig with an empty config
  89. NICE::Config tmpConfEmpty ;
  90. this->initFromConfig ( &tmpConfEmpty, this->confSection );
  91. }
  92. GPHIKRawClassifier::GPHIKRawClassifier( const Config *_conf,
  93. const string & _confSection
  94. )
  95. {
  96. ///////////
  97. // same code as in empty constructor - duplication can be avoided with C++11 allowing for constructor delegation
  98. ///////////
  99. this->b_isTrained = false;
  100. this->confSection = "";
  101. this->nnz_per_dimension = NULL;
  102. this->num_examples = 0;
  103. this->num_dimension = 0;
  104. this->solver = NULL;
  105. this->q = NULL;
  106. this->gm = NULL;
  107. ///////////
  108. // here comes the new code part different from the empty constructor
  109. ///////////
  110. this->confSection = _confSection;
  111. // if no config file was given, we either restore the classifier from an external file, or run ::init with
  112. // an emtpy config (using default values thereby) when calling the train-method
  113. if ( _conf != NULL )
  114. {
  115. this->initFromConfig( _conf, _confSection );
  116. }
  117. else
  118. {
  119. // if no config was given, we create an empty one
  120. NICE::Config tmpConfEmpty ;
  121. this->initFromConfig ( &tmpConfEmpty, this->confSection );
  122. }
  123. }
  124. GPHIKRawClassifier::~GPHIKRawClassifier()
  125. {
  126. if ( this->solver != NULL )
  127. {
  128. delete this->solver;
  129. this->solver = NULL;
  130. }
  131. if ( this->gm != NULL)
  132. {
  133. delete this->gm;
  134. this->gm = NULL;
  135. }
  136. this->clearSetsOfTablesAandB();
  137. this->clearSetsOfTablesT();
  138. if ( this->q != NULL )
  139. {
  140. delete this->q;
  141. this->q = NULL;
  142. }
  143. }
  144. void GPHIKRawClassifier::initFromConfig(const Config *_conf,
  145. const string & _confSection
  146. )
  147. {
  148. this->d_noise = _conf->gD( _confSection, "noise", 0.01);
  149. this->confSection = _confSection;
  150. this->b_verbose = _conf->gB( _confSection, "verbose", false);
  151. this->b_debug = _conf->gB( _confSection, "debug", false);
  152. this->f_tolerance = _conf->gD( _confSection, "f_tolerance", 1e-10);
  153. //FIXME this is not used in that way for the standard GPHIKClassifier
  154. //string ilssection = "FMKGPHyperparameterOptimization";
  155. string ilssection = _confSection;
  156. uint ils_max_iterations = _conf->gI( ilssection, "ils_max_iterations", 1000 );
  157. double ils_min_delta = _conf->gD( ilssection, "ils_min_delta", 1e-7 );
  158. double ils_min_residual = _conf->gD( ilssection, "ils_min_residual", 1e-7 );
  159. bool ils_verbose = _conf->gB( ilssection, "ils_verbose", false );
  160. this->solver = new ILSConjugateGradients( ils_verbose,
  161. ils_max_iterations,
  162. ils_min_delta,
  163. ils_min_residual
  164. );
  165. // variables for the eigen value decomposition technique
  166. this->b_eig_verbose = _conf->gB ( _confSection, "eig_verbose", false );
  167. this->i_eig_value_max_iterations = _conf->gI ( _confSection, "eig_value_max_iterations", 10 );
  168. if ( this->b_verbose )
  169. {
  170. std::cerr << "GPHIKRawClassifier::initFromConfig " <<std::endl;
  171. std::cerr << " confSection " << confSection << std::endl;
  172. std::cerr << " d_noise " << d_noise << std::endl;
  173. std::cerr << " f_tolerance " << f_tolerance << std::endl;
  174. std::cerr << " ils_max_iterations " << ils_max_iterations << std::endl;
  175. std::cerr << " ils_min_delta " << ils_min_delta << std::endl;
  176. std::cerr << " ils_min_residual " << ils_min_residual << std::endl;
  177. std::cerr << " ils_verbose " << ils_verbose << std::endl;
  178. std::cerr << " b_eig_verbose " << b_eig_verbose << std::endl;
  179. std::cerr << " i_eig_value_max_iterations " << i_eig_value_max_iterations << std::endl;
  180. }
  181. //quantization during classification?
  182. bool useQuantization = _conf->gB ( _confSection, "use_quantization", false );
  183. if ( this->b_verbose )
  184. {
  185. std::cerr << "_confSection: " << _confSection << std::endl;
  186. std::cerr << "use_quantization: " << useQuantization << std::endl;
  187. }
  188. if ( _conf->gB ( _confSection, "use_quantization", false ) )
  189. {
  190. int numBins = _conf->gI ( _confSection, "num_bins", 100 );
  191. if ( this->b_verbose )
  192. std::cerr << "GPHIKRawClassifier: quantization initialized with " << numBins << " bins." << std::endl;
  193. std::string s_quantType = _conf->gS( _confSection, "s_quantType", "1d-aequi-0-1" );
  194. if ( s_quantType == "1d-aequi-0-1" )
  195. {
  196. this->q = new NICE::Quantization1DAequiDist0To1 ( numBins );
  197. }
  198. else if ( s_quantType == "1d-aequi-0-max" )
  199. {
  200. this->q = new NICE::Quantization1DAequiDist0ToMax ( numBins );
  201. }
  202. else if ( s_quantType == "nd-aequi-0-max" )
  203. {
  204. this->q = new NICE::QuantizationNDAequiDist0ToMax ( numBins );
  205. }
  206. else
  207. {
  208. fthrow(Exception, "Quantization type is unknown " << s_quantType);
  209. }
  210. }
  211. else
  212. {
  213. this->q = NULL;
  214. }
  215. }
  216. ///////////////////// ///////////////////// /////////////////////
  217. // GET / SET
  218. ///////////////////// ///////////////////// /////////////////////
  219. std::set<uint> GPHIKRawClassifier::getKnownClassNumbers ( ) const
  220. {
  221. if ( ! this->b_isTrained )
  222. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  223. return this->knownClasses;
  224. }
  225. ///////////////////// ///////////////////// /////////////////////
  226. // CLASSIFIER STUFF
  227. ///////////////////// ///////////////////// /////////////////////
  228. void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
  229. uint & _result,
  230. SparseVector & _scores
  231. ) const
  232. {
  233. if ( ! this->b_isTrained )
  234. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  235. _scores.clear();
  236. // classification with quantization of test inputs
  237. if ( this->q != NULL )
  238. {
  239. uint maxClassNo = 0;
  240. for ( std::map< uint, double * >::const_iterator itT = this->precomputedT.begin() ;
  241. itT != this->precomputedT.end();
  242. itT++
  243. )
  244. {
  245. uint classno = itT->first;
  246. maxClassNo = std::max ( maxClassNo, classno );
  247. double beta = 0;
  248. double *T = itT->second;
  249. for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++ )
  250. {
  251. uint dim = i->first;
  252. double v = i->second;
  253. uint qBin = this->q->quantize( v, dim );
  254. beta += T[dim * this->q->getNumberOfBins() + qBin];
  255. }//for-loop over dimensions of test input
  256. _scores[ classno ] = beta;
  257. }//for-loop over 1-vs-all models
  258. }
  259. // classification with exact test inputs, i.e., no quantization involved
  260. else
  261. {
  262. uint maxClassNo = 0;
  263. for ( std::map<uint, PrecomputedType>::const_iterator i = this->precomputedA.begin() ; i != this->precomputedA.end(); i++ )
  264. {
  265. uint classno = i->first;
  266. maxClassNo = std::max ( maxClassNo, classno );
  267. double beta = 0;
  268. GMHIKernelRaw::sparseVectorElement **dataMatrix = this->gm->getDataMatrix();
  269. const PrecomputedType & A = i->second;
  270. std::map<uint, PrecomputedType>::const_iterator j = this->precomputedB.find ( classno );
  271. const PrecomputedType & B = j->second;
  272. for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++)
  273. {
  274. uint dim = i->first;
  275. double fval = i->second;
  276. uint nnz = this->nnz_per_dimension[dim];
  277. uint nz = this->num_examples - nnz;
  278. if ( nnz == 0 ) continue;
  279. // useful
  280. //if ( fval < this->f_tolerance ) continue;
  281. uint position = 0;
  282. //this->X_sorted.findFirstLargerInDimension(dim, fval, position);
  283. GMHIKernelRaw::sparseVectorElement fval_element;
  284. fval_element.value = fval;
  285. //std::cerr << "value to search for " << fval << endl;
  286. //std::cerr << "data matrix in dimension " << dim << endl;
  287. //for (int j = 0; j < nnz; j++)
  288. // std::cerr << dataMatrix[dim][j].value << std::endl;
  289. GMHIKernelRaw::sparseVectorElement *it = upper_bound ( dataMatrix[dim], dataMatrix[dim] + nnz, fval_element );
  290. position = distance ( dataMatrix[dim], it );
  291. // /*// add zero elements
  292. // if ( fval_element.value > 0.0 )
  293. // position += nz;*/
  294. bool posIsZero ( position == 0 );
  295. // special case 1:
  296. // new example is smaller than all known examples
  297. // -> resulting value = fval * sum_l=1^n alpha_l
  298. if ( position == 0 )
  299. {
  300. beta += fval * B[ dim ][ nnz - 1 ];
  301. }
  302. // special case 2:
  303. // new example is equal to or larger than the largest training example in this dimension
  304. // -> the term B[ dim ][ nnz-1 ] - B[ dim ][ indexElem ] is equal to zero and vanishes, which is logical, since all elements are smaller than the remaining prototypes!
  305. else if ( position == nnz )
  306. {
  307. beta += A[ dim ][ nnz - 1 ];
  308. }
  309. // standard case: new example is larger then the smallest element, but smaller then the largest one in the corrent dimension
  310. else
  311. {
  312. beta += A[ dim ][ position - 1 ] + fval * ( B[ dim ][ nnz - 1 ] - B[ dim ][ position - 1 ] );
  313. }
  314. // // correct upper bound to correct position, only possible if new example is not the smallest value in this dimension
  315. // if ( !posIsZero )
  316. // position--;
  317. //
  318. //
  319. // double firstPart = 0.0;
  320. // if ( !posIsZero )
  321. // firstPart = ( A[ dim ][ position ] );
  322. //
  323. // double secondPart( B[ dim ][ this->num_examples-1-nz ]);
  324. // if ( !posIsZero && (position >= nz) )
  325. // secondPart -= B[dim][ position ];
  326. //
  327. // // but apply using the transformed one
  328. // beta += firstPart + secondPart* fval;
  329. }//for-loop over dimensions of test input
  330. _scores[ classno ] = beta;
  331. }//for-loop over 1-vs-all models
  332. } // if-condition wrt quantization
  333. _scores.setDim ( *this->knownClasses.rbegin() + 1 );
  334. if ( this->knownClasses.size() > 2 )
  335. { // multi-class classification
  336. _result = _scores.maxElement();
  337. }
  338. else if ( this->knownClasses.size() == 2 ) // binary setting
  339. {
  340. uint class1 = *(this->knownClasses.begin());
  341. uint class2 = *(this->knownClasses.rbegin());
  342. // since we erased the binary label vector corresponding to the smaller class number,
  343. // we only have scores for the larger class number
  344. uint class_for_which_we_have_a_score = class2;
  345. uint class_for_which_we_dont_have_a_score = class1;
  346. _scores[class_for_which_we_dont_have_a_score] = - _scores[class_for_which_we_have_a_score];
  347. _result = _scores[class_for_which_we_have_a_score] > 0.0 ? class_for_which_we_have_a_score : class_for_which_we_dont_have_a_score;
  348. }
  349. }
  350. void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
  351. uint & _result,
  352. Vector & _scores
  353. ) const
  354. {
  355. if ( ! this->b_isTrained )
  356. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  357. // classification with quantization of test inputs
  358. if ( this->q != NULL )
  359. {
  360. uint maxClassNo = 0;
  361. for ( std::map< uint, double * >::const_iterator itT = this->precomputedT.begin() ;
  362. itT != this->precomputedT.end();
  363. itT++
  364. )
  365. {
  366. uint classno = itT->first;
  367. maxClassNo = std::max ( maxClassNo, classno );
  368. double beta = 0;
  369. double *T = itT->second;
  370. for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++ )
  371. {
  372. uint dim = i->first;
  373. double v = i->second;
  374. uint qBin = this->q->quantize( v, dim );
  375. beta += T[dim * this->q->getNumberOfBins() + qBin];
  376. }//for-loop over dimensions of test input
  377. _scores[ classno ] = beta;
  378. }//for-loop over 1-vs-all models
  379. }
  380. // classification with exact test inputs, i.e., no quantization involved
  381. else
  382. {
  383. uint maxClassNo = 0;
  384. for ( std::map<uint, PrecomputedType>::const_iterator i = this->precomputedA.begin() ; i != this->precomputedA.end(); i++ )
  385. {
  386. uint classno = i->first;
  387. maxClassNo = std::max ( maxClassNo, classno );
  388. double beta = 0;
  389. GMHIKernelRaw::sparseVectorElement **dataMatrix = this->gm->getDataMatrix();
  390. const PrecomputedType & A = i->second;
  391. std::map<uint, PrecomputedType>::const_iterator j = this->precomputedB.find ( classno );
  392. const PrecomputedType & B = j->second;
  393. for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++)
  394. {
  395. uint dim = i->first;
  396. double fval = i->second;
  397. uint nnz = this->nnz_per_dimension[dim];
  398. uint nz = this->num_examples - nnz;
  399. if ( nnz == 0 ) continue;
  400. // useful
  401. //if ( fval < this->f_tolerance ) continue;
  402. uint position = 0;
  403. //this->X_sorted.findFirstLargerInDimension(dim, fval, position);
  404. GMHIKernelRaw::sparseVectorElement fval_element;
  405. fval_element.value = fval;
  406. //std::cerr << "value to search for " << fval << endl;
  407. //std::cerr << "data matrix in dimension " << dim << endl;
  408. //for (int j = 0; j < nnz; j++)
  409. // std::cerr << dataMatrix[dim][j].value << std::endl;
  410. GMHIKernelRaw::sparseVectorElement *it = upper_bound ( dataMatrix[dim], dataMatrix[dim] + nnz, fval_element );
  411. position = distance ( dataMatrix[dim], it );
  412. bool posIsZero ( position == 0 );
  413. // special case 1:
  414. // new example is smaller than all known examples
  415. // -> resulting value = fval * sum_l=1^n alpha_l
  416. if ( position == 0 )
  417. {
  418. beta += fval * B[ dim ][ nnz - 1 ];
  419. }
  420. // special case 2:
  421. // new example is equal to or larger than the largest training example in this dimension
  422. // -> the term B[ dim ][ nnz-1 ] - B[ dim ][ indexElem ] is equal to zero and vanishes, which is logical, since all elements are smaller than the remaining prototypes!
  423. else if ( position == nnz )
  424. {
  425. beta += A[ dim ][ nnz - 1 ];
  426. }
  427. // standard case: new example is larger then the smallest element, but smaller then the largest one in the corrent dimension
  428. else
  429. {
  430. beta += A[ dim ][ position - 1 ] + fval * ( B[ dim ][ nnz - 1 ] - B[ dim ][ position - 1 ] );
  431. }
  432. }//for-loop over dimensions of test input
  433. _scores[ classno ] = beta;
  434. }//for-loop over 1-vs-all models
  435. } // if-condition wrt quantization
  436. if ( this->knownClasses.size() > 2 )
  437. { // multi-class classification
  438. _result = _scores.MaxIndex();
  439. }
  440. else if ( this->knownClasses.size() == 2 ) // binary setting
  441. {
  442. uint class1 = *(this->knownClasses.begin());
  443. uint class2 = *(this->knownClasses.rbegin());
  444. // since we erased the binary label vector corresponding to the smaller class number,
  445. // we only have scores for the larger class number
  446. uint class_for_which_we_have_a_score = class2;
  447. uint class_for_which_we_dont_have_a_score = class1;
  448. _scores[class_for_which_we_dont_have_a_score] = - _scores[class_for_which_we_have_a_score];
  449. _result = _scores[class_for_which_we_have_a_score] > 0.0 ? class_for_which_we_have_a_score : class_for_which_we_dont_have_a_score;
  450. }
  451. }
  452. void GPHIKRawClassifier::classify ( const std::vector< const NICE::SparseVector *> _examples,
  453. NICE::Vector & _results,
  454. NICE::Matrix & _scores
  455. ) const
  456. {
  457. _scores.resize( _examples.size(), * (this->knownClasses.rbegin()) +1 );
  458. _scores.set( 0.0 );
  459. _results.resize( _examples.size() );
  460. _results.set( 0.0 );
  461. NICE::Vector::iterator resultsIt = _results.begin();
  462. uint exCnt ( 0 );
  463. uint resUI ( 0 );
  464. NICE::Vector scoresSingle( *(this->knownClasses.rbegin())+1, -std::numeric_limits<double>::max() );
  465. for ( std::vector< const NICE::SparseVector *>::const_iterator exIt = _examples.begin();
  466. exIt != _examples.end();
  467. exIt++, resultsIt++, exCnt++
  468. )
  469. {
  470. this->classify ( *exIt,
  471. resUI,
  472. scoresSingle
  473. );
  474. *resultsIt = resUI;
  475. _scores.setRow( exCnt, scoresSingle );
  476. scoresSingle.set( -std::numeric_limits<double>::max() );
  477. }
  478. }
  479. /** training process */
  480. void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *> & _examples,
  481. const NICE::Vector & _labels
  482. )
  483. {
  484. // security-check: examples and labels have to be of same size
  485. if ( _examples.size() != _labels.size() )
  486. {
  487. fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
  488. }
  489. this->num_examples = _examples.size();
  490. this->knownClasses.clear();
  491. for ( uint i = 0; i < _labels.size(); i++ )
  492. this->knownClasses.insert((uint)_labels[i]);
  493. std::map<uint, NICE::Vector> binLabels;
  494. for ( set<uint>::const_iterator j = knownClasses.begin(); j != knownClasses.end(); j++ )
  495. {
  496. uint current_class = *j;
  497. Vector labels_binary ( _labels.size() );
  498. for ( uint i = 0; i < _labels.size(); i++ )
  499. {
  500. labels_binary[i] = ( _labels[i] == current_class ) ? 1.0 : -1.0;
  501. }
  502. binLabels.insert ( std::pair<uint, NICE::Vector>( current_class, labels_binary) );
  503. }
  504. // handle special binary case
  505. if ( knownClasses.size() == 2 )
  506. {
  507. // we erase the binary label vector which corresponds to the smaller class number as positive class
  508. uint clNoSmall = *(this->knownClasses.begin());
  509. std::map<uint, NICE::Vector>::iterator it = binLabels.begin();
  510. it++;
  511. if ( binLabels.begin()->first == clNoSmall )
  512. {
  513. binLabels.erase( binLabels.begin(), it );
  514. }
  515. else
  516. {
  517. binLabels.erase( it, binLabels.end() );
  518. }
  519. }
  520. this->train ( _examples, binLabels );
  521. }
  522. void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *> & _examples,
  523. std::map<uint, NICE::Vector> & _binLabels
  524. )
  525. {
  526. // security-check: examples and labels have to be of same size
  527. for ( std::map< uint, NICE::Vector >::const_iterator binLabIt = _binLabels.begin();
  528. binLabIt != _binLabels.end();
  529. binLabIt++
  530. )
  531. {
  532. if ( _examples.size() != binLabIt->second.size() )
  533. {
  534. fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
  535. }
  536. }
  537. if ( this->b_verbose )
  538. std::cerr << "GPHIKRawClassifier::train" << std::endl;
  539. Timer t;
  540. t.start();
  541. this->clearSetsOfTablesAandB();
  542. this->clearSetsOfTablesT();
  543. // sort examples in each dimension and "transpose" the feature matrix
  544. // set up the GenericMatrix interface
  545. if ( this->gm != NULL )
  546. delete this->gm;
  547. this->gm = new GMHIKernelRaw ( _examples, this->d_noise, this->q );
  548. this->nnz_per_dimension = this->gm->getNNZPerDimension();
  549. this->num_dimension = this->gm->getNumberOfDimensions();
  550. // compute largest eigenvalue of our kernel matrix
  551. // note: this guy is shared among all categories,
  552. // since the kernel matrix is shared as well
  553. NICE::Vector eigenMax;
  554. NICE::Matrix eigenMaxV;
  555. // for reproducibility during debuggin
  556. //FIXME
  557. srand ( 0 );
  558. srand48 ( 0 );
  559. NICE::EigValues * eig = new EVArnoldi ( this->b_eig_verbose ,
  560. this->i_eig_value_max_iterations
  561. );
  562. eig->getEigenvalues( *gm, eigenMax, eigenMaxV, 1 /*rank*/ );
  563. delete eig;
  564. // set simple jacobi pre-conditioning
  565. NICE::Vector diagonalElements;
  566. this->gm->getDiagonalElements ( diagonalElements );
  567. this->solver->setJacobiPreconditioner ( diagonalElements );
  568. // solve linear equations for each class
  569. // be careful when parallising this!
  570. for ( std::map<uint, NICE::Vector>::const_iterator i = _binLabels.begin();
  571. i != _binLabels.end();
  572. i++
  573. )
  574. {
  575. uint classno = i->first;
  576. if (b_verbose)
  577. std::cerr << "Training for class " << classno << endl;
  578. const NICE::Vector & y = i->second;
  579. NICE::Vector alpha;
  580. /** About finding a good initial solution (see also GPLikelihoodApproximation)
  581. * K~ = K + sigma^2 I
  582. *
  583. * K~ \approx lambda_max v v^T
  584. * \lambda_max v v^T * alpha = k_* | multiply with v^T from left
  585. * => \lambda_max v^T alpha = v^T k_*
  586. * => alpha = k_* / lambda_max could be a good initial start
  587. * If we put everything in the first equation this gives us
  588. * v = k_*
  589. * This reduces the number of iterations by 5 or 8
  590. */
  591. alpha = (y * (1.0 / eigenMax[0]) );
  592. this->solver->solveLin( *gm, y, alpha );
  593. // //debug
  594. // std::cerr << "alpha: " << alpha << std::endl;
  595. // get lookup tables, A, B, etc. and store them
  596. this->gm->updateTablesAandB( alpha );
  597. double **A = this->gm->getTableA();
  598. double **B = this->gm->getTableB();
  599. this->precomputedA.insert ( std::pair<uint, PrecomputedType> ( classno, A ) );
  600. this->precomputedB.insert ( std::pair<uint, PrecomputedType> ( classno, B ) );
  601. // Quantization for classification?
  602. if ( this->q != NULL )
  603. {
  604. this->gm->updateTableT( alpha );
  605. double *T = this->gm->getTableT ( );
  606. this->precomputedT.insert( std::pair<uint, double * > ( classno, T ) );
  607. }
  608. }
  609. // NOTE if quantization is turned on, we do not need LUTs A and B anymore
  610. if ( this->q != NULL )
  611. {
  612. this->clearSetsOfTablesAandB();
  613. }
  614. t.stop();
  615. if ( this->b_verbose )
  616. std::cerr << "Time used for GPHIKRawClassifier::train: " << t.getLast() << std::endl;
  617. //indicate that we finished training successfully
  618. this->b_isTrained = true;
  619. // clean up all examples ??
  620. if ( this->b_verbose )
  621. std::cerr << "Learning finished" << std::endl;
  622. }