GMHIKernelRaw.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. /**
  2. * @file GMHIKernelRaw.cpp
  3. * @brief Fast multiplication with histogram intersection kernel matrices (Implementation)
  4. * @author Erik Rodner, Alexander Freytag
  5. * @date 01/02/2012
  6. */
  7. #include <iostream>
  8. #include <core/vector/VVector.h>
  9. #include <core/basics/Timer.h>
  10. #include "GMHIKernelRaw.h"
  11. using namespace NICE;
  12. using namespace std;
  13. GMHIKernelRaw::GMHIKernelRaw( const std::vector< const NICE::SparseVector *> &_examples,
  14. const double _d_noise,
  15. NICE::Quantization * _q
  16. )
  17. {
  18. this->examples_raw = NULL;
  19. this->nnz_per_dimension = NULL;
  20. this->table_A = NULL;
  21. this->table_B = NULL;
  22. this->table_T = NULL;
  23. this->d_noise = _d_noise;
  24. this->q = _q;
  25. this->initData(_examples);
  26. }
  27. GMHIKernelRaw::~GMHIKernelRaw()
  28. {
  29. this->cleanupData();
  30. }
  31. void GMHIKernelRaw::cleanupData()
  32. {
  33. // data structure of examples
  34. if ( this->examples_raw != NULL )
  35. {
  36. for ( uint d = 0; d < this->num_dimension; d++ )
  37. if (examples_raw[d] != NULL)
  38. delete [] examples_raw[d];
  39. delete [] this->examples_raw;
  40. this->examples_raw = NULL;
  41. }
  42. // counter of non-zero examples in each dimension
  43. if ( this->nnz_per_dimension != NULL )
  44. {
  45. delete [] this->nnz_per_dimension;
  46. this->nnz_per_dimension = NULL;
  47. }
  48. // LUT A for classification without quantization
  49. if ( this->table_A != NULL )
  50. {
  51. for ( uint d = 0; d < this->num_dimension; d++ )
  52. if (table_A[d] != NULL)
  53. delete [] table_A[d];
  54. delete [] this->table_A;
  55. this->table_A = NULL;
  56. }
  57. // LUT B for classification without quantization
  58. if ( this->table_B != NULL )
  59. {
  60. for ( uint d = 0; d < this->num_dimension; d++ )
  61. if (table_B[d] != NULL)
  62. delete [] table_B[d];
  63. delete [] this->table_B;
  64. this->table_B = NULL;
  65. }
  66. // LUT T for classification with quantization
  67. if ( this->table_T != NULL )
  68. {
  69. delete [] this->table_T;
  70. this->table_T = NULL;
  71. }
  72. }
  73. void GMHIKernelRaw::initData ( const std::vector< const NICE::SparseVector *> &_examples )
  74. {
  75. if (_examples.size() == 0 )
  76. fthrow(Exception, "No examples given for learning");
  77. cleanupData();
  78. this->num_dimension = _examples[0]->getDim();
  79. this->examples_raw = new sparseVectorElement *[num_dimension];
  80. this->nnz_per_dimension = new uint [num_dimension];
  81. this->num_examples = _examples.size();
  82. // waste memory and allocate a non-sparse data block
  83. sparseVectorElement **examples_raw_increment = new sparseVectorElement *[num_dimension];
  84. for (uint d = 0; d < this->num_dimension; d++)
  85. {
  86. this->examples_raw[d] = new sparseVectorElement [ this->num_examples ];
  87. examples_raw_increment[d] = this->examples_raw[d];
  88. this->nnz_per_dimension[d] = 0;
  89. }
  90. // additionally allocate a Vector with as many entries as examples
  91. // this vector will contain the L1 norm values of all examples + noise
  92. // thereby, it represents the diagonal entries of our kernel matrix for
  93. // the special case of minimum kernel
  94. this->diagonalElements.resize ( this->num_examples );
  95. this->diagonalElements.set ( this->d_noise );
  96. uint example_index = 0;
  97. NICE::Vector::iterator itDiagEl = this->diagonalElements.begin();
  98. // minor pre-allocation
  99. uint i_dimNonZero;
  100. double value;
  101. double l1norm;
  102. // iterate over all provided training examples to process their data
  103. for ( std::vector< const NICE::SparseVector * >::const_iterator i = _examples.begin();
  104. i != _examples.end();
  105. i++, example_index++, itDiagEl++
  106. )
  107. {
  108. l1norm = 0.0;
  109. const NICE::SparseVector *x = *i;
  110. // loop over all non-zero dimensions, copy dimension and value into our data structure, and compute the L1 norm
  111. for ( NICE::SparseVector::const_iterator j = x->begin(); j != x->end(); j++ )
  112. {
  113. i_dimNonZero = j->first;
  114. value = j->second;
  115. examples_raw_increment[i_dimNonZero]->value = value;
  116. examples_raw_increment[i_dimNonZero]->example_index = example_index;
  117. // move data pointer to the next element in the current dimension
  118. examples_raw_increment[i_dimNonZero]++;
  119. this->nnz_per_dimension[i_dimNonZero]++;
  120. l1norm = l1norm + value;
  121. }
  122. *itDiagEl = *itDiagEl + l1norm;
  123. }
  124. delete [] examples_raw_increment;
  125. // sort along each dimension
  126. for (uint d = 0; d < this->num_dimension; d++)
  127. {
  128. uint nnz = this->nnz_per_dimension[d];
  129. if ( nnz > 1 )
  130. std::sort( this->examples_raw[d], this->examples_raw[d] + nnz );
  131. }
  132. // pre-allocate the A and B matrices
  133. this->table_A = allocateTableAorB();
  134. this->table_B = allocateTableAorB();
  135. // Quantization for classification?
  136. if ( this->q != NULL )
  137. {
  138. // (1) if yes, setup the parameters of the quantization object
  139. NICE::Vector _maxValuesPerDimension = this->getLargestValuePerDimension();
  140. this->q->computeParametersFromData ( _maxValuesPerDimension );
  141. this->table_T = this->allocateTableT();
  142. }
  143. }
  144. double **GMHIKernelRaw::allocateTableAorB() const
  145. {
  146. double **table;
  147. table = new double *[this->num_dimension];
  148. for (uint i = 0; i < this->num_dimension; i++)
  149. {
  150. uint nnz = this->nnz_per_dimension[i];
  151. if (nnz>0) {
  152. table[i] = new double [ nnz ];
  153. } else {
  154. table[i] = NULL;
  155. }
  156. }
  157. return table;
  158. }
  159. double *GMHIKernelRaw::allocateTableT() const
  160. {
  161. double *table;
  162. table = new double [this->num_dimension * this->q->getNumberOfBins()];
  163. return table;
  164. }
  165. void GMHIKernelRaw::copyTableAorB(double **src, double **dst) const
  166. {
  167. for (uint i = 0; i < this->num_dimension; i++)
  168. {
  169. uint nnz = this->nnz_per_dimension[i];
  170. if (nnz>0)
  171. {
  172. for (uint j = 0; j < nnz; j++)
  173. dst[i][j] = src[i][j];
  174. }
  175. else
  176. {
  177. dst[i] = NULL;
  178. }
  179. }
  180. }
  181. void GMHIKernelRaw::copyTableT(double *_src, double *_dst) const
  182. {
  183. double * p_src = _src;
  184. double * p_dst = _dst;
  185. for ( int i = 0;
  186. i < this->num_dimension * this->q->getNumberOfBins();
  187. i++, p_src++, p_dst++
  188. )
  189. {
  190. *p_dst = *p_src;
  191. }
  192. }
  193. void GMHIKernelRaw::updateTablesAandB ( const NICE::Vector _x ) const
  194. {
  195. // start the actual computations of A, B, and optionally T
  196. for (uint dim = 0; dim < this->num_dimension; dim++)
  197. {
  198. double alpha_sum = 0.0;
  199. double alpha_times_x_sum = 0.0;
  200. uint nnz = nnz_per_dimension[dim];
  201. //////////
  202. // loop through all elements in sorted order
  203. sparseVectorElement *training_values_in_dim = examples_raw[dim];
  204. for ( uint cntNonzeroFeat = 0;
  205. cntNonzeroFeat < nnz;
  206. cntNonzeroFeat++, training_values_in_dim++
  207. )
  208. {
  209. // index of the feature
  210. int index = training_values_in_dim->example_index;
  211. // element of the feature
  212. double elem = training_values_in_dim->value;
  213. alpha_times_x_sum += _x[index] * elem;
  214. alpha_sum += _x[index];
  215. this->table_A[dim][cntNonzeroFeat] = alpha_times_x_sum;
  216. this->table_B[dim][cntNonzeroFeat] = alpha_sum;
  217. }
  218. }
  219. }
  220. void GMHIKernelRaw::updateTableT ( const NICE::Vector _x ) const
  221. {
  222. // sanity check
  223. if ( this->q == NULL)
  224. {
  225. return;
  226. }
  227. // number of quantization bins
  228. uint hmax = this->q->getNumberOfBins();
  229. double * prototypes;
  230. prototypes = new double [ hmax * this->num_dimension ];
  231. double * p_prototypes;
  232. p_prototypes = prototypes;
  233. // compute all prototypes to compare against lateron
  234. for (uint dim = 0; dim < this->num_dimension; dim++)
  235. {
  236. for ( uint i = 0 ; i < hmax ; i++ )
  237. {
  238. *p_prototypes = this->q->getPrototype( i, dim );
  239. p_prototypes++;
  240. }
  241. }
  242. // start the actual computation of T
  243. for (uint dim = 0; dim < this->num_dimension; dim++)
  244. {
  245. uint nnz = nnz_per_dimension[dim];
  246. if ( nnz == 0 )
  247. {
  248. double * itT = this->table_T + dim*hmax;
  249. for ( uint idxProto = 0; idxProto < hmax; idxProto++, itT++ )
  250. {
  251. *itT = 0;
  252. }
  253. continue;
  254. }
  255. uint idxProtoElem; // denotes the bin number in dim i of a quantized example, previously termed qBin
  256. sparseVectorElement * i = examples_raw[dim];
  257. sparseVectorElement * iPredecessor = examples_raw[dim];
  258. // index of the element, which is always bigger than the current value fval
  259. int indexElem = 0;
  260. // element of the feature
  261. double elem = i->value;
  262. idxProtoElem = this->q->quantize ( elem, dim );
  263. uint idxProto;
  264. double * itProtoVal = prototypes + dim*hmax;
  265. double * itT = this->table_T + dim*hmax;
  266. // special case 1:
  267. // loop over all prototypes smaller then the smallest quantized example in this dimension
  268. for ( idxProto = 0; idxProto < idxProtoElem; idxProto++, itProtoVal++, itT++) // idxProto previously j
  269. {
  270. // current prototype is smaller than all known examples
  271. // -> resulting value = fval * sum_l=1^n alpha_l
  272. (*itT) = (*itProtoVal) * ( this->table_B[ dim ][ nnz-1 ] );
  273. }//for-loop over prototypes -- special case 1
  274. // standard case: prototypes larger then the smallest element, but smaller then the largest one in the corrent dimension
  275. for ( ; idxProto < hmax; idxProto++, itProtoVal++, itT++)
  276. {
  277. //move to next example, which is smaller then the current prototype after quantization
  278. // pay attentation to not loop over the number of non-zero elements
  279. while ( (idxProto >= idxProtoElem) && ( indexElem < ( nnz - 1 ) ) ) //(this->ui_n-1-nrZeroIndices)) )
  280. {
  281. indexElem++;
  282. iPredecessor = i;
  283. i++;
  284. // only quantize if value changed
  285. if ( i->value != iPredecessor->value )
  286. {
  287. idxProtoElem = this->q->quantize ( i->value, dim );
  288. }
  289. }
  290. // did we looped over the largest element in this dimension?
  291. if ( indexElem==( nnz-1 ) )
  292. {
  293. break;
  294. }
  295. (*itT) = table_A[ dim ][ indexElem-1 ] + (*itProtoVal)*( table_B[ dim ][ nnz-1 ] - table_B[ dim ][ indexElem-1 ] );
  296. }//for-loop over prototypes -- standard case
  297. // special case 2:
  298. // the current prototype is equal to or larger than the largest training example in this dimension
  299. // -> the term B[ dim ][ nnz-1 ] - B[ dim ][ indexElem ] is equal to zero and vanishes, which is logical, since all elements are smaller than the remaining prototypes!
  300. for ( ; idxProto < hmax; idxProto++, itProtoVal++, itT++)
  301. {
  302. (*itT) = table_A[ dim ][ indexElem ];
  303. }//for-loop over prototypes -- special case 2
  304. }//for-loop over dimensions
  305. // clean-up prototypes
  306. if ( this->q != NULL)
  307. {
  308. delete [] prototypes;
  309. }
  310. // //debug
  311. // double * p_t = table_T;
  312. // for ( uint i=0; i < hmax; i++ , p_t++)
  313. // {
  314. // std::cerr << " " << *p_t;
  315. // }
  316. // std::cerr << std::endl;
  317. }
  318. /** multiply with a vector: A*x = y */
  319. void GMHIKernelRaw::multiply (NICE::Vector & _y, const NICE::Vector & _x) const
  320. {
  321. // STEP 1: initialize tables A and B
  322. this->updateTablesAandB(_x);
  323. _y.resize( this->num_examples );
  324. _y.set(0.0);
  325. for (uint dim = 0; dim < this->num_dimension; dim++)
  326. {
  327. uint nnz = this->nnz_per_dimension[dim];
  328. uint nz = this->num_examples - nnz;
  329. if ( nnz == 0 ) {
  330. // all values are zero in this dimension :) and we can simply ignore the feature
  331. continue;
  332. }
  333. sparseVectorElement *training_values_in_dim = examples_raw[dim];
  334. for ( uint cntNonzeroFeat = 0; cntNonzeroFeat < nnz; cntNonzeroFeat++, training_values_in_dim++ )
  335. {
  336. uint feat = training_values_in_dim->example_index;
  337. uint inversePosition = cntNonzeroFeat;
  338. double fval = training_values_in_dim->value;
  339. double firstPart = this->table_A[dim][inversePosition];
  340. double secondPart = this->table_B[dim][nnz-1] - this->table_B[dim][inversePosition];
  341. _y[feat] += firstPart + fval * secondPart;
  342. }
  343. }
  344. for (uint feat = 0; feat < this->num_examples; feat++)
  345. _y[feat] += this->d_noise * _x[feat];
  346. }
  347. /** get the number of rows in A */
  348. uint GMHIKernelRaw::rows () const
  349. {
  350. // return the number of examples
  351. return num_examples;
  352. }
  353. /** get the number of columns in A */
  354. uint GMHIKernelRaw::cols () const
  355. {
  356. // return the number of examples
  357. return num_examples;
  358. }
  359. double **GMHIKernelRaw::getTableA() const
  360. {
  361. double **t = allocateTableAorB();
  362. copyTableAorB(this->table_A, t);
  363. return t;
  364. }
  365. double **GMHIKernelRaw::getTableB() const
  366. {
  367. double **t = allocateTableAorB();
  368. copyTableAorB(this->table_B, t);
  369. return t;
  370. }
  371. double * GMHIKernelRaw::getTableT() const
  372. {
  373. double * T = this->allocateTableT();
  374. copyTableT(this->table_T, T);
  375. return T;
  376. }
  377. uint *GMHIKernelRaw::getNNZPerDimension() const
  378. {
  379. uint *v = new uint[this->num_dimension];
  380. for (uint i = 0; i < this->num_dimension; i++)
  381. v[i] = this->nnz_per_dimension[i];
  382. return v;
  383. }
  384. uint NICE::GMHIKernelRaw::getNumberOfDimensions() const
  385. {
  386. return this->num_dimension;
  387. }
  388. void NICE::GMHIKernelRaw::getDiagonalElements( NICE::Vector & _diagonalElements) const
  389. {
  390. _diagonalElements = this->diagonalElements;
  391. }
  392. NICE::Vector NICE::GMHIKernelRaw::getLargestValuePerDimension ( ) const
  393. {
  394. NICE::Vector vmax ( this->num_dimension );
  395. NICE::Vector::iterator vmaxIt = vmax.begin();
  396. for (uint d = 0; d < this->num_dimension; d++, vmaxIt++)
  397. {
  398. uint nnz = this->nnz_per_dimension[d];
  399. if ( nnz > 0 )
  400. {
  401. *vmaxIt = this->examples_raw[ d ][ nnz-1 ].value;
  402. }
  403. else
  404. {
  405. *vmaxIt = 0.0;
  406. }
  407. }
  408. return vmax;
  409. }