GMHIKernelRaw.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. /**
  2. * @file GMHIKernelRaw.cpp
  3. * @brief Fast multiplication with histogram intersection kernel matrices (Implementation)
  4. * @author Erik Rodner, Alexander Freytag
  5. * @date 01/02/2012
  6. */
  7. #include <iostream>
  8. #include <core/vector/VVector.h>
  9. #include <core/basics/Timer.h>
  10. #include "GMHIKernelRaw.h"
  11. using namespace NICE;
  12. using namespace std;
  13. GMHIKernelRaw::GMHIKernelRaw( const std::vector< const NICE::SparseVector *> &_examples,
  14. const double _d_noise
  15. const NICE::Quantization * _q
  16. )
  17. {
  18. this->examples_raw = NULL;
  19. this->nnz_per_dimension = NULL;
  20. this->table_A = NULL;
  21. this->table_B = NULL;
  22. this->d_noise = _d_noise;
  23. this->q = _q;
  24. initData(_examples);
  25. }
  26. GMHIKernelRaw::~GMHIKernelRaw()
  27. {
  28. cleanupData();
  29. }
  30. void GMHIKernelRaw::cleanupData()
  31. {
  32. if ( this->examples_raw != NULL ) {
  33. for ( uint d = 0; d < this->num_dimension; d++ )
  34. if (examples_raw[d] != NULL)
  35. delete [] examples_raw[d];
  36. delete [] this->examples_raw;
  37. this->examples_raw = NULL;
  38. }
  39. if ( this->nnz_per_dimension != NULL ) {
  40. delete [] this->nnz_per_dimension;
  41. this->nnz_per_dimension = NULL;
  42. }
  43. if ( this->table_A != NULL ) {
  44. for ( uint d = 0; d < this->num_dimension; d++ )
  45. if (table_A[d] != NULL)
  46. delete [] table_A[d];
  47. delete [] this->table_A;
  48. this->table_A = NULL;
  49. }
  50. if ( this->table_B != NULL ) {
  51. for ( uint d = 0; d < this->num_dimension; d++ )
  52. if (table_B[d] != NULL)
  53. delete [] table_B[d];
  54. delete [] this->table_B;
  55. this->table_B = NULL;
  56. }
  57. }
  58. void GMHIKernelRaw::initData ( const std::vector< const NICE::SparseVector *> &_examples )
  59. {
  60. if (_examples.size() == 0 )
  61. fthrow(Exception, "No examples given for learning");
  62. cleanupData();
  63. this->num_dimension = _examples[0]->getDim();
  64. this->examples_raw = new sparseVectorElement *[num_dimension];
  65. this->nnz_per_dimension = new uint [num_dimension];
  66. this->num_examples = _examples.size();
  67. // waste memory and allocate a non-sparse data block
  68. sparseVectorElement **examples_raw_increment = new sparseVectorElement *[num_dimension];
  69. for (uint d = 0; d < this->num_dimension; d++)
  70. {
  71. this->examples_raw[d] = new sparseVectorElement [ this->num_examples ];
  72. examples_raw_increment[d] = this->examples_raw[d];
  73. this->nnz_per_dimension[d] = 0;
  74. }
  75. // additionally allocate a Vector with as many entries as examples
  76. // this vector will contain the L1 norm values of all examples + noise
  77. // thereby, it represents the diagonal entries of our kernel matrix for
  78. // the special case of minimum kernel
  79. this->diagonalElements.resize ( this->num_examples );
  80. this->diagonalElements.set ( this->d_noise );
  81. uint example_index = 0;
  82. NICE::Vector::iterator itDiagEl = this->diagonalElements.begin();
  83. // minor pre-allocation
  84. uint index;
  85. double value;
  86. double l1norm;
  87. for ( std::vector< const NICE::SparseVector * >::const_iterator i = _examples.begin();
  88. i != _examples.end();
  89. i++, example_index++, itDiagEl++
  90. )
  91. {
  92. l1norm = 0.0;
  93. const NICE::SparseVector *x = *i;
  94. for ( NICE::SparseVector::const_iterator j = x->begin(); j != x->end(); j++ )
  95. {
  96. index = j->first;
  97. value = j->second;
  98. examples_raw_increment[index]->value = value;
  99. examples_raw_increment[index]->example_index = example_index;
  100. // move to the next element
  101. examples_raw_increment[index]++;
  102. this->nnz_per_dimension[index]++;
  103. l1norm = l1norm + value;
  104. }
  105. *itDiagEl = *itDiagEl + l1norm;
  106. }
  107. delete [] examples_raw_increment;
  108. // sort along each dimension
  109. for (uint d = 0; d < this->num_dimension; d++)
  110. {
  111. uint nnz = this->nnz_per_dimension[d];
  112. if ( nnz > 1 )
  113. std::sort( this->examples_raw[d], this->examples_raw[d] + nnz );
  114. }
  115. // pre-allocate the A and B matrices
  116. this->table_A = allocateTableAorB();
  117. this->table_B = allocateTableAorB();
  118. // Quantization for classification?
  119. if ( this->q != NULL )
  120. {
  121. // (1) if yes, setup the parameters of the quantization object
  122. this->q->computeParametersFromData ( this );
  123. this->table_T = this->allocateTableT();
  124. }
  125. }
  126. double **GMHIKernelRaw::allocateTableAorB() const
  127. {
  128. double **table;
  129. table = new double *[this->num_dimension];
  130. for (uint i = 0; i < this->num_dimension; i++)
  131. {
  132. uint nnz = this->nnz_per_dimension[i];
  133. if (nnz>0) {
  134. table[i] = new double [ nnz ];
  135. } else {
  136. table[i] = NULL;
  137. }
  138. }
  139. return table;
  140. }
  141. double *GMHIKernelRaw::allocateTableT() const
  142. {
  143. double *table;
  144. table = new double [this->num_dimension * this->q->getNumberOfBins()];
  145. return table;
  146. }
  147. void GMHIKernelRaw::copyTableAorB(double **src, double **dst) const
  148. {
  149. for (uint i = 0; i < this->num_dimension; i++)
  150. {
  151. uint nnz = this->nnz_per_dimension[i];
  152. if (nnz>0) {
  153. for (uint j = 0; j < nnz; j++)
  154. dst[i][j] = src[i][j];
  155. } else {
  156. dst[i] = NULL;
  157. }
  158. }
  159. }
  160. void GMHIKernelRaw::copyTableT(double *_src, double *_dst) const
  161. {
  162. double p_src = _src;
  163. double p_dst = _dst;
  164. for ( int i = 0; i < this->num_dimension * this->q->getNumberOfBins(); i++ )
  165. {
  166. *p_dst = *p_src;
  167. p_src++;
  168. p_dst++;
  169. }
  170. }
  171. void GMHIKernelRaw::updateTables ( const NICE::Vector _x ) const
  172. {
  173. // pre-computions if quantization is activated
  174. double * prototypes;
  175. double * p_prototypes;
  176. uint hmax;
  177. // store prototypes
  178. if ( this->q != NULL)
  179. {
  180. // number of quantization bins
  181. hmax = this->q->getNumberOfBins();
  182. double * prototypes = new double [ hmax * this->num_dimension ];
  183. double * p_prototypes = prototypes;
  184. for (uint dim = 0; dim < this->num_dimension; dim++)
  185. {
  186. for ( uint i = 0 ; i < hmax ; i++ )
  187. {
  188. *p_prototypes = this->q->getPrototype( i, dim );
  189. p_prototypes++;
  190. }
  191. }
  192. }
  193. // start the actual computations of A, B, and optionally T
  194. for (uint dim = 0; dim < this->num_dimension; dim++)
  195. {
  196. double alpha_sum = 0.0;
  197. double alpha_times_x_sum = 0.0;
  198. uint nnz = nnz_per_dimension[dim];
  199. //////////
  200. // loop through all elements in sorted order
  201. sparseVectorElement *training_values_in_dim = examples_raw[dim];
  202. for ( uint cntNonzeroFeat = 0; cntNonzeroFeat < nnz; cntNonzeroFeat++, training_values_in_dim++ )
  203. {
  204. // index of the feature
  205. int index = training_values_in_dim->example_index;
  206. // element of the feature
  207. double elem = training_values_in_dim->value;
  208. alpha_times_x_sum += _x[index] * elem;
  209. this->table_A[dim][cntNonzeroFeat] = alpha_times_x_sum;
  210. alpha_sum += _x[index];
  211. this->table_B[dim][cntNonzeroFeat] = alpha_sum;
  212. }
  213. if ( this->q != NULL)
  214. {
  215. //////////
  216. // variables which are needed for computing T
  217. uint idxProto ( 0 ); // previously j
  218. double t;
  219. uint idxProtoElem; // previously qBin
  220. sparseVectorElement * i = examples_raw[dim];
  221. sparseVectorElement * iPredecessor = examples_raw[dim];
  222. // index of the element, which is always bigger than the current value fval
  223. int indexElem = 0;//training_values_in_dim->example_index;
  224. // element of the feature
  225. double elem = i->value;//training_values_in_dim->value;
  226. for (uint idxProto = 0; idxProto < hmax; idxProto++) // previously j
  227. {
  228. double fvalProto = prototypes[ dim*hmax + idxProto ];
  229. double t;
  230. idxProtoElem = this->q->quantize ( elem, dim );
  231. if ( (indexElem == 0) && (idxProto < idxProtoElem) )
  232. {
  233. // current prototype is smaller than everything else
  234. // resulting value = fval * sum_l=1^n alpha_l
  235. t = fvalProto*( this->table_B[ dim ][ nnz-1 ] );
  236. }
  237. else
  238. {
  239. //move to next example, which is smaller then the current prototype (if necessary)
  240. // pay attentation to not loop over the number of non-zero elements
  241. while ( (idxProto >= idxProtoElem) && ( indexElem < ( nnz - 1 ) ) ) //(this->ui_n-1-nrZeroIndices)) )
  242. {
  243. indexElem++;
  244. iPredecessor = i;
  245. i++;
  246. if ( i->value != iPredecessor->value )
  247. {
  248. idxProtoElem = this->q->quantize ( i->value, dim );
  249. }
  250. }
  251. // compute current element in the lookup table and keep in mind that
  252. // indexElem is the next element and not the previous one
  253. if ( (idxProto >= idxProtoElem) && ( indexElem==( nnz-1 ) ) )
  254. {
  255. // the current prototype is equal or bigger to the largest training example in this dimension
  256. // -> the term B[ dim ][ nnz-1 ] - B[ dim ][ indexElem ] is equal to zero and vanishes, which is logical, since all elements are smaller than j!
  257. t = table_A[ dim ][ indexElem ];
  258. }
  259. else
  260. {
  261. // standard case
  262. t = table_A[ dim ][ indexElem-1 ] + fvalProto*( table_B[ dim ][ nnz-1 ] - table_B[ dim ][ indexElem-1 ] );
  263. }
  264. }
  265. this->table_T[ dim*hmax + idxProto ] = t;
  266. }
  267. }
  268. }
  269. }
  270. // clean-up prototypes
  271. if ( this->q != NULL)
  272. {
  273. delete [] prototypes;
  274. }
  275. }
  276. /** multiply with a vector: A*x = y */
  277. void GMHIKernelRaw::multiply (NICE::Vector & _y, const NICE::Vector & _x) const
  278. {
  279. // STEP 1: initialize tables A and B
  280. updateTables(_x);
  281. _y.resize( this->num_examples );
  282. _y.set(0.0);
  283. for (uint dim = 0; dim < this->num_dimension; dim++)
  284. {
  285. uint nnz = this->nnz_per_dimension[dim];
  286. uint nz = this->num_examples - nnz;
  287. if ( nnz == 0 ) {
  288. // all values are zero in this dimension :) and we can simply ignore the feature
  289. continue;
  290. }
  291. sparseVectorElement *training_values_in_dim = examples_raw[dim];
  292. for ( uint cntNonzeroFeat = 0; cntNonzeroFeat < nnz; cntNonzeroFeat++, training_values_in_dim++ )
  293. {
  294. uint feat = training_values_in_dim->example_index;
  295. uint inversePosition = cntNonzeroFeat;
  296. double fval = training_values_in_dim->value;
  297. double firstPart = this->table_A[dim][inversePosition];
  298. double secondPart = this->table_B[dim][nnz-1] - this->table_B[dim][inversePosition];
  299. _y[feat] += firstPart + fval * secondPart;
  300. }
  301. }
  302. for (uint feat = 0; feat < this->num_examples; feat++)
  303. _y[feat] += this->d_noise * _x[feat];
  304. }
  305. /** get the number of rows in A */
  306. uint GMHIKernelRaw::rows () const
  307. {
  308. // return the number of examples
  309. return num_examples;
  310. }
  311. /** get the number of columns in A */
  312. uint GMHIKernelRaw::cols () const
  313. {
  314. // return the number of examples
  315. return num_examples;
  316. }
  317. double **GMHIKernelRaw::getTableA() const
  318. {
  319. double **t = allocateTableAorB();
  320. copyTableAorB(this->table_A, t);
  321. return t;
  322. }
  323. double **GMHIKernelRaw::getTableB() const
  324. {
  325. double **t = allocateTableAorB();
  326. copyTableAorB(this->table_B, t);
  327. return t;
  328. }
  329. double **GMHIKernelRaw::getTableT() const
  330. {
  331. double **t = allocateTableT();
  332. copyTableT(this->table_T, t);
  333. return t;
  334. }
  335. uint *GMHIKernelRaw::getNNZPerDimension() const
  336. {
  337. uint *v = new uint[this->num_dimension];
  338. for (uint i = 0; i < this->num_dimension; i++)
  339. v[i] = this->nnz_per_dimension[i];
  340. return v;
  341. }
  342. uint NICE::GMHIKernelRaw::getNumberOfDimensions() const
  343. {
  344. return this->num_dimension;
  345. }
  346. void NICE::GMHIKernelRaw::getDiagonalElements( NICE::Vector & _diagonalElements) const
  347. {
  348. _diagonalElements = this->diagonalElements;
  349. }
  350. double NICE::GMHIKernelRaw::getLargestValue ( ) const
  351. {
  352. double vmax (0.0);
  353. double vtmp (0.0);
  354. uint tmpIdx ( 0 );
  355. // compare largest elements of all dimensions
  356. for (uint d = 0; d < this->num_dimension; d++)
  357. {
  358. uint nnz = this->nnz_per_dimension[d];
  359. if ( nnz > 1 )
  360. {
  361. tmpIdx = tmpIdx + nnz;
  362. vtmp = this->examples_raw[tmpIdx];
  363. if ( vtmp > vmax )
  364. {
  365. vmax = vtmp;
  366. }
  367. }
  368. }
  369. return vmax;
  370. }
  371. NICE::Vector NICE::GMHIKernelRaw::getLargestValuePerDimension ( ) const
  372. {
  373. NICE::Vector vmax ( this->get_d() );
  374. NICE::Vector::iterator vmaxIt = vmax.begin();
  375. uint tmpIdx ( 0 );
  376. for (uint d = 0; d < this->num_dimension; d++, vmaxIt++)
  377. {
  378. uint nnz = this->nnz_per_dimension[d];
  379. if ( nnz > 1 )
  380. {
  381. tmpIdx = tmpIdx + nnz;
  382. *vmaxIt = this->examples_raw[tmpIdx];
  383. }
  384. else
  385. {
  386. *vmaxIt = 0.0;
  387. }
  388. }
  389. return vmax;
  390. }