FeatureMatrixT.tcc 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119
  1. /**
  2. * @file FeatureMatrixT.tcc
  3. * @brief A feature matrix, storing (sparse) features sorted per dimension (Implementation)
  4. * @author Alexander Freytag
  5. * @date 07-12-2011 (dd-mm-yyyy)
  6. */
  7. // #ifndef FEATUREMATRIX_TCC
  8. // #define FEATUREMATRIX_TCC
  9. // gp-hik-core includes
  10. #include "FeatureMatrixT.h"
  11. namespace NICE {
  12. //------------------------------------------------------
  13. // several constructors and destructors
  14. //------------------------------------------------------
  15. // Default constructor
  16. template <typename T>
  17. FeatureMatrixT<T>::FeatureMatrixT()
  18. {
  19. this->ui_n = 0;
  20. this->ui_d = 0;
  21. this->features.clear();
  22. this->b_verbose = false;
  23. this->b_debug = false;
  24. }
  25. // Recommended constructor
  26. template <typename T>
  27. FeatureMatrixT<T>::FeatureMatrixT(const std::vector<std::vector<T> > & _features,
  28. const uint & _dim
  29. )
  30. {
  31. // resize our data structure
  32. if (_dim == 0)
  33. this->set_d( (*_features.begin()).size() );
  34. else
  35. this->set_d( _dim );
  36. for (typename std::vector<std::vector<T> >::const_iterator it = _features.begin(); it != _features.end(); it++)
  37. {
  38. add_feature(*it);
  39. }
  40. this->b_verbose = false;
  41. this->b_debug = false;
  42. }
  43. //Constructor reading data from a vector of sparse vector pointers
  44. template <typename T>
  45. FeatureMatrixT<T>::
  46. FeatureMatrixT(const std::vector< const NICE::SparseVector * > & _features,
  47. const bool _dimensionsOverExamples,
  48. const uint & _dim
  49. )
  50. {
  51. this->features.clear();
  52. // resize our data structure
  53. if (_dim == 0)
  54. this->set_d( (*_features.begin())->getDim() );
  55. else
  56. this->set_d( _dim );
  57. // set number of examples n
  58. if ( this->ui_d > 0 )
  59. {
  60. if (_dimensionsOverExamples) //do we have dim x examples ?
  61. this->ui_n = _features[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  62. else //we have examples x dimes (as usually done)
  63. this->ui_n = _features.size();
  64. }
  65. // insert all values
  66. if (_dimensionsOverExamples) //do we have dim x examples ?
  67. {
  68. for (uint dim = 0; dim < this->ui_d; dim++)
  69. {
  70. this->features[dim].insert( _features[dim] );
  71. }
  72. }
  73. else //we have examples x dimes (as usually done)
  74. {
  75. //loop over every example to add its content
  76. for (uint nr = 0; nr < this->ui_n; nr++)
  77. {
  78. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  79. for (NICE::SparseVector::const_iterator elemIt = _features[nr]->begin(); elemIt != _features[nr]->end(); elemIt++)
  80. {
  81. //elemIt->first: dim, elemIt->second: value
  82. this->features[elemIt->first].insert( (T) elemIt->second, nr);
  83. }//for non-zero-values of the feature
  84. }//for every new feature
  85. }//if dimOverEx
  86. //set n for the internal data structure SortedVectorSparse
  87. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  88. (*it).setN( this->ui_n );
  89. }
  90. #ifdef NICE_USELIB_MATIO
  91. //Constructor reading data from matlab-files
  92. template <typename T>
  93. FeatureMatrixT<T>::
  94. FeatureMatrixT(const sparse_t & _features,
  95. const uint & _dim
  96. )
  97. {
  98. if (_dim < 0)
  99. set_d( _features.njc -1 );
  100. else
  101. set_d( _dim );
  102. uint nMax(0);
  103. for ( uint i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  104. {
  105. for ( uint j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  106. {
  107. this->features[i].insert(((T*)_features.data)[j], _features.ir[ j]);
  108. if ((_features.ir[ j])>nMax)
  109. nMax = _features.ir[ j];
  110. }
  111. }
  112. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  113. {
  114. (*it).setN(nMax+1);
  115. }
  116. this->ui_n = nMax+1;
  117. this->b_verbose = false;
  118. }
  119. //Constructor reading data from matlab-files
  120. template <typename T>
  121. FeatureMatrixT<T>::
  122. FeatureMatrixT(const sparse_t & _features,
  123. const std::map<uint, uint> & _examples,
  124. const uint & _dim)
  125. {
  126. if (_dim < 0)
  127. set_d(_features.njc -1);
  128. else
  129. set_d(_dim);
  130. uint nMax(0);
  131. for ( uint i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  132. {
  133. for ( uint j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  134. {
  135. uint example_index = _features.ir[ j];
  136. std::map<uint, uint>::const_iterator it = examples.find(example_index);
  137. if ( it != examples.end() ) {
  138. this->features[i].insert(((T*)_features.data)[j], it->second /* new index */);
  139. if (it->second > nMax)
  140. nMax = it->second;
  141. }
  142. }
  143. }
  144. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  145. (*it).setN(nMax+1);
  146. this->ui_n = nMax+1;
  147. this->b_verbose = false;
  148. }
  149. #endif
  150. // Default destructor
  151. template <typename T>
  152. FeatureMatrixT<T>::~FeatureMatrixT()
  153. {
  154. }
  155. //------------------------------------------------------
  156. // several get and set methods including access operators
  157. //------------------------------------------------------
  158. // Get number of examples
  159. template <typename T>
  160. uint FeatureMatrixT<T>::get_n() const
  161. {
  162. return this->ui_n;
  163. }
  164. // Get number of dimensions
  165. template <typename T>
  166. uint FeatureMatrixT<T>::get_d() const
  167. {
  168. return this->ui_d;
  169. }
  170. // Sets the given dimension and re-sizes internal data structure. WARNING: this will completely remove your current data!
  171. template <typename T>
  172. void FeatureMatrixT<T>::set_d(const uint & _d)
  173. {
  174. this->ui_d = _d;
  175. this->features.resize( this->ui_d );
  176. }
  177. template <typename T>
  178. void FeatureMatrixT<T>::setVerbose( const bool & _verbose)
  179. {
  180. this->b_verbose = _verbose;
  181. }
  182. template <typename T>
  183. bool FeatureMatrixT<T>::getVerbose( ) const
  184. {
  185. return this->b_verbose;
  186. }
  187. template <typename T>
  188. void FeatureMatrixT<T>::setDebug( const bool & _debug)
  189. {
  190. this->b_debug = _debug;
  191. }
  192. template <typename T>
  193. bool FeatureMatrixT<T>::getDebug( ) const
  194. {
  195. return this->b_debug;
  196. }
  197. // Matrix-like operator for element access, performs validity check
  198. template <typename T>
  199. inline T FeatureMatrixT<T>::operator()(const uint _row,
  200. const uint _col
  201. ) const
  202. {
  203. if ( (_row < 0) || (_col < 0) || (_row > this->ui_d) || (_col > this->ui_n) )
  204. {
  205. fthrow(Exception, "FeatureMatrixT: out of bounds");
  206. }
  207. else
  208. return ( this->features[_row]).access(_col);
  209. }
  210. template<class T>
  211. inline bool
  212. FeatureMatrixT<T>::operator==(const FeatureMatrixT<T> & _F) const
  213. {
  214. if ( ( this->get_n() != _F.get_n()) || (this->get_d() != _F.get_d()) )
  215. {
  216. fthrow(Exception, "FeatureMatrixT<T>::operator== : (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  217. }
  218. else if ((this->ui_n == 0) || (this->ui_d == 0))
  219. {
  220. return true;
  221. }
  222. for (uint i = 0; i < this->ui_d; i++)
  223. {
  224. for (uint j = 0; j < this->ui_n; j++)
  225. {
  226. // FIXME: it would be more efficient if we compare SortedVectorSparse objects here
  227. if(!((*this)(i,j) == _F(i,j)))
  228. return false;
  229. }
  230. }
  231. return true;
  232. }
  233. template<class T>
  234. inline bool
  235. FeatureMatrixT<T>::operator!=(const FeatureMatrixT<T> & _F) const
  236. {
  237. if ( ( (*this).get_n() != _F.get_n()) ||
  238. ( (*this).get_d() != _F.get_d())
  239. )
  240. {
  241. fthrow(Exception, "FeatureMatrixT::operator!=(): (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  242. }
  243. else if ((this->ui_n == 0) || (this->ui_d == 0))
  244. {
  245. return false;
  246. }
  247. for (uint i = 0; i < this->ui_d; i++)
  248. {
  249. for (uint j = 0; j < this->ui_n; j++)
  250. {
  251. if(!((*this)(i,j) == _F(i,j)))
  252. return true;
  253. }
  254. }
  255. return false;
  256. }
  257. template<typename T>
  258. inline FeatureMatrixT<T>&
  259. FeatureMatrixT<T>::operator=(const FeatureMatrixT<T> & _F)
  260. {
  261. this->set_d(_F.get_d());
  262. this->ui_n = _F.get_n();
  263. for (uint i = 0; i < (*this).get_d(); i++)
  264. {
  265. // use the operator= of SortedVectorSparse
  266. features[i] = _F[i];
  267. }
  268. return *this;
  269. }
  270. // Element access without validity check
  271. template <typename T>
  272. inline T FeatureMatrixT<T>::getUnsafe(const uint _row,
  273. const uint _col
  274. ) const
  275. {
  276. return (this->features[_row]).access(_col);
  277. }
  278. //! Element access of original values without validity check
  279. template <typename T>
  280. inline T FeatureMatrixT<T>::getOriginal(const uint _row,
  281. const uint _col
  282. ) const
  283. {
  284. return (this->features[_row]).accessOriginal(_col);
  285. }
  286. // Sets a specified element to the given value, performs validity check
  287. template <typename T>
  288. inline void FeatureMatrixT<T>::set (const uint _row,
  289. const uint _col,
  290. const T & _newElement,
  291. bool _setTransformedValue
  292. )
  293. {
  294. if ( (_row < 0) || (_col < 0) || (_row > this->ui_d) || (_col > this->ui_n) )
  295. {
  296. return;
  297. }
  298. else
  299. (this->features[_row]).set ( _col, _newElement, _setTransformedValue );
  300. }
  301. // Sets a specified element to the given value, without validity check
  302. template <typename T>
  303. inline void FeatureMatrixT<T>::setUnsafe (const uint _row,
  304. const uint _col,
  305. const T & _newElement,
  306. bool _setTransformedValue
  307. )
  308. {
  309. (this->features[_row]).set ( _col, _newElement, _setTransformedValue );
  310. }
  311. // Acceess to all element entries of a specified dimension, including validity check
  312. template <typename T>
  313. void FeatureMatrixT<T>::getDimension(const uint & _dim,
  314. NICE::SortedVectorSparse<T> & _dimension
  315. ) const
  316. {
  317. if ( (_dim < 0) || (_dim > this->ui_d) )
  318. {
  319. return;
  320. }
  321. else
  322. _dimension = this->features[_dim];
  323. }
  324. // Acceess to all element entries of a specified dimension, without validity check
  325. template <typename T>
  326. void FeatureMatrixT<T>::getDimensionUnsafe(const uint & _dim,
  327. NICE::SortedVectorSparse<T> & _dimension
  328. ) const
  329. {
  330. _dimension = this->features[_dim];
  331. }
  332. // Finds the first element in a given dimension, which equals elem
  333. template <typename T>
  334. void FeatureMatrixT<T>::findFirstInDimension(const uint & _dim,
  335. const T & _elem,
  336. uint & _position
  337. ) const
  338. {
  339. _position = 0;
  340. if ( _dim > this->ui_d )
  341. return;
  342. std::pair< typename SortedVectorSparse<T>::elementpointer, typename SortedVectorSparse<T>::elementpointer > eit;
  343. eit = this->features[_dim].nonzeroElements().equal_range ( _elem );
  344. _position = distance( this->features[_dim].nonzeroElements().begin(), eit.first );
  345. if ( _elem > this->features[_dim].getTolerance() )
  346. _position += this->features[_dim].getZeros();
  347. }
  348. // Finds the last element in a given dimension, which equals elem
  349. template <typename T>
  350. void FeatureMatrixT<T>::findLastInDimension(const uint & _dim,
  351. const T & _elem,
  352. uint & _position
  353. ) const
  354. {
  355. _position = 0;
  356. if ( _dim > this->ui_d )
  357. return;
  358. std::pair< typename SortedVectorSparse<T>::const_elementpointer, typename SortedVectorSparse<T>::const_elementpointer > eit = this->features[_dim].nonzeroElements().equal_range ( _elem );
  359. _position = distance( this->features[_dim].nonzeroElements().begin(), eit.second );
  360. if ( _elem > this->features[_dim].getTolerance() )
  361. _position += this->features[_dim].getZeros();
  362. }
  363. // Finds the first element in a given dimension, which is larger as elem
  364. template <typename T>
  365. void FeatureMatrixT<T>::findFirstLargerInDimension(const uint & _dim,
  366. const T & _elem,
  367. uint & _position
  368. ) const
  369. {
  370. _position = 0;
  371. if ( _dim > this->ui_d )
  372. return;
  373. //no non-zero elements?
  374. if (this->features[_dim].getNonZeros() <= 0)
  375. {
  376. // if element is greater than zero, than is should be added at the last position
  377. if (_elem > this->features[_dim].getTolerance() )
  378. _position = this->ui_n;
  379. //if not, position is 0
  380. return;
  381. }
  382. if (this->features[_dim].getNonZeros() == 1)
  383. {
  384. // if element is greater than the only nonzero element, then it is larger than everything else
  385. if (this->features[_dim].nonzeroElements().begin()->first <= _elem)
  386. _position = this->ui_n;
  387. //if not, but the element is still greater than zero, than
  388. else if (_elem > this->features[_dim].getTolerance() )
  389. _position = this->ui_n -1;
  390. return;
  391. }
  392. // standard case - not everything is zero and not only a single element is zero
  393. // find pointer to last non-zero element
  394. // FIXME no idea why this should be necessary...
  395. typename SortedVectorSparse<T>::const_elementpointer it = this->features[_dim].nonzeroElements().end(); //this is needed !!!
  396. // find pointer to first element largern than the given value
  397. it = this->features[_dim].nonzeroElements().upper_bound ( _elem ); //if all values are smaller, this does not do anything at all
  398. _position = distance( this->features[_dim].nonzeroElements().begin(), it );
  399. if ( _elem > this->features[_dim].getTolerance() )
  400. {
  401. //position += features[dim].getZeros();
  402. _position += this->ui_n - this->features[_dim].getNonZeros();
  403. }
  404. }
  405. // Finds the last element in a given dimension, which is smaller as elem
  406. template <typename T>
  407. void FeatureMatrixT<T>::findLastSmallerInDimension(const uint & _dim,
  408. const T & _elem,
  409. uint & _position
  410. ) const
  411. {
  412. _position = 0;
  413. if ( (_dim < 0) || (_dim > this->ui_d))
  414. return;
  415. typename SortedVectorSparse<T>::const_elementpointer it = this->features[_dim].nonzeroElements().lower_bound ( _elem );
  416. _position = distance( this->features[_dim].nonzeroElements().begin(), it );
  417. if ( it->first > this->features[_dim].getTolerance() )
  418. _position += this->features[_dim].getZeros();
  419. }
  420. template <typename T>
  421. T FeatureMatrixT<T>::getLargestValue ( const bool & _getTransformedValue ) const
  422. {
  423. T vmax = (T) 0;
  424. T vtmp = (T) 0;
  425. uint tmp ( 0 );
  426. for ( typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin();
  427. it != this->features.end();
  428. it++, tmp++
  429. )
  430. {
  431. vtmp = it->getLargestValueUnsafe( 1.0 /*quantile, we are interested in the largest value*/, _getTransformedValue );
  432. if ( vtmp > vmax )
  433. {
  434. vmax = vtmp;
  435. }
  436. }
  437. return vmax;
  438. }
  439. template <typename T>
  440. NICE::VectorT<T> FeatureMatrixT<T>::getLargestValuePerDimension ( const double & _quantile,
  441. const bool & _getTransformedValue
  442. ) const
  443. {
  444. NICE::VectorT<T> vmax ( this->get_d() );
  445. uint tmp ( 0 );
  446. typename NICE::VectorT<T>::iterator vmaxIt = vmax.begin();
  447. for ( typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin();
  448. it != this->features.end();
  449. it++, vmaxIt++, tmp++
  450. )
  451. {
  452. *vmaxIt = it->getLargestValueUnsafe( _quantile, _getTransformedValue );
  453. }
  454. return vmax;
  455. }
  456. //------------------------------------------------------
  457. // high level methods
  458. //------------------------------------------------------
  459. template <typename T>
  460. void FeatureMatrixT<T>::applyFunctionToFeatureMatrix ( const NICE::ParameterizedFunction *_pf )
  461. {
  462. if (_pf != NULL)
  463. {
  464. // REMARK: might be inefficient due to virtual calls
  465. if ( !_pf->isOrderPreserving() )
  466. fthrow(Exception, "ParameterizedFunction::applyFunctionToFeatureMatrix: this function is optimized for order preserving transformations");
  467. uint d = this->get_d();
  468. for (uint dim = 0; dim < d; dim++)
  469. {
  470. std::multimap< double, typename SortedVectorSparse<double>::dataelement> & nonzeroElements = this->getFeatureValues(dim).nonzeroElements();
  471. for ( SortedVectorSparse<double>::elementpointer i = nonzeroElements.begin(); i != nonzeroElements.end(); i++ )
  472. {
  473. SortedVectorSparse<double>::dataelement & de = i->second;
  474. //TODO check, wether the element is "sparse" afterwards
  475. de.second = _pf->f( dim, i->first );
  476. }
  477. }
  478. /*for ( int i = 0 ; i < featureMatrix.get_n(); i++ )
  479. for ( int index = 0 ; index < featureMatrix.get_d(); index++ )
  480. featureMatrix.set(index, i, f( (uint)index, featureMatrix.getOriginal(index,i) ), isOrderPreserving() );*/
  481. }
  482. else
  483. {
  484. //no pf given -> nothing to do
  485. }
  486. }
  487. //Computes the ratio of sparsity across the matrix
  488. template <typename T>
  489. double FeatureMatrixT<T>:: computeSparsityRatio() const
  490. {
  491. double ratio(0.0);
  492. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++)
  493. {
  494. ratio += (*it).getZeros() / (double) (*it).getN();
  495. }
  496. if (this->features.size() != 0)
  497. ratio /= double(this->features.size());
  498. return ratio;
  499. }
  500. // add a new feature and insert its elements at the end of each dimension vector
  501. template <typename T>
  502. void FeatureMatrixT<T>::add_feature( const std::vector<T> & _feature,
  503. const NICE::ParameterizedFunction *_pf
  504. )
  505. {
  506. if (this->ui_n == 0)
  507. {
  508. this->set_d( _feature.size() );
  509. }
  510. if ( _feature.size() != this->ui_d)
  511. {
  512. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  513. return;
  514. }
  515. for (uint dimension = 0; dimension < this->features.size(); dimension++)
  516. {
  517. if (_pf != NULL)
  518. this->features[dimension].insert( _feature[dimension], _pf->f( dimension, _feature[dimension]) );
  519. else
  520. this->features[dimension].insert( _feature[dimension] );
  521. }
  522. this->ui_n++;
  523. }
  524. // add a new feature and insert its elements at the end of each dimension vector
  525. template <typename T>
  526. void FeatureMatrixT<T>::add_feature(const NICE::SparseVector & _feature,
  527. const ParameterizedFunction *_pf
  528. )
  529. {
  530. if (this->ui_n == 0)
  531. {
  532. this->set_d( _feature.size() );
  533. }
  534. if ( _feature.getDim() > this->ui_d)
  535. {
  536. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  537. return;
  538. }
  539. for (NICE::SparseVector::const_iterator it = _feature.begin(); it != _feature.end(); it++)
  540. {
  541. if (_pf != NULL)
  542. this->features[it->first].insert( (T) it->second, _pf->f( it->first, (T) it->second), true /* _specifyFeatureNumber */, this->ui_n );
  543. else
  544. this->features[it->first].insert( (T) it->second, true /* _specifyFeatureNumber */, this->ui_n );
  545. }
  546. this->ui_n++;
  547. }
  548. // add several new features and insert their elements in the already ordered structure
  549. template <typename T>
  550. void FeatureMatrixT<T>::add_features(const std::vector<std::vector<T> > & _features )
  551. {
  552. //TODO do we need the parameterized function here as well? usually, we add several features and run applyFunctionToFeatureMatrix afterwards.
  553. // check this please :)
  554. //TODO assure that every feature has the same dimension
  555. if (this->ui_n == 0)
  556. {
  557. this->set_d(_features.size());
  558. }
  559. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  560. for (uint dim = 0; dim < this->ui_d; dim++)
  561. {
  562. this->features[dim].insert( _features[dim] );
  563. }
  564. //update the number of our features
  565. this->ui_n += _features[0].size();
  566. }
  567. template <typename T>
  568. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  569. std::vector<std::vector<uint> > & _permutations,
  570. const uint & _dim
  571. )
  572. {
  573. this->features.clear();
  574. this->set_d( std::max ( _dim, (const uint) _features.size() ) );
  575. if ( this->ui_d > 0 )
  576. this->ui_n = _features[0].size();
  577. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  578. for (uint dim = 0; dim < this->ui_d; dim++)
  579. {
  580. this->features[dim].insert( _features[dim] );
  581. }
  582. this->getPermutations( _permutations );
  583. }
  584. template <typename T>
  585. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  586. std::vector<std::map<uint,uint> > & _permutations,
  587. const uint & _dim
  588. )
  589. {
  590. this->features.clear();
  591. this->set_d( std::max ( _dim, _features.size() ) );
  592. if ( this->ui_d > 0 )
  593. this->ui_n = _features[0].size();
  594. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  595. for (uint dim = 0; dim < this->ui_d; dim++)
  596. {
  597. this->features[dim].insert( _features[dim] );
  598. }
  599. this->getPermutations( _permutations );
  600. }
  601. template <typename T>
  602. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  603. const uint & _dim
  604. )
  605. {
  606. this->features.clear();
  607. this->set_d( std::max ( _dim, (const uint) _features.size() ) );
  608. if ( this->ui_d > 0 )
  609. this->ui_n = _features[0].size();
  610. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  611. for (uint dim = 0; dim < this->ui_d; dim++)
  612. {
  613. if ( this->b_debug )
  614. {
  615. std::cerr << " dim: " << dim << " add " << _features[dim].size() << " examples " << std::endl;
  616. }
  617. this->features[dim].insert( _features[dim] );
  618. }
  619. }
  620. template <typename T>
  621. void FeatureMatrixT<T>::set_features(const std::vector< const NICE::SparseVector * > & _features,
  622. const bool _dimensionsOverExamples,
  623. const uint & _dim
  624. )
  625. {
  626. this->features.clear();
  627. if (_features.size() == 0)
  628. {
  629. std::cerr << "set_features without features" << std::endl;
  630. }
  631. // resize our data structure
  632. //therefore, let's first of all figure out if the user specified a dimension or not.
  633. uint dimTmp ( _dim );
  634. if (_dimensionsOverExamples) //do we have dim x examples ?
  635. {
  636. if ( _features.size() > dimTmp )
  637. {
  638. dimTmp = _features.size();
  639. }
  640. }
  641. else //we have examples x dimes (as usually done)
  642. {
  643. if (_features.size() > 0) //and have at least one example
  644. {
  645. try{
  646. if ( _features[0]->getDim() > dimTmp )
  647. {
  648. dimTmp = _features[0]->getDim();
  649. }
  650. }
  651. catch(...)
  652. {
  653. std::cerr << "FeatureMatrixT<T>::set_features -- something went wrong using getDim() of SparseVectors" << std::endl;
  654. }
  655. }
  656. }
  657. this->set_d( dimTmp );
  658. // set number of examples n
  659. if ( this->ui_d > 0 )
  660. {
  661. if ( _dimensionsOverExamples ) //do we have dim x examples ?
  662. this->ui_n = _features[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  663. else //we have examples x dimes (as usually done)
  664. this->ui_n = _features.size();
  665. }
  666. // insert all values
  667. if ( _dimensionsOverExamples ) //do we have dim x examples ?
  668. {
  669. for (uint dim = 0; dim < this->ui_d; dim++)
  670. {
  671. this->features[dim].insert( _features[dim] );
  672. }
  673. }
  674. else //we have examples x dimes (as usually done)
  675. {
  676. if ( this->b_debug )
  677. std::cerr << "FeatureMatrixT<T>::set_features " << this->ui_n << " new examples" << std::endl;
  678. //loop over every example to add its content
  679. for (uint nr = 0; nr < this->ui_n; nr++)
  680. {
  681. if ( this->b_debug )
  682. std::cerr << "add feature nr. " << nr << " / " << _features.size() << " ";
  683. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  684. for (NICE::SparseVector::const_iterator elemIt = _features[nr]->begin(); elemIt != _features[nr]->end(); elemIt++)
  685. {
  686. if ( this->b_debug )
  687. std::cerr << elemIt->first << "-" << elemIt->second << " ";
  688. //elemIt->first: dim, elemIt->second: value
  689. this->features[elemIt->first].insert( (T) elemIt->second, true /* _specifyFeatureNumber */, nr);
  690. }//for non-zero-values of the feature
  691. if ( this->b_debug )
  692. std::cerr << std::endl;
  693. }//for every new feature
  694. if ( this->b_debug )
  695. std::cerr << "FeatureMatrixT<T>::set_features done" << std::endl;
  696. }//if dimOverEx
  697. //set n for the internal data structure SortedVectorSparse
  698. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  699. (*it).setN( this->ui_n );
  700. }
  701. template <typename T>
  702. void FeatureMatrixT<T>::getPermutations( std::vector<std::vector<uint> > & _permutations) const
  703. {
  704. for (uint dim = 0; dim < this->ui_d; dim++)
  705. {
  706. std::vector<uint> perm ( (this->features[dim]).getPermutation() );
  707. _permutations.push_back(perm);
  708. }
  709. }
  710. template <typename T>
  711. void FeatureMatrixT<T>::getPermutations( std::vector<std::map<uint,uint> > & _permutations) const
  712. {
  713. for (uint dim = 0; dim < this->ui_d; dim++)
  714. {
  715. std::map<uint,uint> perm ( (this->features[dim]).getPermutationNonZeroReal() );
  716. _permutations.push_back(perm);
  717. }
  718. }
  719. // Prints the whole Matrix (outer loop over dimension, inner loop over features)
  720. template <typename T>
  721. void FeatureMatrixT<T>::print(std::ostream & _os) const
  722. {
  723. if (_os.good())
  724. {
  725. for (uint dim = 0; dim < this->ui_d; dim++)
  726. {
  727. this->features[dim].print(_os);
  728. }
  729. }
  730. }
  731. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  732. template <typename T>
  733. void FeatureMatrixT<T>::computeNonSparseMatrix(NICE::MatrixT<T> & _matrix,
  734. bool _transpose
  735. ) const
  736. {
  737. if ( _transpose )
  738. _matrix.resize(this->get_n(),this->get_d());
  739. else
  740. _matrix.resize(this->get_d(),this->get_n());
  741. _matrix.set((T)0.0);
  742. uint dimIdx(0);
  743. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  744. {
  745. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  746. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  747. {
  748. uint featIndex = ((*inIt).second)->second.first;
  749. if ( _transpose )
  750. _matrix(featIndex,dimIdx) =((*inIt).second)->second.second;
  751. else
  752. _matrix(dimIdx,featIndex) =((*inIt).second)->second.second;
  753. }
  754. }
  755. }
  756. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  757. template <typename T>
  758. void FeatureMatrixT<T>::computeNonSparseMatrix(std::vector<std::vector<T> > & _matrix,
  759. bool _transpose
  760. ) const
  761. {
  762. if ( _transpose )
  763. _matrix.resize(this->get_n());
  764. else
  765. _matrix.resize(this->get_d());
  766. // resizing the matrix
  767. for ( uint i = 0 ; i < _matrix.size(); i++ )
  768. if ( _transpose )
  769. _matrix[i] = std::vector<T>(this->get_d(), 0.0);
  770. else
  771. _matrix[i] = std::vector<T>(this->get_n(), 0.0);
  772. uint dimIdx(0);
  773. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  774. {
  775. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  776. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  777. {
  778. uint featIndex = ((*inIt).second)->second.first;
  779. if ( _transpose )
  780. _matrix[featIndex][dimIdx] =((*inIt).second)->second.second;
  781. else
  782. _matrix[dimIdx][featIndex] =((*inIt).second)->second.second;
  783. }
  784. }
  785. }
  786. // Swaps to specified elements, performing a validity check
  787. template <typename T>
  788. void FeatureMatrixT<T>::swap(const uint & _row1,
  789. const uint & _col1,
  790. const uint & _row2,
  791. const uint & _col2
  792. )
  793. {
  794. if ( (_row1 < 0) || (_col1 < 0) || (_row1 > this->ui_d) || (_col1 > this->ui_n) ||
  795. (_row2 < 0) || (_col2 < 0) || (_row2 > this->ui_d) || (_col2 > this->ui_n)
  796. )
  797. {
  798. return;
  799. }
  800. else
  801. {
  802. //swap
  803. T tmp = (*this)(_row1, _col1);
  804. (*this).set(_row1, _col1, (*this)(_row2,_col2));
  805. (*this).set(_row2, _col2, tmp);
  806. }
  807. }
  808. // Swaps to specified elements, without performing a validity check
  809. template <typename T>
  810. void FeatureMatrixT<T>::swapUnsafe(const uint & _row1,
  811. const uint & _col1,
  812. const uint & _row2,
  813. const uint & _col2
  814. )
  815. {
  816. //swap
  817. T tmp = (*this)(_row1, _col1);
  818. (*this).set(_row1, _col1, (*this)(_row2,_col2));
  819. (*this).set(_row2, _col2, tmp);
  820. }
  821. template <typename T>
  822. void FeatureMatrixT<T>::hikDiagonalElements( Vector & _diagonalElements ) const
  823. {
  824. uint dimIdx = 0;
  825. // the function calculates the diagonal elements of a HIK kernel matrix
  826. _diagonalElements.resize(this->ui_n);
  827. _diagonalElements.set(0.0);
  828. // loop through all dimensions
  829. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  830. {
  831. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  832. // loop through all features
  833. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  834. {
  835. uint index = inIt->first;
  836. typename NICE::SortedVectorSparse<T>::elementpointer p = inIt->second;
  837. typename NICE::SortedVectorSparse<T>::dataelement de = p->second;
  838. _diagonalElements[index] += de.second;
  839. }
  840. }
  841. }
  842. template <typename T>
  843. double FeatureMatrixT<T>::hikTrace() const
  844. {
  845. Vector diagonalElements;
  846. this->hikDiagonalElements( diagonalElements );
  847. return diagonalElements.Sum();
  848. }
  849. template <typename T>
  850. uint FeatureMatrixT<T>::getNumberOfNonZeroElementsPerDimension(const uint & dim) const
  851. {
  852. //FIXME we could return a boolean indicating success and return the actual number via call-by-reference
  853. if ( (dim < 0) || (dim > this->ui_d))
  854. return 0;
  855. return this->features[dim].getNonZeros();
  856. }
  857. template <typename T>
  858. uint FeatureMatrixT<T>::getNumberOfZeroElementsPerDimension(const uint & dim) const
  859. {
  860. if ( (dim < 0) || (dim > this->ui_d))
  861. return 0;
  862. return this->ui_n - this-> features[dim].getNonZeros();
  863. }
  864. template <typename T>
  865. void FeatureMatrixT<T>::restore ( std::istream & _is,
  866. int _format
  867. )
  868. {
  869. bool b_restoreVerbose ( false );
  870. if ( _is.good() )
  871. {
  872. if ( b_restoreVerbose )
  873. std::cerr << " restore FeatureMatrixT" << std::endl;
  874. std::string tmp;
  875. _is >> tmp; //class name
  876. if ( ! this->isStartTag( tmp, "FeatureMatrixT" ) )
  877. {
  878. std::cerr << " WARNING - attempt to restore FeatureMatrixT, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  879. throw;
  880. }
  881. _is.precision ( std::numeric_limits<double>::digits10 + 1);
  882. bool b_endOfBlock ( false ) ;
  883. while ( !b_endOfBlock )
  884. {
  885. _is >> tmp; // start of block
  886. if ( this->isEndTag( tmp, "FeatureMatrixT" ) )
  887. {
  888. b_endOfBlock = true;
  889. continue;
  890. }
  891. tmp = this->removeStartTag ( tmp );
  892. if ( b_restoreVerbose )
  893. std::cerr << " currently restore section " << tmp << " in FeatureMatrixT" << std::endl;
  894. if ( tmp.compare("ui_n") == 0 )
  895. {
  896. _is >> this->ui_n;
  897. _is >> tmp; // end of block
  898. tmp = this->removeEndTag ( tmp );
  899. }
  900. else if ( tmp.compare("ui_d") == 0 )
  901. {
  902. _is >> this->ui_d;
  903. _is >> tmp; // end of block
  904. tmp = this->removeEndTag ( tmp );
  905. }
  906. else if ( tmp.compare("features") == 0 )
  907. {
  908. //NOTE assumes d to be read first!
  909. this->features.resize( this->ui_d);
  910. //now read features for every dimension
  911. for (uint dim = 0; dim < this->ui_d; dim++)
  912. {
  913. NICE::SortedVectorSparse<T> svs;
  914. this->features[dim] = svs;
  915. this->features[dim].restore(_is, _format);
  916. }
  917. _is >> tmp; // end of block
  918. tmp = this->removeEndTag ( tmp );
  919. }
  920. else
  921. {
  922. std::cerr << "WARNING -- unexpected FeatureMatrixT object -- " << tmp << " -- for restoration... aborting" << std::endl;
  923. throw;
  924. }
  925. }
  926. }
  927. else
  928. {
  929. std::cerr << "FeatureMatrixT<T>::restore -- InStream not initialized - restoring not possible!" << std::endl;
  930. throw;
  931. }
  932. }
  933. template <typename T>
  934. void FeatureMatrixT<T>::store ( std::ostream & _os,
  935. int _format
  936. ) const
  937. {
  938. if (_os.good())
  939. {
  940. // show starting point
  941. _os << this->createStartTag( "FeatureMatrixT" ) << std::endl;
  942. _os.precision (std::numeric_limits<double>::digits10 + 1);
  943. _os << this->createStartTag( "ui_n" ) << std::endl;
  944. _os << this->ui_n << std::endl;
  945. _os << this->createEndTag( "ui_n" ) << std::endl;
  946. _os << this->createStartTag( "ui_d" ) << std::endl;
  947. _os << this->ui_d << std::endl;
  948. _os << this->createEndTag( "ui_d" ) << std::endl;
  949. //now write features for every dimension
  950. _os << this->createStartTag( "features" ) << std::endl;
  951. for (uint dim = 0; dim < this->ui_d; dim++)
  952. {
  953. this->features[dim].store(_os,_format);
  954. }
  955. _os << this->createEndTag( "features" ) << std::endl;
  956. // done
  957. _os << this->createEndTag( "FeatureMatrixT" ) << std::endl;
  958. }
  959. else
  960. {
  961. std::cerr << "FeatureMatrixT<T>::store -- OutStream not initialized - storing not possible!" << std::endl;
  962. }
  963. }
  964. template <typename T>
  965. void FeatureMatrixT<T>::clear ()
  966. {}
  967. } // namespace
  968. // #endif