FeatureMatrixT.tcc 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074
  1. /**
  2. * @file FeatureMatrixT.tcc
  3. * @brief A feature matrix, storing (sparse) features sorted per dimension (Implementation)
  4. * @author Alexander Freytag
  5. * @date 07-12-2011 (dd-mm-yyyy)
  6. */
  7. // #ifndef FEATUREMATRIX_TCC
  8. // #define FEATUREMATRIX_TCC
  9. // gp-hik-core includes
  10. #include "FeatureMatrixT.h"
  11. namespace NICE {
  12. //------------------------------------------------------
  13. // several constructors and destructors
  14. //------------------------------------------------------
  15. // Default constructor
  16. template <typename T>
  17. FeatureMatrixT<T>::FeatureMatrixT()
  18. {
  19. this->ui_n = 0;
  20. this->ui_d = 0;
  21. this->features.clear();
  22. this->b_verbose = false;
  23. this->b_debug = false;
  24. }
  25. // Recommended constructor
  26. template <typename T>
  27. FeatureMatrixT<T>::FeatureMatrixT(const std::vector<std::vector<T> > & _features,
  28. const uint & _dim
  29. )
  30. {
  31. this->ui_n = 0;
  32. if (_dim < 0)
  33. this->ui_d = (*_features.begin()).size();
  34. else
  35. this->ui_d = _dim;
  36. for (typename std::vector<std::vector<T> >::const_iterator it = _features.begin(); it != _features.end(); it++)
  37. {
  38. add_feature(*it);
  39. }
  40. this->b_verbose = false;
  41. this->b_debug = false;
  42. }
  43. //Constructor reading data from a vector of sparse vector pointers
  44. template <typename T>
  45. FeatureMatrixT<T>::
  46. FeatureMatrixT(const std::vector< const NICE::SparseVector * > & _X,
  47. const bool _dimensionsOverExamples,
  48. const uint & _dim
  49. )
  50. {
  51. this->features.clear();
  52. // resize our data structure
  53. set_d( _dim );
  54. // set number of examples n
  55. if ( this->ui_d > 0 )
  56. {
  57. if (_dimensionsOverExamples) //do we have dim x examples ?
  58. this->ui_n = _X[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  59. else //we have examples x dimes (as usually done)
  60. this->ui_n = _X.size();
  61. }
  62. // insert all values
  63. if (_dimensionsOverExamples) //do we have dim x examples ?
  64. {
  65. for (uint dim = 0; dim < this->ui_d; dim++)
  66. {
  67. this->features[dim].insert( _X[dim] );
  68. }
  69. }
  70. else //we have examples x dimes (as usually done)
  71. {
  72. //loop over every example to add its content
  73. for (uint nr = 0; nr < this->ui_n; nr++)
  74. {
  75. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  76. for (NICE::SparseVector::const_iterator elemIt = _X[nr]->begin(); elemIt != _X[nr]->end(); elemIt++)
  77. {
  78. //elemIt->first: dim, elemIt->second: value
  79. this->features[elemIt->first].insert( (T) elemIt->second, nr);
  80. }//for non-zero-values of the feature
  81. }//for every new feature
  82. }//if dimOverEx
  83. //set n for the internal data structure SortedVectorSparse
  84. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  85. (*it).setN( this->ui_n );
  86. }
  87. #ifdef NICE_USELIB_MATIO
  88. //Constructor reading data from matlab-files
  89. template <typename T>
  90. FeatureMatrixT<T>::
  91. FeatureMatrixT(const sparse_t & _features,
  92. const uint & _dim
  93. )
  94. {
  95. if (_dim < 0)
  96. set_d( _features.njc -1 );
  97. else
  98. set_d( _dim );
  99. uint nMax(0);
  100. for ( uint i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  101. {
  102. for ( uint j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  103. {
  104. this->features[i].insert(((T*)_features.data)[j], _features.ir[ j]);
  105. if ((_features.ir[ j])>nMax)
  106. nMax = _features.ir[ j];
  107. }
  108. }
  109. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  110. {
  111. (*it).setN(nMax+1);
  112. }
  113. this->ui_n = nMax+1;
  114. this->b_verbose = false;
  115. }
  116. //Constructor reading data from matlab-files
  117. template <typename T>
  118. FeatureMatrixT<T>::
  119. FeatureMatrixT(const sparse_t & _features,
  120. const std::map<uint, uint> & _examples,
  121. const uint & _dim)
  122. {
  123. if (_dim < 0)
  124. set_d(_features.njc -1);
  125. else
  126. set_d(_dim);
  127. uint nMax(0);
  128. for ( uint i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  129. {
  130. for ( uint j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  131. {
  132. uint example_index = _features.ir[ j];
  133. std::map<uint, uint>::const_iterator it = examples.find(example_index);
  134. if ( it != examples.end() ) {
  135. this->features[i].insert(((T*)_features.data)[j], it->second /* new index */);
  136. if (it->second > nMax)
  137. nMax = it->second;
  138. }
  139. }
  140. }
  141. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  142. (*it).setN(nMax+1);
  143. this->ui_n = nMax+1;
  144. this->b_verbose = false;
  145. }
  146. #endif
  147. // Default destructor
  148. template <typename T>
  149. FeatureMatrixT<T>::~FeatureMatrixT()
  150. {
  151. }
  152. //------------------------------------------------------
  153. // several get and set methods including access operators
  154. //------------------------------------------------------
  155. // Get number of examples
  156. template <typename T>
  157. uint FeatureMatrixT<T>::get_n() const
  158. {
  159. return this->ui_n;
  160. }
  161. // Get number of dimensions
  162. template <typename T>
  163. uint FeatureMatrixT<T>::get_d() const
  164. {
  165. return this->ui_d;
  166. }
  167. // Sets the given dimension and re-sizes internal data structure. WARNING: this will completely remove your current data!
  168. template <typename T>
  169. void FeatureMatrixT<T>::set_d(const uint & _d)
  170. {
  171. this->ui_d = _d;
  172. this->features.resize( this->ui_d );
  173. }
  174. template <typename T>
  175. void FeatureMatrixT<T>::setVerbose( const bool & _verbose)
  176. {
  177. this->b_verbose = _verbose;
  178. }
  179. template <typename T>
  180. bool FeatureMatrixT<T>::getVerbose( ) const
  181. {
  182. return this->b_verbose;
  183. }
  184. template <typename T>
  185. void FeatureMatrixT<T>::setDebug( const bool & _debug)
  186. {
  187. this->b_debug = _debug;
  188. }
  189. template <typename T>
  190. bool FeatureMatrixT<T>::getDebug( ) const
  191. {
  192. return this->b_debug;
  193. }
  194. // Matrix-like operator for element access, performs validity check
  195. template <typename T>
  196. inline T FeatureMatrixT<T>::operator()(const uint _row,
  197. const uint _col
  198. ) const
  199. {
  200. if ( (_row < 0) || (_col < 0) || (_row > this->ui_d) || (_col > this->ui_n) )
  201. {
  202. fthrow(Exception, "FeatureMatrixT: out of bounds");
  203. }
  204. else
  205. return ( this->features[_row]).access(_col);
  206. }
  207. template<class T>
  208. inline bool
  209. FeatureMatrixT<T>::operator==(const FeatureMatrixT<T> & _F) const
  210. {
  211. if ( ( this->get_n() != _F.get_n()) || (this->get_d() != _F.get_d()) )
  212. {
  213. fthrow(Exception, "FeatureMatrixT<T>::operator== : (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  214. }
  215. else if ((this->ui_n == 0) || (this->ui_d == 0))
  216. {
  217. return true;
  218. }
  219. for (uint i = 0; i < this->ui_d; i++)
  220. {
  221. for (uint j = 0; j < this->ui_n; j++)
  222. {
  223. // FIXME: it would be more efficient if we compare SortedVectorSparse objects here
  224. if(!((*this)(i,j) == _F(i,j)))
  225. return false;
  226. }
  227. }
  228. return true;
  229. }
  230. template<class T>
  231. inline bool
  232. FeatureMatrixT<T>::operator!=(const FeatureMatrixT<T> & _F) const
  233. {
  234. if ( ( (*this).get_n() != _F.get_n()) ||
  235. ( (*this).get_d() != _F.get_d())
  236. )
  237. {
  238. fthrow(Exception, "FeatureMatrixT::operator!=(): (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  239. }
  240. else if ((this->ui_n == 0) || (this->ui_d == 0))
  241. {
  242. return false;
  243. }
  244. for (uint i = 0; i < this->ui_d; i++)
  245. {
  246. for (uint j = 0; j < this->ui_n; j++)
  247. {
  248. if(!((*this)(i,j) == _F(i,j)))
  249. return true;
  250. }
  251. }
  252. return false;
  253. }
  254. template<typename T>
  255. inline FeatureMatrixT<T>&
  256. FeatureMatrixT<T>::operator=(const FeatureMatrixT<T> & _F)
  257. {
  258. this->set_d(_F.get_d());
  259. this->ui_n = _F.get_n();
  260. for (uint i = 0; i < (*this).get_d(); i++)
  261. {
  262. // use the operator= of SortedVectorSparse
  263. features[i] = _F[i];
  264. }
  265. return *this;
  266. }
  267. // Element access without validity check
  268. template <typename T>
  269. inline T FeatureMatrixT<T>::getUnsafe(const uint _row,
  270. const uint _col
  271. ) const
  272. {
  273. return (this->features[_row]).access(_col);
  274. }
  275. //! Element access of original values without validity check
  276. template <typename T>
  277. inline T FeatureMatrixT<T>::getOriginal(const uint _row,
  278. const uint _col
  279. ) const
  280. {
  281. return (this->features[_row]).accessOriginal(_col);
  282. }
  283. // Sets a specified element to the given value, performs validity check
  284. template <typename T>
  285. inline void FeatureMatrixT<T>::set (const uint _row,
  286. const uint _col,
  287. const T & _newElement,
  288. bool _setTransformedValue
  289. )
  290. {
  291. if ( (_row < 0) || (_col < 0) || (_row > this->ui_d) || (_col > this->ui_n) )
  292. {
  293. return;
  294. }
  295. else
  296. (this->features[_row]).set ( _col, _newElement, _setTransformedValue );
  297. }
  298. // Sets a specified element to the given value, without validity check
  299. template <typename T>
  300. inline void FeatureMatrixT<T>::setUnsafe (const uint _row,
  301. const uint _col,
  302. const T & _newElement,
  303. bool _setTransformedValue
  304. )
  305. {
  306. (this->features[_row]).set ( _col, _newElement, _setTransformedValue );
  307. }
  308. // Acceess to all element entries of a specified dimension, including validity check
  309. template <typename T>
  310. void FeatureMatrixT<T>::getDimension(const uint & _dim,
  311. NICE::SortedVectorSparse<T> & _dimension
  312. ) const
  313. {
  314. if ( (_dim < 0) || (_dim > this->ui_d) )
  315. {
  316. return;
  317. }
  318. else
  319. _dimension = this->features[_dim];
  320. }
  321. // Acceess to all element entries of a specified dimension, without validity check
  322. template <typename T>
  323. void FeatureMatrixT<T>::getDimensionUnsafe(const uint & _dim,
  324. NICE::SortedVectorSparse<T> & _dimension
  325. ) const
  326. {
  327. _dimension = this->features[_dim];
  328. }
  329. // Finds the first element in a given dimension, which equals elem
  330. template <typename T>
  331. void FeatureMatrixT<T>::findFirstInDimension(const uint & _dim,
  332. const T & _elem,
  333. uint & _position
  334. ) const
  335. {
  336. _position = 0;
  337. if ( _dim > this->ui_d )
  338. return;
  339. std::pair< typename SortedVectorSparse<T>::elementpointer, typename SortedVectorSparse<T>::elementpointer > eit;
  340. eit = this->features[_dim].nonzeroElements().equal_range ( _elem );
  341. _position = distance( this->features[_dim].nonzeroElements().begin(), eit.first );
  342. if ( _elem > this->features[_dim].getTolerance() )
  343. _position += this->features[_dim].getZeros();
  344. }
  345. // Finds the last element in a given dimension, which equals elem
  346. template <typename T>
  347. void FeatureMatrixT<T>::findLastInDimension(const uint & _dim,
  348. const T & _elem,
  349. uint & _position
  350. ) const
  351. {
  352. _position = 0;
  353. if ( _dim > this->ui_d )
  354. return;
  355. std::pair< typename SortedVectorSparse<T>::const_elementpointer, typename SortedVectorSparse<T>::const_elementpointer > eit = this->features[_dim].nonzeroElements().equal_range ( _elem );
  356. _position = distance( this->features[_dim].nonzeroElements().begin(), eit.second );
  357. if ( _elem > this->features[_dim].getTolerance() )
  358. _position += this->features[_dim].getZeros();
  359. }
  360. // Finds the first element in a given dimension, which is larger as elem
  361. template <typename T>
  362. void FeatureMatrixT<T>::findFirstLargerInDimension(const uint & _dim,
  363. const T & _elem,
  364. uint & _position
  365. ) const
  366. {
  367. _position = 0;
  368. if ( _dim > this->ui_d )
  369. return;
  370. //no non-zero elements?
  371. if (this->features[_dim].getNonZeros() <= 0)
  372. {
  373. // if element is greater than zero, than is should be added at the last position
  374. if (_elem > this->features[_dim].getTolerance() )
  375. _position = this->ui_n;
  376. //if not, position is 0
  377. return;
  378. }
  379. if (this->features[_dim].getNonZeros() == 1)
  380. {
  381. // if element is greater than the only nonzero element, then it is larger than everything else
  382. if (this->features[_dim].nonzeroElements().begin()->first <= _elem)
  383. _position = this->ui_n;
  384. //if not, but the element is still greater than zero, than
  385. else if (_elem > this->features[_dim].getTolerance() )
  386. _position = this->ui_n -1;
  387. return;
  388. }
  389. // standard case - not everything is zero and not only a single element is zero
  390. // find pointer to last non-zero element
  391. // FIXME no idea why this should be necessary...
  392. typename SortedVectorSparse<T>::const_elementpointer it = this->features[_dim].nonzeroElements().end(); //this is needed !!!
  393. // find pointer to first element largern than the given value
  394. it = this->features[_dim].nonzeroElements().upper_bound ( _elem ); //if all values are smaller, this does not do anything at all
  395. _position = distance( this->features[_dim].nonzeroElements().begin(), it );
  396. if ( _elem > this->features[_dim].getTolerance() )
  397. {
  398. //position += features[dim].getZeros();
  399. _position += this->ui_n - this->features[_dim].getNonZeros();
  400. }
  401. }
  402. // Finds the last element in a given dimension, which is smaller as elem
  403. template <typename T>
  404. void FeatureMatrixT<T>::findLastSmallerInDimension(const uint & _dim,
  405. const T & _elem,
  406. uint & _position
  407. ) const
  408. {
  409. _position = 0;
  410. if ( (_dim < 0) || (_dim > this->ui_d))
  411. return;
  412. typename SortedVectorSparse<T>::const_elementpointer it = this->features[_dim].nonzeroElements().lower_bound ( _elem );
  413. _position = distance( this->features[_dim].nonzeroElements().begin(), it );
  414. if ( it->first > this->features[_dim].getTolerance() )
  415. _position += this->features[_dim].getZeros();
  416. }
  417. //------------------------------------------------------
  418. // high level methods
  419. //------------------------------------------------------
  420. template <typename T>
  421. void FeatureMatrixT<T>::applyFunctionToFeatureMatrix ( const NICE::ParameterizedFunction *_pf )
  422. {
  423. if (_pf != NULL)
  424. {
  425. // REMARK: might be inefficient due to virtual calls
  426. if ( !_pf->isOrderPreserving() )
  427. fthrow(Exception, "ParameterizedFunction::applyFunctionToFeatureMatrix: this function is optimized for order preserving transformations");
  428. uint d = this->get_d();
  429. for (uint dim = 0; dim < d; dim++)
  430. {
  431. std::multimap< double, typename SortedVectorSparse<double>::dataelement> & nonzeroElements = this->getFeatureValues(dim).nonzeroElements();
  432. for ( SortedVectorSparse<double>::elementpointer i = nonzeroElements.begin(); i != nonzeroElements.end(); i++ )
  433. {
  434. SortedVectorSparse<double>::dataelement & de = i->second;
  435. //TODO check, wether the element is "sparse" afterwards
  436. de.second = _pf->f( dim, i->first );
  437. }
  438. }
  439. /*for ( int i = 0 ; i < featureMatrix.get_n(); i++ )
  440. for ( int index = 0 ; index < featureMatrix.get_d(); index++ )
  441. featureMatrix.set(index, i, f( (uint)index, featureMatrix.getOriginal(index,i) ), isOrderPreserving() );*/
  442. }
  443. else
  444. {
  445. //no pf given -> nothing to do
  446. }
  447. }
  448. //Computes the ratio of sparsity across the matrix
  449. template <typename T>
  450. double FeatureMatrixT<T>:: computeSparsityRatio() const
  451. {
  452. double ratio(0.0);
  453. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++)
  454. {
  455. ratio += (*it).getZeros() / (double) (*it).getN();
  456. }
  457. if (this->features.size() != 0)
  458. ratio /= double(this->features.size());
  459. return ratio;
  460. }
  461. // add a new feature and insert its elements at the end of each dimension vector
  462. template <typename T>
  463. void FeatureMatrixT<T>::add_feature( const std::vector<T> & _feature,
  464. const NICE::ParameterizedFunction *_pf
  465. )
  466. {
  467. if (this->ui_n == 0)
  468. {
  469. this->set_d( _feature.size() );
  470. }
  471. if ( _feature.size() != this->ui_d)
  472. {
  473. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  474. return;
  475. }
  476. for (uint dimension = 0; dimension < this->features.size(); dimension++)
  477. {
  478. if (_pf != NULL)
  479. this->features[dimension].insert( _feature[dimension], _pf->f( dimension, _feature[dimension]) );
  480. else
  481. this->features[dimension].insert( _feature[dimension] );
  482. }
  483. this->ui_n++;
  484. }
  485. // add a new feature and insert its elements at the end of each dimension vector
  486. template <typename T>
  487. void FeatureMatrixT<T>::add_feature(const NICE::SparseVector & _feature,
  488. const ParameterizedFunction *_pf
  489. )
  490. {
  491. if (this->ui_n == 0)
  492. {
  493. this->set_d( _feature.size() );
  494. }
  495. if ( _feature.getDim() > this->ui_d)
  496. {
  497. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  498. return;
  499. }
  500. for (NICE::SparseVector::const_iterator it = _feature.begin(); it != _feature.end(); it++)
  501. {
  502. if (_pf != NULL)
  503. this->features[it->first].insert( (T) it->second, _pf->f( it->first, (T) it->second), true /* _specifyFeatureNumber */, this->ui_n );
  504. else
  505. this->features[it->first].insert( (T) it->second, true /* _specifyFeatureNumber */, this->ui_n );
  506. }
  507. this->ui_n++;
  508. }
  509. // add several new features and insert their elements in the already ordered structure
  510. template <typename T>
  511. void FeatureMatrixT<T>::add_features(const std::vector<std::vector<T> > & _features )
  512. {
  513. //TODO do we need the parameterized function here as well? usually, we add several features and run applyFunctionToFeatureMatrix afterwards.
  514. // check this please :)
  515. //TODO assure that every feature has the same dimension
  516. if (this->ui_n == 0)
  517. {
  518. this->set_d(_features.size());
  519. }
  520. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  521. for (uint dim = 0; dim < this->ui_d; dim++)
  522. {
  523. this->features[dim].insert( _features[dim] );
  524. }
  525. //update the number of our features
  526. this->ui_n += _features[0].size();
  527. }
  528. template <typename T>
  529. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  530. std::vector<std::vector<uint> > & _permutations,
  531. const uint & _dim
  532. )
  533. {
  534. this->features.clear();
  535. this->set_d( std::max ( _dim, (const uint) _features.size() ) );
  536. if ( this->ui_d > 0 )
  537. this->ui_n = _features[0].size();
  538. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  539. for (uint dim = 0; dim < this->ui_d; dim++)
  540. {
  541. this->features[dim].insert( _features[dim] );
  542. }
  543. this->getPermutations( _permutations );
  544. }
  545. template <typename T>
  546. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  547. std::vector<std::map<uint,uint> > & _permutations,
  548. const uint & _dim
  549. )
  550. {
  551. this->features.clear();
  552. this->set_d( std::max ( _dim, _features.size() ) );
  553. if ( this->ui_d > 0 )
  554. this->ui_n = _features[0].size();
  555. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  556. for (uint dim = 0; dim < this->ui_d; dim++)
  557. {
  558. this->features[dim].insert( _features[dim] );
  559. }
  560. this->getPermutations( _permutations );
  561. }
  562. template <typename T>
  563. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  564. const uint & _dim
  565. )
  566. {
  567. this->features.clear();
  568. this->set_d( std::max ( _dim, (const uint) _features.size() ) );
  569. if ( this->ui_d > 0 )
  570. this->ui_n = _features[0].size();
  571. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  572. for (uint dim = 0; dim < this->ui_d; dim++)
  573. {
  574. if ( this->b_debug )
  575. {
  576. std::cerr << " dim: " << dim << " add " << _features[dim].size() << " examples " << std::endl;
  577. }
  578. this->features[dim].insert( _features[dim] );
  579. }
  580. }
  581. template <typename T>
  582. void FeatureMatrixT<T>::set_features(const std::vector< const NICE::SparseVector * > & _features,
  583. const bool _dimensionsOverExamples,
  584. const uint & _dim
  585. )
  586. {
  587. this->features.clear();
  588. if (_features.size() == 0)
  589. {
  590. std::cerr << "set_features without features" << std::endl;
  591. }
  592. // resize our data structure
  593. //therefore, let's first of all figure out if the user specified a dimension or not.
  594. uint dimTmp ( _dim );
  595. if (_dimensionsOverExamples) //do we have dim x examples ?
  596. {
  597. if ( _features.size() > dimTmp )
  598. {
  599. dimTmp = _features.size();
  600. }
  601. }
  602. else //we have examples x dimes (as usually done)
  603. {
  604. if (_features.size() > 0) //and have at least one example
  605. {
  606. try{
  607. if ( _features[0]->getDim() > dimTmp )
  608. {
  609. dimTmp = _features[0]->getDim();
  610. }
  611. }
  612. catch(...)
  613. {
  614. std::cerr << "FeatureMatrixT<T>::set_features -- something went wrong using getDim() of SparseVectors" << std::endl;
  615. }
  616. }
  617. }
  618. this->set_d( dimTmp );
  619. // set number of examples n
  620. if ( this->ui_d > 0 )
  621. {
  622. if ( _dimensionsOverExamples ) //do we have dim x examples ?
  623. this->ui_n = _features[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  624. else //we have examples x dimes (as usually done)
  625. this->ui_n = _features.size();
  626. }
  627. // insert all values
  628. if ( _dimensionsOverExamples ) //do we have dim x examples ?
  629. {
  630. for (uint dim = 0; dim < this->ui_d; dim++)
  631. {
  632. this->features[dim].insert( _features[dim] );
  633. }
  634. }
  635. else //we have examples x dimes (as usually done)
  636. {
  637. if ( this->b_debug )
  638. std::cerr << "FeatureMatrixT<T>::set_features " << this->ui_n << " new examples" << std::endl;
  639. //loop over every example to add its content
  640. for (uint nr = 0; nr < this->ui_n; nr++)
  641. {
  642. if ( this->b_debug )
  643. std::cerr << "add feature nr. " << nr << " / " << _features.size() << " ";
  644. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  645. for (NICE::SparseVector::const_iterator elemIt = _features[nr]->begin(); elemIt != _features[nr]->end(); elemIt++)
  646. {
  647. if ( this->b_debug )
  648. std::cerr << elemIt->first << "-" << elemIt->second << " ";
  649. //elemIt->first: dim, elemIt->second: value
  650. this->features[elemIt->first].insert( (T) elemIt->second, true /* _specifyFeatureNumber */, nr);
  651. }//for non-zero-values of the feature
  652. if ( this->b_debug )
  653. std::cerr << std::endl;
  654. }//for every new feature
  655. if ( this->b_debug )
  656. std::cerr << "FeatureMatrixT<T>::set_features done" << std::endl;
  657. }//if dimOverEx
  658. //set n for the internal data structure SortedVectorSparse
  659. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  660. (*it).setN( this->ui_n );
  661. }
  662. template <typename T>
  663. void FeatureMatrixT<T>::getPermutations( std::vector<std::vector<uint> > & _permutations) const
  664. {
  665. for (uint dim = 0; dim < this->ui_d; dim++)
  666. {
  667. std::vector<uint> perm ( (this->features[dim]).getPermutation() );
  668. _permutations.push_back(perm);
  669. }
  670. }
  671. template <typename T>
  672. void FeatureMatrixT<T>::getPermutations( std::vector<std::map<uint,uint> > & _permutations) const
  673. {
  674. for (uint dim = 0; dim < this->ui_d; dim++)
  675. {
  676. std::map<uint,uint> perm ( (this->features[dim]).getPermutationNonZeroReal() );
  677. _permutations.push_back(perm);
  678. }
  679. }
  680. // Prints the whole Matrix (outer loop over dimension, inner loop over features)
  681. template <typename T>
  682. void FeatureMatrixT<T>::print(std::ostream & _os) const
  683. {
  684. if (_os.good())
  685. {
  686. for (uint dim = 0; dim < this->ui_d; dim++)
  687. {
  688. this->features[dim].print(_os);
  689. }
  690. }
  691. }
  692. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  693. template <typename T>
  694. void FeatureMatrixT<T>::computeNonSparseMatrix(NICE::MatrixT<T> & _matrix,
  695. bool _transpose
  696. ) const
  697. {
  698. if ( _transpose )
  699. _matrix.resize(this->get_n(),this->get_d());
  700. else
  701. _matrix.resize(this->get_d(),this->get_n());
  702. _matrix.set((T)0.0);
  703. uint dimIdx(0);
  704. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  705. {
  706. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  707. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  708. {
  709. uint featIndex = ((*inIt).second)->second.first;
  710. if ( _transpose )
  711. _matrix(featIndex,dimIdx) =((*inIt).second)->second.second;
  712. else
  713. _matrix(dimIdx,featIndex) =((*inIt).second)->second.second;
  714. }
  715. }
  716. }
  717. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  718. template <typename T>
  719. void FeatureMatrixT<T>::computeNonSparseMatrix(std::vector<std::vector<T> > & _matrix,
  720. bool _transpose
  721. ) const
  722. {
  723. if ( _transpose )
  724. _matrix.resize(this->get_n());
  725. else
  726. _matrix.resize(this->get_d());
  727. // resizing the matrix
  728. for ( uint i = 0 ; i < _matrix.size(); i++ )
  729. if ( _transpose )
  730. _matrix[i] = std::vector<T>(this->get_d(), 0.0);
  731. else
  732. _matrix[i] = std::vector<T>(this->get_n(), 0.0);
  733. uint dimIdx(0);
  734. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  735. {
  736. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  737. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  738. {
  739. uint featIndex = ((*inIt).second)->second.first;
  740. if ( _transpose )
  741. _matrix[featIndex][dimIdx] =((*inIt).second)->second.second;
  742. else
  743. _matrix[dimIdx][featIndex] =((*inIt).second)->second.second;
  744. }
  745. }
  746. }
  747. // Swaps to specified elements, performing a validity check
  748. template <typename T>
  749. void FeatureMatrixT<T>::swap(const uint & _row1,
  750. const uint & _col1,
  751. const uint & _row2,
  752. const uint & _col2
  753. )
  754. {
  755. if ( (_row1 < 0) || (_col1 < 0) || (_row1 > this->ui_d) || (_col1 > this->ui_n) ||
  756. (_row2 < 0) || (_col2 < 0) || (_row2 > this->ui_d) || (_col2 > this->ui_n)
  757. )
  758. {
  759. return;
  760. }
  761. else
  762. {
  763. //swap
  764. T tmp = (*this)(_row1, _col1);
  765. (*this).set(_row1, _col1, (*this)(_row2,_col2));
  766. (*this).set(_row2, _col2, tmp);
  767. }
  768. }
  769. // Swaps to specified elements, without performing a validity check
  770. template <typename T>
  771. void FeatureMatrixT<T>::swapUnsafe(const uint & _row1,
  772. const uint & _col1,
  773. const uint & _row2,
  774. const uint & _col2
  775. )
  776. {
  777. //swap
  778. T tmp = (*this)(_row1, _col1);
  779. (*this).set(_row1, _col1, (*this)(_row2,_col2));
  780. (*this).set(_row2, _col2, tmp);
  781. }
  782. template <typename T>
  783. void FeatureMatrixT<T>::hikDiagonalElements( Vector & _diagonalElements ) const
  784. {
  785. uint dimIdx = 0;
  786. // the function calculates the diagonal elements of a HIK kernel matrix
  787. _diagonalElements.resize(this->ui_n);
  788. _diagonalElements.set(0.0);
  789. // loop through all dimensions
  790. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  791. {
  792. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  793. // loop through all features
  794. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  795. {
  796. uint index = inIt->first;
  797. typename NICE::SortedVectorSparse<T>::elementpointer p = inIt->second;
  798. typename NICE::SortedVectorSparse<T>::dataelement de = p->second;
  799. _diagonalElements[index] += de.second;
  800. }
  801. }
  802. }
  803. template <typename T>
  804. double FeatureMatrixT<T>::hikTrace() const
  805. {
  806. Vector diagonalElements;
  807. this->hikDiagonalElements( diagonalElements );
  808. return diagonalElements.Sum();
  809. }
  810. template <typename T>
  811. uint FeatureMatrixT<T>::getNumberOfNonZeroElementsPerDimension(const uint & dim) const
  812. {
  813. //FIXME we could return a boolean indicating success and return the actual number via call-by-reference
  814. if ( (dim < 0) || (dim > this->ui_d))
  815. return 0;
  816. return this->features[dim].getNonZeros();
  817. }
  818. template <typename T>
  819. uint FeatureMatrixT<T>::getNumberOfZeroElementsPerDimension(const uint & dim) const
  820. {
  821. if ( (dim < 0) || (dim > this->ui_d))
  822. return 0;
  823. return this->ui_n - this-> features[dim].getNonZeros();
  824. }
  825. template <typename T>
  826. void FeatureMatrixT<T>::restore ( std::istream & _is,
  827. int _format
  828. )
  829. {
  830. bool b_restoreVerbose ( false );
  831. if ( _is.good() )
  832. {
  833. if ( b_restoreVerbose )
  834. std::cerr << " restore FeatureMatrixT" << std::endl;
  835. std::string tmp;
  836. _is >> tmp; //class name
  837. if ( ! this->isStartTag( tmp, "FeatureMatrixT" ) )
  838. {
  839. std::cerr << " WARNING - attempt to restore FeatureMatrixT, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  840. throw;
  841. }
  842. _is.precision ( std::numeric_limits<double>::digits10 + 1);
  843. bool b_endOfBlock ( false ) ;
  844. while ( !b_endOfBlock )
  845. {
  846. _is >> tmp; // start of block
  847. if ( this->isEndTag( tmp, "FeatureMatrixT" ) )
  848. {
  849. b_endOfBlock = true;
  850. continue;
  851. }
  852. tmp = this->removeStartTag ( tmp );
  853. if ( b_restoreVerbose )
  854. std::cerr << " currently restore section " << tmp << " in FeatureMatrixT" << std::endl;
  855. if ( tmp.compare("ui_n") == 0 )
  856. {
  857. _is >> this->ui_n;
  858. _is >> tmp; // end of block
  859. tmp = this->removeEndTag ( tmp );
  860. }
  861. else if ( tmp.compare("ui_d") == 0 )
  862. {
  863. _is >> this->ui_d;
  864. _is >> tmp; // end of block
  865. tmp = this->removeEndTag ( tmp );
  866. }
  867. else if ( tmp.compare("features") == 0 )
  868. {
  869. //NOTE assumes d to be read first!
  870. this->features.resize( this->ui_d);
  871. //now read features for every dimension
  872. for (uint dim = 0; dim < this->ui_d; dim++)
  873. {
  874. NICE::SortedVectorSparse<T> svs;
  875. this->features[dim] = svs;
  876. this->features[dim].restore(_is, _format);
  877. }
  878. _is >> tmp; // end of block
  879. tmp = this->removeEndTag ( tmp );
  880. }
  881. else
  882. {
  883. std::cerr << "WARNING -- unexpected FeatureMatrixT object -- " << tmp << " -- for restoration... aborting" << std::endl;
  884. throw;
  885. }
  886. }
  887. }
  888. else
  889. {
  890. std::cerr << "FeatureMatrixT<T>::restore -- InStream not initialized - restoring not possible!" << std::endl;
  891. throw;
  892. }
  893. }
  894. template <typename T>
  895. void FeatureMatrixT<T>::store ( std::ostream & _os,
  896. int _format
  897. ) const
  898. {
  899. if (_os.good())
  900. {
  901. // show starting point
  902. _os << this->createStartTag( "FeatureMatrixT" ) << std::endl;
  903. _os.precision (std::numeric_limits<double>::digits10 + 1);
  904. _os << this->createStartTag( "ui_n" ) << std::endl;
  905. _os << this->ui_n << std::endl;
  906. _os << this->createEndTag( "ui_n" ) << std::endl;
  907. _os << this->createStartTag( "ui_d" ) << std::endl;
  908. _os << this->ui_d << std::endl;
  909. _os << this->createEndTag( "ui_d" ) << std::endl;
  910. //now write features for every dimension
  911. _os << this->createStartTag( "features" ) << std::endl;
  912. for (uint dim = 0; dim < this->ui_d; dim++)
  913. {
  914. this->features[dim].store(_os,_format);
  915. }
  916. _os << this->createEndTag( "features" ) << std::endl;
  917. // done
  918. _os << this->createEndTag( "FeatureMatrixT" ) << std::endl;
  919. }
  920. else
  921. {
  922. std::cerr << "FeatureMatrixT<T>::store -- OutStream not initialized - storing not possible!" << std::endl;
  923. }
  924. }
  925. template <typename T>
  926. void FeatureMatrixT<T>::clear ()
  927. {}
  928. } // namespace
  929. // #endif