FeatureMatrixT.tcc 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114
  1. /**
  2. * @file FeatureMatrixT.tcc
  3. * @brief A feature matrix, storing (sparse) features sorted per dimension (Implementation)
  4. * @author Alexander Freytag
  5. * @date 07-12-2011 (dd-mm-yyyy)
  6. */
  7. // #ifndef FEATUREMATRIX_TCC
  8. // #define FEATUREMATRIX_TCC
  9. // gp-hik-core includes
  10. #include "FeatureMatrixT.h"
  11. namespace NICE {
  12. //------------------------------------------------------
  13. // several constructors and destructors
  14. //------------------------------------------------------
  15. // Default constructor
  16. template <typename T>
  17. FeatureMatrixT<T>::FeatureMatrixT()
  18. {
  19. this->ui_n = 0;
  20. this->ui_d = 0;
  21. this->features.clear();
  22. this->b_verbose = false;
  23. this->b_debug = false;
  24. }
  25. // Recommended constructor
  26. template <typename T>
  27. FeatureMatrixT<T>::FeatureMatrixT(const std::vector<std::vector<T> > & _features,
  28. const uint & _dim
  29. )
  30. {
  31. this->ui_n = 0;
  32. if (_dim < 0)
  33. this->ui_d = (*_features.begin()).size();
  34. else
  35. this->ui_d = _dim;
  36. for (typename std::vector<std::vector<T> >::const_iterator it = _features.begin(); it != _features.end(); it++)
  37. {
  38. add_feature(*it);
  39. }
  40. this->b_verbose = false;
  41. this->b_debug = false;
  42. }
  43. //Constructor reading data from a vector of sparse vector pointers
  44. template <typename T>
  45. FeatureMatrixT<T>::
  46. FeatureMatrixT(const std::vector< const NICE::SparseVector * > & _X,
  47. const bool _dimensionsOverExamples,
  48. const uint & _dim
  49. )
  50. {
  51. this->features.clear();
  52. // resize our data structure
  53. set_d( _dim );
  54. // set number of examples n
  55. if ( this->ui_d > 0 )
  56. {
  57. if (_dimensionsOverExamples) //do we have dim x examples ?
  58. this->ui_n = _X[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  59. else //we have examples x dimes (as usually done)
  60. this->ui_n = _X.size();
  61. }
  62. // insert all values
  63. if (_dimensionsOverExamples) //do we have dim x examples ?
  64. {
  65. for (uint dim = 0; dim < this->ui_d; dim++)
  66. {
  67. this->features[dim].insert( _X[dim] );
  68. }
  69. }
  70. else //we have examples x dimes (as usually done)
  71. {
  72. //loop over every example to add its content
  73. for (uint nr = 0; nr < this->ui_n; nr++)
  74. {
  75. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  76. for (NICE::SparseVector::const_iterator elemIt = _X[nr]->begin(); elemIt != _X[nr]->end(); elemIt++)
  77. {
  78. //elemIt->first: dim, elemIt->second: value
  79. this->features[elemIt->first].insert( (T) elemIt->second, nr);
  80. }//for non-zero-values of the feature
  81. }//for every new feature
  82. }//if dimOverEx
  83. //set n for the internal data structure SortedVectorSparse
  84. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  85. (*it).setN( this->ui_n );
  86. }
  87. #ifdef NICE_USELIB_MATIO
  88. //Constructor reading data from matlab-files
  89. template <typename T>
  90. FeatureMatrixT<T>::
  91. FeatureMatrixT(const sparse_t & _features,
  92. const uint & _dim
  93. )
  94. {
  95. if (_dim < 0)
  96. set_d( _features.njc -1 );
  97. else
  98. set_d( _dim );
  99. uint nMax(0);
  100. for ( uint i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  101. {
  102. for ( uint j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  103. {
  104. this->features[i].insert(((T*)_features.data)[j], _features.ir[ j]);
  105. if ((_features.ir[ j])>nMax)
  106. nMax = _features.ir[ j];
  107. }
  108. }
  109. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  110. {
  111. (*it).setN(nMax+1);
  112. }
  113. this->ui_n = nMax+1;
  114. this->b_verbose = false;
  115. }
  116. //Constructor reading data from matlab-files
  117. template <typename T>
  118. FeatureMatrixT<T>::
  119. FeatureMatrixT(const sparse_t & _features,
  120. const std::map<uint, uint> & _examples,
  121. const uint & _dim)
  122. {
  123. if (_dim < 0)
  124. set_d(_features.njc -1);
  125. else
  126. set_d(_dim);
  127. uint nMax(0);
  128. for ( uint i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  129. {
  130. for ( uint j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  131. {
  132. uint example_index = _features.ir[ j];
  133. std::map<uint, uint>::const_iterator it = examples.find(example_index);
  134. if ( it != examples.end() ) {
  135. this->features[i].insert(((T*)_features.data)[j], it->second /* new index */);
  136. if (it->second > nMax)
  137. nMax = it->second;
  138. }
  139. }
  140. }
  141. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  142. (*it).setN(nMax+1);
  143. this->ui_n = nMax+1;
  144. this->b_verbose = false;
  145. }
  146. #endif
  147. // Default destructor
  148. template <typename T>
  149. FeatureMatrixT<T>::~FeatureMatrixT()
  150. {
  151. }
  152. //------------------------------------------------------
  153. // several get and set methods including access operators
  154. //------------------------------------------------------
  155. // Get number of examples
  156. template <typename T>
  157. uint FeatureMatrixT<T>::get_n() const
  158. {
  159. return this->ui_n;
  160. }
  161. // Get number of dimensions
  162. template <typename T>
  163. uint FeatureMatrixT<T>::get_d() const
  164. {
  165. return this->ui_d;
  166. }
  167. // Sets the given dimension and re-sizes internal data structure. WARNING: this will completely remove your current data!
  168. template <typename T>
  169. void FeatureMatrixT<T>::set_d(const uint & _d)
  170. {
  171. this->ui_d = _d;
  172. this->features.resize( this->ui_d );
  173. }
  174. template <typename T>
  175. void FeatureMatrixT<T>::setVerbose( const bool & _verbose)
  176. {
  177. this->b_verbose = _verbose;
  178. }
  179. template <typename T>
  180. bool FeatureMatrixT<T>::getVerbose( ) const
  181. {
  182. return this->b_verbose;
  183. }
  184. template <typename T>
  185. void FeatureMatrixT<T>::setDebug( const bool & _debug)
  186. {
  187. this->b_debug = _debug;
  188. }
  189. template <typename T>
  190. bool FeatureMatrixT<T>::getDebug( ) const
  191. {
  192. return this->b_debug;
  193. }
  194. // Matrix-like operator for element access, performs validity check
  195. template <typename T>
  196. inline T FeatureMatrixT<T>::operator()(const uint _row,
  197. const uint _col
  198. ) const
  199. {
  200. if ( (_row < 0) || (_col < 0) || (_row > this->ui_d) || (_col > this->ui_n) )
  201. {
  202. fthrow(Exception, "FeatureMatrixT: out of bounds");
  203. }
  204. else
  205. return ( this->features[_row]).access(_col);
  206. }
  207. template<class T>
  208. inline bool
  209. FeatureMatrixT<T>::operator==(const FeatureMatrixT<T> & _F) const
  210. {
  211. if ( ( this->get_n() != _F.get_n()) || (this->get_d() != _F.get_d()) )
  212. {
  213. fthrow(Exception, "FeatureMatrixT<T>::operator== : (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  214. }
  215. else if ((this->ui_n == 0) || (this->ui_d == 0))
  216. {
  217. return true;
  218. }
  219. for (uint i = 0; i < this->ui_d; i++)
  220. {
  221. for (uint j = 0; j < this->ui_n; j++)
  222. {
  223. // FIXME: it would be more efficient if we compare SortedVectorSparse objects here
  224. if(!((*this)(i,j) == _F(i,j)))
  225. return false;
  226. }
  227. }
  228. return true;
  229. }
  230. template<class T>
  231. inline bool
  232. FeatureMatrixT<T>::operator!=(const FeatureMatrixT<T> & _F) const
  233. {
  234. if ( ( (*this).get_n() != _F.get_n()) ||
  235. ( (*this).get_d() != _F.get_d())
  236. )
  237. {
  238. fthrow(Exception, "FeatureMatrixT::operator!=(): (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  239. }
  240. else if ((this->ui_n == 0) || (this->ui_d == 0))
  241. {
  242. return false;
  243. }
  244. for (uint i = 0; i < this->ui_d; i++)
  245. {
  246. for (uint j = 0; j < this->ui_n; j++)
  247. {
  248. if(!((*this)(i,j) == _F(i,j)))
  249. return true;
  250. }
  251. }
  252. return false;
  253. }
  254. template<typename T>
  255. inline FeatureMatrixT<T>&
  256. FeatureMatrixT<T>::operator=(const FeatureMatrixT<T> & _F)
  257. {
  258. this->set_d(_F.get_d());
  259. this->ui_n = _F.get_n();
  260. for (uint i = 0; i < (*this).get_d(); i++)
  261. {
  262. // use the operator= of SortedVectorSparse
  263. features[i] = _F[i];
  264. }
  265. return *this;
  266. }
  267. // Element access without validity check
  268. template <typename T>
  269. inline T FeatureMatrixT<T>::getUnsafe(const uint _row,
  270. const uint _col
  271. ) const
  272. {
  273. return (this->features[_row]).access(_col);
  274. }
  275. //! Element access of original values without validity check
  276. template <typename T>
  277. inline T FeatureMatrixT<T>::getOriginal(const uint _row,
  278. const uint _col
  279. ) const
  280. {
  281. return (this->features[_row]).accessOriginal(_col);
  282. }
  283. // Sets a specified element to the given value, performs validity check
  284. template <typename T>
  285. inline void FeatureMatrixT<T>::set (const uint _row,
  286. const uint _col,
  287. const T & _newElement,
  288. bool _setTransformedValue
  289. )
  290. {
  291. if ( (_row < 0) || (_col < 0) || (_row > this->ui_d) || (_col > this->ui_n) )
  292. {
  293. return;
  294. }
  295. else
  296. (this->features[_row]).set ( _col, _newElement, _setTransformedValue );
  297. }
  298. // Sets a specified element to the given value, without validity check
  299. template <typename T>
  300. inline void FeatureMatrixT<T>::setUnsafe (const uint _row,
  301. const uint _col,
  302. const T & _newElement,
  303. bool _setTransformedValue
  304. )
  305. {
  306. (this->features[_row]).set ( _col, _newElement, _setTransformedValue );
  307. }
  308. // Acceess to all element entries of a specified dimension, including validity check
  309. template <typename T>
  310. void FeatureMatrixT<T>::getDimension(const uint & _dim,
  311. NICE::SortedVectorSparse<T> & _dimension
  312. ) const
  313. {
  314. if ( (_dim < 0) || (_dim > this->ui_d) )
  315. {
  316. return;
  317. }
  318. else
  319. _dimension = this->features[_dim];
  320. }
  321. // Acceess to all element entries of a specified dimension, without validity check
  322. template <typename T>
  323. void FeatureMatrixT<T>::getDimensionUnsafe(const uint & _dim,
  324. NICE::SortedVectorSparse<T> & _dimension
  325. ) const
  326. {
  327. _dimension = this->features[_dim];
  328. }
  329. // Finds the first element in a given dimension, which equals elem
  330. template <typename T>
  331. void FeatureMatrixT<T>::findFirstInDimension(const uint & _dim,
  332. const T & _elem,
  333. uint & _position
  334. ) const
  335. {
  336. _position = 0;
  337. if ( _dim > this->ui_d )
  338. return;
  339. std::pair< typename SortedVectorSparse<T>::elementpointer, typename SortedVectorSparse<T>::elementpointer > eit;
  340. eit = this->features[_dim].nonzeroElements().equal_range ( _elem );
  341. _position = distance( this->features[_dim].nonzeroElements().begin(), eit.first );
  342. if ( _elem > this->features[_dim].getTolerance() )
  343. _position += this->features[_dim].getZeros();
  344. }
  345. // Finds the last element in a given dimension, which equals elem
  346. template <typename T>
  347. void FeatureMatrixT<T>::findLastInDimension(const uint & _dim,
  348. const T & _elem,
  349. uint & _position
  350. ) const
  351. {
  352. _position = 0;
  353. if ( _dim > this->ui_d )
  354. return;
  355. std::pair< typename SortedVectorSparse<T>::const_elementpointer, typename SortedVectorSparse<T>::const_elementpointer > eit = this->features[_dim].nonzeroElements().equal_range ( _elem );
  356. _position = distance( this->features[_dim].nonzeroElements().begin(), eit.second );
  357. if ( _elem > this->features[_dim].getTolerance() )
  358. _position += this->features[_dim].getZeros();
  359. }
  360. // Finds the first element in a given dimension, which is larger as elem
  361. template <typename T>
  362. void FeatureMatrixT<T>::findFirstLargerInDimension(const uint & _dim,
  363. const T & _elem,
  364. uint & _position
  365. ) const
  366. {
  367. _position = 0;
  368. if ( _dim > this->ui_d )
  369. return;
  370. //no non-zero elements?
  371. if (this->features[_dim].getNonZeros() <= 0)
  372. {
  373. // if element is greater than zero, than is should be added at the last position
  374. if (_elem > this->features[_dim].getTolerance() )
  375. _position = this->ui_n;
  376. //if not, position is 0
  377. return;
  378. }
  379. if (this->features[_dim].getNonZeros() == 1)
  380. {
  381. // if element is greater than the only nonzero element, then it is larger than everything else
  382. if (this->features[_dim].nonzeroElements().begin()->first <= _elem)
  383. _position = this->ui_n;
  384. //if not, but the element is still greater than zero, than
  385. else if (_elem > this->features[_dim].getTolerance() )
  386. _position = this->ui_n -1;
  387. return;
  388. }
  389. // standard case - not everything is zero and not only a single element is zero
  390. // find pointer to last non-zero element
  391. // FIXME no idea why this should be necessary...
  392. typename SortedVectorSparse<T>::const_elementpointer it = this->features[_dim].nonzeroElements().end(); //this is needed !!!
  393. // find pointer to first element largern than the given value
  394. it = this->features[_dim].nonzeroElements().upper_bound ( _elem ); //if all values are smaller, this does not do anything at all
  395. _position = distance( this->features[_dim].nonzeroElements().begin(), it );
  396. if ( _elem > this->features[_dim].getTolerance() )
  397. {
  398. //position += features[dim].getZeros();
  399. _position += this->ui_n - this->features[_dim].getNonZeros();
  400. }
  401. }
  402. // Finds the last element in a given dimension, which is smaller as elem
  403. template <typename T>
  404. void FeatureMatrixT<T>::findLastSmallerInDimension(const uint & _dim,
  405. const T & _elem,
  406. uint & _position
  407. ) const
  408. {
  409. _position = 0;
  410. if ( (_dim < 0) || (_dim > this->ui_d))
  411. return;
  412. typename SortedVectorSparse<T>::const_elementpointer it = this->features[_dim].nonzeroElements().lower_bound ( _elem );
  413. _position = distance( this->features[_dim].nonzeroElements().begin(), it );
  414. if ( it->first > this->features[_dim].getTolerance() )
  415. _position += this->features[_dim].getZeros();
  416. }
  417. template <typename T>
  418. T FeatureMatrixT<T>::getLargestValue ( const bool & _getTransformedValue ) const
  419. {
  420. T vmax = (T) 0;
  421. T vtmp = (T) 0;
  422. uint tmp ( 0 );
  423. for ( typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin();
  424. it != this->features.end();
  425. it++, tmp++
  426. )
  427. {
  428. vtmp = it->getLargestValueUnsafe( 1.0 /*quantile, we are interested in the largest value*/, _getTransformedValue );
  429. if ( vtmp > vmax )
  430. {
  431. vmax = vtmp;
  432. }
  433. }
  434. return vmax;
  435. }
  436. template <typename T>
  437. NICE::VectorT<T> FeatureMatrixT<T>::getLargestValuePerDimension ( const double & _quantile,
  438. const bool & _getTransformedValue
  439. ) const
  440. {
  441. NICE::VectorT<T> vmax ( this->get_d() );
  442. uint tmp ( 0 );
  443. typename NICE::VectorT<T>::iterator vmaxIt = vmax.begin();
  444. for ( typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin();
  445. it != this->features.end();
  446. it++, vmaxIt++, tmp++
  447. )
  448. {
  449. *vmaxIt = it->getLargestValueUnsafe( _quantile, _getTransformedValue );
  450. }
  451. return vmax;
  452. }
  453. //------------------------------------------------------
  454. // high level methods
  455. //------------------------------------------------------
  456. template <typename T>
  457. void FeatureMatrixT<T>::applyFunctionToFeatureMatrix ( const NICE::ParameterizedFunction *_pf )
  458. {
  459. if (_pf != NULL)
  460. {
  461. // REMARK: might be inefficient due to virtual calls
  462. if ( !_pf->isOrderPreserving() )
  463. fthrow(Exception, "ParameterizedFunction::applyFunctionToFeatureMatrix: this function is optimized for order preserving transformations");
  464. uint d = this->get_d();
  465. for (uint dim = 0; dim < d; dim++)
  466. {
  467. std::multimap< double, typename SortedVectorSparse<double>::dataelement> & nonzeroElements = this->getFeatureValues(dim).nonzeroElements();
  468. for ( SortedVectorSparse<double>::elementpointer i = nonzeroElements.begin(); i != nonzeroElements.end(); i++ )
  469. {
  470. SortedVectorSparse<double>::dataelement & de = i->second;
  471. //TODO check, wether the element is "sparse" afterwards
  472. de.second = _pf->f( dim, i->first );
  473. }
  474. }
  475. /*for ( int i = 0 ; i < featureMatrix.get_n(); i++ )
  476. for ( int index = 0 ; index < featureMatrix.get_d(); index++ )
  477. featureMatrix.set(index, i, f( (uint)index, featureMatrix.getOriginal(index,i) ), isOrderPreserving() );*/
  478. }
  479. else
  480. {
  481. //no pf given -> nothing to do
  482. }
  483. }
  484. //Computes the ratio of sparsity across the matrix
  485. template <typename T>
  486. double FeatureMatrixT<T>:: computeSparsityRatio() const
  487. {
  488. double ratio(0.0);
  489. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++)
  490. {
  491. ratio += (*it).getZeros() / (double) (*it).getN();
  492. }
  493. if (this->features.size() != 0)
  494. ratio /= double(this->features.size());
  495. return ratio;
  496. }
  497. // add a new feature and insert its elements at the end of each dimension vector
  498. template <typename T>
  499. void FeatureMatrixT<T>::add_feature( const std::vector<T> & _feature,
  500. const NICE::ParameterizedFunction *_pf
  501. )
  502. {
  503. if (this->ui_n == 0)
  504. {
  505. this->set_d( _feature.size() );
  506. }
  507. if ( _feature.size() != this->ui_d)
  508. {
  509. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  510. return;
  511. }
  512. for (uint dimension = 0; dimension < this->features.size(); dimension++)
  513. {
  514. if (_pf != NULL)
  515. this->features[dimension].insert( _feature[dimension], _pf->f( dimension, _feature[dimension]) );
  516. else
  517. this->features[dimension].insert( _feature[dimension] );
  518. }
  519. this->ui_n++;
  520. }
  521. // add a new feature and insert its elements at the end of each dimension vector
  522. template <typename T>
  523. void FeatureMatrixT<T>::add_feature(const NICE::SparseVector & _feature,
  524. const ParameterizedFunction *_pf
  525. )
  526. {
  527. if (this->ui_n == 0)
  528. {
  529. this->set_d( _feature.size() );
  530. }
  531. if ( _feature.getDim() > this->ui_d)
  532. {
  533. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  534. return;
  535. }
  536. for (NICE::SparseVector::const_iterator it = _feature.begin(); it != _feature.end(); it++)
  537. {
  538. if (_pf != NULL)
  539. this->features[it->first].insert( (T) it->second, _pf->f( it->first, (T) it->second), true /* _specifyFeatureNumber */, this->ui_n );
  540. else
  541. this->features[it->first].insert( (T) it->second, true /* _specifyFeatureNumber */, this->ui_n );
  542. }
  543. this->ui_n++;
  544. }
  545. // add several new features and insert their elements in the already ordered structure
  546. template <typename T>
  547. void FeatureMatrixT<T>::add_features(const std::vector<std::vector<T> > & _features )
  548. {
  549. //TODO do we need the parameterized function here as well? usually, we add several features and run applyFunctionToFeatureMatrix afterwards.
  550. // check this please :)
  551. //TODO assure that every feature has the same dimension
  552. if (this->ui_n == 0)
  553. {
  554. this->set_d(_features.size());
  555. }
  556. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  557. for (uint dim = 0; dim < this->ui_d; dim++)
  558. {
  559. this->features[dim].insert( _features[dim] );
  560. }
  561. //update the number of our features
  562. this->ui_n += _features[0].size();
  563. }
  564. template <typename T>
  565. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  566. std::vector<std::vector<uint> > & _permutations,
  567. const uint & _dim
  568. )
  569. {
  570. this->features.clear();
  571. this->set_d( std::max ( _dim, (const uint) _features.size() ) );
  572. if ( this->ui_d > 0 )
  573. this->ui_n = _features[0].size();
  574. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  575. for (uint dim = 0; dim < this->ui_d; dim++)
  576. {
  577. this->features[dim].insert( _features[dim] );
  578. }
  579. this->getPermutations( _permutations );
  580. }
  581. template <typename T>
  582. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  583. std::vector<std::map<uint,uint> > & _permutations,
  584. const uint & _dim
  585. )
  586. {
  587. this->features.clear();
  588. this->set_d( std::max ( _dim, _features.size() ) );
  589. if ( this->ui_d > 0 )
  590. this->ui_n = _features[0].size();
  591. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  592. for (uint dim = 0; dim < this->ui_d; dim++)
  593. {
  594. this->features[dim].insert( _features[dim] );
  595. }
  596. this->getPermutations( _permutations );
  597. }
  598. template <typename T>
  599. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  600. const uint & _dim
  601. )
  602. {
  603. this->features.clear();
  604. this->set_d( std::max ( _dim, (const uint) _features.size() ) );
  605. if ( this->ui_d > 0 )
  606. this->ui_n = _features[0].size();
  607. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  608. for (uint dim = 0; dim < this->ui_d; dim++)
  609. {
  610. if ( this->b_debug )
  611. {
  612. std::cerr << " dim: " << dim << " add " << _features[dim].size() << " examples " << std::endl;
  613. }
  614. this->features[dim].insert( _features[dim] );
  615. }
  616. }
  617. template <typename T>
  618. void FeatureMatrixT<T>::set_features(const std::vector< const NICE::SparseVector * > & _features,
  619. const bool _dimensionsOverExamples,
  620. const uint & _dim
  621. )
  622. {
  623. this->features.clear();
  624. if (_features.size() == 0)
  625. {
  626. std::cerr << "set_features without features" << std::endl;
  627. }
  628. // resize our data structure
  629. //therefore, let's first of all figure out if the user specified a dimension or not.
  630. uint dimTmp ( _dim );
  631. if (_dimensionsOverExamples) //do we have dim x examples ?
  632. {
  633. if ( _features.size() > dimTmp )
  634. {
  635. dimTmp = _features.size();
  636. }
  637. }
  638. else //we have examples x dimes (as usually done)
  639. {
  640. if (_features.size() > 0) //and have at least one example
  641. {
  642. try{
  643. if ( _features[0]->getDim() > dimTmp )
  644. {
  645. dimTmp = _features[0]->getDim();
  646. }
  647. }
  648. catch(...)
  649. {
  650. std::cerr << "FeatureMatrixT<T>::set_features -- something went wrong using getDim() of SparseVectors" << std::endl;
  651. }
  652. }
  653. }
  654. this->set_d( dimTmp );
  655. // set number of examples n
  656. if ( this->ui_d > 0 )
  657. {
  658. if ( _dimensionsOverExamples ) //do we have dim x examples ?
  659. this->ui_n = _features[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  660. else //we have examples x dimes (as usually done)
  661. this->ui_n = _features.size();
  662. }
  663. // insert all values
  664. if ( _dimensionsOverExamples ) //do we have dim x examples ?
  665. {
  666. for (uint dim = 0; dim < this->ui_d; dim++)
  667. {
  668. this->features[dim].insert( _features[dim] );
  669. }
  670. }
  671. else //we have examples x dimes (as usually done)
  672. {
  673. if ( this->b_debug )
  674. std::cerr << "FeatureMatrixT<T>::set_features " << this->ui_n << " new examples" << std::endl;
  675. //loop over every example to add its content
  676. for (uint nr = 0; nr < this->ui_n; nr++)
  677. {
  678. if ( this->b_debug )
  679. std::cerr << "add feature nr. " << nr << " / " << _features.size() << " ";
  680. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  681. for (NICE::SparseVector::const_iterator elemIt = _features[nr]->begin(); elemIt != _features[nr]->end(); elemIt++)
  682. {
  683. if ( this->b_debug )
  684. std::cerr << elemIt->first << "-" << elemIt->second << " ";
  685. //elemIt->first: dim, elemIt->second: value
  686. this->features[elemIt->first].insert( (T) elemIt->second, true /* _specifyFeatureNumber */, nr);
  687. }//for non-zero-values of the feature
  688. if ( this->b_debug )
  689. std::cerr << std::endl;
  690. }//for every new feature
  691. if ( this->b_debug )
  692. std::cerr << "FeatureMatrixT<T>::set_features done" << std::endl;
  693. }//if dimOverEx
  694. //set n for the internal data structure SortedVectorSparse
  695. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  696. (*it).setN( this->ui_n );
  697. }
  698. template <typename T>
  699. void FeatureMatrixT<T>::getPermutations( std::vector<std::vector<uint> > & _permutations) const
  700. {
  701. for (uint dim = 0; dim < this->ui_d; dim++)
  702. {
  703. std::vector<uint> perm ( (this->features[dim]).getPermutation() );
  704. _permutations.push_back(perm);
  705. }
  706. }
  707. template <typename T>
  708. void FeatureMatrixT<T>::getPermutations( std::vector<std::map<uint,uint> > & _permutations) const
  709. {
  710. for (uint dim = 0; dim < this->ui_d; dim++)
  711. {
  712. std::map<uint,uint> perm ( (this->features[dim]).getPermutationNonZeroReal() );
  713. _permutations.push_back(perm);
  714. }
  715. }
  716. // Prints the whole Matrix (outer loop over dimension, inner loop over features)
  717. template <typename T>
  718. void FeatureMatrixT<T>::print(std::ostream & _os) const
  719. {
  720. if (_os.good())
  721. {
  722. for (uint dim = 0; dim < this->ui_d; dim++)
  723. {
  724. this->features[dim].print(_os);
  725. }
  726. }
  727. }
  728. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  729. template <typename T>
  730. void FeatureMatrixT<T>::computeNonSparseMatrix(NICE::MatrixT<T> & _matrix,
  731. bool _transpose
  732. ) const
  733. {
  734. if ( _transpose )
  735. _matrix.resize(this->get_n(),this->get_d());
  736. else
  737. _matrix.resize(this->get_d(),this->get_n());
  738. _matrix.set((T)0.0);
  739. uint dimIdx(0);
  740. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  741. {
  742. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  743. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  744. {
  745. uint featIndex = ((*inIt).second)->second.first;
  746. if ( _transpose )
  747. _matrix(featIndex,dimIdx) =((*inIt).second)->second.second;
  748. else
  749. _matrix(dimIdx,featIndex) =((*inIt).second)->second.second;
  750. }
  751. }
  752. }
  753. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  754. template <typename T>
  755. void FeatureMatrixT<T>::computeNonSparseMatrix(std::vector<std::vector<T> > & _matrix,
  756. bool _transpose
  757. ) const
  758. {
  759. if ( _transpose )
  760. _matrix.resize(this->get_n());
  761. else
  762. _matrix.resize(this->get_d());
  763. // resizing the matrix
  764. for ( uint i = 0 ; i < _matrix.size(); i++ )
  765. if ( _transpose )
  766. _matrix[i] = std::vector<T>(this->get_d(), 0.0);
  767. else
  768. _matrix[i] = std::vector<T>(this->get_n(), 0.0);
  769. uint dimIdx(0);
  770. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  771. {
  772. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  773. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  774. {
  775. uint featIndex = ((*inIt).second)->second.first;
  776. if ( _transpose )
  777. _matrix[featIndex][dimIdx] =((*inIt).second)->second.second;
  778. else
  779. _matrix[dimIdx][featIndex] =((*inIt).second)->second.second;
  780. }
  781. }
  782. }
  783. // Swaps to specified elements, performing a validity check
  784. template <typename T>
  785. void FeatureMatrixT<T>::swap(const uint & _row1,
  786. const uint & _col1,
  787. const uint & _row2,
  788. const uint & _col2
  789. )
  790. {
  791. if ( (_row1 < 0) || (_col1 < 0) || (_row1 > this->ui_d) || (_col1 > this->ui_n) ||
  792. (_row2 < 0) || (_col2 < 0) || (_row2 > this->ui_d) || (_col2 > this->ui_n)
  793. )
  794. {
  795. return;
  796. }
  797. else
  798. {
  799. //swap
  800. T tmp = (*this)(_row1, _col1);
  801. (*this).set(_row1, _col1, (*this)(_row2,_col2));
  802. (*this).set(_row2, _col2, tmp);
  803. }
  804. }
  805. // Swaps to specified elements, without performing a validity check
  806. template <typename T>
  807. void FeatureMatrixT<T>::swapUnsafe(const uint & _row1,
  808. const uint & _col1,
  809. const uint & _row2,
  810. const uint & _col2
  811. )
  812. {
  813. //swap
  814. T tmp = (*this)(_row1, _col1);
  815. (*this).set(_row1, _col1, (*this)(_row2,_col2));
  816. (*this).set(_row2, _col2, tmp);
  817. }
  818. template <typename T>
  819. void FeatureMatrixT<T>::hikDiagonalElements( Vector & _diagonalElements ) const
  820. {
  821. uint dimIdx = 0;
  822. // the function calculates the diagonal elements of a HIK kernel matrix
  823. _diagonalElements.resize(this->ui_n);
  824. _diagonalElements.set(0.0);
  825. // loop through all dimensions
  826. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  827. {
  828. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  829. // loop through all features
  830. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  831. {
  832. uint index = inIt->first;
  833. typename NICE::SortedVectorSparse<T>::elementpointer p = inIt->second;
  834. typename NICE::SortedVectorSparse<T>::dataelement de = p->second;
  835. _diagonalElements[index] += de.second;
  836. }
  837. }
  838. }
  839. template <typename T>
  840. double FeatureMatrixT<T>::hikTrace() const
  841. {
  842. Vector diagonalElements;
  843. this->hikDiagonalElements( diagonalElements );
  844. return diagonalElements.Sum();
  845. }
  846. template <typename T>
  847. uint FeatureMatrixT<T>::getNumberOfNonZeroElementsPerDimension(const uint & dim) const
  848. {
  849. //FIXME we could return a boolean indicating success and return the actual number via call-by-reference
  850. if ( (dim < 0) || (dim > this->ui_d))
  851. return 0;
  852. return this->features[dim].getNonZeros();
  853. }
  854. template <typename T>
  855. uint FeatureMatrixT<T>::getNumberOfZeroElementsPerDimension(const uint & dim) const
  856. {
  857. if ( (dim < 0) || (dim > this->ui_d))
  858. return 0;
  859. return this->ui_n - this-> features[dim].getNonZeros();
  860. }
  861. template <typename T>
  862. void FeatureMatrixT<T>::restore ( std::istream & _is,
  863. int _format
  864. )
  865. {
  866. bool b_restoreVerbose ( false );
  867. if ( _is.good() )
  868. {
  869. if ( b_restoreVerbose )
  870. std::cerr << " restore FeatureMatrixT" << std::endl;
  871. std::string tmp;
  872. _is >> tmp; //class name
  873. if ( ! this->isStartTag( tmp, "FeatureMatrixT" ) )
  874. {
  875. std::cerr << " WARNING - attempt to restore FeatureMatrixT, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  876. throw;
  877. }
  878. _is.precision ( std::numeric_limits<double>::digits10 + 1);
  879. bool b_endOfBlock ( false ) ;
  880. while ( !b_endOfBlock )
  881. {
  882. _is >> tmp; // start of block
  883. if ( this->isEndTag( tmp, "FeatureMatrixT" ) )
  884. {
  885. b_endOfBlock = true;
  886. continue;
  887. }
  888. tmp = this->removeStartTag ( tmp );
  889. if ( b_restoreVerbose )
  890. std::cerr << " currently restore section " << tmp << " in FeatureMatrixT" << std::endl;
  891. if ( tmp.compare("ui_n") == 0 )
  892. {
  893. _is >> this->ui_n;
  894. _is >> tmp; // end of block
  895. tmp = this->removeEndTag ( tmp );
  896. }
  897. else if ( tmp.compare("ui_d") == 0 )
  898. {
  899. _is >> this->ui_d;
  900. _is >> tmp; // end of block
  901. tmp = this->removeEndTag ( tmp );
  902. }
  903. else if ( tmp.compare("features") == 0 )
  904. {
  905. //NOTE assumes d to be read first!
  906. this->features.resize( this->ui_d);
  907. //now read features for every dimension
  908. for (uint dim = 0; dim < this->ui_d; dim++)
  909. {
  910. NICE::SortedVectorSparse<T> svs;
  911. this->features[dim] = svs;
  912. this->features[dim].restore(_is, _format);
  913. }
  914. _is >> tmp; // end of block
  915. tmp = this->removeEndTag ( tmp );
  916. }
  917. else
  918. {
  919. std::cerr << "WARNING -- unexpected FeatureMatrixT object -- " << tmp << " -- for restoration... aborting" << std::endl;
  920. throw;
  921. }
  922. }
  923. }
  924. else
  925. {
  926. std::cerr << "FeatureMatrixT<T>::restore -- InStream not initialized - restoring not possible!" << std::endl;
  927. throw;
  928. }
  929. }
  930. template <typename T>
  931. void FeatureMatrixT<T>::store ( std::ostream & _os,
  932. int _format
  933. ) const
  934. {
  935. if (_os.good())
  936. {
  937. // show starting point
  938. _os << this->createStartTag( "FeatureMatrixT" ) << std::endl;
  939. _os.precision (std::numeric_limits<double>::digits10 + 1);
  940. _os << this->createStartTag( "ui_n" ) << std::endl;
  941. _os << this->ui_n << std::endl;
  942. _os << this->createEndTag( "ui_n" ) << std::endl;
  943. _os << this->createStartTag( "ui_d" ) << std::endl;
  944. _os << this->ui_d << std::endl;
  945. _os << this->createEndTag( "ui_d" ) << std::endl;
  946. //now write features for every dimension
  947. _os << this->createStartTag( "features" ) << std::endl;
  948. for (uint dim = 0; dim < this->ui_d; dim++)
  949. {
  950. this->features[dim].store(_os,_format);
  951. }
  952. _os << this->createEndTag( "features" ) << std::endl;
  953. // done
  954. _os << this->createEndTag( "FeatureMatrixT" ) << std::endl;
  955. }
  956. else
  957. {
  958. std::cerr << "FeatureMatrixT<T>::store -- OutStream not initialized - storing not possible!" << std::endl;
  959. }
  960. }
  961. template <typename T>
  962. void FeatureMatrixT<T>::clear ()
  963. {}
  964. } // namespace
  965. // #endif