FeatureMatrixT.tcc 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141
  1. /**
  2. * @file FeatureMatrixT.tcc
  3. * @brief A feature matrix, storing (sparse) features sorted per dimension (Implementation)
  4. * @author Alexander Freytag
  5. * @date 07-12-2011 (dd-mm-yyyy)
  6. */
  7. // #ifndef FEATUREMATRIX_TCC
  8. // #define FEATUREMATRIX_TCC
  9. // gp-hik-core includes
  10. #include "FeatureMatrixT.h"
  11. namespace NICE {
  12. //------------------------------------------------------
  13. // several constructors and destructors
  14. //------------------------------------------------------
  15. // Default constructor
  16. template <typename T>
  17. FeatureMatrixT<T>::FeatureMatrixT()
  18. {
  19. this->ui_n = 0;
  20. this->ui_d = 0;
  21. this->features.clear();
  22. this->b_verbose = false;
  23. this->b_debug = false;
  24. }
  25. // Recommended constructor
  26. template <typename T>
  27. FeatureMatrixT<T>::FeatureMatrixT(const std::vector<std::vector<T> > & _features,
  28. const uint & _dim
  29. )
  30. {
  31. this->ui_n = 0;
  32. if (_dim < 0)
  33. this->ui_d = (*_features.begin()).size();
  34. else
  35. this->ui_d = _dim;
  36. for (typename std::vector<std::vector<T> >::const_iterator it = _features.begin(); it != _features.end(); it++)
  37. {
  38. add_feature(*it);
  39. }
  40. this->b_verbose = false;
  41. this->b_debug = false;
  42. }
  43. //Constructor reading data from a vector of sparse vector pointers
  44. template <typename T>
  45. FeatureMatrixT<T>::
  46. FeatureMatrixT(const std::vector< const NICE::SparseVector * > & _X,
  47. const bool _dimensionsOverExamples,
  48. const uint & _dim
  49. )
  50. {
  51. this->features.clear();
  52. // resize our data structure
  53. //FIXME adapt these security checks to uint ...
  54. if (_dim >= 0) //did the user specified the number of dimensions?
  55. set_d( _dim );
  56. else //dimensions not specified by users
  57. {
  58. if (_dimensionsOverExamples) //do we have dim x examples ?
  59. {
  60. set_d( _X.size());
  61. }
  62. else //we have examples x dimes (as usually done)
  63. {
  64. if ( _X.size() > 0) //and have at least one example
  65. set_d( _X[0]->getDim() );
  66. else //no example, so set the dim to 0, since we have no idea at all
  67. {
  68. set_d(0);
  69. }
  70. }
  71. }
  72. // set number of examples n
  73. if ( this->ui_d > 0 )
  74. {
  75. if (_dimensionsOverExamples) //do we have dim x examples ?
  76. this->ui_n = _X[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  77. else //we have examples x dimes (as usually done)
  78. this->ui_n = _X.size();
  79. }
  80. // insert all values
  81. if (_dimensionsOverExamples) //do we have dim x examples ?
  82. {
  83. for (uint dim = 0; dim < this->ui_d; dim++)
  84. {
  85. this->features[dim].insert( _X[dim] );
  86. }
  87. }
  88. else //we have examples x dimes (as usually done)
  89. {
  90. //loop over every example to add its content
  91. for (uint nr = 0; nr < this->ui_n; nr++)
  92. {
  93. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  94. for (NICE::SparseVector::const_iterator elemIt = _X[nr]->begin(); elemIt != _X[nr]->end(); elemIt++)
  95. {
  96. //elemIt->first: dim, elemIt->second: value
  97. this->features[elemIt->first].insert( (T) elemIt->second, nr);
  98. }//for non-zero-values of the feature
  99. }//for every new feature
  100. }//if dimOverEx
  101. //set n for the internal data structure SortedVectorSparse
  102. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  103. (*it).setN( this->ui_n );
  104. }
  105. #ifdef NICE_USELIB_MATIO
  106. //Constructor reading data from matlab-files
  107. template <typename T>
  108. FeatureMatrixT<T>::
  109. FeatureMatrixT(const sparse_t & _features,
  110. const uint & _dim
  111. )
  112. {
  113. if (_dim < 0)
  114. set_d( _features.njc -1 );
  115. else
  116. set_d( _dim );
  117. uint nMax(0);
  118. for ( uint i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  119. {
  120. for ( uint j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  121. {
  122. this->features[i].insert(((T*)_features.data)[j], _features.ir[ j]);
  123. if ((_features.ir[ j])>nMax)
  124. nMax = _features.ir[ j];
  125. }
  126. }
  127. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  128. {
  129. (*it).setN(nMax+1);
  130. }
  131. this->ui_n = nMax+1;
  132. this->b_verbose = false;
  133. }
  134. //Constructor reading data from matlab-files
  135. template <typename T>
  136. FeatureMatrixT<T>::
  137. FeatureMatrixT(const sparse_t & _features,
  138. const std::map<uint, uint> & _examples,
  139. const uint & _dim)
  140. {
  141. if (_dim < 0)
  142. set_d(_features.njc -1);
  143. else
  144. set_d(_dim);
  145. uint nMax(0);
  146. for ( uint i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  147. {
  148. for ( uint j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  149. {
  150. uint example_index = _features.ir[ j];
  151. std::map<uint, uint>::const_iterator it = examples.find(example_index);
  152. if ( it != examples.end() ) {
  153. this->features[i].insert(((T*)_features.data)[j], it->second /* new index */);
  154. if (it->second > nMax)
  155. nMax = it->second;
  156. }
  157. }
  158. }
  159. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  160. (*it).setN(nMax+1);
  161. this->ui_n = nMax+1;
  162. this->b_verbose = false;
  163. }
  164. #endif
  165. // Default destructor
  166. template <typename T>
  167. FeatureMatrixT<T>::~FeatureMatrixT()
  168. {
  169. }
  170. //------------------------------------------------------
  171. // several get and set methods including access operators
  172. //------------------------------------------------------
  173. // Get number of examples
  174. template <typename T>
  175. uint FeatureMatrixT<T>::get_n() const
  176. {
  177. return this->ui_n;
  178. }
  179. // Get number of dimensions
  180. template <typename T>
  181. uint FeatureMatrixT<T>::get_d() const
  182. {
  183. return this->ui_d;
  184. }
  185. // Sets the given dimension and re-sizes internal data structure. WARNING: this will completely remove your current data!
  186. template <typename T>
  187. void FeatureMatrixT<T>::set_d(const uint & _d)
  188. {
  189. this->ui_d = _d;
  190. this->features.resize( this->ui_d );
  191. }
  192. template <typename T>
  193. void FeatureMatrixT<T>::setVerbose( const bool & _verbose)
  194. {
  195. this->b_verbose = _verbose;
  196. }
  197. template <typename T>
  198. bool FeatureMatrixT<T>::getVerbose( ) const
  199. {
  200. return this->b_verbose;
  201. }
  202. template <typename T>
  203. void FeatureMatrixT<T>::setDebug( const bool & _debug)
  204. {
  205. this->b_debug = _debug;
  206. }
  207. template <typename T>
  208. bool FeatureMatrixT<T>::getDebug( ) const
  209. {
  210. return this->b_debug;
  211. }
  212. // Matrix-like operator for element access, performs validity check
  213. template <typename T>
  214. inline T FeatureMatrixT<T>::operator()(const uint _row,
  215. const uint _col
  216. ) const
  217. {
  218. if ( (_row < 0) || (_col < 0) || (_row > this->ui_d) || (_col > this->ui_n) )
  219. {
  220. fthrow(Exception, "FeatureMatrixT: out of bounds");
  221. }
  222. else
  223. return ( this->features[_row]).access(_col);
  224. }
  225. template<class T>
  226. inline bool
  227. FeatureMatrixT<T>::operator==(const FeatureMatrixT<T> & _F) const
  228. {
  229. if ( ( this->get_n() != _F.get_n()) || (this->get_d() != _F.get_d()) )
  230. {
  231. fthrow(Exception, "FeatureMatrixT<T>::operator== : (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  232. }
  233. else if ((this->ui_n == 0) || (this->ui_d == 0))
  234. {
  235. return true;
  236. }
  237. for (uint i = 0; i < this->ui_d; i++)
  238. {
  239. for (uint j = 0; j < this->ui_n; j++)
  240. {
  241. // FIXME: it would be more efficient if we compare SortedVectorSparse objects here
  242. if(!((*this)(i,j) == _F(i,j)))
  243. return false;
  244. }
  245. }
  246. return true;
  247. }
  248. template<class T>
  249. inline bool
  250. FeatureMatrixT<T>::operator!=(const FeatureMatrixT<T> & _F) const
  251. {
  252. if ( ( (*this).get_n() != _F.get_n()) ||
  253. ( (*this).get_d() != _F.get_d())
  254. )
  255. {
  256. fthrow(Exception, "FeatureMatrixT::operator!=(): (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  257. }
  258. else if ((this->ui_n == 0) || (this->ui_d == 0))
  259. {
  260. return false;
  261. }
  262. for (uint i = 0; i < this->ui_d; i++)
  263. {
  264. for (uint j = 0; j < this->ui_n; j++)
  265. {
  266. if(!((*this)(i,j) == _F(i,j)))
  267. return true;
  268. }
  269. }
  270. return false;
  271. }
  272. template<typename T>
  273. inline FeatureMatrixT<T>&
  274. FeatureMatrixT<T>::operator=(const FeatureMatrixT<T> & _F)
  275. {
  276. this->set_d(_F.get_d());
  277. this->ui_n = _F.get_n();
  278. for (uint i = 0; i < (*this).get_d(); i++)
  279. {
  280. // use the operator= of SortedVectorSparse
  281. features[i] = _F[i];
  282. }
  283. return *this;
  284. }
  285. // Element access without validity check
  286. template <typename T>
  287. inline T FeatureMatrixT<T>::getUnsafe(const uint _row,
  288. const uint _col
  289. ) const
  290. {
  291. return (this->features[_row]).access(_col);
  292. }
  293. //! Element access of original values without validity check
  294. template <typename T>
  295. inline T FeatureMatrixT<T>::getOriginal(const uint _row,
  296. const uint _col
  297. ) const
  298. {
  299. return (this->features[_row]).accessOriginal(_col);
  300. }
  301. // Sets a specified element to the given value, performs validity check
  302. template <typename T>
  303. inline void FeatureMatrixT<T>::set (const uint _row,
  304. const uint _col,
  305. const T & _newElement,
  306. bool _setTransformedValue
  307. )
  308. {
  309. if ( (_row < 0) || (_col < 0) || (_row > this->ui_d) || (_col > this->ui_n) )
  310. {
  311. return;
  312. }
  313. else
  314. (this->features[_row]).set ( _col, _newElement, _setTransformedValue );
  315. }
  316. // Sets a specified element to the given value, without validity check
  317. template <typename T>
  318. inline void FeatureMatrixT<T>::setUnsafe (const uint _row,
  319. const uint _col,
  320. const T & _newElement,
  321. bool _setTransformedValue
  322. )
  323. {
  324. (this->features[_row]).set ( _col, _newElement, _setTransformedValue );
  325. }
  326. // Acceess to all element entries of a specified dimension, including validity check
  327. template <typename T>
  328. void FeatureMatrixT<T>::getDimension(const uint & _dim,
  329. NICE::SortedVectorSparse<T> & _dimension
  330. ) const
  331. {
  332. if ( (_dim < 0) || (_dim > this->ui_d) )
  333. {
  334. return;
  335. }
  336. else
  337. _dimension = this->features[_dim];
  338. }
  339. // Acceess to all element entries of a specified dimension, without validity check
  340. template <typename T>
  341. void FeatureMatrixT<T>::getDimensionUnsafe(const uint & _dim,
  342. NICE::SortedVectorSparse<T> & _dimension
  343. ) const
  344. {
  345. _dimension = this->features[_dim];
  346. }
  347. // Finds the first element in a given dimension, which equals elem
  348. template <typename T>
  349. void FeatureMatrixT<T>::findFirstInDimension(const uint & _dim,
  350. const T & _elem,
  351. uint & _position
  352. ) const
  353. {
  354. _position = 0;
  355. if ( _dim > this->ui_d )
  356. return;
  357. std::pair< typename SortedVectorSparse<T>::elementpointer, typename SortedVectorSparse<T>::elementpointer > eit;
  358. eit = this->features[_dim].nonzeroElements().equal_range ( _elem );
  359. _position = distance( this->features[_dim].nonzeroElements().begin(), eit.first );
  360. if ( _elem > this->features[_dim].getTolerance() )
  361. _position += this->features[_dim].getZeros();
  362. }
  363. // Finds the last element in a given dimension, which equals elem
  364. template <typename T>
  365. void FeatureMatrixT<T>::findLastInDimension(const uint & _dim,
  366. const T & _elem,
  367. uint & _position
  368. ) const
  369. {
  370. _position = 0;
  371. if ( _dim > this->ui_d )
  372. return;
  373. std::pair< typename SortedVectorSparse<T>::const_elementpointer, typename SortedVectorSparse<T>::const_elementpointer > eit = this->features[_dim].nonzeroElements().equal_range ( _elem );
  374. _position = distance( this->features[_dim].nonzeroElements().begin(), eit.second );
  375. if ( _elem > this->features[_dim].getTolerance() )
  376. _position += this->features[_dim].getZeros();
  377. }
  378. // Finds the first element in a given dimension, which is larger as elem
  379. template <typename T>
  380. void FeatureMatrixT<T>::findFirstLargerInDimension(const uint & _dim,
  381. const T & _elem,
  382. uint & _position
  383. ) const
  384. {
  385. _position = 0;
  386. if ( _dim > this->ui_d )
  387. return;
  388. //no non-zero elements?
  389. if (this->features[_dim].getNonZeros() <= 0)
  390. {
  391. // if element is greater than zero, than is should be added at the last position
  392. if (_elem > this->features[_dim].getTolerance() )
  393. _position = this->ui_n;
  394. //if not, position is 0
  395. return;
  396. }
  397. if (this->features[_dim].getNonZeros() == 1)
  398. {
  399. // if element is greater than the only nonzero element, then it is larger than everything else
  400. if (this->features[_dim].nonzeroElements().begin()->first <= _elem)
  401. _position = this->ui_n;
  402. //if not, but the element is still greater than zero, than
  403. else if (_elem > this->features[_dim].getTolerance() )
  404. _position = this->ui_n -1;
  405. return;
  406. }
  407. // standard case - not everything is zero and not only a single element is zero
  408. // find pointer to last non-zero element
  409. // FIXME no idea why this should be necessary...
  410. typename SortedVectorSparse<T>::const_elementpointer it = this->features[_dim].nonzeroElements().end(); //this is needed !!!
  411. // find pointer to first element largern than the given value
  412. it = this->features[_dim].nonzeroElements().upper_bound ( _elem ); //if all values are smaller, this does not do anything at all
  413. _position = distance( this->features[_dim].nonzeroElements().begin(), it );
  414. if ( _elem > this->features[_dim].getTolerance() )
  415. {
  416. //position += features[dim].getZeros();
  417. _position += this->ui_n - this->features[_dim].getNonZeros();
  418. }
  419. }
  420. // Finds the last element in a given dimension, which is smaller as elem
  421. template <typename T>
  422. void FeatureMatrixT<T>::findLastSmallerInDimension(const uint & _dim,
  423. const T & _elem,
  424. uint & _position
  425. ) const
  426. {
  427. _position = 0;
  428. if ( (_dim < 0) || (_dim > this->ui_d))
  429. return;
  430. typename SortedVectorSparse<T>::const_elementpointer it = this->features[_dim].nonzeroElements().lower_bound ( _elem );
  431. _position = distance( this->features[_dim].nonzeroElements().begin(), it );
  432. if ( it->first > this->features[_dim].getTolerance() )
  433. _position += this->features[_dim].getZeros();
  434. }
  435. template <typename T>
  436. T FeatureMatrixT<T>::getLargestValue ( const bool & _getTransformedValue ) const
  437. {
  438. T vmax = (T) 0;
  439. T vtmp = (T) 0;
  440. std::cerr << " FeatureMatrixT<T>::getLargestValue begin of for loop " << std::endl;
  441. uint tmp ( 0 );
  442. for ( typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin();
  443. it != this->features.end();
  444. it++, tmp++
  445. )
  446. {
  447. std::cerr << " dim: " << tmp << std::endl;
  448. vtmp = it->getLargestValueUnsafe( 1.0 /*quantile, we are interested in the largest value*/, _getTransformedValue );
  449. std:: cerr << "largest value: " << vtmp << std::endl;
  450. if ( vtmp > vmax )
  451. {
  452. vmax = vtmp;
  453. }
  454. }
  455. std::cerr << " FeatureMatrixT<T>::getLargestValue end of for loop -- largest value: " << vmax << std::endl;
  456. return vmax;
  457. }
  458. template <typename T>
  459. NICE::VectorT<T> FeatureMatrixT<T>::getLargestValuePerDimension ( const double & _quantile,
  460. const bool & _getTransformedValue
  461. ) const
  462. {
  463. NICE::VectorT<T> vmax ( this->get_d() );
  464. std::cerr << " FeatureMatrixT<T>::getLargestValuePerDimension begin of for loop " << std::endl;
  465. uint tmp ( 0 );
  466. typename NICE::VectorT<T>::iterator vmaxIt = vmax.begin();
  467. for ( typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin();
  468. it != this->features.end();
  469. it++, vmaxIt++, tmp++
  470. )
  471. {
  472. std::cerr << " dim: " << tmp << std::endl;
  473. *vmaxIt = it->getLargestValueUnsafe( _quantile, _getTransformedValue );
  474. std:: cerr << "largest value: " << *vmaxIt << std::endl;
  475. }
  476. std::cerr << " FeatureMatrixT<T>::getLargestValuePerDimension end of for loop " << std::endl;
  477. return vmax;
  478. }
  479. //------------------------------------------------------
  480. // high level methods
  481. //------------------------------------------------------
  482. template <typename T>
  483. void FeatureMatrixT<T>::applyFunctionToFeatureMatrix ( const NICE::ParameterizedFunction *_pf )
  484. {
  485. if (_pf != NULL)
  486. {
  487. // REMARK: might be inefficient due to virtual calls
  488. if ( !_pf->isOrderPreserving() )
  489. fthrow(Exception, "ParameterizedFunction::applyFunctionToFeatureMatrix: this function is optimized for order preserving transformations");
  490. uint d = this->get_d();
  491. for (uint dim = 0; dim < d; dim++)
  492. {
  493. std::multimap< double, typename SortedVectorSparse<double>::dataelement> & nonzeroElements = this->getFeatureValues(dim).nonzeroElements();
  494. for ( SortedVectorSparse<double>::elementpointer i = nonzeroElements.begin(); i != nonzeroElements.end(); i++ )
  495. {
  496. SortedVectorSparse<double>::dataelement & de = i->second;
  497. //TODO check, wether the element is "sparse" afterwards
  498. de.second = _pf->f( dim, i->first );
  499. }
  500. }
  501. /*for ( int i = 0 ; i < featureMatrix.get_n(); i++ )
  502. for ( int index = 0 ; index < featureMatrix.get_d(); index++ )
  503. featureMatrix.set(index, i, f( (uint)index, featureMatrix.getOriginal(index,i) ), isOrderPreserving() );*/
  504. }
  505. else
  506. {
  507. //no pf given -> nothing to do
  508. }
  509. }
  510. //Computes the ratio of sparsity across the matrix
  511. template <typename T>
  512. double FeatureMatrixT<T>:: computeSparsityRatio() const
  513. {
  514. double ratio(0.0);
  515. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++)
  516. {
  517. ratio += (*it).getZeros() / (double) (*it).getN();
  518. }
  519. if (this->features.size() != 0)
  520. ratio /= double(this->features.size());
  521. return ratio;
  522. }
  523. // add a new feature and insert its elements at the end of each dimension vector
  524. template <typename T>
  525. void FeatureMatrixT<T>::add_feature( const std::vector<T> & _feature,
  526. const NICE::ParameterizedFunction *_pf
  527. )
  528. {
  529. if (this->ui_n == 0)
  530. {
  531. this->set_d( _feature.size() );
  532. }
  533. if ( _feature.size() != this->ui_d)
  534. {
  535. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  536. return;
  537. }
  538. for (uint dimension = 0; dimension < this->features.size(); dimension++)
  539. {
  540. if (_pf != NULL)
  541. this->features[dimension].insert( _feature[dimension], _pf->f( dimension, _feature[dimension]) );
  542. else
  543. this->features[dimension].insert( _feature[dimension] );
  544. }
  545. this->ui_n++;
  546. }
  547. // add a new feature and insert its elements at the end of each dimension vector
  548. template <typename T>
  549. void FeatureMatrixT<T>::add_feature(const NICE::SparseVector & _feature,
  550. const ParameterizedFunction *_pf
  551. )
  552. {
  553. if (this->ui_n == 0)
  554. {
  555. this->set_d( _feature.size() );
  556. }
  557. if ( _feature.getDim() > this->ui_d)
  558. {
  559. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  560. return;
  561. }
  562. for (NICE::SparseVector::const_iterator it = _feature.begin(); it != _feature.end(); it++)
  563. {
  564. if (_pf != NULL)
  565. this->features[it->first].insert( (T) it->second, _pf->f( it->first, (T) it->second), true /* _specifyFeatureNumber */, this->ui_n );
  566. else
  567. this->features[it->first].insert( (T) it->second, true /* _specifyFeatureNumber */, this->ui_n );
  568. }
  569. this->ui_n++;
  570. }
  571. // add several new features and insert their elements in the already ordered structure
  572. template <typename T>
  573. void FeatureMatrixT<T>::add_features(const std::vector<std::vector<T> > & _features )
  574. {
  575. //TODO do we need the parameterized function here as well? usually, we add several features and run applyFunctionToFeatureMatrix afterwards.
  576. // check this please :)
  577. //TODO assure that every feature has the same dimension
  578. if (this->ui_n == 0)
  579. {
  580. this->set_d(_features.size());
  581. }
  582. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  583. for (uint dim = 0; dim < this->ui_d; dim++)
  584. {
  585. this->features[dim].insert( _features[dim] );
  586. }
  587. //update the number of our features
  588. this->ui_n += _features[0].size();
  589. }
  590. template <typename T>
  591. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  592. std::vector<std::vector<uint> > & _permutations,
  593. const uint & _dim
  594. )
  595. {
  596. this->features.clear();
  597. this->set_d( std::max ( _dim, (const uint) _features.size() ) );
  598. if ( this->ui_d > 0 )
  599. this->ui_n = _features[0].size();
  600. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  601. for (uint dim = 0; dim < this->ui_d; dim++)
  602. {
  603. this->features[dim].insert( _features[dim] );
  604. }
  605. this->getPermutations( _permutations );
  606. }
  607. template <typename T>
  608. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  609. std::vector<std::map<uint,uint> > & _permutations,
  610. const uint & _dim
  611. )
  612. {
  613. this->features.clear();
  614. this->set_d( std::max ( _dim, _features.size() ) );
  615. if ( this->ui_d > 0 )
  616. this->ui_n = _features[0].size();
  617. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  618. for (uint dim = 0; dim < this->ui_d; dim++)
  619. {
  620. this->features[dim].insert( _features[dim] );
  621. }
  622. this->getPermutations( _permutations );
  623. }
  624. template <typename T>
  625. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  626. const uint & _dim
  627. )
  628. {
  629. this->features.clear();
  630. this->set_d( std::max ( _dim, (const uint) _features.size() ) );
  631. if ( this->ui_d > 0 )
  632. this->ui_n = _features[0].size();
  633. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  634. for (uint dim = 0; dim < this->ui_d; dim++)
  635. {
  636. if ( this->b_debug )
  637. {
  638. std::cerr << " dim: " << dim << " add " << _features[dim].size() << " examples " << std::endl;
  639. }
  640. this->features[dim].insert( _features[dim] );
  641. }
  642. }
  643. template <typename T>
  644. void FeatureMatrixT<T>::set_features(const std::vector< const NICE::SparseVector * > & _features,
  645. const bool _dimensionsOverExamples,
  646. const uint & _dim
  647. )
  648. {
  649. this->features.clear();
  650. if (_features.size() == 0)
  651. {
  652. std::cerr << "set_features without features" << std::endl;
  653. }
  654. // resize our data structure
  655. //therefore, let's first of all figure out if the user specified a dimension or not.
  656. uint dimTmp ( _dim );
  657. if (_dimensionsOverExamples) //do we have dim x examples ?
  658. {
  659. if ( _features.size() > dimTmp )
  660. {
  661. dimTmp = _features.size();
  662. }
  663. }
  664. else //we have examples x dimes (as usually done)
  665. {
  666. if (_features.size() > 0) //and have at least one example
  667. {
  668. try{
  669. if ( _features[0]->getDim() > dimTmp )
  670. {
  671. dimTmp = _features[0]->getDim();
  672. }
  673. }
  674. catch(...)
  675. {
  676. std::cerr << "FeatureMatrixT<T>::set_features -- something went wrong using getDim() of SparseVectors" << std::endl;
  677. }
  678. }
  679. }
  680. this->set_d( dimTmp );
  681. // set number of examples n
  682. if ( this->ui_d > 0 )
  683. {
  684. if ( _dimensionsOverExamples ) //do we have dim x examples ?
  685. this->ui_n = _features[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  686. else //we have examples x dimes (as usually done)
  687. this->ui_n = _features.size();
  688. }
  689. // insert all values
  690. if ( _dimensionsOverExamples ) //do we have dim x examples ?
  691. {
  692. for (uint dim = 0; dim < this->ui_d; dim++)
  693. {
  694. this->features[dim].insert( _features[dim] );
  695. }
  696. }
  697. else //we have examples x dimes (as usually done)
  698. {
  699. if ( this->b_debug )
  700. std::cerr << "FeatureMatrixT<T>::set_features " << this->ui_n << " new examples" << std::endl;
  701. //loop over every example to add its content
  702. for (uint nr = 0; nr < this->ui_n; nr++)
  703. {
  704. if ( this->b_debug )
  705. std::cerr << "add feature nr. " << nr << " / " << _features.size() << " ";
  706. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  707. for (NICE::SparseVector::const_iterator elemIt = _features[nr]->begin(); elemIt != _features[nr]->end(); elemIt++)
  708. {
  709. if ( this->b_debug )
  710. std::cerr << elemIt->first << "-" << elemIt->second << " ";
  711. //elemIt->first: dim, elemIt->second: value
  712. this->features[elemIt->first].insert( (T) elemIt->second, true /* _specifyFeatureNumber */, nr);
  713. }//for non-zero-values of the feature
  714. if ( this->b_debug )
  715. std::cerr << std::endl;
  716. }//for every new feature
  717. if ( this->b_debug )
  718. std::cerr << "FeatureMatrixT<T>::set_features done" << std::endl;
  719. }//if dimOverEx
  720. //set n for the internal data structure SortedVectorSparse
  721. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  722. (*it).setN( this->ui_n );
  723. }
  724. template <typename T>
  725. void FeatureMatrixT<T>::getPermutations( std::vector<std::vector<uint> > & _permutations) const
  726. {
  727. for (uint dim = 0; dim < this->ui_d; dim++)
  728. {
  729. std::vector<uint> perm ( (this->features[dim]).getPermutation() );
  730. _permutations.push_back(perm);
  731. }
  732. }
  733. template <typename T>
  734. void FeatureMatrixT<T>::getPermutations( std::vector<std::map<uint,uint> > & _permutations) const
  735. {
  736. for (uint dim = 0; dim < this->ui_d; dim++)
  737. {
  738. std::map<uint,uint> perm ( (this->features[dim]).getPermutationNonZeroReal() );
  739. _permutations.push_back(perm);
  740. }
  741. }
  742. // Prints the whole Matrix (outer loop over dimension, inner loop over features)
  743. template <typename T>
  744. void FeatureMatrixT<T>::print(std::ostream & _os) const
  745. {
  746. if (_os.good())
  747. {
  748. for (uint dim = 0; dim < this->ui_d; dim++)
  749. {
  750. this->features[dim].print(_os);
  751. }
  752. }
  753. }
  754. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  755. template <typename T>
  756. void FeatureMatrixT<T>::computeNonSparseMatrix(NICE::MatrixT<T> & _matrix,
  757. bool _transpose
  758. ) const
  759. {
  760. if ( _transpose )
  761. _matrix.resize(this->get_n(),this->get_d());
  762. else
  763. _matrix.resize(this->get_d(),this->get_n());
  764. _matrix.set((T)0.0);
  765. uint dimIdx(0);
  766. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  767. {
  768. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  769. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  770. {
  771. uint featIndex = ((*inIt).second)->second.first;
  772. if ( _transpose )
  773. _matrix(featIndex,dimIdx) =((*inIt).second)->second.second;
  774. else
  775. _matrix(dimIdx,featIndex) =((*inIt).second)->second.second;
  776. }
  777. }
  778. }
  779. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  780. template <typename T>
  781. void FeatureMatrixT<T>::computeNonSparseMatrix(std::vector<std::vector<T> > & _matrix,
  782. bool _transpose
  783. ) const
  784. {
  785. if ( _transpose )
  786. _matrix.resize(this->get_n());
  787. else
  788. _matrix.resize(this->get_d());
  789. // resizing the matrix
  790. for ( uint i = 0 ; i < _matrix.size(); i++ )
  791. if ( _transpose )
  792. _matrix[i] = std::vector<T>(this->get_d(), 0.0);
  793. else
  794. _matrix[i] = std::vector<T>(this->get_n(), 0.0);
  795. uint dimIdx(0);
  796. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  797. {
  798. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  799. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  800. {
  801. uint featIndex = ((*inIt).second)->second.first;
  802. if ( _transpose )
  803. _matrix[featIndex][dimIdx] =((*inIt).second)->second.second;
  804. else
  805. _matrix[dimIdx][featIndex] =((*inIt).second)->second.second;
  806. }
  807. }
  808. }
  809. // Swaps to specified elements, performing a validity check
  810. template <typename T>
  811. void FeatureMatrixT<T>::swap(const uint & _row1,
  812. const uint & _col1,
  813. const uint & _row2,
  814. const uint & _col2
  815. )
  816. {
  817. if ( (_row1 < 0) || (_col1 < 0) || (_row1 > this->ui_d) || (_col1 > this->ui_n) ||
  818. (_row2 < 0) || (_col2 < 0) || (_row2 > this->ui_d) || (_col2 > this->ui_n)
  819. )
  820. {
  821. return;
  822. }
  823. else
  824. {
  825. //swap
  826. T tmp = (*this)(_row1, _col1);
  827. (*this).set(_row1, _col1, (*this)(_row2,_col2));
  828. (*this).set(_row2, _col2, tmp);
  829. }
  830. }
  831. // Swaps to specified elements, without performing a validity check
  832. template <typename T>
  833. void FeatureMatrixT<T>::swapUnsafe(const uint & _row1,
  834. const uint & _col1,
  835. const uint & _row2,
  836. const uint & _col2
  837. )
  838. {
  839. //swap
  840. T tmp = (*this)(_row1, _col1);
  841. (*this).set(_row1, _col1, (*this)(_row2,_col2));
  842. (*this).set(_row2, _col2, tmp);
  843. }
  844. template <typename T>
  845. void FeatureMatrixT<T>::hikDiagonalElements( Vector & _diagonalElements ) const
  846. {
  847. uint dimIdx = 0;
  848. // the function calculates the diagonal elements of a HIK kernel matrix
  849. _diagonalElements.resize(this->ui_n);
  850. _diagonalElements.set(0.0);
  851. // loop through all dimensions
  852. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  853. {
  854. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  855. // loop through all features
  856. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  857. {
  858. uint index = inIt->first;
  859. typename NICE::SortedVectorSparse<T>::elementpointer p = inIt->second;
  860. typename NICE::SortedVectorSparse<T>::dataelement de = p->second;
  861. _diagonalElements[index] += de.second;
  862. }
  863. }
  864. }
  865. template <typename T>
  866. double FeatureMatrixT<T>::hikTrace() const
  867. {
  868. Vector diagonalElements;
  869. this->hikDiagonalElements( diagonalElements );
  870. return diagonalElements.Sum();
  871. }
  872. template <typename T>
  873. uint FeatureMatrixT<T>::getNumberOfNonZeroElementsPerDimension(const uint & dim) const
  874. {
  875. //FIXME we could return a boolean indicating success and return the actual number via call-by-reference
  876. if ( (dim < 0) || (dim > this->ui_d))
  877. return 0;
  878. return this->features[dim].getNonZeros();
  879. }
  880. template <typename T>
  881. uint FeatureMatrixT<T>::getNumberOfZeroElementsPerDimension(const uint & dim) const
  882. {
  883. if ( (dim < 0) || (dim > this->ui_d))
  884. return 0;
  885. return this->ui_n - this-> features[dim].getNonZeros();
  886. }
  887. template <typename T>
  888. void FeatureMatrixT<T>::restore ( std::istream & _is,
  889. int _format
  890. )
  891. {
  892. bool b_restoreVerbose ( false );
  893. if ( _is.good() )
  894. {
  895. if ( b_restoreVerbose )
  896. std::cerr << " restore FeatureMatrixT" << std::endl;
  897. std::string tmp;
  898. _is >> tmp; //class name
  899. if ( ! this->isStartTag( tmp, "FeatureMatrixT" ) )
  900. {
  901. std::cerr << " WARNING - attempt to restore FeatureMatrixT, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  902. throw;
  903. }
  904. _is.precision ( std::numeric_limits<double>::digits10 + 1);
  905. bool b_endOfBlock ( false ) ;
  906. while ( !b_endOfBlock )
  907. {
  908. _is >> tmp; // start of block
  909. if ( this->isEndTag( tmp, "FeatureMatrixT" ) )
  910. {
  911. b_endOfBlock = true;
  912. continue;
  913. }
  914. tmp = this->removeStartTag ( tmp );
  915. if ( b_restoreVerbose )
  916. std::cerr << " currently restore section " << tmp << " in FeatureMatrixT" << std::endl;
  917. if ( tmp.compare("ui_n") == 0 )
  918. {
  919. _is >> this->ui_n;
  920. _is >> tmp; // end of block
  921. tmp = this->removeEndTag ( tmp );
  922. }
  923. else if ( tmp.compare("ui_d") == 0 )
  924. {
  925. _is >> this->ui_d;
  926. _is >> tmp; // end of block
  927. tmp = this->removeEndTag ( tmp );
  928. }
  929. else if ( tmp.compare("features") == 0 )
  930. {
  931. //NOTE assumes d to be read first!
  932. this->features.resize( this->ui_d);
  933. //now read features for every dimension
  934. for (uint dim = 0; dim < this->ui_d; dim++)
  935. {
  936. NICE::SortedVectorSparse<T> svs;
  937. this->features[dim] = svs;
  938. this->features[dim].restore(_is, _format);
  939. }
  940. _is >> tmp; // end of block
  941. tmp = this->removeEndTag ( tmp );
  942. }
  943. else
  944. {
  945. std::cerr << "WARNING -- unexpected FeatureMatrixT object -- " << tmp << " -- for restoration... aborting" << std::endl;
  946. throw;
  947. }
  948. }
  949. }
  950. else
  951. {
  952. std::cerr << "FeatureMatrixT<T>::restore -- InStream not initialized - restoring not possible!" << std::endl;
  953. throw;
  954. }
  955. }
  956. template <typename T>
  957. void FeatureMatrixT<T>::store ( std::ostream & _os,
  958. int _format
  959. ) const
  960. {
  961. if (_os.good())
  962. {
  963. // show starting point
  964. _os << this->createStartTag( "FeatureMatrixT" ) << std::endl;
  965. _os.precision (std::numeric_limits<double>::digits10 + 1);
  966. _os << this->createStartTag( "ui_n" ) << std::endl;
  967. _os << this->ui_n << std::endl;
  968. _os << this->createEndTag( "ui_n" ) << std::endl;
  969. _os << this->createStartTag( "ui_d" ) << std::endl;
  970. _os << this->ui_d << std::endl;
  971. _os << this->createEndTag( "ui_d" ) << std::endl;
  972. //now write features for every dimension
  973. _os << this->createStartTag( "features" ) << std::endl;
  974. for (uint dim = 0; dim < this->ui_d; dim++)
  975. {
  976. this->features[dim].store(_os,_format);
  977. }
  978. _os << this->createEndTag( "features" ) << std::endl;
  979. // done
  980. _os << this->createEndTag( "FeatureMatrixT" ) << std::endl;
  981. }
  982. else
  983. {
  984. std::cerr << "FeatureMatrixT<T>::store -- OutStream not initialized - storing not possible!" << std::endl;
  985. }
  986. }
  987. template <typename T>
  988. void FeatureMatrixT<T>::clear ()
  989. {}
  990. } // namespace
  991. // #endif