FeatureMatrixT.tcc 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097
  1. /**
  2. * @file FeatureMatrixT.tcc
  3. * @brief A feature matrix, storing (sparse) features sorted per dimension (Implementation)
  4. * @author Alexander Freytag
  5. * @date 07-12-2011 (dd-mm-yyyy)
  6. */
  7. // #ifndef FEATUREMATRIX_TCC
  8. // #define FEATUREMATRIX_TCC
  9. // gp-hik-core includes
  10. #include "FeatureMatrixT.h"
  11. namespace NICE {
  12. //------------------------------------------------------
  13. // several constructors and destructors
  14. //------------------------------------------------------
  15. // Default constructor
  16. template <typename T>
  17. FeatureMatrixT<T>::FeatureMatrixT()
  18. {
  19. this->ui_n = 0;
  20. this->ui_d = 0;
  21. this->features.clear();
  22. this->b_verbose = false;
  23. this->b_debug = false;
  24. }
  25. // Recommended constructor
  26. template <typename T>
  27. FeatureMatrixT<T>::FeatureMatrixT(const std::vector<std::vector<T> > & _features,
  28. const uint & _dim
  29. )
  30. {
  31. // resize our data structure
  32. if (_dim == 0)
  33. this->set_d( (*_features.begin()).size() );
  34. else
  35. this->set_d( _dim );
  36. for (typename std::vector<std::vector<T> >::const_iterator it = _features.begin(); it != _features.end(); it++)
  37. {
  38. add_feature(*it);
  39. }
  40. this->b_verbose = false;
  41. this->b_debug = false;
  42. }
  43. //Constructor reading data from a vector of sparse vector pointers
  44. template <typename T>
  45. FeatureMatrixT<T>::
  46. FeatureMatrixT(const std::vector< const NICE::SparseVector * > & _features,
  47. const bool _dimensionsOverExamples,
  48. const uint & _dim
  49. )
  50. {
  51. this->features.clear();
  52. // resize our data structure
  53. if (_dim == 0)
  54. this->set_d( (*_features.begin())->getDim() );
  55. else
  56. this->set_d( _dim );
  57. // set number of examples n
  58. if ( this->ui_d > 0 )
  59. {
  60. if (_dimensionsOverExamples) //do we have dim x examples ?
  61. this->ui_n = _features[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  62. else //we have examples x dimes (as usually done)
  63. this->ui_n = _features.size();
  64. }
  65. // insert all values
  66. if (_dimensionsOverExamples) //do we have dim x examples ?
  67. {
  68. for (uint dim = 0; dim < this->ui_d; dim++)
  69. {
  70. this->features[dim].insert( _features[dim] );
  71. }
  72. }
  73. else //we have examples x dimes (as usually done)
  74. {
  75. //loop over every example to add its content
  76. for (uint nr = 0; nr < this->ui_n; nr++)
  77. {
  78. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  79. for (NICE::SparseVector::const_iterator elemIt = _features[nr]->begin(); elemIt != _features[nr]->end(); elemIt++)
  80. {
  81. //elemIt->first: dim, elemIt->second: value
  82. this->features[elemIt->first].insert( (T) elemIt->second, nr);
  83. }//for non-zero-values of the feature
  84. }//for every new feature
  85. }//if dimOverEx
  86. //set n for the internal data structure SortedVectorSparse
  87. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  88. (*it).setN( this->ui_n );
  89. }
  90. #ifdef NICE_USELIB_MATIO
  91. //Constructor reading data from matlab-files
  92. template <typename T>
  93. FeatureMatrixT<T>::
  94. FeatureMatrixT(const sparse_t & _features,
  95. const uint & _dim
  96. )
  97. {
  98. if (_dim < 0)
  99. set_d( _features.njc -1 );
  100. else
  101. set_d( _dim );
  102. uint nMax(0);
  103. for ( uint i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  104. {
  105. for ( uint j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  106. {
  107. this->features[i].insert(((T*)_features.data)[j], true /* specify feature number */, _features.ir[ j]);
  108. if ((_features.ir[ j])>nMax)
  109. nMax = _features.ir[ j];
  110. }
  111. }
  112. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  113. {
  114. (*it).setN(nMax+1);
  115. }
  116. this->ui_n = nMax+1;
  117. this->b_verbose = false;
  118. }
  119. //Constructor reading data from matlab-files
  120. template <typename T>
  121. FeatureMatrixT<T>::
  122. FeatureMatrixT(const sparse_t & _features,
  123. const std::map<uint, uint> & _examples,
  124. const uint & _dim)
  125. {
  126. if (_dim < 0)
  127. set_d(_features.njc -1);
  128. else
  129. set_d(_dim);
  130. uint nMax(0);
  131. for ( uint i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  132. {
  133. for ( uint j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  134. {
  135. uint example_index = _features.ir[ j];
  136. std::map<uint, uint>::const_iterator it = _examples.find(example_index);
  137. if ( it != _examples.end() ) {
  138. this->features[i].insert(((T*)_features.data)[j], true /* specify feature number */, it->second /* new index */);
  139. if (it->second > nMax)
  140. nMax = it->second;
  141. }
  142. }
  143. }
  144. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  145. (*it).setN(nMax+1);
  146. this->ui_n = nMax+1;
  147. this->b_verbose = false;
  148. }
  149. #endif
  150. // Default destructor
  151. template <typename T>
  152. FeatureMatrixT<T>::~FeatureMatrixT()
  153. {
  154. }
  155. //------------------------------------------------------
  156. // several get and set methods including access operators
  157. //------------------------------------------------------
  158. // Get number of examples
  159. template <typename T>
  160. uint FeatureMatrixT<T>::get_n() const
  161. {
  162. return this->ui_n;
  163. }
  164. // Get number of dimensions
  165. template <typename T>
  166. uint FeatureMatrixT<T>::get_d() const
  167. {
  168. return this->ui_d;
  169. }
  170. // Sets the given dimension and re-sizes internal data structure. WARNING: this will completely remove your current data!
  171. template <typename T>
  172. void FeatureMatrixT<T>::set_d(const uint & _d)
  173. {
  174. this->ui_d = _d;
  175. this->features.resize( this->ui_d );
  176. }
  177. template <typename T>
  178. void FeatureMatrixT<T>::setVerbose( const bool & _verbose)
  179. {
  180. this->b_verbose = _verbose;
  181. }
  182. template <typename T>
  183. bool FeatureMatrixT<T>::getVerbose( ) const
  184. {
  185. return this->b_verbose;
  186. }
  187. template <typename T>
  188. void FeatureMatrixT<T>::setDebug( const bool & _debug)
  189. {
  190. this->b_debug = _debug;
  191. }
  192. template <typename T>
  193. bool FeatureMatrixT<T>::getDebug( ) const
  194. {
  195. return this->b_debug;
  196. }
  197. // Matrix-like operator for element access, performs validity check
  198. template <typename T>
  199. inline T FeatureMatrixT<T>::operator()(const uint _row,
  200. const uint _col
  201. ) const
  202. {
  203. if ( (_row < 0) || (_col < 0) || (_row > this->ui_d) || (_col > this->ui_n) )
  204. {
  205. fthrow(Exception, "FeatureMatrixT: out of bounds");
  206. }
  207. else
  208. return ( this->features[_row]).access(_col);
  209. }
  210. template<class T>
  211. inline bool
  212. FeatureMatrixT<T>::operator==(const FeatureMatrixT<T> & _F) const
  213. {
  214. if ( ( this->get_n() != _F.get_n()) || (this->get_d() != _F.get_d()) )
  215. {
  216. fthrow(Exception, "FeatureMatrixT<T>::operator== : (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  217. }
  218. else if ((this->ui_n == 0) || (this->ui_d == 0))
  219. {
  220. return true;
  221. }
  222. for (uint i = 0; i < this->ui_d; i++)
  223. {
  224. for (uint j = 0; j < this->ui_n; j++)
  225. {
  226. // FIXME: it would be more efficient if we compare SortedVectorSparse objects here
  227. if(!((*this)(i,j) == _F(i,j)))
  228. return false;
  229. }
  230. }
  231. return true;
  232. }
  233. template<class T>
  234. inline bool
  235. FeatureMatrixT<T>::operator!=(const FeatureMatrixT<T> & _F) const
  236. {
  237. if ( ( (*this).get_n() != _F.get_n()) ||
  238. ( (*this).get_d() != _F.get_d())
  239. )
  240. {
  241. fthrow(Exception, "FeatureMatrixT::operator!=(): (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  242. }
  243. else if ((this->ui_n == 0) || (this->ui_d == 0))
  244. {
  245. return false;
  246. }
  247. for (uint i = 0; i < this->ui_d; i++)
  248. {
  249. for (uint j = 0; j < this->ui_n; j++)
  250. {
  251. if(!((*this)(i,j) == _F(i,j)))
  252. return true;
  253. }
  254. }
  255. return false;
  256. }
  257. template<typename T>
  258. inline FeatureMatrixT<T>&
  259. FeatureMatrixT<T>::operator=(const FeatureMatrixT<T> & _F)
  260. {
  261. this->set_d(_F.get_d());
  262. this->ui_n = _F.get_n();
  263. for (uint i = 0; i < (*this).get_d(); i++)
  264. {
  265. // use the operator= of SortedVectorSparse
  266. features[i] = _F[i];
  267. }
  268. return *this;
  269. }
  270. // Element access without validity check
  271. template <typename T>
  272. inline T FeatureMatrixT<T>::getUnsafe(const uint _row,
  273. const uint _col
  274. ) const
  275. {
  276. return (this->features[_row]).access(_col);
  277. }
  278. //! Element access of original values without validity check
  279. template <typename T>
  280. inline T FeatureMatrixT<T>::getOriginal(const uint _row,
  281. const uint _col
  282. ) const
  283. {
  284. return (this->features[_row]).accessOriginal(_col);
  285. }
  286. // Sets a specified element to the given value, performs validity check
  287. template <typename T>
  288. inline void FeatureMatrixT<T>::set (const uint _row,
  289. const uint _col,
  290. const T & _newElement,
  291. bool _setTransformedValue
  292. )
  293. {
  294. if ( (_row < 0) || (_col < 0) || (_row > this->ui_d) || (_col > this->ui_n) )
  295. {
  296. return;
  297. }
  298. else
  299. (this->features[_row]).set ( _col, _newElement, _setTransformedValue );
  300. }
  301. // Sets a specified element to the given value, without validity check
  302. template <typename T>
  303. inline void FeatureMatrixT<T>::setUnsafe (const uint _row,
  304. const uint _col,
  305. const T & _newElement,
  306. bool _setTransformedValue
  307. )
  308. {
  309. (this->features[_row]).set ( _col, _newElement, _setTransformedValue );
  310. }
  311. // Acceess to all element entries of a specified dimension, including validity check
  312. template <typename T>
  313. void FeatureMatrixT<T>::getDimension(const uint & _dim,
  314. NICE::SortedVectorSparse<T> & _dimension
  315. ) const
  316. {
  317. if ( (_dim < 0) || (_dim > this->ui_d) )
  318. {
  319. return;
  320. }
  321. else
  322. _dimension = this->features[_dim];
  323. }
  324. // Acceess to all element entries of a specified dimension, without validity check
  325. template <typename T>
  326. void FeatureMatrixT<T>::getDimensionUnsafe(const uint & _dim,
  327. NICE::SortedVectorSparse<T> & _dimension
  328. ) const
  329. {
  330. _dimension = this->features[_dim];
  331. }
  332. // Finds the first element in a given dimension, which equals elem
  333. template <typename T>
  334. void FeatureMatrixT<T>::findFirstInDimension(const uint & _dim,
  335. const T & _elem,
  336. uint & _position
  337. ) const
  338. {
  339. _position = 0;
  340. if ( _dim > this->ui_d )
  341. return;
  342. std::pair< typename SortedVectorSparse<T>::elementpointer, typename SortedVectorSparse<T>::elementpointer > eit;
  343. eit = this->features[_dim].nonzeroElements().equal_range ( _elem );
  344. _position = distance( this->features[_dim].nonzeroElements().begin(), eit.first );
  345. if ( _elem > this->features[_dim].getTolerance() )
  346. _position += this->features[_dim].getZeros();
  347. }
  348. // Finds the last element in a given dimension, which equals elem
  349. template <typename T>
  350. void FeatureMatrixT<T>::findLastInDimension(const uint & _dim,
  351. const T & _elem,
  352. uint & _position
  353. ) const
  354. {
  355. _position = 0;
  356. if ( _dim > this->ui_d )
  357. return;
  358. std::pair< typename SortedVectorSparse<T>::const_elementpointer, typename SortedVectorSparse<T>::const_elementpointer > eit = this->features[_dim].nonzeroElements().equal_range ( _elem );
  359. _position = distance( this->features[_dim].nonzeroElements().begin(), eit.second );
  360. if ( _elem > this->features[_dim].getTolerance() )
  361. _position += this->features[_dim].getZeros();
  362. }
  363. // Finds the first element in a given dimension, which is larger as elem
  364. template <typename T>
  365. void FeatureMatrixT<T>::findFirstLargerInDimension(const uint & _dim,
  366. const T & _elem,
  367. uint & _position
  368. ) const
  369. {
  370. // number of zero elements
  371. uint nz = this->ui_n - this->features[_dim].getNonZeros();
  372. // number of non-zero elements
  373. uint nnz = this->features[_dim].getNonZeros();
  374. _position = 0; // this is the index of the first non-zero element.
  375. // standard case
  376. // find pointer to last non-zero element
  377. typename SortedVectorSparse<T>::const_elementpointer it = this->features[_dim].nonzeroElements().end();
  378. // find pointer to first element largern than the given value
  379. it = this->features[_dim].nonzeroElements().upper_bound ( _elem );
  380. _position = distance( this->features[_dim].nonzeroElements().begin(), it );
  381. // special case 1: element is (almost) zero -> every other value is considered to be larger
  382. if ( _elem >= this->features[_dim].getTolerance() )
  383. {
  384. _position += nz;
  385. }
  386. }
  387. // Finds the last element in a given dimension, which is smaller as elem
  388. template <typename T>
  389. void FeatureMatrixT<T>::findLastSmallerInDimension(const uint & _dim,
  390. const T & _elem,
  391. uint & _position
  392. ) const
  393. {
  394. _position = 0;
  395. if ( (_dim < 0) || (_dim > this->ui_d))
  396. return;
  397. typename SortedVectorSparse<T>::const_elementpointer it = this->features[_dim].nonzeroElements().lower_bound ( _elem );
  398. _position = distance( this->features[_dim].nonzeroElements().begin(), it );
  399. if ( it->first > this->features[_dim].getTolerance() )
  400. _position += this->features[_dim].getZeros();
  401. }
  402. template <typename T>
  403. T FeatureMatrixT<T>::getLargestValue ( const bool & _getTransformedValue ) const
  404. {
  405. T vmax = (T) 0;
  406. T vtmp = (T) 0;
  407. uint tmp ( 0 );
  408. for ( typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin();
  409. it != this->features.end();
  410. it++, tmp++
  411. )
  412. {
  413. vtmp = it->getLargestValueUnsafe( 1.0 /*quantile, we are interested in the largest value*/, _getTransformedValue );
  414. if ( vtmp > vmax )
  415. {
  416. vmax = vtmp;
  417. }
  418. }
  419. return vmax;
  420. }
  421. template <typename T>
  422. NICE::VectorT<T> FeatureMatrixT<T>::getLargestValuePerDimension ( const double & _quantile,
  423. const bool & _getTransformedValue
  424. ) const
  425. {
  426. NICE::VectorT<T> vmax ( this->get_d() );
  427. uint tmp ( 0 );
  428. typename NICE::VectorT<T>::iterator vmaxIt = vmax.begin();
  429. for ( typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin();
  430. it != this->features.end();
  431. it++, vmaxIt++, tmp++
  432. )
  433. {
  434. *vmaxIt = it->getLargestValueUnsafe( _quantile, _getTransformedValue );
  435. }
  436. return vmax;
  437. }
  438. //------------------------------------------------------
  439. // high level methods
  440. //------------------------------------------------------
  441. template <typename T>
  442. void FeatureMatrixT<T>::applyFunctionToFeatureMatrix ( const NICE::ParameterizedFunction *_pf )
  443. {
  444. if (_pf != NULL)
  445. {
  446. // REMARK: might be inefficient due to virtual calls
  447. if ( !_pf->isOrderPreserving() )
  448. fthrow(Exception, "ParameterizedFunction::applyFunctionToFeatureMatrix: this function is optimized for order preserving transformations");
  449. uint d = this->get_d();
  450. for (uint dim = 0; dim < d; dim++)
  451. {
  452. std::multimap< double, typename SortedVectorSparse<double>::dataelement> & nonzeroElements = this->getFeatureValues(dim).nonzeroElements();
  453. for ( SortedVectorSparse<double>::elementpointer i = nonzeroElements.begin(); i != nonzeroElements.end(); i++ )
  454. {
  455. SortedVectorSparse<double>::dataelement & de = i->second;
  456. //TODO check, wether the element is "sparse" afterwards
  457. de.second = _pf->f( dim, i->first );
  458. }
  459. }
  460. /*for ( int i = 0 ; i < featureMatrix.get_n(); i++ )
  461. for ( int index = 0 ; index < featureMatrix.get_d(); index++ )
  462. featureMatrix.set(index, i, f( (uint)index, featureMatrix.getOriginal(index,i) ), isOrderPreserving() );*/
  463. }
  464. else
  465. {
  466. //no pf given -> nothing to do
  467. }
  468. }
  469. //Computes the ratio of sparsity across the matrix
  470. template <typename T>
  471. double FeatureMatrixT<T>:: computeSparsityRatio() const
  472. {
  473. double ratio(0.0);
  474. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++)
  475. {
  476. ratio += (*it).getZeros() / (double) (*it).getN();
  477. }
  478. if (this->features.size() != 0)
  479. ratio /= double(this->features.size());
  480. return ratio;
  481. }
  482. // add a new feature and insert its elements at the end of each dimension vector
  483. template <typename T>
  484. void FeatureMatrixT<T>::add_feature( const std::vector<T> & _feature,
  485. const NICE::ParameterizedFunction *_pf
  486. )
  487. {
  488. if (this->ui_n == 0)
  489. {
  490. this->set_d( _feature.size() );
  491. }
  492. if ( _feature.size() != this->ui_d)
  493. {
  494. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  495. return;
  496. }
  497. for (uint dimension = 0; dimension < this->features.size(); dimension++)
  498. {
  499. if (_pf != NULL)
  500. this->features[dimension].insert( _feature[dimension], _pf->f( dimension, _feature[dimension]) );
  501. else
  502. this->features[dimension].insert( _feature[dimension] );
  503. }
  504. this->ui_n++;
  505. }
  506. // add a new feature and insert its elements at the end of each dimension vector
  507. template <typename T>
  508. void FeatureMatrixT<T>::add_feature(const NICE::SparseVector & _feature,
  509. const ParameterizedFunction *_pf
  510. )
  511. {
  512. if (this->ui_n == 0)
  513. {
  514. this->set_d( _feature.size() );
  515. }
  516. if ( _feature.getDim() > this->ui_d)
  517. {
  518. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  519. return;
  520. }
  521. for (NICE::SparseVector::const_iterator it = _feature.begin(); it != _feature.end(); it++)
  522. {
  523. if (_pf != NULL)
  524. this->features[it->first].insert( (T) it->second, _pf->f( it->first, (T) it->second), true /* _specifyFeatureNumber */, this->ui_n );
  525. else
  526. this->features[it->first].insert( (T) it->second, true /* _specifyFeatureNumber */, this->ui_n );
  527. }
  528. this->ui_n++;
  529. }
  530. // add several new features and insert their elements in the already ordered structure
  531. template <typename T>
  532. void FeatureMatrixT<T>::add_features(const std::vector<std::vector<T> > & _features )
  533. {
  534. //TODO do we need the parameterized function here as well? usually, we add several features and run applyFunctionToFeatureMatrix afterwards.
  535. // check this please :)
  536. //TODO assure that every feature has the same dimension
  537. if (this->ui_n == 0)
  538. {
  539. this->set_d(_features.size());
  540. }
  541. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  542. for (uint dim = 0; dim < this->ui_d; dim++)
  543. {
  544. this->features[dim].insert( _features[dim] );
  545. }
  546. //update the number of our features
  547. this->ui_n += _features[0].size();
  548. }
  549. template <typename T>
  550. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  551. std::vector<std::vector<uint> > & _permutations,
  552. const uint & _dim
  553. )
  554. {
  555. this->features.clear();
  556. this->set_d( std::max ( _dim, (const uint) _features.size() ) );
  557. if ( this->ui_d > 0 )
  558. this->ui_n = _features[0].size();
  559. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  560. for (uint dim = 0; dim < this->ui_d; dim++)
  561. {
  562. this->features[dim].insert( _features[dim] );
  563. }
  564. this->getPermutations( _permutations );
  565. }
  566. template <typename T>
  567. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  568. std::vector<std::map<uint,uint> > & _permutations,
  569. const uint & _dim
  570. )
  571. {
  572. this->features.clear();
  573. this->set_d( std::max ( _dim, _features.size() ) );
  574. if ( this->ui_d > 0 )
  575. this->ui_n = _features[0].size();
  576. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  577. for (uint dim = 0; dim < this->ui_d; dim++)
  578. {
  579. this->features[dim].insert( _features[dim] );
  580. }
  581. this->getPermutations( _permutations );
  582. }
  583. template <typename T>
  584. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features,
  585. const uint & _dim
  586. )
  587. {
  588. this->features.clear();
  589. this->set_d( std::max ( _dim, (const uint) _features.size() ) );
  590. if ( this->ui_d > 0 )
  591. this->ui_n = _features[0].size();
  592. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  593. for (uint dim = 0; dim < this->ui_d; dim++)
  594. {
  595. if ( this->b_debug )
  596. {
  597. std::cerr << " dim: " << dim << " add " << _features[dim].size() << " examples " << std::endl;
  598. }
  599. this->features[dim].insert( _features[dim] );
  600. }
  601. }
  602. template <typename T>
  603. void FeatureMatrixT<T>::set_features(const std::vector< const NICE::SparseVector * > & _features,
  604. const bool _dimensionsOverExamples,
  605. const uint & _dim
  606. )
  607. {
  608. this->features.clear();
  609. if (_features.size() == 0)
  610. {
  611. std::cerr << "set_features without features" << std::endl;
  612. }
  613. // resize our data structure
  614. //therefore, let's first of all figure out if the user specified a dimension or not.
  615. uint dimTmp ( _dim );
  616. if (_dimensionsOverExamples) //do we have dim x examples ?
  617. {
  618. if ( _features.size() > dimTmp )
  619. {
  620. dimTmp = _features.size();
  621. }
  622. }
  623. else //we have examples x dimes (as usually done)
  624. {
  625. if (_features.size() > 0) //and have at least one example
  626. {
  627. try{
  628. if ( _features[0]->getDim() > dimTmp )
  629. {
  630. dimTmp = _features[0]->getDim();
  631. }
  632. }
  633. catch(...)
  634. {
  635. std::cerr << "FeatureMatrixT<T>::set_features -- something went wrong using getDim() of SparseVectors" << std::endl;
  636. }
  637. }
  638. }
  639. this->set_d( dimTmp );
  640. // set number of examples n
  641. if ( this->ui_d > 0 )
  642. {
  643. if ( _dimensionsOverExamples ) //do we have dim x examples ?
  644. this->ui_n = _features[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  645. else //we have examples x dimes (as usually done)
  646. this->ui_n = _features.size();
  647. }
  648. // insert all values
  649. if ( _dimensionsOverExamples ) //do we have dim x examples ?
  650. {
  651. for (uint dim = 0; dim < this->ui_d; dim++)
  652. {
  653. this->features[dim].insert( _features[dim] );
  654. }
  655. }
  656. else //we have examples x dimes (as usually done)
  657. {
  658. if ( this->b_debug )
  659. std::cerr << "FeatureMatrixT<T>::set_features " << this->ui_n << " new examples" << std::endl;
  660. //loop over every example to add its content
  661. for (uint nr = 0; nr < this->ui_n; nr++)
  662. {
  663. if ( this->b_debug )
  664. std::cerr << "add feature nr. " << nr << " / " << _features.size() << " ";
  665. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  666. for (NICE::SparseVector::const_iterator elemIt = _features[nr]->begin(); elemIt != _features[nr]->end(); elemIt++)
  667. {
  668. if ( this->b_debug )
  669. std::cerr << elemIt->first << "-" << elemIt->second << " ";
  670. //elemIt->first: dim, elemIt->second: value
  671. this->features[elemIt->first].insert( (T) elemIt->second, true /* _specifyFeatureNumber */, nr);
  672. }//for non-zero-values of the feature
  673. if ( this->b_debug )
  674. std::cerr << std::endl;
  675. }//for every new feature
  676. if ( this->b_debug )
  677. std::cerr << "FeatureMatrixT<T>::set_features done" << std::endl;
  678. }//if dimOverEx
  679. //set n for the internal data structure SortedVectorSparse
  680. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = this->features.begin(); it != this->features.end(); it++)
  681. (*it).setN( this->ui_n );
  682. }
  683. template <typename T>
  684. void FeatureMatrixT<T>::getPermutations( std::vector<std::vector<uint> > & _permutations) const
  685. {
  686. for (uint dim = 0; dim < this->ui_d; dim++)
  687. {
  688. std::vector<uint> perm ( (this->features[dim]).getPermutation() );
  689. _permutations.push_back(perm);
  690. }
  691. }
  692. template <typename T>
  693. void FeatureMatrixT<T>::getPermutations( std::vector<std::map<uint,uint> > & _permutations) const
  694. {
  695. for (uint dim = 0; dim < this->ui_d; dim++)
  696. {
  697. std::map<uint,uint> perm ( (this->features[dim]).getPermutationNonZeroReal() );
  698. _permutations.push_back(perm);
  699. }
  700. }
  701. // Prints the whole Matrix (outer loop over dimension, inner loop over features)
  702. template <typename T>
  703. void FeatureMatrixT<T>::print(std::ostream & _os) const
  704. {
  705. if (_os.good())
  706. {
  707. for (uint dim = 0; dim < this->ui_d; dim++)
  708. {
  709. this->features[dim].print(_os);
  710. }
  711. }
  712. }
  713. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  714. template <typename T>
  715. void FeatureMatrixT<T>::computeNonSparseMatrix(NICE::MatrixT<T> & _matrix,
  716. bool _transpose
  717. ) const
  718. {
  719. if ( _transpose )
  720. _matrix.resize(this->get_n(),this->get_d());
  721. else
  722. _matrix.resize(this->get_d(),this->get_n());
  723. _matrix.set((T)0.0);
  724. uint dimIdx(0);
  725. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  726. {
  727. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  728. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  729. {
  730. uint featIndex = ((*inIt).second)->second.first;
  731. if ( _transpose )
  732. _matrix(featIndex,dimIdx) =((*inIt).second)->second.second;
  733. else
  734. _matrix(dimIdx,featIndex) =((*inIt).second)->second.second;
  735. }
  736. }
  737. }
  738. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  739. template <typename T>
  740. void FeatureMatrixT<T>::computeNonSparseMatrix(std::vector<std::vector<T> > & _matrix,
  741. bool _transpose
  742. ) const
  743. {
  744. if ( _transpose )
  745. _matrix.resize(this->get_n());
  746. else
  747. _matrix.resize(this->get_d());
  748. // resizing the matrix
  749. for ( uint i = 0 ; i < _matrix.size(); i++ )
  750. if ( _transpose )
  751. _matrix[i] = std::vector<T>(this->get_d(), 0.0);
  752. else
  753. _matrix[i] = std::vector<T>(this->get_n(), 0.0);
  754. uint dimIdx(0);
  755. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  756. {
  757. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  758. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  759. {
  760. uint featIndex = ((*inIt).second)->second.first;
  761. if ( _transpose )
  762. _matrix[featIndex][dimIdx] =((*inIt).second)->second.second;
  763. else
  764. _matrix[dimIdx][featIndex] =((*inIt).second)->second.second;
  765. }
  766. }
  767. }
  768. // Swaps to specified elements, performing a validity check
  769. template <typename T>
  770. void FeatureMatrixT<T>::swap(const uint & _row1,
  771. const uint & _col1,
  772. const uint & _row2,
  773. const uint & _col2
  774. )
  775. {
  776. if ( (_row1 < 0) || (_col1 < 0) || (_row1 > this->ui_d) || (_col1 > this->ui_n) ||
  777. (_row2 < 0) || (_col2 < 0) || (_row2 > this->ui_d) || (_col2 > this->ui_n)
  778. )
  779. {
  780. return;
  781. }
  782. else
  783. {
  784. //swap
  785. T tmp = (*this)(_row1, _col1);
  786. (*this).set(_row1, _col1, (*this)(_row2,_col2));
  787. (*this).set(_row2, _col2, tmp);
  788. }
  789. }
  790. // Swaps to specified elements, without performing a validity check
  791. template <typename T>
  792. void FeatureMatrixT<T>::swapUnsafe(const uint & _row1,
  793. const uint & _col1,
  794. const uint & _row2,
  795. const uint & _col2
  796. )
  797. {
  798. //swap
  799. T tmp = (*this)(_row1, _col1);
  800. (*this).set(_row1, _col1, (*this)(_row2,_col2));
  801. (*this).set(_row2, _col2, tmp);
  802. }
  803. template <typename T>
  804. void FeatureMatrixT<T>::hikDiagonalElements( Vector & _diagonalElements ) const
  805. {
  806. uint dimIdx = 0;
  807. // the function calculates the diagonal elements of a HIK kernel matrix
  808. _diagonalElements.resize(this->ui_n);
  809. _diagonalElements.set(0.0);
  810. // loop through all dimensions
  811. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = this->features.begin(); it != this->features.end(); it++, dimIdx++)
  812. {
  813. std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  814. // loop through all features
  815. for (typename std::map< uint, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  816. {
  817. uint index = inIt->first;
  818. typename NICE::SortedVectorSparse<T>::elementpointer p = inIt->second;
  819. typename NICE::SortedVectorSparse<T>::dataelement de = p->second;
  820. _diagonalElements[index] += de.second;
  821. }
  822. }
  823. }
  824. template <typename T>
  825. double FeatureMatrixT<T>::hikTrace() const
  826. {
  827. Vector diagonalElements;
  828. this->hikDiagonalElements( diagonalElements );
  829. return diagonalElements.Sum();
  830. }
  831. template <typename T>
  832. uint FeatureMatrixT<T>::getNumberOfNonZeroElementsPerDimension(const uint & dim) const
  833. {
  834. //FIXME we could return a boolean indicating success and return the actual number via call-by-reference
  835. if ( (dim < 0) || (dim > this->ui_d))
  836. return 0;
  837. return this->features[dim].getNonZeros();
  838. }
  839. template <typename T>
  840. uint FeatureMatrixT<T>::getNumberOfZeroElementsPerDimension(const uint & dim) const
  841. {
  842. if ( (dim < 0) || (dim > this->ui_d))
  843. return 0;
  844. return this->ui_n - this-> features[dim].getNonZeros();
  845. }
  846. template <typename T>
  847. void FeatureMatrixT<T>::restore ( std::istream & _is,
  848. int _format
  849. )
  850. {
  851. bool b_restoreVerbose ( false );
  852. if ( _is.good() )
  853. {
  854. if ( b_restoreVerbose )
  855. std::cerr << " restore FeatureMatrixT" << std::endl;
  856. std::string tmp;
  857. _is >> tmp; //class name
  858. if ( ! this->isStartTag( tmp, "FeatureMatrixT" ) )
  859. {
  860. std::cerr << " WARNING - attempt to restore FeatureMatrixT, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  861. throw;
  862. }
  863. _is.precision ( std::numeric_limits<double>::digits10 + 1);
  864. bool b_endOfBlock ( false ) ;
  865. while ( !b_endOfBlock )
  866. {
  867. _is >> tmp; // start of block
  868. if ( this->isEndTag( tmp, "FeatureMatrixT" ) )
  869. {
  870. b_endOfBlock = true;
  871. continue;
  872. }
  873. tmp = this->removeStartTag ( tmp );
  874. if ( b_restoreVerbose )
  875. std::cerr << " currently restore section " << tmp << " in FeatureMatrixT" << std::endl;
  876. if ( tmp.compare("ui_n") == 0 )
  877. {
  878. _is >> this->ui_n;
  879. _is >> tmp; // end of block
  880. tmp = this->removeEndTag ( tmp );
  881. }
  882. else if ( tmp.compare("ui_d") == 0 )
  883. {
  884. _is >> this->ui_d;
  885. _is >> tmp; // end of block
  886. tmp = this->removeEndTag ( tmp );
  887. }
  888. else if ( tmp.compare("features") == 0 )
  889. {
  890. //NOTE assumes d to be read first!
  891. this->features.resize( this->ui_d);
  892. //now read features for every dimension
  893. for (uint dim = 0; dim < this->ui_d; dim++)
  894. {
  895. NICE::SortedVectorSparse<T> svs;
  896. this->features[dim] = svs;
  897. this->features[dim].restore(_is, _format);
  898. }
  899. _is >> tmp; // end of block
  900. tmp = this->removeEndTag ( tmp );
  901. }
  902. else
  903. {
  904. std::cerr << "WARNING -- unexpected FeatureMatrixT object -- " << tmp << " -- for restoration... aborting" << std::endl;
  905. throw;
  906. }
  907. }
  908. }
  909. else
  910. {
  911. std::cerr << "FeatureMatrixT<T>::restore -- InStream not initialized - restoring not possible!" << std::endl;
  912. throw;
  913. }
  914. }
  915. template <typename T>
  916. void FeatureMatrixT<T>::store ( std::ostream & _os,
  917. int _format
  918. ) const
  919. {
  920. if (_os.good())
  921. {
  922. // show starting point
  923. _os << this->createStartTag( "FeatureMatrixT" ) << std::endl;
  924. _os.precision (std::numeric_limits<double>::digits10 + 1);
  925. _os << this->createStartTag( "ui_n" ) << std::endl;
  926. _os << this->ui_n << std::endl;
  927. _os << this->createEndTag( "ui_n" ) << std::endl;
  928. _os << this->createStartTag( "ui_d" ) << std::endl;
  929. _os << this->ui_d << std::endl;
  930. _os << this->createEndTag( "ui_d" ) << std::endl;
  931. //now write features for every dimension
  932. _os << this->createStartTag( "features" ) << std::endl;
  933. for (uint dim = 0; dim < this->ui_d; dim++)
  934. {
  935. this->features[dim].store(_os,_format);
  936. }
  937. _os << this->createEndTag( "features" ) << std::endl;
  938. // done
  939. _os << this->createEndTag( "FeatureMatrixT" ) << std::endl;
  940. }
  941. else
  942. {
  943. std::cerr << "FeatureMatrixT<T>::store -- OutStream not initialized - storing not possible!" << std::endl;
  944. }
  945. }
  946. template <typename T>
  947. void FeatureMatrixT<T>::clear ()
  948. {}
  949. } // namespace
  950. // #endif