FeatureMatrixT.tcc 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002
  1. /**
  2. * @file FeatureMatrixT.tcc
  3. * @brief A feature matrix, storing (sparse) features sorted per dimension (Implementation)
  4. * @author Alexander Freytag
  5. * @date 07-12-2011 (dd-mm-yyyy)
  6. */
  7. // #ifndef FEATUREMATRIX_TCC
  8. // #define FEATUREMATRIX_TCC
  9. // gp-hik-core includes
  10. #include "FeatureMatrixT.h"
  11. namespace NICE {
  12. //------------------------------------------------------
  13. // several constructors and destructors
  14. //------------------------------------------------------
  15. // Default constructor
  16. template <typename T>
  17. FeatureMatrixT<T>::FeatureMatrixT()
  18. {
  19. n = 0;
  20. d = 0;
  21. features.clear();
  22. verbose = false;
  23. debug = false;
  24. }
  25. // Recommended constructor
  26. template <typename T>
  27. FeatureMatrixT<T>::FeatureMatrixT(const std::vector<std::vector<T> > & _features, const int & _dim)
  28. {
  29. n = 0;
  30. if (_dim < 0)
  31. d = (*_features.begin()).size();
  32. else
  33. d = _dim;
  34. for (typename std::vector<std::vector<T> >::const_iterator it = _features.begin(); it != _features.end(); it++)
  35. {
  36. add_feature(*it);
  37. }
  38. verbose = false;
  39. debug = false;
  40. }
  41. //Constructor reading data from a vector of sparse vector pointers
  42. template <typename T>
  43. FeatureMatrixT<T>::
  44. FeatureMatrixT(const std::vector< const NICE::SparseVector * > & X, const bool dimensionsOverExamples, const int & _dim)
  45. {
  46. features.clear();
  47. // resize our data structure
  48. if (_dim >= 0) //did the user specified the number of dimensions?
  49. set_d(_dim);
  50. else //dimensions not specified by users
  51. {
  52. if (dimensionsOverExamples) //do we have dim x examples ?
  53. {
  54. set_d(X.size());
  55. }
  56. else //we have examples x dimes (as usually done)
  57. {
  58. if (X.size() > 0) //and have at least one example
  59. set_d(X[0]->getDim());
  60. else //no example, so set the dim to 0, since we have no idea at all
  61. {
  62. set_d(0);
  63. }
  64. }
  65. }
  66. // set number of examples n
  67. if (d>0)
  68. {
  69. if (dimensionsOverExamples) //do we have dim x examples ?
  70. n = X[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  71. else //we have examples x dimes (as usually done)
  72. n = X.size();
  73. }
  74. // insert all values
  75. if (dimensionsOverExamples) //do we have dim x examples ?
  76. {
  77. for (int dim = 0; dim < d; dim++)
  78. {
  79. features[dim].insert( X[dim] );
  80. }
  81. }
  82. else //we have examples x dimes (as usually done)
  83. {
  84. //loop over every example to add its content
  85. for (int nr = 0; nr < n; nr++)
  86. {
  87. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  88. for (NICE::SparseVector::const_iterator elemIt = X[nr]->begin(); elemIt != X[nr]->end(); elemIt++)
  89. {
  90. //elemIt->first: dim, elemIt->second: value
  91. features[elemIt->first].insert( (T) elemIt->second, nr);
  92. }//for non-zero-values of the feature
  93. }//for every new feature
  94. }//if dimOverEx
  95. //set n for the internal data structure SortedVectorSparse
  96. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = features.begin(); it != features.end(); it++)
  97. (*it).setN(n);
  98. }
  99. #ifdef NICE_USELIB_MATIO
  100. //Constructor reading data from matlab-files
  101. template <typename T>
  102. FeatureMatrixT<T>::
  103. FeatureMatrixT(const sparse_t & _features, const int & _dim)
  104. {
  105. if (_dim < 0)
  106. set_d(_features.njc -1);
  107. else
  108. set_d(_dim);
  109. int nMax(0);
  110. for ( int i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  111. {
  112. for ( int j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  113. {
  114. features[i].insert(((T*)_features.data)[j], _features.ir[ j]);
  115. if ((_features.ir[ j])>nMax) nMax = _features.ir[ j];
  116. }
  117. }
  118. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = features.begin(); it != features.end(); it++)
  119. {
  120. (*it).setN(nMax+1);
  121. }
  122. n = nMax+1;
  123. verbose = false;
  124. }
  125. //Constructor reading data from matlab-files
  126. template <typename T>
  127. FeatureMatrixT<T>::
  128. FeatureMatrixT(const sparse_t & _features, const std::map<int, int> & examples, const int & _dim)
  129. {
  130. if (_dim < 0)
  131. set_d(_features.njc -1);
  132. else
  133. set_d(_dim);
  134. int nMax(0);
  135. for ( int i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  136. {
  137. for ( int j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  138. {
  139. int example_index = _features.ir[ j];
  140. std::map<int, int>::const_iterator it = examples.find(example_index);
  141. if ( it != examples.end() ) {
  142. features[i].insert(((T*)_features.data)[j], it->second /* new index */);
  143. if (it->second > nMax) nMax = it->second;
  144. }
  145. }
  146. }
  147. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = features.begin(); it != features.end(); it++)
  148. (*it).setN(nMax+1);
  149. n = nMax+1;
  150. verbose = false;
  151. }
  152. #endif
  153. // Default destructor
  154. template <typename T>
  155. FeatureMatrixT<T>::~FeatureMatrixT()
  156. {
  157. }
  158. //------------------------------------------------------
  159. // several get and set methods including access operators
  160. //------------------------------------------------------
  161. // Get number of examples
  162. template <typename T>
  163. int FeatureMatrixT<T>::get_n() const
  164. {
  165. return n;
  166. }
  167. // Get number of dimensions
  168. template <typename T>
  169. int FeatureMatrixT<T>::get_d() const
  170. {
  171. return d;
  172. }
  173. // Sets the given dimension and re-sizes internal data structure. WARNING: this will completely remove your current data!
  174. template <typename T>
  175. void FeatureMatrixT<T>::set_d(const int & _d)
  176. {
  177. d = _d; features.resize(d);
  178. }
  179. template <typename T>
  180. void FeatureMatrixT<T>::setVerbose( const bool & _verbose)
  181. {
  182. verbose = _verbose;
  183. }
  184. template <typename T>
  185. bool FeatureMatrixT<T>::getVerbose( ) const
  186. {
  187. return verbose;
  188. }
  189. template <typename T>
  190. void FeatureMatrixT<T>::setDebug( const bool & _debug)
  191. {
  192. debug = _debug;
  193. }
  194. template <typename T>
  195. bool FeatureMatrixT<T>::getDebug( ) const
  196. {
  197. return debug;
  198. }
  199. // Matrix-like operator for element access, performs validity check
  200. template <typename T>
  201. inline T FeatureMatrixT<T>::operator()(const int row, const int col) const
  202. {
  203. if ( (row < 0) || (col < 0) || (row > d) || (col > n) )
  204. {
  205. fthrow(Exception, "FeatureMatrixT: out of bounds");
  206. }
  207. else
  208. return (features[row]).access(col);
  209. }
  210. template<class T>
  211. inline bool
  212. FeatureMatrixT<T>::operator==(const FeatureMatrixT<T> & F) const
  213. {
  214. if ( ( (*this).get_n() != F.get_n()) || ((*this).get_d() != F.get_d()) )
  215. {
  216. fthrow(Exception, "FeatureMatrixT<T>::operator== : (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  217. }
  218. else if ((n == 0) || (d == 0))
  219. {
  220. return true;
  221. }
  222. for (int i = 0; i < d; i++)
  223. {
  224. for (int j = 0; j < n; j++)
  225. {
  226. // FIXME: it would be more efficient if we compare SortedVectorSparse objects here
  227. if(!((*this)(i,j) == F(i,j)))
  228. return false;
  229. }
  230. }
  231. return true;
  232. }
  233. template<class T>
  234. inline bool
  235. FeatureMatrixT<T>::operator!=(const FeatureMatrixT<T> & F) const
  236. {
  237. if ( ( (*this).get_n() != F.get_n()) || ((*this).get_d() != F.get_d()) )
  238. {
  239. fthrow(Exception, "FeatureMatrixT::operator!=(): (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  240. }
  241. else if ((n == 0) || (d == 0))
  242. {
  243. return false;
  244. }
  245. for (int i = 0; i < d; i++)
  246. {
  247. for (int j = 0; j < n; j++)
  248. {
  249. if(!((*this)(i,j) == F(i,j)))
  250. return true;
  251. }
  252. }
  253. return false;
  254. }
  255. template<typename T>
  256. inline FeatureMatrixT<T>&
  257. FeatureMatrixT<T>::operator=(const FeatureMatrixT<T> & F)
  258. {
  259. (*this).set_d(F.get_d());
  260. (*this).n = F.get_n();
  261. for (int i = 0; i < (*this).get_d(); i++)
  262. {
  263. // use the operator= of SortedVectorSparse
  264. features[i] = F[i];
  265. }
  266. return *this;
  267. }
  268. // Element access without validity check
  269. template <typename T>
  270. inline T FeatureMatrixT<T>::getUnsafe(const int row, const int col) const
  271. {
  272. return (features[row]).access(col);
  273. }
  274. //! Element access of original values without validity check
  275. template <typename T>
  276. inline T FeatureMatrixT<T>::getOriginal(const int row, const int col) const
  277. {
  278. return (features[row]).accessOriginal(col);
  279. }
  280. // Sets a specified element to the given value, performs validity check
  281. template <typename T>
  282. inline void FeatureMatrixT<T>::set (const int row, const int col, const T & newElement, bool setTransformedValue)
  283. {
  284. if ( (row < 0) || (col < 0) || (row > d) || (col > n) )
  285. {
  286. return;
  287. }
  288. else
  289. (features[row]).set ( col, newElement, setTransformedValue );
  290. }
  291. // Sets a specified element to the given value, without validity check
  292. template <typename T>
  293. inline void FeatureMatrixT<T>::setUnsafe (const int row, const int col, const T & newElement, bool setTransformedValue)
  294. {
  295. (features[row]).set ( col, newElement, setTransformedValue );
  296. }
  297. // Acceess to all element entries of a specified dimension, including validity check
  298. template <typename T>
  299. void FeatureMatrixT<T>::getDimension(const int & dim, NICE::SortedVectorSparse<T> & dimension) const
  300. {
  301. if ( (dim < 0) || (dim > d) )
  302. {
  303. return;
  304. }
  305. else
  306. dimension = features[dim];
  307. }
  308. // Acceess to all element entries of a specified dimension, without validity check
  309. template <typename T>
  310. void FeatureMatrixT<T>::getDimensionUnsafe(const int & dim, NICE::SortedVectorSparse<T> & dimension) const
  311. {
  312. dimension = features[dim];
  313. }
  314. // Finds the first element in a given dimension, which equals elem
  315. template <typename T>
  316. void FeatureMatrixT<T>::findFirstInDimension(const int & dim, const T & elem, int & position) const
  317. {
  318. position = -1;
  319. if ( (dim < 0) || (dim > d))
  320. return;
  321. std::pair< typename SortedVectorSparse<T>::elementpointer, typename SortedVectorSparse<T>::elementpointer > eit;
  322. eit = features[dim].nonzeroElements().equal_range ( elem );
  323. position = distance( features[dim].nonzeroElements().begin(), eit.first );
  324. if ( elem > features[dim].getTolerance() )
  325. position += features[dim].getZeros();
  326. }
  327. // Finds the last element in a given dimension, which equals elem
  328. template <typename T>
  329. void FeatureMatrixT<T>::findLastInDimension(const int & dim, const T & elem, int & position) const
  330. {
  331. position = -1;
  332. if ( (dim < 0) || (dim > d))
  333. return;
  334. std::pair< typename SortedVectorSparse<T>::const_elementpointer, typename SortedVectorSparse<T>::const_elementpointer > eit = features[dim].nonzeroElements().equal_range ( elem );
  335. position = distance( features[dim].nonzeroElements().begin(), eit.second );
  336. if ( elem > features[dim].getTolerance() )
  337. position += features[dim].getZeros();
  338. }
  339. // Finds the first element in a given dimension, which is larger as elem
  340. template <typename T>
  341. void FeatureMatrixT<T>::findFirstLargerInDimension(const int & dim, const T & elem, int & position) const
  342. {
  343. position = -1;
  344. if ( (dim < 0) || (dim > d))
  345. return;
  346. //no non-zero elements?
  347. if (features[dim].getNonZeros() <= 0)
  348. {
  349. // if element is greater than zero, than is should be added at the last position
  350. if (elem > features[dim].getTolerance() )
  351. position = this->n;
  352. //if not, position is -1
  353. return;
  354. }
  355. if (features[dim].getNonZeros() == 1)
  356. {
  357. // if element is greater than the only nonzero element, than it is larger as everything else
  358. if (features[dim].nonzeroElements().begin()->first <= elem)
  359. position = this->n;
  360. //if not, but the element is still greater than zero, than
  361. else if (elem > features[dim].getTolerance() )
  362. position = this->n -1;
  363. return;
  364. }
  365. typename SortedVectorSparse<T>::const_elementpointer it = features[dim].nonzeroElements().end(); //this is needed !!!
  366. it = features[dim].nonzeroElements().upper_bound ( elem ); //if all values are smaller, this does not do anything at all
  367. position = distance( features[dim].nonzeroElements().begin(), it );
  368. if ( elem > features[dim].getTolerance() )
  369. {
  370. //position += features[dim].getZeros();
  371. position += n - features[dim].getNonZeros();
  372. }
  373. }
  374. // Finds the last element in a given dimension, which is smaller as elem
  375. template <typename T>
  376. void FeatureMatrixT<T>::findLastSmallerInDimension(const int & dim, const T & elem, int & position) const
  377. {
  378. position = -1;
  379. if ( (dim < 0) || (dim > d))
  380. return;
  381. typename SortedVectorSparse<T>::const_elementpointer it = features[dim].nonzeroElements().lower_bound ( elem );
  382. position = distance( features[dim].nonzeroElements().begin(), it );
  383. if ( it->first > features[dim].getTolerance() )
  384. position += features[dim].getZeros();
  385. }
  386. //------------------------------------------------------
  387. // high level methods
  388. //------------------------------------------------------
  389. template <typename T>
  390. void FeatureMatrixT<T>::applyFunctionToFeatureMatrix ( const NICE::ParameterizedFunction *pf )
  391. {
  392. if (pf != NULL)
  393. {
  394. // REMARK: might be inefficient due to virtual calls
  395. if ( !pf->isOrderPreserving() )
  396. fthrow(Exception, "ParameterizedFunction::applyFunctionToFeatureMatrix: this function is optimized for order preserving transformations");
  397. int d = this->get_d();
  398. for (int dim = 0; dim < d; dim++)
  399. {
  400. std::multimap< double, typename SortedVectorSparse<double>::dataelement> & nonzeroElements = this->getFeatureValues(dim).nonzeroElements();
  401. for ( SortedVectorSparse<double>::elementpointer i = nonzeroElements.begin(); i != nonzeroElements.end(); i++ )
  402. {
  403. SortedVectorSparse<double>::dataelement & de = i->second;
  404. //TODO check, wether the element is "sparse" afterwards
  405. de.second = pf->f( dim, i->first );
  406. }
  407. }
  408. /*for ( int i = 0 ; i < featureMatrix.get_n(); i++ )
  409. for ( int index = 0 ; index < featureMatrix.get_d(); index++ )
  410. featureMatrix.set(index, i, f( (uint)index, featureMatrix.getOriginal(index,i) ), isOrderPreserving() );*/
  411. }
  412. else
  413. {
  414. //no pf given -> nothing to do
  415. }
  416. }
  417. //Computes the ratio of sparsity across the matrix
  418. template <typename T>
  419. double FeatureMatrixT<T>:: computeSparsityRatio()
  420. {
  421. double ratio(0.0);
  422. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = features.begin(); it != features.end(); it++)
  423. {
  424. ratio += (*it).getZeros() / (double) (*it).getN();
  425. }
  426. if (features.size() != 0)
  427. ratio /= features.size();
  428. return ratio;
  429. }
  430. // add a new feature and insert its elements at the end of each dimension vector
  431. template <typename T>
  432. void FeatureMatrixT<T>::add_feature( const std::vector<T> & feature, const NICE::ParameterizedFunction *pf )
  433. {
  434. if (n == 0)
  435. {
  436. set_d(feature.size());
  437. }
  438. if ( (int)feature.size() != d)
  439. {
  440. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  441. return;
  442. }
  443. for (int dimension = 0; dimension < (int) features.size(); dimension++)
  444. {
  445. if (pf != NULL)
  446. features[dimension].insert( feature[dimension], pf->f( dimension, feature[dimension]) );
  447. else
  448. features[dimension].insert( feature[dimension] );
  449. }
  450. n++;
  451. }
  452. // add a new feature and insert its elements at the end of each dimension vector
  453. template <typename T>
  454. void FeatureMatrixT<T>::add_feature(const NICE::SparseVector & feature, const ParameterizedFunction *pf )
  455. {
  456. if (n == 0)
  457. {
  458. set_d(feature.size());
  459. }
  460. if ( (int)feature.getDim() > d)
  461. {
  462. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  463. return;
  464. }
  465. for (NICE::SparseVector::const_iterator it = feature.begin(); it != feature.end(); it++)
  466. {
  467. if (pf != NULL)
  468. features[it->first].insert( (T) it->second, pf->f( it->first, (T) it->second), n );
  469. else
  470. features[it->first].insert( (T) it->second, n );
  471. }
  472. n++;
  473. }
  474. // add several new features and insert their elements in the already ordered structure
  475. template <typename T>
  476. void FeatureMatrixT<T>::add_features(const std::vector<std::vector<T> > & _features )
  477. {
  478. //TODO do we need the parameterized function here as well? usually, we add several features and run applyFunctionToFeatureMatrix afterwards.
  479. // check this please :)
  480. //TODO assure that every feature has the same dimension
  481. if (n == 0)
  482. {
  483. set_d(_features.size());
  484. }
  485. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  486. for (int dim = 0; dim < d; dim++)
  487. {
  488. features[dim].insert( _features[dim] );
  489. }
  490. //update the number of our features
  491. n += (int) _features[0].size();
  492. }
  493. template <typename T>
  494. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features, std::vector<std::vector<int> > & permutations, const int & _dim )
  495. {
  496. features.clear();
  497. if (_dim < 0)
  498. set_d(_features.size());
  499. else
  500. set_d(_dim);
  501. if (d>0)
  502. n = _features[0].size();
  503. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  504. for (int dim = 0; dim < d; dim++)
  505. {
  506. features[dim].insert( _features[dim] );
  507. }
  508. getPermutations( permutations );
  509. }
  510. template <typename T>
  511. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features, std::vector<std::map<int,int> > & permutations, const int & _dim)
  512. {
  513. features.clear();
  514. if (_dim < 0)
  515. set_d(_features.size());
  516. else
  517. set_d(_dim);
  518. if (d>0)
  519. n = _features[0].size();
  520. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  521. for (int dim = 0; dim < d; dim++)
  522. {
  523. features[dim].insert( _features[dim] );
  524. }
  525. getPermutations( permutations );
  526. }
  527. template <typename T>
  528. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features, const int & _dim)
  529. {
  530. features.clear();
  531. if (_dim < 0)
  532. set_d(_features.size());
  533. else
  534. set_d(_dim);
  535. if (d>0)
  536. n = _features[0].size();
  537. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  538. for (int dim = 0; dim < d; dim++)
  539. {
  540. features[dim].insert( _features[dim] );
  541. }
  542. }
  543. template <typename T>
  544. void FeatureMatrixT<T>::set_features(const std::vector< const NICE::SparseVector * > & _features, const bool dimensionsOverExamples, const int & _dim)
  545. {
  546. features.clear();
  547. if (_features.size() == 0)
  548. {
  549. std::cerr << "set_features without features" << std::endl;
  550. }
  551. // resize our data structure
  552. if (_dim >= 0) //did the user specified the number of dimensions?
  553. set_d(_dim);
  554. else //dimensions not specified by users
  555. {
  556. if (dimensionsOverExamples) //do we have dim x examples ?
  557. {
  558. set_d(_features.size());
  559. }
  560. else //we have examples x dimes (as usually done)
  561. {
  562. if (_features.size() > 0) //and have at least one example
  563. {
  564. try{
  565. set_d(_features[0]->getDim());
  566. }
  567. catch(...)
  568. {
  569. std::cerr << "FeatureMatrixT<T>::set_features -- something went wrong using getDim() of SparseVectors" << std::endl;
  570. }
  571. }
  572. else //no example, so set the dim to 0, since we have no idea at all
  573. {
  574. set_d(0);
  575. }
  576. }
  577. }
  578. // set number of examples n
  579. if (d>0)
  580. {
  581. if (dimensionsOverExamples) //do we have dim x examples ?
  582. n = _features[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  583. else //we have examples x dimes (as usually done)
  584. n = _features.size();
  585. }
  586. // insert all values
  587. if (dimensionsOverExamples) //do we have dim x examples ?
  588. {
  589. for (int dim = 0; dim < d; dim++)
  590. {
  591. features[dim].insert( _features[dim] );
  592. }
  593. }
  594. else //we have examples x dimes (as usually done)
  595. {
  596. if ( debug )
  597. std::cerr << "FeatureMatrixT<T>::set_features " << n << " new examples" << std::endl;
  598. //loop over every example to add its content
  599. for (int nr = 0; nr < n; nr++)
  600. {
  601. if ( debug )
  602. std::cerr << "add feature nr. " << nr << " / " << _features.size() << " ";
  603. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  604. for (NICE::SparseVector::const_iterator elemIt = _features[nr]->begin(); elemIt != _features[nr]->end(); elemIt++)
  605. {
  606. if ( debug )
  607. std::cerr << elemIt->first << "-" << elemIt->second << " ";
  608. //elemIt->first: dim, elemIt->second: value
  609. features[elemIt->first].insert( (T) elemIt->second, nr);
  610. }//for non-zero-values of the feature
  611. if ( debug )
  612. std::cerr << std::endl;
  613. }//for every new feature
  614. if ( debug )
  615. std::cerr << "FeatureMatrixT<T>::set_features done" << std::endl;
  616. }//if dimOverEx
  617. //set n for the internal data structure SortedVectorSparse
  618. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = features.begin(); it != features.end(); it++)
  619. (*it).setN(n);
  620. }
  621. template <typename T>
  622. void FeatureMatrixT<T>::getPermutations( std::vector<std::vector<int> > & permutations) const
  623. {
  624. for (int dim = 0; dim < d; dim++)
  625. {
  626. std::vector<int> perm ( (features[dim]).getPermutation() );
  627. permutations.push_back(perm);
  628. }
  629. }
  630. template <typename T>
  631. void FeatureMatrixT<T>::getPermutations( std::vector<std::map<int,int> > & permutations) const
  632. {
  633. for (int dim = 0; dim < d; dim++)
  634. {
  635. std::map<int,int> perm ( (features[dim]).getPermutationNonZeroReal() );
  636. permutations.push_back(perm);
  637. }
  638. }
  639. // Prints the whole Matrix (outer loop over dimension, inner loop over features)
  640. template <typename T>
  641. void FeatureMatrixT<T>::print(std::ostream & os) const
  642. {
  643. if (os.good())
  644. {
  645. for (int dim = 0; dim < d; dim++)
  646. {
  647. features[dim].print(os);
  648. }
  649. }
  650. }
  651. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  652. template <typename T>
  653. void FeatureMatrixT<T>::computeNonSparseMatrix(NICE::MatrixT<T> & matrix, bool transpose) const
  654. {
  655. if ( transpose )
  656. matrix.resize(this->get_n(),this->get_d());
  657. else
  658. matrix.resize(this->get_d(),this->get_n());
  659. matrix.set((T)0.0);
  660. int dimIdx(0);
  661. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = features.begin(); it != features.end(); it++, dimIdx++)
  662. {
  663. std::map< int, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  664. for (typename std::map< int, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  665. {
  666. int featIndex = ((*inIt).second)->second.first;
  667. if ( transpose )
  668. matrix(featIndex,dimIdx) =((*inIt).second)->second.second;
  669. else
  670. matrix(dimIdx,featIndex) =((*inIt).second)->second.second;
  671. }
  672. }
  673. }
  674. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  675. template <typename T>
  676. void FeatureMatrixT<T>::computeNonSparseMatrix(std::vector<std::vector<T> > & matrix, bool transpose) const
  677. {
  678. if ( transpose )
  679. matrix.resize(this->get_n());
  680. else
  681. matrix.resize(this->get_d());
  682. // resizing the matrix
  683. for ( uint i = 0 ; i < matrix.size(); i++ )
  684. if ( transpose )
  685. matrix[i] = std::vector<T>(this->get_d(), 0.0);
  686. else
  687. matrix[i] = std::vector<T>(this->get_n(), 0.0);
  688. int dimIdx(0);
  689. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = features.begin(); it != features.end(); it++, dimIdx++)
  690. {
  691. std::map< int, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  692. for (typename std::map< int, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  693. {
  694. int featIndex = ((*inIt).second)->second.first;
  695. if ( transpose )
  696. matrix[featIndex][dimIdx] =((*inIt).second)->second.second;
  697. else
  698. matrix[dimIdx][featIndex] =((*inIt).second)->second.second;
  699. }
  700. }
  701. }
  702. // Swaps to specified elements, performing a validity check
  703. template <typename T>
  704. void FeatureMatrixT<T>::swap(const int & row1, const int & col1, const int & row2, const int & col2)
  705. {
  706. if ( (row1 < 0) || (col1 < 0) || (row1 > d) || (col1 > n) || (row2 < 0) || (col2 < 0) || (row2 > d) || (col2 > n))
  707. {
  708. return;
  709. }
  710. else
  711. {
  712. //swap
  713. T tmp = (*this)(row1, col1);
  714. (*this).set(row1, col1, (*this)(row2,col2));
  715. (*this).set(row2, col2, tmp);
  716. }
  717. }
  718. // Swaps to specified elements, without performing a validity check
  719. template <typename T>
  720. void FeatureMatrixT<T>::swapUnsafe(const int & row1, const int & col1, const int & row2, const int & col2)
  721. {
  722. //swap
  723. T tmp = (*this)(row1, col1);
  724. (*this).set(row1, col1, (*this)(row2,col2));
  725. (*this).set(row2, col2, tmp);
  726. }
  727. template <typename T>
  728. void FeatureMatrixT<T>::hikDiagonalElements( Vector & diagonalElements ) const
  729. {
  730. int dimIdx = 0;
  731. // the function calculates the diagonal elements of a HIK kernel matrix
  732. diagonalElements.resize(n);
  733. diagonalElements.set(0.0);
  734. // loop through all dimensions
  735. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = features.begin(); it != features.end(); it++, dimIdx++)
  736. {
  737. std::map< int, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  738. // loop through all features
  739. for (typename std::map< int, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  740. {
  741. int index = inIt->first;
  742. typename NICE::SortedVectorSparse<T>::elementpointer p = inIt->second;
  743. typename NICE::SortedVectorSparse<T>::dataelement de = p->second;
  744. diagonalElements[index] += de.second;
  745. }
  746. }
  747. }
  748. template <typename T>
  749. double FeatureMatrixT<T>::hikTrace() const
  750. {
  751. Vector diagonalElements;
  752. hikDiagonalElements( diagonalElements );
  753. return diagonalElements.Sum();
  754. }
  755. template <typename T>
  756. int FeatureMatrixT<T>::getNumberOfNonZeroElementsPerDimension(const int & dim) const
  757. {
  758. if ( (dim < 0) || (dim > d))
  759. return -1;
  760. return features[dim].getNonZeros();
  761. }
  762. template <typename T>
  763. int FeatureMatrixT<T>::getNumberOfZeroElementsPerDimension(const int & dim) const
  764. {
  765. if ( (dim < 0) || (dim > d))
  766. return -1;
  767. return n - features[dim].getNonZeros();
  768. }
  769. template <typename T>
  770. void FeatureMatrixT<T>::restore ( std::istream & is, int format )
  771. {
  772. bool b_restoreVerbose ( false );
  773. if ( is.good() )
  774. {
  775. if ( b_restoreVerbose )
  776. std::cerr << " restore FeatureMatrixT" << std::endl;
  777. std::string tmp;
  778. is >> tmp; //class name
  779. if ( ! this->isStartTag( tmp, "FeatureMatrixT" ) )
  780. {
  781. std::cerr << " WARNING - attempt to restore FeatureMatrixT, but start flag " << tmp << " does not match! Aborting... " << std::endl;
  782. throw;
  783. }
  784. is.precision ( std::numeric_limits<double>::digits10 + 1);
  785. bool b_endOfBlock ( false ) ;
  786. while ( !b_endOfBlock )
  787. {
  788. is >> tmp; // start of block
  789. if ( this->isEndTag( tmp, "FeatureMatrixT" ) )
  790. {
  791. b_endOfBlock = true;
  792. continue;
  793. }
  794. tmp = this->removeStartTag ( tmp );
  795. if ( b_restoreVerbose )
  796. std::cerr << " currently restore section " << tmp << " in FeatureMatrixT" << std::endl;
  797. if ( tmp.compare("n") == 0 )
  798. {
  799. is >> n;
  800. is >> tmp; // end of block
  801. tmp = this->removeEndTag ( tmp );
  802. }
  803. else if ( tmp.compare("d") == 0 )
  804. {
  805. is >> d;
  806. is >> tmp; // end of block
  807. tmp = this->removeEndTag ( tmp );
  808. }
  809. else if ( tmp.compare("features") == 0 )
  810. {
  811. //NOTE assumes d to be read first!
  812. features.resize(d);
  813. //now read features for every dimension
  814. for (int dim = 0; dim < d; dim++)
  815. {
  816. NICE::SortedVectorSparse<T> svs;
  817. features[dim] = svs;
  818. features[dim].restore(is,format);
  819. }
  820. is >> tmp; // end of block
  821. tmp = this->removeEndTag ( tmp );
  822. }
  823. else
  824. {
  825. std::cerr << "WARNING -- unexpected FeatureMatrixT object -- " << tmp << " -- for restoration... aborting" << std::endl;
  826. throw;
  827. }
  828. }
  829. }
  830. else
  831. {
  832. std::cerr << "FeatureMatrixT<T>::restore -- InStream not initialized - restoring not possible!" << std::endl;
  833. throw;
  834. }
  835. }
  836. template <typename T>
  837. void FeatureMatrixT<T>::store ( std::ostream & os, int format ) const
  838. {
  839. if (os.good())
  840. {
  841. // show starting point
  842. os << this->createStartTag( "FeatureMatrixT" ) << std::endl;
  843. os.precision (std::numeric_limits<double>::digits10 + 1);
  844. os << this->createStartTag( "n" ) << std::endl;
  845. os << n << std::endl;
  846. os << this->createEndTag( "n" ) << std::endl;
  847. os << this->createStartTag( "d" ) << std::endl;
  848. os << d << std::endl;
  849. os << this->createEndTag( "d" ) << std::endl;
  850. //now write features for every dimension
  851. os << this->createStartTag( "features" ) << std::endl;
  852. for (int dim = 0; dim < d; dim++)
  853. {
  854. features[dim].store(os,format);
  855. }
  856. os << this->createEndTag( "features" ) << std::endl;
  857. // done
  858. os << this->createEndTag( "FeatureMatrixT" ) << std::endl;
  859. }
  860. else
  861. {
  862. std::cerr << "FeatureMatrixT<T>::store -- OutStream not initialized - storing not possible!" << std::endl;
  863. }
  864. }
  865. template <typename T>
  866. void FeatureMatrixT<T>::clear ()
  867. {}
  868. } // namespace
  869. // #endif