FeatureMatrixT.tcc 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. /**
  2. * @file FeatureMatrixT.tcc
  3. * @brief A feature matrix, storing (sparse) features sorted per dimension (Implementation)
  4. * @author Alexander Freytag
  5. * @date 07-12-2011 (dd-mm-yyyy)
  6. */
  7. // #ifndef FEATUREMATRIX_TCC
  8. // #define FEATUREMATRIX_TCC
  9. #include "FeatureMatrixT.h"
  10. namespace NICE {
  11. //------------------------------------------------------
  12. // several constructors and destructors
  13. //------------------------------------------------------
  14. // Default constructor
  15. template <typename T>
  16. FeatureMatrixT<T>::FeatureMatrixT()
  17. {
  18. n = 0;
  19. d = 0;
  20. features.clear();
  21. verbose = false;
  22. debug = false;
  23. }
  24. // Recommended constructor
  25. template <typename T>
  26. FeatureMatrixT<T>::FeatureMatrixT(const std::vector<std::vector<T> > & _features, const int & _dim)
  27. {
  28. n = 0;
  29. if (_dim < 0)
  30. d = (*_features.begin()).size();
  31. else
  32. d = _dim;
  33. for (typename std::vector<std::vector<T> >::const_iterator it = _features.begin(); it != _features.end(); it++)
  34. {
  35. add_feature(*it);
  36. }
  37. verbose = false;
  38. debug = false;
  39. }
  40. //Constructor reading data from a vector of sparse vector pointers
  41. template <typename T>
  42. FeatureMatrixT<T>::
  43. FeatureMatrixT(const std::vector< SparseVector * > & X, const bool dimensionsOverExamples, const int & _dim)
  44. {
  45. features.clear();
  46. // resize our data structure
  47. if (_dim >= 0) //did the user specified the number of dimensions?
  48. set_d(_dim);
  49. else //dimensions not specified by users
  50. {
  51. if (dimensionsOverExamples) //do we have dim x examples ?
  52. {
  53. set_d(X.size());
  54. }
  55. else //we have examples x dimes (as usually done)
  56. {
  57. if (X.size() > 0) //and have at least one example
  58. set_d(X[0]->getDim());
  59. else //no example, so set the dim to 0, since we have no idea at all
  60. {
  61. set_d(0);
  62. }
  63. }
  64. }
  65. // set number of examples n
  66. if (d>0)
  67. {
  68. if (dimensionsOverExamples) //do we have dim x examples ?
  69. n = X[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  70. else //we have examples x dimes (as usually done)
  71. n = X.size();
  72. }
  73. // insert all values
  74. if (dimensionsOverExamples) //do we have dim x examples ?
  75. {
  76. for (int dim = 0; dim < d; dim++)
  77. {
  78. features[dim].insert( X[dim] );
  79. }
  80. }
  81. else //we have examples x dimes (as usually done)
  82. {
  83. //loop over every example to add its content
  84. for (int nr = 0; nr < n; nr++)
  85. {
  86. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  87. for (NICE::SparseVector::const_iterator elemIt = X[nr]->begin(); elemIt != X[nr]->end(); elemIt++)
  88. {
  89. //elemIt->first: dim, elemIt->second: value
  90. features[elemIt->first].insert( (T) elemIt->second, nr);
  91. }//for non-zero-values of the feature
  92. }//for every new feature
  93. }//if dimOverEx
  94. //set n for the internal data structure SortedVectorSparse
  95. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = features.begin(); it != features.end(); it++)
  96. (*it).setN(n);
  97. }
  98. #ifdef NICE_USELIB_MATIO
  99. //Constructor reading data from matlab-files
  100. template <typename T>
  101. FeatureMatrixT<T>::
  102. FeatureMatrixT(const sparse_t & _features, const int & _dim)
  103. {
  104. if (_dim < 0)
  105. set_d(_features.njc -1);
  106. else
  107. set_d(_dim);
  108. int nMax(0);
  109. for ( int i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  110. {
  111. for ( int j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  112. {
  113. features[i].insert(((T*)_features.data)[j], _features.ir[ j]);
  114. if ((_features.ir[ j])>nMax) nMax = _features.ir[ j];
  115. }
  116. }
  117. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = features.begin(); it != features.end(); it++)
  118. {
  119. (*it).setN(nMax+1);
  120. }
  121. n = nMax+1;
  122. verbose = false;
  123. }
  124. //Constructor reading data from matlab-files
  125. template <typename T>
  126. FeatureMatrixT<T>::
  127. FeatureMatrixT(const sparse_t & _features, const std::map<int, int> & examples, const int & _dim)
  128. {
  129. if (_dim < 0)
  130. set_d(_features.njc -1);
  131. else
  132. set_d(_dim);
  133. int nMax(0);
  134. for ( int i = 0; i < _features.njc-1; i++ ) //walk over dimensions
  135. {
  136. for ( int j = _features.jc[i]; j < _features.jc[i+1] && j < _features.ndata; j++ ) //walk over single features, which are sparsely represented
  137. {
  138. int example_index = _features.ir[ j];
  139. std::map<int, int>::const_iterator it = examples.find(example_index);
  140. if ( it != examples.end() ) {
  141. features[i].insert(((T*)_features.data)[j], it->second /* new index */);
  142. if (it->second > nMax) nMax = it->second;
  143. }
  144. }
  145. }
  146. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = features.begin(); it != features.end(); it++)
  147. (*it).setN(nMax+1);
  148. n = nMax+1;
  149. verbose = false;
  150. }
  151. #endif
  152. // Default destructor
  153. template <typename T>
  154. FeatureMatrixT<T>::~FeatureMatrixT()
  155. {
  156. }
  157. //------------------------------------------------------
  158. // several get and set methods including access operators
  159. //------------------------------------------------------
  160. // Get number of examples
  161. template <typename T>
  162. int FeatureMatrixT<T>::get_n() const
  163. {
  164. return n;
  165. }
  166. // Get number of dimensions
  167. template <typename T>
  168. int FeatureMatrixT<T>::get_d() const
  169. {
  170. return d;
  171. }
  172. // Sets the given dimension and re-sizes internal data structure. WARNING: this will completely remove your current data!
  173. template <typename T>
  174. void FeatureMatrixT<T>::set_d(const int & _d)
  175. {
  176. d = _d; features.resize(d);
  177. }
  178. template <typename T>
  179. void FeatureMatrixT<T>::setVerbose( const bool & _verbose)
  180. {
  181. verbose = _verbose;
  182. }
  183. template <typename T>
  184. bool FeatureMatrixT<T>::getVerbose( ) const
  185. {
  186. return verbose;
  187. }
  188. template <typename T>
  189. void FeatureMatrixT<T>::setDebug( const bool & _debug)
  190. {
  191. debug = _debug;
  192. }
  193. template <typename T>
  194. bool FeatureMatrixT<T>::getDebug( ) const
  195. {
  196. return debug;
  197. }
  198. // Matrix-like operator for element access, performs validity check
  199. template <typename T>
  200. inline T FeatureMatrixT<T>::operator()(const int row, const int col) const
  201. {
  202. if ( (row < 0) || (col < 0) || (row > d) || (col > n) )
  203. {
  204. fthrow(Exception, "FeatureMatrixT: out of bounds");
  205. }
  206. else
  207. return (features[row]).access(col);
  208. }
  209. template<class T>
  210. inline bool
  211. FeatureMatrixT<T>::operator==(const FeatureMatrixT<T> & F) const
  212. {
  213. if ( ( (*this).get_n() != F.get_n()) || ((*this).get_d() != F.get_d()) )
  214. {
  215. fthrow(Exception, "FeatureMatrixT<T>::operator== : (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  216. }
  217. else if ((n == 0) || (d == 0))
  218. {
  219. return true;
  220. }
  221. for (int i = 0; i < d; i++)
  222. {
  223. for (int j = 0; j < n; j++)
  224. {
  225. // FIXME: it would be more efficient if we compare SortedVectorSparse objects here
  226. if(!((*this)(i,j) == F(i,j)))
  227. return false;
  228. }
  229. }
  230. return true;
  231. }
  232. template<class T>
  233. inline bool
  234. FeatureMatrixT<T>::operator!=(const FeatureMatrixT<T> & F) const
  235. {
  236. if ( ( (*this).get_n() != F.get_n()) || ((*this).get_d() != F.get_d()) )
  237. {
  238. fthrow(Exception, "FeatureMatrixT::operator!=(): (n != F.get_n()) || (d != F.get_d()) -- number of dimensions does not fit");
  239. }
  240. else if ((n == 0) || (d == 0))
  241. {
  242. return false;
  243. }
  244. for (int i = 0; i < d; i++)
  245. {
  246. for (int j = 0; j < n; j++)
  247. {
  248. if(!((*this)(i,j) == F(i,j)))
  249. return true;
  250. }
  251. }
  252. return false;
  253. }
  254. template<typename T>
  255. inline FeatureMatrixT<T>&
  256. FeatureMatrixT<T>::operator=(const FeatureMatrixT<T> & F)
  257. {
  258. (*this).set_d(F.get_d());
  259. (*this).n = F.get_n();
  260. for (int i = 0; i < (*this).get_d(); i++)
  261. {
  262. // use the operator= of SortedVectorSparse
  263. features[i] = F[i];
  264. }
  265. return *this;
  266. }
  267. // Element access without validity check
  268. template <typename T>
  269. inline T FeatureMatrixT<T>::getUnsafe(const int row, const int col) const
  270. {
  271. return (features[row]).access(col);
  272. }
  273. //! Element access of original values without validity check
  274. template <typename T>
  275. inline T FeatureMatrixT<T>::getOriginal(const int row, const int col) const
  276. {
  277. return (features[row]).accessOriginal(col);
  278. }
  279. // Sets a specified element to the given value, performs validity check
  280. template <typename T>
  281. inline void FeatureMatrixT<T>::set (const int row, const int col, const T & newElement, bool setTransformedValue)
  282. {
  283. if ( (row < 0) || (col < 0) || (row > d) || (col > n) )
  284. {
  285. return;
  286. }
  287. else
  288. (features[row]).set ( col, newElement, setTransformedValue );
  289. }
  290. // Sets a specified element to the given value, without validity check
  291. template <typename T>
  292. inline void FeatureMatrixT<T>::setUnsafe (const int row, const int col, const T & newElement, bool setTransformedValue)
  293. {
  294. (features[row]).set ( col, newElement, setTransformedValue );
  295. }
  296. // Acceess to all element entries of a specified dimension, including validity check
  297. template <typename T>
  298. void FeatureMatrixT<T>::getDimension(const int & dim, NICE::SortedVectorSparse<T> & dimension) const
  299. {
  300. if ( (dim < 0) || (dim > d) )
  301. {
  302. return;
  303. }
  304. else
  305. dimension = features[dim];
  306. }
  307. // Acceess to all element entries of a specified dimension, without validity check
  308. template <typename T>
  309. void FeatureMatrixT<T>::getDimensionUnsafe(const int & dim, NICE::SortedVectorSparse<T> & dimension) const
  310. {
  311. dimension = features[dim];
  312. }
  313. // Finds the first element in a given dimension, which equals elem
  314. template <typename T>
  315. void FeatureMatrixT<T>::findFirstInDimension(const int & dim, const T & elem, int & position) const
  316. {
  317. position = -1;
  318. if ( (dim < 0) || (dim > d))
  319. return;
  320. std::pair< typename SortedVectorSparse<T>::elementpointer, typename SortedVectorSparse<T>::elementpointer > eit;
  321. eit = features[dim].nonzeroElements().equal_range ( elem );
  322. position = distance( features[dim].nonzeroElements().begin(), eit.first );
  323. if ( elem > features[dim].getTolerance() )
  324. position += features[dim].getZeros();
  325. }
  326. // Finds the last element in a given dimension, which equals elem
  327. template <typename T>
  328. void FeatureMatrixT<T>::findLastInDimension(const int & dim, const T & elem, int & position) const
  329. {
  330. position = -1;
  331. if ( (dim < 0) || (dim > d))
  332. return;
  333. std::pair< typename SortedVectorSparse<T>::const_elementpointer, typename SortedVectorSparse<T>::const_elementpointer > eit = features[dim].nonzeroElements().equal_range ( elem );
  334. position = distance( features[dim].nonzeroElements().begin(), eit.second );
  335. if ( elem > features[dim].getTolerance() )
  336. position += features[dim].getZeros();
  337. }
  338. // Finds the first element in a given dimension, which is larger as elem
  339. template <typename T>
  340. void FeatureMatrixT<T>::findFirstLargerInDimension(const int & dim, const T & elem, int & position) const
  341. {
  342. position = -1;
  343. if ( (dim < 0) || (dim > d))
  344. return;
  345. //no non-zero elements?
  346. if (features[dim].getNonZeros() <= 0)
  347. {
  348. // if element is greater than zero, than is should be added at the last position
  349. if (elem > features[dim].getTolerance() )
  350. position = this->n;
  351. //if not, position is -1
  352. return;
  353. }
  354. if (features[dim].getNonZeros() == 1)
  355. {
  356. // if element is greater than the only nonzero element, than it is larger as everything else
  357. if (features[dim].nonzeroElements().begin()->first <= elem)
  358. position = this->n;
  359. //if not, but the element is still greater than zero, than
  360. else if (elem > features[dim].getTolerance() )
  361. position = this->n -1;
  362. return;
  363. }
  364. typename SortedVectorSparse<T>::const_elementpointer it = features[dim].nonzeroElements().end(); //this is needed !!!
  365. it = features[dim].nonzeroElements().upper_bound ( elem ); //if all values are smaller, this does not do anything at all
  366. position = distance( features[dim].nonzeroElements().begin(), it );
  367. if ( elem > features[dim].getTolerance() )
  368. {
  369. //position += features[dim].getZeros();
  370. position += n - features[dim].getNonZeros();
  371. }
  372. }
  373. // Finds the last element in a given dimension, which is smaller as elem
  374. template <typename T>
  375. void FeatureMatrixT<T>::findLastSmallerInDimension(const int & dim, const T & elem, int & position) const
  376. {
  377. position = -1;
  378. if ( (dim < 0) || (dim > d))
  379. return;
  380. typename SortedVectorSparse<T>::const_elementpointer it = features[dim].nonzeroElements().lower_bound ( elem );
  381. position = distance( features[dim].nonzeroElements().begin(), it );
  382. if ( it->first > features[dim].getTolerance() )
  383. position += features[dim].getZeros();
  384. }
  385. //------------------------------------------------------
  386. // high level methods
  387. //------------------------------------------------------
  388. template <typename T>
  389. void FeatureMatrixT<T>::applyFunctionToFeatureMatrix ( const NICE::ParameterizedFunction *pf )
  390. {
  391. if (pf != NULL)
  392. {
  393. // REMARK: might be inefficient due to virtual calls
  394. if ( !pf->isOrderPreserving() )
  395. fthrow(Exception, "ParameterizedFunction::applyFunctionToFeatureMatrix: this function is optimized for order preserving transformations");
  396. int d = this->get_d();
  397. for (int dim = 0; dim < d; dim++)
  398. {
  399. std::multimap< double, typename SortedVectorSparse<double>::dataelement> & nonzeroElements = this->getFeatureValues(dim).nonzeroElements();
  400. for ( SortedVectorSparse<double>::elementpointer i = nonzeroElements.begin(); i != nonzeroElements.end(); i++ )
  401. {
  402. SortedVectorSparse<double>::dataelement & de = i->second;
  403. //TODO check, wether the element is "sparse" afterwards
  404. de.second = pf->f( dim, i->first );
  405. }
  406. }
  407. /*for ( int i = 0 ; i < featureMatrix.get_n(); i++ )
  408. for ( int index = 0 ; index < featureMatrix.get_d(); index++ )
  409. featureMatrix.set(index, i, f( (uint)index, featureMatrix.getOriginal(index,i) ), isOrderPreserving() );*/
  410. }
  411. else
  412. {
  413. //no pf given -> nothing to do
  414. }
  415. }
  416. //Computes the ratio of sparsity across the matrix
  417. template <typename T>
  418. double FeatureMatrixT<T>:: computeSparsityRatio()
  419. {
  420. double ratio(0.0);
  421. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = features.begin(); it != features.end(); it++)
  422. {
  423. ratio += (*it).getZeros() / (double) (*it).getN();
  424. }
  425. if (features.size() != 0)
  426. ratio /= features.size();
  427. return ratio;
  428. }
  429. // add a new feature and insert its elements at the end of each dimension vector
  430. template <typename T>
  431. void FeatureMatrixT<T>::add_feature( const std::vector<T> & feature, const NICE::ParameterizedFunction *pf )
  432. {
  433. if (n == 0)
  434. {
  435. set_d(feature.size());
  436. }
  437. if ( (int)feature.size() != d)
  438. {
  439. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  440. return;
  441. }
  442. for (int dimension = 0; dimension < (int) features.size(); dimension++)
  443. {
  444. if (pf != NULL)
  445. features[dimension].insert( feature[dimension], pf->f( dimension, feature[dimension]) );
  446. else
  447. features[dimension].insert( feature[dimension] );
  448. }
  449. n++;
  450. }
  451. // add a new feature and insert its elements at the end of each dimension vector
  452. template <typename T>
  453. void FeatureMatrixT<T>::add_feature(const NICE::SparseVector & feature, const ParameterizedFunction *pf )
  454. {
  455. if (n == 0)
  456. {
  457. set_d(feature.size());
  458. }
  459. if ( (int)feature.getDim() > d)
  460. {
  461. fthrow(Exception, "FeatureMatrixT<T>::add_feature - number of dimensions does not fit");
  462. return;
  463. }
  464. for (NICE::SparseVector::const_iterator it = feature.begin(); it != feature.end(); it++)
  465. {
  466. if (pf != NULL)
  467. features[it->first].insert( (T) it->second, pf->f( it->first, (T) it->second), n );
  468. else
  469. features[it->first].insert( (T) it->second, n );
  470. }
  471. n++;
  472. }
  473. // add several new features and insert their elements in the already ordered structure
  474. template <typename T>
  475. void FeatureMatrixT<T>::add_features(const std::vector<std::vector<T> > & _features )
  476. {
  477. //TODO do we need the parameterized function here as well? usually, we add several features and run applyFunctionToFeatureMatrix afterwards.
  478. // check this please :)
  479. //TODO assure that every feature has the same dimension
  480. if (n == 0)
  481. {
  482. set_d(_features.size());
  483. }
  484. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  485. for (int dim = 0; dim < d; dim++)
  486. {
  487. features[dim].insert( _features[dim] );
  488. }
  489. //update the number of our features
  490. n += (int) _features[0].size();
  491. }
  492. template <typename T>
  493. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features, std::vector<std::vector<int> > & permutations, const int & _dim )
  494. {
  495. features.clear();
  496. if (_dim < 0)
  497. set_d(_features.size());
  498. else
  499. set_d(_dim);
  500. if (d>0)
  501. n = _features[0].size();
  502. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  503. for (int dim = 0; dim < d; dim++)
  504. {
  505. features[dim].insert( _features[dim] );
  506. }
  507. getPermutations( permutations );
  508. }
  509. template <typename T>
  510. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features, std::vector<std::map<int,int> > & permutations, const int & _dim)
  511. {
  512. features.clear();
  513. if (_dim < 0)
  514. set_d(_features.size());
  515. else
  516. set_d(_dim);
  517. if (d>0)
  518. n = _features[0].size();
  519. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  520. for (int dim = 0; dim < d; dim++)
  521. {
  522. features[dim].insert( _features[dim] );
  523. }
  524. getPermutations( permutations );
  525. }
  526. template <typename T>
  527. void FeatureMatrixT<T>::set_features(const std::vector<std::vector<T> > & _features, const int & _dim)
  528. {
  529. features.clear();
  530. if (_dim < 0)
  531. set_d(_features.size());
  532. else
  533. set_d(_dim);
  534. if (d>0)
  535. n = _features[0].size();
  536. //pay attention: we assume now, that we have a vector (over dimensions) containing vectors over features (examples per dimension) - to be more efficient
  537. for (int dim = 0; dim < d; dim++)
  538. {
  539. features[dim].insert( _features[dim] );
  540. }
  541. }
  542. template <typename T>
  543. void FeatureMatrixT<T>::set_features(const std::vector< NICE::SparseVector * > & _features, const bool dimensionsOverExamples, const int & _dim)
  544. {
  545. features.clear();
  546. if (_features.size() == 0)
  547. {
  548. std::cerr << "set_features without features" << std::endl;
  549. }
  550. // resize our data structure
  551. if (_dim >= 0) //did the user specified the number of dimensions?
  552. set_d(_dim);
  553. else //dimensions not specified by users
  554. {
  555. if (dimensionsOverExamples) //do we have dim x examples ?
  556. {
  557. set_d(_features.size());
  558. }
  559. else //we have examples x dimes (as usually done)
  560. {
  561. if (_features.size() > 0) //and have at least one example
  562. {
  563. try{
  564. set_d(_features[0]->getDim());
  565. }
  566. catch(...)
  567. {
  568. std::cerr << "FeatureMatrixT<T>::set_features -- something went wrong using getDim() of SparseVectors" << std::endl;
  569. }
  570. }
  571. else //no example, so set the dim to 0, since we have no idea at all
  572. {
  573. set_d(0);
  574. }
  575. }
  576. }
  577. // set number of examples n
  578. if (d>0)
  579. {
  580. if (dimensionsOverExamples) //do we have dim x examples ?
  581. n = _features[0]->getDim(); //NOTE Pay attention: we assume, that this number is set!
  582. else //we have examples x dimes (as usually done)
  583. n = _features.size();
  584. }
  585. // insert all values
  586. if (dimensionsOverExamples) //do we have dim x examples ?
  587. {
  588. for (int dim = 0; dim < d; dim++)
  589. {
  590. features[dim].insert( _features[dim] );
  591. }
  592. }
  593. else //we have examples x dimes (as usually done)
  594. {
  595. if ( debug )
  596. std::cerr << "FeatureMatrixT<T>::set_features " << n << " new examples" << std::endl;
  597. //loop over every example to add its content
  598. for (int nr = 0; nr < n; nr++)
  599. {
  600. if ( debug )
  601. std::cerr << "add feature nr. " << nr << " / " << _features.size() << " ";
  602. //loop over every dimension to add the specific value to the corresponding SortedVectorSparse
  603. for (NICE::SparseVector::const_iterator elemIt = _features[nr]->begin(); elemIt != _features[nr]->end(); elemIt++)
  604. {
  605. if ( debug )
  606. std::cerr << elemIt->first << "-" << elemIt->second << " ";
  607. //elemIt->first: dim, elemIt->second: value
  608. features[elemIt->first].insert( (T) elemIt->second, nr);
  609. }//for non-zero-values of the feature
  610. if ( debug )
  611. std::cerr << std::endl;
  612. }//for every new feature
  613. if ( debug )
  614. std::cerr << "FeatureMatrixT<T>::set_features done" << std::endl;
  615. }//if dimOverEx
  616. //set n for the internal data structure SortedVectorSparse
  617. for (typename std::vector<NICE::SortedVectorSparse<T> >::iterator it = features.begin(); it != features.end(); it++)
  618. (*it).setN(n);
  619. }
  620. template <typename T>
  621. void FeatureMatrixT<T>::getPermutations( std::vector<std::vector<int> > & permutations) const
  622. {
  623. for (int dim = 0; dim < d; dim++)
  624. {
  625. std::vector<int> perm ( (features[dim]).getPermutation() );
  626. permutations.push_back(perm);
  627. }
  628. }
  629. template <typename T>
  630. void FeatureMatrixT<T>::getPermutations( std::vector<std::map<int,int> > & permutations) const
  631. {
  632. for (int dim = 0; dim < d; dim++)
  633. {
  634. std::map<int,int> perm ( (features[dim]).getPermutationNonZeroReal() );
  635. permutations.push_back(perm);
  636. }
  637. }
  638. // Prints the whole Matrix (outer loop over dimension, inner loop over features)
  639. template <typename T>
  640. void FeatureMatrixT<T>::print(std::ostream & os) const
  641. {
  642. if (os.good())
  643. {
  644. for (int dim = 0; dim < d; dim++)
  645. {
  646. features[dim].print(os);
  647. }
  648. }
  649. }
  650. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  651. template <typename T>
  652. void FeatureMatrixT<T>::computeNonSparseMatrix(NICE::MatrixT<T> & matrix, bool transpose) const
  653. {
  654. if ( transpose )
  655. matrix.resize(this->get_n(),this->get_d());
  656. else
  657. matrix.resize(this->get_d(),this->get_n());
  658. matrix.set((T)0.0);
  659. int dimIdx(0);
  660. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = features.begin(); it != features.end(); it++, dimIdx++)
  661. {
  662. std::map< int, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  663. for (typename std::map< int, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  664. {
  665. int featIndex = ((*inIt).second)->second.first;
  666. if ( transpose )
  667. matrix(featIndex,dimIdx) =((*inIt).second)->second.second;
  668. else
  669. matrix(dimIdx,featIndex) =((*inIt).second)->second.second;
  670. }
  671. }
  672. }
  673. // Computes the whole non-sparse matrix. WARNING: this may result in a really memory-consuming data-structure!
  674. template <typename T>
  675. void FeatureMatrixT<T>::computeNonSparseMatrix(std::vector<std::vector<T> > & matrix, bool transpose) const
  676. {
  677. if ( transpose )
  678. matrix.resize(this->get_n());
  679. else
  680. matrix.resize(this->get_d());
  681. // resizing the matrix
  682. for ( uint i = 0 ; i < matrix.size(); i++ )
  683. if ( transpose )
  684. matrix[i] = std::vector<T>(this->get_d(), 0.0);
  685. else
  686. matrix[i] = std::vector<T>(this->get_n(), 0.0);
  687. int dimIdx(0);
  688. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = features.begin(); it != features.end(); it++, dimIdx++)
  689. {
  690. std::map< int, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  691. for (typename std::map< int, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  692. {
  693. int featIndex = ((*inIt).second)->second.first;
  694. if ( transpose )
  695. matrix[featIndex][dimIdx] =((*inIt).second)->second.second;
  696. else
  697. matrix[dimIdx][featIndex] =((*inIt).second)->second.second;
  698. }
  699. }
  700. }
  701. // Swaps to specified elements, performing a validity check
  702. template <typename T>
  703. void FeatureMatrixT<T>::swap(const int & row1, const int & col1, const int & row2, const int & col2)
  704. {
  705. if ( (row1 < 0) || (col1 < 0) || (row1 > d) || (col1 > n) || (row2 < 0) || (col2 < 0) || (row2 > d) || (col2 > n))
  706. {
  707. return;
  708. }
  709. else
  710. {
  711. //swap
  712. T tmp = (*this)(row1, col1);
  713. (*this).set(row1, col1, (*this)(row2,col2));
  714. (*this).set(row2, col2, tmp);
  715. }
  716. }
  717. // Swaps to specified elements, without performing a validity check
  718. template <typename T>
  719. void FeatureMatrixT<T>::swapUnsafe(const int & row1, const int & col1, const int & row2, const int & col2)
  720. {
  721. //swap
  722. T tmp = (*this)(row1, col1);
  723. (*this).set(row1, col1, (*this)(row2,col2));
  724. (*this).set(row2, col2, tmp);
  725. }
  726. template <typename T>
  727. void FeatureMatrixT<T>::hikDiagonalElements( Vector & diagonalElements ) const
  728. {
  729. int dimIdx = 0;
  730. // the function calculates the diagonal elements of a HIK kernel matrix
  731. diagonalElements.resize(n);
  732. diagonalElements.set(0.0);
  733. // loop through all dimensions
  734. for (typename std::vector<NICE::SortedVectorSparse<T> >::const_iterator it = features.begin(); it != features.end(); it++, dimIdx++)
  735. {
  736. std::map< int, typename NICE::SortedVectorSparse<T>::elementpointer> nonzeroIndices= (*it).nonzeroIndices();
  737. // loop through all features
  738. for (typename std::map< int, typename NICE::SortedVectorSparse<T>::elementpointer>::const_iterator inIt = nonzeroIndices.begin(); inIt != nonzeroIndices.end(); inIt++)
  739. {
  740. int index = inIt->first;
  741. typename NICE::SortedVectorSparse<T>::elementpointer p = inIt->second;
  742. typename NICE::SortedVectorSparse<T>::dataelement de = p->second;
  743. diagonalElements[index] += de.second;
  744. }
  745. }
  746. }
  747. template <typename T>
  748. double FeatureMatrixT<T>::hikTrace() const
  749. {
  750. Vector diagonalElements;
  751. hikDiagonalElements( diagonalElements );
  752. return diagonalElements.Sum();
  753. }
  754. template <typename T>
  755. int FeatureMatrixT<T>::getNumberOfNonZeroElementsPerDimension(const int & dim) const
  756. {
  757. if ( (dim < 0) || (dim > d))
  758. return -1;
  759. return features[dim].getNonZeros();
  760. }
  761. template <typename T>
  762. int FeatureMatrixT<T>::getNumberOfZeroElementsPerDimension(const int & dim) const
  763. {
  764. if ( (dim < 0) || (dim > d))
  765. return -1;
  766. return n - features[dim].getNonZeros();
  767. }
  768. template <typename T>
  769. void FeatureMatrixT<T>::restore ( std::istream & is, int format )
  770. {
  771. if (is.good())
  772. {
  773. is.precision (std::numeric_limits<double>::digits10 + 1);
  774. std::string tmp;
  775. is >> tmp; //classname
  776. is >> tmp;
  777. is >> n;
  778. is >> tmp;
  779. is >> d;
  780. features.resize(d);
  781. //now read features for every dimension
  782. for (int dim = 0; dim < d; dim++)
  783. {
  784. NICE::SortedVectorSparse<T> svs;
  785. features[dim] = svs;
  786. features[dim].restore(is,format);
  787. }
  788. if (verbose)
  789. {
  790. std::cerr << "FeatureMatrixT<T>::restore" << std::endl;
  791. std::cerr << "n: " << n << std::endl;
  792. std::cerr << "d: " << d << std::endl;
  793. this->print(std::cerr);
  794. }
  795. }
  796. else
  797. {
  798. std::cerr << "FeatureMatrixT<T>::restore -- InStream not initialized - restoring not possible!" << std::endl;
  799. }
  800. }
  801. template <typename T>
  802. void FeatureMatrixT<T>::store ( std::ostream & os, int format ) const
  803. {
  804. if (os.good())
  805. {
  806. os.precision (std::numeric_limits<double>::digits10 + 1);
  807. os << "FeatureMatrixT" << std::endl;
  808. os << "n: " << n << std::endl;
  809. os << "d: " << d << std::endl;
  810. //now write features for every dimension
  811. for (int dim = 0; dim < d; dim++)
  812. {
  813. features[dim].store(os,format);
  814. }
  815. }
  816. else
  817. {
  818. std::cerr << "FeatureMatrixT<T>::store -- OutStream not initialized - storing not possible!" << std::endl;
  819. }
  820. }
  821. template <typename T>
  822. void FeatureMatrixT<T>::clear ()
  823. {}
  824. } // namespace
  825. // #endif