SparseVectorT.tcc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570
  1. #ifndef SPARSEVECTORT_TCC
  2. #define SPARSEVECTORT_TCC
  3. #include <iostream>
  4. #include <algorithm>
  5. #include <math.h>
  6. #include <assert.h>
  7. #include <limits>
  8. #include <stdio.h>
  9. #include "core/vector/SparseVectorT.h"
  10. namespace NICE {
  11. template<typename I, typename V>
  12. SparseVectorT<I,V>::SparseVectorT ( const std::map<I, V> & mymap ):std::map<I, V> ( mymap )
  13. {
  14. //we assume that the last element specifies the maximal size :)
  15. if ( mymap.size() > 0 )
  16. {
  17. typename std::map<I, V>::const_reverse_iterator lastElem = mymap.rbegin();
  18. dim = lastElem->first;
  19. }
  20. }
  21. template<typename I, typename V>
  22. SparseVectorT<I,V>::SparseVectorT ( I k, V v )
  23. {
  24. this->insert ( std::pair<I, V> ( k, v ) );
  25. this->dim = k;
  26. }
  27. template<typename I, typename V>
  28. SparseVectorT<I,V>::SparseVectorT ( uint _dim ) : dim ( _dim )
  29. {
  30. }
  31. template<typename I, typename V>
  32. SparseVectorT<I,V>::SparseVectorT ( const NICE::Vector &v, double tolerance )
  33. {
  34. this->dim = v.size();
  35. for ( uint i = 0; i < dim; i++ )
  36. {
  37. if ( fabs ( v[i] ) > tolerance )
  38. this->insert ( std::pair<I, V> ( i, v[i] ) );
  39. }
  40. }
  41. template<typename I, typename V>
  42. void SparseVectorT<I,V>::sub ( const SparseVectorT<I,V> & v )
  43. {
  44. for ( typename SparseVectorT<I,V>::const_iterator v_it = v.begin(); v_it != v.end(); v_it++ )
  45. ( *this ) [v_it->first] -= v_it->second;
  46. }
  47. template<typename I, typename V>
  48. void SparseVectorT<I,V>::add ( const SparseVectorT<I,V> & v )
  49. {
  50. this->add ( v, 1.0 );
  51. }
  52. template<typename I, typename V>
  53. void SparseVectorT<I,V>::add ( const SparseVectorT<I,V> & v, double lambda )
  54. {
  55. for ( typename SparseVectorT<I,V>::const_iterator v_it = v.begin();
  56. v_it != v.end();
  57. v_it++ )
  58. ( *this ) [v_it->first] += v_it->second * lambda;
  59. }
  60. template<typename I, typename V>
  61. void SparseVectorT<I,V>::add ( V val )
  62. {
  63. for ( typename SparseVectorT<I,V>::iterator it = this->begin(); it != this->end(); it++ )
  64. it->second += val;
  65. }
  66. template<typename I, typename V>
  67. void SparseVectorT<I,V>::divide ( const SparseVectorT<I,V> & v )
  68. {
  69. for ( typename SparseVectorT<I,V>::iterator it = this->begin(); it != this->end(); )
  70. {
  71. bool deleteEntry = false;
  72. typename SparseVectorT<I,V>::const_iterator v_it = v.find ( it->first );
  73. if ( v_it == v.end() ) {
  74. fthrow ( Exception, "Division by Zero !" );
  75. } else {
  76. it->second /= v_it->second;
  77. if ( fabs ( it->second ) < 1e-20 )
  78. deleteEntry = true;
  79. }
  80. if ( deleteEntry )
  81. this->erase ( it++ );
  82. else
  83. it++;
  84. }
  85. }
  86. template<typename I, typename V>
  87. void SparseVectorT<I,V>::divide ( V v )
  88. {
  89. if ( fabs ( v ) < 1e-20 )
  90. fthrow ( Exception, "Division by Zero !" );
  91. for ( typename SparseVectorT<I,V>::iterator it = this->begin(); it != this->end(); )
  92. {
  93. bool deleteEntry = false;
  94. it->second /= v;
  95. if ( fabs ( it->second ) < 1e-20 )
  96. deleteEntry = true;
  97. if ( deleteEntry )
  98. this->erase ( it++ );
  99. else
  100. it++;
  101. }
  102. }
  103. template<typename I, typename V>
  104. void SparseVectorT<I,V>::multiply ( const SparseVectorT<I,V> & v )
  105. {
  106. for ( typename SparseVectorT<I,V>::iterator it = this->begin(); it != this->end(); )
  107. {
  108. bool deleteEntry = false;
  109. typename SparseVectorT<I,V>::const_iterator v_it = v.find ( it->first );
  110. if ( v_it == v.end() )
  111. deleteEntry = true;
  112. else {
  113. it->second *= v_it->second;
  114. if ( fabs ( it->second ) < 1e-20 )
  115. deleteEntry = true;
  116. }
  117. if ( deleteEntry )
  118. this->erase ( it++ );
  119. else
  120. it++;
  121. }
  122. }
  123. template<typename I, typename V>
  124. void SparseVectorT<I,V>::multiply ( V val )
  125. {
  126. for ( typename SparseVectorT<I,V>::iterator it = this->begin(); it != this->end(); )
  127. {
  128. it->second *= val;
  129. if ( fabs ( it->second ) < 1e-20 )
  130. {
  131. this->erase ( it++ );
  132. } else {
  133. it++;
  134. }
  135. }
  136. }
  137. template<typename I, typename V>
  138. double SparseVectorT<I,V>::innerProduct ( const SparseVectorT<I,V> & v ) const
  139. {
  140. V prod = 0.0;
  141. typename SparseVectorT<I,V>::const_iterator it1 = this->begin();
  142. typename SparseVectorT<I,V>::const_iterator it2 = v.begin();
  143. while ( ( it1 != this->end() ) && ( it2 != v.end() ) )
  144. {
  145. if ( it1->first == it2->first )
  146. {
  147. prod += it1->second * it2->second;
  148. ++it1;
  149. ++it2;
  150. }
  151. else if ( it1->first < it2->first )
  152. ++it1;
  153. else
  154. ++it2;
  155. }
  156. return prod;
  157. }
  158. template<typename I, typename V>
  159. void SparseVectorT<I,V>::restore ( std::istream & is, int format )
  160. {
  161. I first;
  162. V second;
  163. if ( format == FORMAT_INDEX )
  164. {
  165. std::string tag;
  166. is >> tag;
  167. if ( tag != "SVECTOR" ) {
  168. fthrow ( Exception, "Format error: starting tag is " << tag << " instead of SVECTOR" );
  169. }
  170. is >> dim;
  171. I size;
  172. is >> size;
  173. for ( int i = 0; i < size; i++ )
  174. {
  175. is >> first;
  176. is >> second;
  177. ( *this ) [first] = second;
  178. }
  179. is >> tag;
  180. if ( tag != "END" ) {
  181. fthrow ( Exception, "Format error: end tag is " << tag << " instead of END" );
  182. }
  183. } else if ( format == FORMAT_INDEX_LINE ) {
  184. std::string line;
  185. std::getline ( is, line );
  186. std::istringstream iss ( line, std::istringstream::in);
  187. while ( !iss.eof() )
  188. {
  189. char c;
  190. if ( !( iss >> first) ) break;
  191. if ( !( iss >> c ) ) break;
  192. if ( !( iss >> second ) ) break;
  193. ( *this ) [first] = second;
  194. }
  195. // preserve dimension setting
  196. //dimension = -1;
  197. } else {
  198. fthrow(Exception, "Unknown format! (see SparseVectorT.h for details)");
  199. }
  200. }
  201. template<typename I, typename V>
  202. void SparseVectorT<I,V>::store ( std::ostream & os, int format ) const
  203. {
  204. if ( format == FORMAT_INDEX ) {
  205. os << "SVECTOR ";
  206. os << dim << " " << this->size() << std::endl;
  207. for ( typename SparseVectorT<I,V>::const_iterator it = this->begin(); it != this->end(); it++ )
  208. {
  209. os << it->first << " " << it->second << " ";
  210. }
  211. os << "END" << std::endl;
  212. }
  213. else if ( format == FORMAT_INDEX_LINE )
  214. {
  215. for ( typename SparseVectorT<I,V>::const_iterator it = this->begin(); it != this->end(); it++ )
  216. {
  217. if ( it != this->begin() )
  218. os << " ";
  219. os << it->first << ":" << it->second;
  220. }
  221. os << std::endl;
  222. } else {
  223. fthrow(Exception, "Unknown format! (see SparseVectorT.h for details)");
  224. }
  225. }
  226. template<typename I, typename V>
  227. void SparseVectorT<I,V>::clear ()
  228. {
  229. std::map<I, V>::clear();
  230. }
  231. template<typename I, typename V>
  232. V SparseVectorT<I,V>::sum () const
  233. {
  234. V sumv = 0.0;
  235. for ( typename SparseVectorT<I,V>::const_iterator it = this->begin(); it != this->end(); it++ )
  236. sumv += it->second;
  237. return sumv;
  238. }
  239. template<typename I, typename V>
  240. void SparseVectorT<I,V>::normalize ()
  241. {
  242. V sum = this->sum();
  243. if ( fabs ( sum ) < 1e-20 )
  244. {
  245. fprintf ( stderr, "WARNING: normalization failed !\n" );
  246. for ( typename SparseVectorT<I,V>::iterator it = this->begin(); it != this->end(); it++ )
  247. it->second = 0.0;
  248. } else
  249. {
  250. for ( typename SparseVectorT<I,V>::iterator it = this->begin(); it != this->end(); it++ )
  251. it->second /= sum;
  252. }
  253. }
  254. template<typename I, typename V>
  255. void SparseVectorT<I,V>::normalize ( V minv, V maxv )
  256. {
  257. typename SparseVectorT<I,V>::iterator it = this->begin();
  258. V oldmin = it->second;
  259. V oldmax = it->second;
  260. it++;
  261. for ( ; it != this->end(); it++ )
  262. {
  263. oldmin = std::min ( oldmin, it->second );
  264. oldmax = std::max ( oldmax, it->second );
  265. }
  266. V diff = oldmax - oldmin;
  267. if ( diff == 0.0 )
  268. {
  269. for ( it = this->begin(); it != this->end(); it++ )
  270. {
  271. it->second = 0;
  272. }
  273. return;
  274. }
  275. for ( it = this->begin(); it != this->end(); it++ )
  276. {
  277. it->second = ( ( ( it->second - oldmin ) / diff ) * ( maxv - minv ) ) + minv;
  278. }
  279. }
  280. template<typename I, typename V>
  281. void SparseVectorT<I,V>::addMap ( const std::map<uint, int> & v, double lambda )
  282. {
  283. for ( std::map<uint, int>::const_iterator v_it = v.begin(); v_it != v.end(); v_it++ )
  284. ( *this ) [(I)v_it->first] += (V)v_it->second * lambda;
  285. }
  286. template<typename I, typename V>
  287. void SparseVectorT<I,V>::addMap ( const std::map<uint, double> & v, double lambda )
  288. {
  289. for ( std::map<uint, double>::const_iterator v_it = v.begin(); v_it != v.end(); v_it++ )
  290. ( *this ) [(I)v_it->first] += (V)v_it->second * lambda;
  291. }
  292. template<typename I, typename V>
  293. I SparseVectorT<I,V>::maxElement () const
  294. {
  295. I maxindex = 0;
  296. V max = -std::numeric_limits<V>::max();
  297. for ( typename SparseVectorT<I,V>::const_iterator it = this->begin(); it != this->end(); it++ )
  298. {
  299. if ( it->second > max )
  300. {
  301. maxindex = it->first;
  302. max = it->second;
  303. }
  304. }
  305. return maxindex;
  306. }
  307. template<typename I, typename V>
  308. I SparseVectorT<I,V>::maxElementExclusive ( I key ) const
  309. {
  310. I maxindex = ( key == 0 ) ? 1 : 0;
  311. double max = - std::numeric_limits<double>::max();
  312. for ( typename SparseVectorT<I,V>::const_iterator it = this->begin(); it != this->end(); it++ )
  313. {
  314. if ( ( it->first != key ) && ( it->second > max ) )
  315. {
  316. maxindex = it->first;
  317. max = it->second;
  318. }
  319. }
  320. return maxindex;
  321. }
  322. template<typename I, typename V>
  323. double SparseVectorT<I,V>::entropy () const
  324. {
  325. double entropy = 0.0;
  326. double sum = 0.0;
  327. for ( typename SparseVectorT<I,V>::const_iterator j = this->begin(); j != this->end(); j++ )
  328. {
  329. if ( j->second <= 0.0 )
  330. continue;
  331. entropy -= (double)j->second * log ( (double)j->second );
  332. sum += (double)j->second;
  333. }
  334. entropy /= sum;
  335. entropy += log ( sum );
  336. return entropy;
  337. }
  338. template<typename I, typename V>
  339. void SparseVectorT<I,V>::getSortedIndices ( std::vector<I> & indizes ) const
  340. {
  341. typename std::vector< std::pair<V, I> > tmp;
  342. for ( typename SparseVectorT<I,V>::const_iterator i = this->begin(); i != this->end() ; i++ )
  343. tmp.push_back ( std::pair<V, I> ( i->second, i->first ) );
  344. std::sort ( tmp.begin(), tmp.end() );
  345. for (typename std::vector<std::pair<V, I> >::const_iterator j = tmp.begin(); j != tmp.end(); j++ )
  346. indizes.push_back ( j->second );
  347. }
  348. template<typename I, typename V>
  349. V SparseVectorT<I,V>::max () const
  350. {
  351. V max = - std::numeric_limits<V>::max();
  352. for ( typename SparseVectorT<I,V>::const_iterator it = this->begin(); it != this->end(); it++ )
  353. {
  354. if ( it->second > max )
  355. max = it->second;
  356. }
  357. return max;
  358. }
  359. template<typename I, typename V>
  360. V SparseVectorT<I,V>::min () const
  361. {
  362. V min = std::numeric_limits<V>::max();
  363. for ( typename SparseVectorT<I,V>::const_iterator it = this->begin(); it != this->end(); it++ )
  364. {
  365. if ( it->second < min )
  366. min = it->second;
  367. }
  368. return min;
  369. }
  370. template<typename I, typename V>
  371. V SparseVectorT<I,V>::get ( I i ) const
  372. {
  373. typename SparseVectorT<I,V>::const_iterator it = this->find ( i );
  374. if ( it == this->end() )
  375. return 0.0;
  376. else
  377. return it->second;
  378. }
  379. template<typename I, typename V>
  380. bool SparseVectorT<I,V>::set ( I i , V newValue )
  381. {
  382. typename SparseVectorT<I,V>::iterator it = this->find ( i );
  383. if ( it == this->end() )
  384. {
  385. // behavior change (2/2/2012 by erik)
  386. if ( newValue > 1e-20 )
  387. this->insert ( std::pair<I, V> ( i, newValue ) );
  388. //update our dimensions
  389. if ( i > dim ) //changed on 10-02-2012 by alex
  390. dim = i;
  391. return false;
  392. } else
  393. {
  394. it->second = newValue;
  395. return true;
  396. }
  397. }
  398. template<typename I, typename V>
  399. void SparseVectorT<I,V>::setDim ( uint _dim )
  400. {
  401. this->dim = _dim;
  402. }
  403. template<typename I, typename V>
  404. uint SparseVectorT<I,V>::getDim() const
  405. {
  406. return this->dim;
  407. }
  408. template<typename I, typename V>
  409. double SparseVectorT<I,V>::minimumKernel ( const SparseVectorT<I,V> & b ) const
  410. {
  411. V sum = 0.0;
  412. typename SparseVectorT<I,V>::const_iterator i = this->begin();
  413. typename SparseVectorT<I,V>::const_iterator j = b.begin();
  414. while ( ( i != this->end() ) && ( j != b.end() ) )
  415. {
  416. I idx1 = i->first;
  417. I idx2 = j->first;
  418. if ( idx1 == idx2 ) {
  419. sum += std::min ( i->second, j->second );
  420. i++;
  421. j++;
  422. } else if ( idx1 > idx2 ) {
  423. j++;
  424. } else {
  425. i++;
  426. }
  427. }
  428. return sum;
  429. }
  430. template<typename I, typename V>
  431. I SparseVectorT<I,V>::pickRandomSample() const
  432. {
  433. typedef typename std::vector< std::pair<V, I> > CumSumType;
  434. CumSumType cumsum;
  435. // calculate the cumulative sum, also known
  436. // as the distribution function F
  437. typename SparseVectorT<I,V>::const_iterator i = this->begin();
  438. for ( ; i != this->end(); i++ )
  439. {
  440. V value;
  441. if ( i == this->begin() )
  442. value = i->second;
  443. else
  444. value = cumsum.rbegin()->first + i->second;
  445. cumsum.push_back( std::pair<V, I> ( value, i->first ) );
  446. }
  447. V sum = cumsum.rbegin()->first;
  448. // sample between 0 and sum
  449. V t = (V)(randDouble() * sum);
  450. // find the position to calculate F^{-1}
  451. //
  452. std::pair<V, I> searchElement ( (V)t, (I)0 );
  453. typename CumSumType::const_iterator j = std::lower_bound(cumsum.begin(), cumsum.end(), searchElement );
  454. // return element index
  455. return j->second;
  456. }
  457. template<typename I, typename V>
  458. void SparseVectorT<I,V>::convertToVectorT(NICE::VectorT<V> & v ) const
  459. {
  460. uint dimension ( this->getDim() );
  461. if ( dimension == 0 )
  462. {
  463. // if dimension is zero, it could either be the case that the sparse vector is empty, or that the dimension flag was not set properly.
  464. // thus, let's check out the largest dimension
  465. // could be removed later... only needed for backwards compatibility
  466. typename SparseVectorT<I,V>::const_reverse_iterator svIt = this->rbegin();
  467. uint dist (distance(this->begin(), this->end()) );
  468. if ( dist > 0 )
  469. dimension = svIt->first+1; //plus one, since this is the index, but we need the resulting size
  470. //we're not allowed here to set the dimension flag, since we want to have this method to be a const one
  471. }
  472. // is our sparse vector empty?
  473. if ( dimension == 0 )
  474. {
  475. v.clear();
  476. v.resize(0);
  477. return;
  478. }
  479. //resize the new VectorT
  480. v.resize(dimension);
  481. //set default values to zero
  482. v.set( (V) 0.0);
  483. //add the actual content
  484. typename SparseVectorT<I,V>::const_iterator svIt = this->begin();
  485. for ( ; svIt != this->end(); svIt++ )
  486. {
  487. v[svIt->first] = svIt->second;
  488. }
  489. }
  490. } //namepspace NICE
  491. #endif