LabeledSet.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. /**
  2. * @file LabeledSet.cpp
  3. * @brief Labeled set of vectors
  4. * @author Erik Rodner
  5. * @date 07.09.2007
  6. */
  7. #ifndef LABELEDSETTCCINCLUDE
  8. #define LABELEDSETTCCINCLUDE
  9. #include "core/image/ImageT.h"
  10. #include "core/vector/VectorT.h"
  11. #include "core/vector/MatrixT.h"
  12. #include <iostream>
  13. #include "vislearning/cbaselib/LabeledSet.h"
  14. #include "core/basics/StringTools.h"
  15. using namespace OBJREC;
  16. using namespace std;
  17. using namespace NICE;
  18. LabeledSet::LabeledSet ( bool _selection ) : selection ( _selection )
  19. {
  20. }
  21. LabeledSet::~LabeledSet ()
  22. {
  23. //This is a big problem when using selections
  24. //clear();
  25. //fprintf (stderr, "LabeledSet: destructor (FIXME: memory leak)\n");
  26. }
  27. int LabeledSet::count ( int classno ) const
  28. {
  29. const_iterator i = find ( classno );
  30. return ( i == end() ) ? 0 : i->second.size();
  31. }
  32. int LabeledSet::count () const
  33. {
  34. int mycount = 0;
  35. for ( const_iterator i = begin() ; i != end() ; i++ )
  36. {
  37. mycount += i->second.size();
  38. }
  39. return mycount;
  40. }
  41. void LabeledSet::clear ()
  42. {
  43. if ( !selection )
  44. {
  45. for ( Permutation::const_iterator i = insertOrder.begin();
  46. i != insertOrder.end();
  47. i++ )
  48. {
  49. const ImageInfo *s = i->second;
  50. delete s;
  51. }
  52. }
  53. std::map< int, vector<ImageInfo *> >::clear();
  54. }
  55. void LabeledSet::add ( int classno, ImageInfo *x )
  56. {
  57. if ( selection ) {
  58. fprintf ( stderr, "Operation not available for selections !\n" );
  59. exit ( -1 );
  60. }
  61. iterator i = find ( classno );
  62. if ( i == end() ) {
  63. operator[] ( classno ) = vector<ImageInfo *>();
  64. i = find ( classno );
  65. }
  66. i->second.push_back ( x );
  67. insertOrder.push_back ( ElementPointer ( classno, x ) );
  68. }
  69. void LabeledSet::getPermutation ( Permutation & permutation ) const
  70. {
  71. permutation = Permutation ( insertOrder );
  72. }
  73. void LabeledSet::add_reference ( int classno, ImageInfo *pointer )
  74. {
  75. iterator i = find ( classno );
  76. if ( i == end() ) {
  77. operator[] ( classno ) = vector<ImageInfo *>();
  78. i = find ( classno );
  79. }
  80. i->second.push_back ( pointer );
  81. insertOrder.push_back ( ElementPointer ( classno, pointer ) );
  82. }
  83. void LabeledSet::getClasses ( std::vector<int> & classes ) const
  84. {
  85. for ( const_iterator i = begin(); i != end(); i++ )
  86. classes.push_back ( i->first );
  87. }
  88. void LabeledSet::printInformation () const
  89. {
  90. for ( const_iterator i = begin(); i != end(); i++ )
  91. {
  92. cerr << "class " << i->first << ": " << i->second.size() << endl;
  93. }
  94. }
  95. /************************************
  96. LabeledSetVector
  97. *************************************/
  98. LabeledSetVector::LabeledSetVector ( bool _selection ) : selection ( _selection )
  99. {}
  100. LabeledSetVector::~LabeledSetVector ()
  101. {
  102. // FIXME: THIS is a big problem with selections !!!
  103. //clear();
  104. }
  105. int LabeledSetVector::dimension () const
  106. {
  107. if ( insertOrder.size() <= 0 ) return -1;
  108. return ( * ( begin()->second.begin() ) )->size();
  109. //insertOrder[0].second->size();
  110. }
  111. void LabeledSetVector::restore ( istream & is, int format )
  112. {
  113. if ( format == FILEFORMAT_RAW )
  114. restoreRAW ( is );
  115. else
  116. restoreASCII ( is, format );
  117. }
  118. void LabeledSetVector::restoreASCII ( istream & is, int format )
  119. {
  120. const int bufsize = 1024 * 1024;
  121. char *buf = new char[bufsize];
  122. std::string buf_s;
  123. vector<string> elements;
  124. vector<string> pair;
  125. // maximal dimension of all feature vectors;
  126. int dataset_dimension = -numeric_limits<int>::max();
  127. while ( ! is.eof() )
  128. {
  129. elements.clear();
  130. int classno;
  131. if ( ! ( is >> classno ) ) {
  132. break;
  133. }
  134. is.get ( buf, bufsize );
  135. buf_s = buf;
  136. if ( buf_s.size() <= 0 )
  137. break;
  138. StringTools::split ( buf_s, ' ', elements );
  139. if ( elements.size() <= 1 )
  140. break;
  141. int dimension = - numeric_limits<int>::max();
  142. if ( format == FILEFORMAT_INDEX_SPARSE_ONE )
  143. {
  144. // in this format we have to determine the maximum index
  145. for ( vector<string>::const_iterator i = elements.begin() + 1;
  146. i != elements.end();
  147. i++ )
  148. {
  149. pair.clear();
  150. StringTools::split ( *i, ':', pair );
  151. if ( pair.size() != 2 ) continue;
  152. int index = atoi ( pair[0].c_str() );
  153. if ( index > dimension )
  154. dimension = index;
  155. }
  156. if ( dimension > dataset_dimension )
  157. dataset_dimension = dimension;
  158. } else {
  159. // skip first element because of white space
  160. dimension = elements.size() - 1;
  161. }
  162. NICE::Vector vec ( dimension, 0.0 );
  163. size_t l = 0;
  164. // skip first element because of white space
  165. for ( vector<string>::const_iterator i = elements.begin() + 1;
  166. i != elements.end();
  167. i++, l++ )
  168. {
  169. if ( format == FILEFORMAT_INDEX )
  170. {
  171. pair.clear();
  172. StringTools::split ( *i, ':', pair );
  173. if ( pair.size() == 2 ) {
  174. double val = atof ( pair[1].c_str() );
  175. vec[l] = val;
  176. }
  177. } else if ( format == FILEFORMAT_INDEX_SPARSE_ONE )
  178. {
  179. pair.clear();
  180. StringTools::split ( *i, ':', pair );
  181. if ( pair.size() == 2 ) {
  182. double val = atof ( pair[1].c_str() );
  183. int index = atoi ( pair[0].c_str() ) - 1;
  184. vec[index] = val;
  185. }
  186. } else {
  187. vec[l] = atof ( i->c_str() );
  188. }
  189. }
  190. add ( classno, vec );
  191. }
  192. delete [] buf;
  193. if ( format == FILEFORMAT_INDEX_SPARSE_ONE ) {
  194. // we have to resize all feature vectors of the dataset to dataset_dimension
  195. for ( LabeledSetVector::iterator iLOOP_ALL = begin() ; iLOOP_ALL != end() ; iLOOP_ALL++ )
  196. for ( vector<NICE::Vector *>::iterator jLOOP_ALL = iLOOP_ALL->second.begin();
  197. jLOOP_ALL != iLOOP_ALL->second.end();
  198. jLOOP_ALL++ )
  199. {
  200. NICE::Vector *x = ( *jLOOP_ALL );
  201. uint old_dimension = x->size();
  202. // resize the vector to the dataset dimension
  203. x->resize ( dataset_dimension );
  204. // set all elements to zero, which are new after the resize operation
  205. for ( uint k = old_dimension; k < x->size(); k++ )
  206. ( *x ) [k] = 0.0;
  207. }
  208. }
  209. }
  210. void LabeledSetVector::store ( ostream & os, int format ) const
  211. {
  212. for ( Permutation::const_iterator i = insertOrder.begin();
  213. i != insertOrder.end();
  214. i++ )
  215. {
  216. int classno = i->first;
  217. const NICE::Vector & x = * ( i->second );
  218. storeElement ( os, classno, x, format );
  219. }
  220. }
  221. void LabeledSetVector::storeElement ( ostream & os, int classno, const NICE::Vector & x, int format )
  222. {
  223. if ( format != FILEFORMAT_RAW ) {
  224. os << classno << " ";
  225. for ( size_t k = 0 ; k < x.size() ; k++ )
  226. {
  227. if ( format == FILEFORMAT_INDEX )
  228. os << k + 1 << ":" << x[k];
  229. else if ( format == FILEFORMAT_NOINDEX )
  230. os << x[k];
  231. else if ( format == FILEFORMAT_INDEX_SPARSE_ONE ) {
  232. if ( x[k] != 0.0 )
  233. os << k + 1 << ":" << x[k];
  234. }
  235. if ( k != x.size() )
  236. os << " ";
  237. }
  238. os << endl;
  239. } else {
  240. const double *data = x.getDataPointer();
  241. int dimension = x.size();
  242. os.write ( ( char * ) &classno, sizeof ( int ) );
  243. os.write ( ( char * ) &dimension, sizeof ( int ) );
  244. os.write ( ( char * ) data, sizeof ( double ) *dimension );
  245. }
  246. }
  247. void LabeledSetVector::restoreRAW ( istream & is )
  248. {
  249. while ( ! is.eof() )
  250. {
  251. int classno;
  252. int dimension;
  253. is.read ( ( char * ) &classno, sizeof ( int ) );
  254. if ( is.gcount() != sizeof ( int ) )
  255. return;
  256. is.read ( ( char * ) &dimension, sizeof ( int ) );
  257. if ( is.gcount() != sizeof ( int ) )
  258. return;
  259. NICE::Vector vec;
  260. try {
  261. vec.resize ( dimension );
  262. } catch ( std::bad_alloc ) {
  263. fthrow ( IOException, "Unable to allocate a vector with size " << dimension << "." << endl
  264. << "(debug: class " << classno << " ; " << "sizeof(int) = " << 8*sizeof ( int ) << " Bit ; " << endl
  265. << "elements read = " << count() << " )" << endl );
  266. }
  267. double *data = vec.getDataPointer();
  268. is.read ( ( char * ) data, sizeof ( double ) *dimension );
  269. if ( ( int ) is.gcount() != ( int ) sizeof ( double ) *dimension )
  270. return;
  271. for ( int k = 0 ; k < dimension ; k++ )
  272. if ( NICE::isNaN ( data[k] ) ) {
  273. cerr << "WARNING: nan's found !!" << endl;
  274. data[k] = 0.0;
  275. }
  276. add ( classno, vec );
  277. }
  278. }
  279. LabeledSetVector::ElementPointer LabeledSetVector::pickRandomSample () const
  280. {
  281. if ( insertOrder.size() <= 0 ) {
  282. fprintf ( stderr, "LabeledSet::pickRandomSample: failure !\n" );
  283. exit ( -1 );
  284. }
  285. int selection = rand() % insertOrder.size();
  286. return insertOrder[selection];
  287. }
  288. int LabeledSetVector::count ( int classno ) const
  289. {
  290. const_iterator i = find ( classno );
  291. return ( i == end() ) ? 0 : i->second.size();
  292. }
  293. int LabeledSetVector::count () const
  294. {
  295. int mycount = 0;
  296. for ( const_iterator i = begin() ; i != end() ; i++ )
  297. mycount += i->second.size();
  298. return mycount;
  299. }
  300. int LabeledSetVector::pickRandomSample ( int classno, ElementPointer & i ) const
  301. {
  302. const_iterator j = find ( classno );
  303. if ( j == end() ) return -1;
  304. const vector<Vector *> & l = j->second;
  305. int num = rand() % l.size();
  306. i.first = classno;
  307. i.second = l[num];
  308. return classno;
  309. }
  310. void LabeledSetVector::clear ()
  311. {
  312. if ( ! selection ) {
  313. for ( Permutation::const_iterator i = insertOrder.begin();
  314. i != insertOrder.end();
  315. i++ )
  316. {
  317. const NICE::Vector *s = i->second;
  318. delete s;
  319. }
  320. insertOrder.clear();
  321. }
  322. std::map< int, vector<Vector *> >::clear();
  323. }
  324. void LabeledSetVector::add ( int classno, const NICE::Vector & x )
  325. {
  326. if ( selection ) {
  327. fprintf ( stderr, "Add operation not available for selections !\n" );
  328. exit ( -1 );
  329. }
  330. iterator i = find ( classno );
  331. if ( i == end() ) {
  332. operator[] ( classno ) = vector<Vector *>();
  333. i = find ( classno );
  334. }
  335. NICE::Vector *xp = new Vector ( x );
  336. i->second.push_back ( xp );
  337. insertOrder.push_back ( ElementPointer ( classno, xp ) );
  338. }
  339. void LabeledSetVector::getPermutation ( Permutation & permutation ) const
  340. {
  341. permutation = Permutation ( insertOrder );
  342. }
  343. void LabeledSetVector::add_reference ( int classno, NICE::Vector *pointer )
  344. {
  345. iterator i = find ( classno );
  346. if ( i == end() ) {
  347. operator[] ( classno ) = vector<Vector *>();
  348. i = find ( classno );
  349. }
  350. i->second.push_back ( pointer );
  351. insertOrder.push_back ( ElementPointer ( classno, pointer ) );
  352. }
  353. void LabeledSetVector::getClasses ( std::vector<int> & classes ) const
  354. {
  355. for ( const_iterator i = begin(); i != end(); i++ )
  356. classes.push_back ( i->first );
  357. }
  358. void LabeledSetVector::printInformation () const
  359. {
  360. for ( const_iterator i = begin(); i != end(); i++ )
  361. {
  362. cerr << "class " << i->first << ": " << i->second.size() << endl;
  363. }
  364. }
  365. int LabeledSetVector::getMaxClassno() const
  366. {
  367. int maxclassno = 0;
  368. for ( const_iterator i = begin(); i != end(); i++ )
  369. if ( i->first > maxclassno )
  370. maxclassno = i->first;
  371. return maxclassno;
  372. }
  373. void LabeledSetVector::getFlatRepresentation ( VVector & vecSet, NICE::Vector & vecSetLabels ) const
  374. {
  375. int k = 0;
  376. vecSetLabels.resize ( count() );
  377. for ( LabeledSetVector::const_iterator iLOOP_ALL = begin() ; iLOOP_ALL != end() ; iLOOP_ALL++ )
  378. for ( vector<NICE::Vector *>::const_iterator jLOOP_ALL = iLOOP_ALL->second.begin();
  379. jLOOP_ALL != iLOOP_ALL->second.end();
  380. jLOOP_ALL++, k++ )
  381. {
  382. const NICE::Vector & ( x ) = * ( *jLOOP_ALL );
  383. vecSet.push_back ( x );
  384. vecSetLabels[k] = iLOOP_ALL->first;
  385. }
  386. }
  387. void LabeledSetVector::removePointersToDataWithoutDeletion()
  388. {
  389. //remove pointers in the order-struct if needed
  390. if ( ! this->selection ) {
  391. for ( Permutation::iterator i = this->insertOrder.begin();
  392. i != this->insertOrder.end();
  393. i++ )
  394. {
  395. i->second = NULL;
  396. }
  397. }
  398. //remove pointers in normal map
  399. for ( std::map< int, std::vector<NICE::Vector *> >::iterator iter = this->begin(); iter != this->end(); ++iter )
  400. {
  401. for ( int j = 0; j < (int)iter->second.size(); j++ )
  402. {
  403. iter->second[j] = NULL;
  404. }
  405. }
  406. }
  407. #endif