LabeledSet.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. /**
  2. * @file LabeledSet.cpp
  3. * @brief Labeled set of vectors
  4. * @author Erik Rodner
  5. * @date 07.09.2007
  6. */
  7. #ifndef LABELEDSETTCCINCLUDE
  8. #define LABELEDSETTCCINCLUDE
  9. #include <vislearning/nice_nonvis.h>
  10. #include <iostream>
  11. #include "vislearning/cbaselib/LabeledSet.h"
  12. #include "core/basics/StringTools.h"
  13. using namespace OBJREC;
  14. using namespace std;
  15. using namespace NICE;
  16. LabeledSet::LabeledSet ( bool _selection ) : selection(_selection)
  17. {
  18. }
  19. LabeledSet::~LabeledSet ()
  20. {
  21. //This is a big problem when using selections
  22. //clear();
  23. //fprintf (stderr, "LabeledSet: destructor (FIXME: memory leak)\n");
  24. }
  25. int LabeledSet::count ( int classno ) const
  26. {
  27. const_iterator i = find(classno);
  28. return ( i == end() ) ? 0 : i->second.size();
  29. }
  30. int LabeledSet::count () const
  31. {
  32. int mycount = 0;
  33. for ( const_iterator i = begin() ; i != end() ; i++ )
  34. {
  35. mycount += i->second.size();
  36. }
  37. return mycount;
  38. }
  39. void LabeledSet::clear ()
  40. {
  41. if ( !selection )
  42. {
  43. for ( Permutation::const_iterator i = insertOrder.begin();
  44. i != insertOrder.end();
  45. i++ )
  46. {
  47. const ImageInfo *s = i->second;
  48. delete s;
  49. }
  50. }
  51. std::map< int, vector<ImageInfo *> >::clear();
  52. }
  53. void LabeledSet::add ( int classno, ImageInfo *x )
  54. {
  55. if ( selection ) {
  56. fprintf (stderr, "Operation not available for selections !\n");
  57. exit(-1);
  58. }
  59. iterator i = find(classno);
  60. if ( i == end() ) {
  61. operator[](classno) = vector<ImageInfo *>();
  62. i = find(classno);
  63. }
  64. i->second.push_back ( x );
  65. insertOrder.push_back ( ElementPointer ( classno, x ) );
  66. }
  67. void LabeledSet::getPermutation ( Permutation & permutation ) const
  68. {
  69. permutation = Permutation ( insertOrder );
  70. }
  71. void LabeledSet::add_reference ( int classno, ImageInfo *pointer )
  72. {
  73. iterator i = find(classno);
  74. if ( i == end() ) {
  75. operator[](classno) = vector<ImageInfo *>();
  76. i = find(classno);
  77. }
  78. i->second.push_back ( pointer );
  79. insertOrder.push_back ( ElementPointer ( classno, pointer ) );
  80. }
  81. void LabeledSet::getClasses ( std::vector<int> & classes ) const
  82. {
  83. for ( const_iterator i = begin(); i != end(); i++ )
  84. classes.push_back ( i->first );
  85. }
  86. void LabeledSet::printInformation () const
  87. {
  88. for ( const_iterator i = begin(); i != end(); i++ )
  89. {
  90. cerr << "class " << i->first << ": " << i->second.size() << endl;
  91. }
  92. }
  93. /************************************
  94. LabeledSetVector
  95. *************************************/
  96. LabeledSetVector::LabeledSetVector (bool _selection) : selection(_selection)
  97. {}
  98. LabeledSetVector::~LabeledSetVector ()
  99. {
  100. // FIXME: THIS is a big problem with selections !!!
  101. //clear();
  102. }
  103. int LabeledSetVector::dimension () const
  104. {
  105. if ( insertOrder.size() <= 0 ) return -1;
  106. return (*(begin()->second.begin()))->size();
  107. //insertOrder[0].second->size();
  108. }
  109. void LabeledSetVector::restore (istream & is, int format)
  110. {
  111. if ( format == FILEFORMAT_RAW )
  112. restoreRAW ( is );
  113. else
  114. restoreASCII ( is, format );
  115. }
  116. void LabeledSetVector::restoreASCII (istream & is, int format)
  117. {
  118. const int bufsize = 1024*1024;
  119. char *buf = new char[bufsize];
  120. std::string buf_s;
  121. vector<string> elements;
  122. vector<string> pair;
  123. // maximal dimension of all feature vectors;
  124. int dataset_dimension = -numeric_limits<int>::max();
  125. while (! is.eof())
  126. {
  127. elements.clear();
  128. int classno;
  129. if ( ! (is >> classno) ) {
  130. break;
  131. }
  132. is.get ( buf, bufsize );
  133. buf_s = buf;
  134. if ( buf_s.size() <= 0 )
  135. break;
  136. StringTools::split ( buf_s, ' ', elements );
  137. if ( elements.size() <= 1 )
  138. break;
  139. int dimension = - numeric_limits<int>::max();
  140. if ( format == FILEFORMAT_INDEX_SPARSE_ONE )
  141. {
  142. // in this format we have to determine the maximum index
  143. for ( vector<string>::const_iterator i = elements.begin()+1;
  144. i != elements.end();
  145. i++ )
  146. {
  147. pair.clear();
  148. StringTools::split ( *i, ':', pair );
  149. if ( pair.size() != 2 ) continue;
  150. int index = atoi(pair[0].c_str());
  151. if ( index > dimension )
  152. dimension = index;
  153. }
  154. if ( dimension > dataset_dimension )
  155. dataset_dimension = dimension;
  156. } else {
  157. // skip first element because of white space
  158. dimension = elements.size()-1;
  159. }
  160. NICE::Vector vec ( dimension, 0.0 );
  161. size_t l = 0;
  162. // skip first element because of white space
  163. for ( vector<string>::const_iterator i = elements.begin()+1;
  164. i != elements.end();
  165. i++, l++ )
  166. {
  167. if ( format == FILEFORMAT_INDEX )
  168. {
  169. pair.clear();
  170. StringTools::split ( *i, ':', pair );
  171. if ( pair.size() == 2 ) {
  172. double val = atof ( pair[1].c_str() );
  173. vec[l] = val;
  174. }
  175. } else if ( format == FILEFORMAT_INDEX_SPARSE_ONE )
  176. {
  177. pair.clear();
  178. StringTools::split ( *i, ':', pair );
  179. if ( pair.size() == 2 ) {
  180. double val = atof ( pair[1].c_str() );
  181. int index = atoi ( pair[0].c_str() ) - 1;
  182. vec[index] = val;
  183. }
  184. } else {
  185. vec[l] = atof( i->c_str() );
  186. }
  187. }
  188. add( classno, vec );
  189. }
  190. delete [] buf;
  191. if ( format == FILEFORMAT_INDEX_SPARSE_ONE ) {
  192. // we have to resize all feature vectors of the dataset to dataset_dimension
  193. for ( LabeledSetVector::iterator iLOOP_ALL = begin() ; iLOOP_ALL != end() ; iLOOP_ALL++)
  194. for ( vector<NICE::Vector *>::iterator jLOOP_ALL = iLOOP_ALL->second.begin();
  195. jLOOP_ALL != iLOOP_ALL->second.end();
  196. jLOOP_ALL++ )
  197. {
  198. NICE::Vector *x = (*jLOOP_ALL);
  199. uint old_dimension = x->size();
  200. // resize the vector to the dataset dimension
  201. x->resize(dataset_dimension);
  202. // set all elements to zero, which are new after the resize operation
  203. for ( uint k = old_dimension; k < x->size(); k++ )
  204. (*x)[k] = 0.0;
  205. }
  206. }
  207. }
  208. void LabeledSetVector::store (ostream & os, int format) const
  209. {
  210. for ( Permutation::const_iterator i = insertOrder.begin();
  211. i != insertOrder.end();
  212. i++ )
  213. {
  214. int classno = i->first;
  215. const NICE::Vector & x = *(i->second);
  216. storeElement ( os, classno, x, format );
  217. }
  218. }
  219. void LabeledSetVector::storeElement ( ostream & os, int classno, const NICE::Vector & x, int format )
  220. {
  221. if ( format != FILEFORMAT_RAW ) {
  222. os << classno << " ";
  223. for ( size_t k = 0 ; k < x.size() ; k++ )
  224. {
  225. if ( format == FILEFORMAT_INDEX )
  226. os << k+1 << ":" << x[k];
  227. else if ( format == FILEFORMAT_NOINDEX )
  228. os << x[k];
  229. else if ( format == FILEFORMAT_INDEX_SPARSE_ONE ) {
  230. if ( x[k] != 0.0 )
  231. os << k+1 << ":" << x[k];
  232. }
  233. if ( k != x.size() )
  234. os << " ";
  235. }
  236. os << endl;
  237. } else {
  238. const double *data = x.getDataPointer();
  239. int dimension = x.size();
  240. os.write ( (char *)&classno, sizeof(int) );
  241. os.write ( (char *)&dimension, sizeof(int) );
  242. os.write ( (char *)data, sizeof(double)*dimension );
  243. }
  244. }
  245. void LabeledSetVector::restoreRAW (istream & is)
  246. {
  247. while (! is.eof())
  248. {
  249. int classno;
  250. int dimension;
  251. is.read ( (char *)&classno, sizeof(int) );
  252. if ( is.gcount() != sizeof(int) )
  253. return;
  254. is.read ( (char *)&dimension, sizeof(int) );
  255. if ( is.gcount() != sizeof(int) )
  256. return;
  257. NICE::Vector vec;
  258. try {
  259. vec.resize(dimension);
  260. } catch ( std::bad_alloc ) {
  261. fthrow(IOException, "Unable to allocate a vector with size " << dimension << "." << endl
  262. << "(debug: class " << classno << " ; " << "sizeof(int) = " << 8*sizeof(int) << " Bit ; " << endl
  263. << "elements read = " << count() << " )" << endl );
  264. }
  265. double *data = vec.getDataPointer();
  266. is.read ( (char *)data, sizeof(double)*dimension );
  267. if ( (int)is.gcount() != (int)sizeof(double)*dimension )
  268. return;
  269. for ( int k = 0 ; k < dimension ; k++ )
  270. if ( isnan(data[k]) ) {
  271. cerr << "WARNING: nan's found !!" << endl;
  272. data[k] = 0.0;
  273. }
  274. add( classno, vec );
  275. }
  276. }
  277. LabeledSetVector::ElementPointer LabeledSetVector::pickRandomSample () const
  278. {
  279. if ( insertOrder.size() <= 0 ) {
  280. fprintf (stderr, "LabeledSet::pickRandomSample: failure !\n");
  281. exit(-1);
  282. }
  283. int selection = rand() % insertOrder.size();
  284. return insertOrder[selection];
  285. }
  286. int LabeledSetVector::count ( int classno ) const
  287. {
  288. const_iterator i = find(classno);
  289. return ( i == end() ) ? 0 : i->second.size();
  290. }
  291. int LabeledSetVector::count () const
  292. {
  293. int mycount = 0;
  294. for ( const_iterator i = begin() ; i != end() ; i++ )
  295. mycount += i->second.size();
  296. return mycount;
  297. }
  298. int LabeledSetVector::pickRandomSample ( int classno, ElementPointer & i ) const
  299. {
  300. const_iterator j = find(classno);
  301. if ( j == end() ) return -1;
  302. const vector<Vector *> & l = j->second;
  303. int num = rand() % l.size();
  304. i.first = classno;
  305. i.second = l[num];
  306. return classno;
  307. }
  308. void LabeledSetVector::clear ()
  309. {
  310. if ( ! selection ) {
  311. for ( Permutation::const_iterator i = insertOrder.begin();
  312. i != insertOrder.end();
  313. i++ )
  314. {
  315. const NICE::Vector *s = i->second;
  316. delete s;
  317. }
  318. insertOrder.clear();
  319. }
  320. std::map< int, vector<Vector *> >::clear();
  321. }
  322. void LabeledSetVector::add ( int classno, const NICE::Vector & x )
  323. {
  324. if ( selection ) {
  325. fprintf (stderr, "Add operation not available for selections !\n");
  326. exit(-1);
  327. }
  328. iterator i = find(classno);
  329. if ( i == end() ) {
  330. operator[](classno) = vector<Vector *>();
  331. i = find(classno);
  332. }
  333. NICE::Vector *xp = new Vector(x);
  334. i->second.push_back ( xp );
  335. insertOrder.push_back ( ElementPointer ( classno, xp ) );
  336. }
  337. void LabeledSetVector::getPermutation ( Permutation & permutation ) const
  338. {
  339. permutation = Permutation ( insertOrder );
  340. }
  341. void LabeledSetVector::add_reference ( int classno, NICE::Vector *pointer )
  342. {
  343. iterator i = find(classno);
  344. if ( i == end() ) {
  345. operator[](classno) = vector<Vector *>();
  346. i = find(classno);
  347. }
  348. i->second.push_back ( pointer );
  349. insertOrder.push_back ( ElementPointer ( classno, pointer ) );
  350. }
  351. void LabeledSetVector::getClasses ( std::vector<int> & classes ) const
  352. {
  353. for ( const_iterator i = begin(); i != end(); i++ )
  354. classes.push_back ( i->first );
  355. }
  356. void LabeledSetVector::printInformation () const
  357. {
  358. for ( const_iterator i = begin(); i != end(); i++ )
  359. {
  360. cerr << "class " << i->first << ": " << i->second.size() << endl;
  361. }
  362. }
  363. int LabeledSetVector::getMaxClassno() const
  364. {
  365. int maxclassno = 0;
  366. for ( const_iterator i = begin(); i != end(); i++ )
  367. if ( i->first > maxclassno )
  368. maxclassno = i->first;
  369. return maxclassno;
  370. }
  371. void LabeledSetVector::getFlatRepresentation ( VVector & vecSet, NICE::Vector & vecSetLabels ) const
  372. {
  373. int k = 0;
  374. vecSetLabels.resize(count());
  375. for ( LabeledSetVector::const_iterator iLOOP_ALL = begin() ; iLOOP_ALL != end() ; iLOOP_ALL++)
  376. for ( vector<NICE::Vector *>::const_iterator jLOOP_ALL = iLOOP_ALL->second.begin();
  377. jLOOP_ALL != iLOOP_ALL->second.end();
  378. jLOOP_ALL++,k++ )
  379. {
  380. const NICE::Vector & (x) = *(*jLOOP_ALL);
  381. vecSet.push_back ( x );
  382. vecSetLabels[k] = iLOOP_ALL->first;
  383. }
  384. }
  385. #endif