123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493 |
- /**
- * @file LabeledSet.cpp
- * @brief Labeled set of vectors
- * @author Erik Rodner
- * @date 07.09.2007
- */
- #ifndef LABELEDSETTCCINCLUDE
- #define LABELEDSETTCCINCLUDE
- #include "core/image/ImageT.h"
- #include "core/vector/VectorT.h"
- #include "core/vector/MatrixT.h"
- #include <iostream>
- #include "vislearning/cbaselib/LabeledSet.h"
- #include "core/basics/StringTools.h"
- using namespace OBJREC;
- using namespace std;
- using namespace NICE;
- LabeledSet::LabeledSet ( bool _selection ) : selection ( _selection )
- {
- }
- LabeledSet::~LabeledSet ()
- {
- //This is a big problem when using selections
- //clear();
- //fprintf (stderr, "LabeledSet: destructor (FIXME: memory leak)\n");
- }
- int LabeledSet::count ( int classno ) const
- {
- const_iterator i = find ( classno );
- return ( i == end() ) ? 0 : i->second.size();
- }
- int LabeledSet::count () const
- {
- int mycount = 0;
- for ( const_iterator i = begin() ; i != end() ; i++ )
- {
- mycount += i->second.size();
- }
- return mycount;
- }
- void LabeledSet::clear ()
- {
- if ( !selection )
- {
- for ( Permutation::const_iterator i = insertOrder.begin();
- i != insertOrder.end();
- i++ )
- {
- const ImageInfo *s = i->second;
- delete s;
- }
- }
- std::map< int, vector<ImageInfo *> >::clear();
- }
- void LabeledSet::add ( int classno, ImageInfo *x )
- {
- if ( selection ) {
- fprintf ( stderr, "Operation not available for selections !\n" );
- exit ( -1 );
- }
- iterator i = find ( classno );
- if ( i == end() ) {
- operator[] ( classno ) = vector<ImageInfo *>();
- i = find ( classno );
- }
- i->second.push_back ( x );
- insertOrder.push_back ( ElementPointer ( classno, x ) );
- }
- void LabeledSet::getPermutation ( Permutation & permutation ) const
- {
- permutation = Permutation ( insertOrder );
- }
- void LabeledSet::add_reference ( int classno, ImageInfo *pointer )
- {
- iterator i = find ( classno );
- if ( i == end() ) {
- operator[] ( classno ) = vector<ImageInfo *>();
- i = find ( classno );
- }
- i->second.push_back ( pointer );
- insertOrder.push_back ( ElementPointer ( classno, pointer ) );
- }
- void LabeledSet::getClasses ( std::vector<int> & classes ) const
- {
- for ( const_iterator i = begin(); i != end(); i++ )
- classes.push_back ( i->first );
- }
- void LabeledSet::printInformation () const
- {
- for ( const_iterator i = begin(); i != end(); i++ )
- {
- cerr << "class " << i->first << ": " << i->second.size() << endl;
- }
- }
- /************************************
- LabeledSetVector
- *************************************/
- LabeledSetVector::LabeledSetVector ( bool _selection ) : selection ( _selection )
- {}
- LabeledSetVector::~LabeledSetVector ()
- {
- // FIXME: THIS is a big problem with selections !!!
- //clear();
- }
- int LabeledSetVector::dimension () const
- {
- if ( insertOrder.size() <= 0 ) return -1;
- return ( * ( begin()->second.begin() ) )->size();
- //insertOrder[0].second->size();
- }
- void LabeledSetVector::restore ( istream & is, int format )
- {
- if ( format == FILEFORMAT_RAW )
- restoreRAW ( is );
- else
- restoreASCII ( is, format );
- }
- void LabeledSetVector::restoreASCII ( istream & is, int format )
- {
- const int bufsize = 1024 * 1024;
- char *buf = new char[bufsize];
- std::string buf_s;
- vector<string> elements;
- vector<string> pair;
- // maximal dimension of all feature vectors;
- int dataset_dimension = -numeric_limits<int>::max();
- while ( ! is.eof() )
- {
- elements.clear();
- int classno;
- if ( ! ( is >> classno ) ) {
- break;
- }
- is.get ( buf, bufsize );
- buf_s = buf;
- if ( buf_s.size() <= 0 )
- break;
- StringTools::split ( buf_s, ' ', elements );
- if ( elements.size() <= 1 )
- break;
- int dimension = - numeric_limits<int>::max();
- if ( format == FILEFORMAT_INDEX_SPARSE_ONE )
- {
- // in this format we have to determine the maximum index
- for ( vector<string>::const_iterator i = elements.begin() + 1;
- i != elements.end();
- i++ )
- {
- pair.clear();
- StringTools::split ( *i, ':', pair );
- if ( pair.size() != 2 ) continue;
- int index = atoi ( pair[0].c_str() );
- if ( index > dimension )
- dimension = index;
- }
- if ( dimension > dataset_dimension )
- dataset_dimension = dimension;
- } else {
- // skip first element because of white space
- dimension = elements.size() - 1;
- }
- NICE::Vector vec ( dimension, 0.0 );
- size_t l = 0;
- // skip first element because of white space
- for ( vector<string>::const_iterator i = elements.begin() + 1;
- i != elements.end();
- i++, l++ )
- {
- if ( format == FILEFORMAT_INDEX )
- {
- pair.clear();
- StringTools::split ( *i, ':', pair );
- if ( pair.size() == 2 ) {
- double val = atof ( pair[1].c_str() );
- vec[l] = val;
- }
- } else if ( format == FILEFORMAT_INDEX_SPARSE_ONE )
- {
- pair.clear();
- StringTools::split ( *i, ':', pair );
- if ( pair.size() == 2 ) {
- double val = atof ( pair[1].c_str() );
- int index = atoi ( pair[0].c_str() ) - 1;
- vec[index] = val;
- }
- } else {
- vec[l] = atof ( i->c_str() );
- }
- }
- add ( classno, vec );
- }
- delete [] buf;
- if ( format == FILEFORMAT_INDEX_SPARSE_ONE ) {
- // we have to resize all feature vectors of the dataset to dataset_dimension
- for ( LabeledSetVector::iterator iLOOP_ALL = begin() ; iLOOP_ALL != end() ; iLOOP_ALL++ )
- for ( vector<NICE::Vector *>::iterator jLOOP_ALL = iLOOP_ALL->second.begin();
- jLOOP_ALL != iLOOP_ALL->second.end();
- jLOOP_ALL++ )
- {
- NICE::Vector *x = ( *jLOOP_ALL );
- uint old_dimension = x->size();
- // resize the vector to the dataset dimension
- x->resize ( dataset_dimension );
- // set all elements to zero, which are new after the resize operation
- for ( uint k = old_dimension; k < x->size(); k++ )
- ( *x ) [k] = 0.0;
- }
- }
- }
- void LabeledSetVector::store ( ostream & os, int format ) const
- {
- for ( Permutation::const_iterator i = insertOrder.begin();
- i != insertOrder.end();
- i++ )
- {
- int classno = i->first;
- const NICE::Vector & x = * ( i->second );
- storeElement ( os, classno, x, format );
- }
- }
- void LabeledSetVector::storeElement ( ostream & os, int classno, const NICE::Vector & x, int format )
- {
- if ( format != FILEFORMAT_RAW ) {
- os << classno << " ";
- for ( size_t k = 0 ; k < x.size() ; k++ )
- {
- if ( format == FILEFORMAT_INDEX )
- os << k + 1 << ":" << x[k];
- else if ( format == FILEFORMAT_NOINDEX )
- os << x[k];
- else if ( format == FILEFORMAT_INDEX_SPARSE_ONE ) {
- if ( x[k] != 0.0 )
- os << k + 1 << ":" << x[k];
- }
- if ( k != x.size() )
- os << " ";
- }
- os << endl;
- } else {
- const double *data = x.getDataPointer();
- int dimension = x.size();
- os.write ( ( char * ) &classno, sizeof ( int ) );
- os.write ( ( char * ) &dimension, sizeof ( int ) );
- os.write ( ( char * ) data, sizeof ( double ) *dimension );
- }
- }
- void LabeledSetVector::restoreRAW ( istream & is )
- {
- while ( ! is.eof() )
- {
- int classno;
- int dimension;
- is.read ( ( char * ) &classno, sizeof ( int ) );
- if ( is.gcount() != sizeof ( int ) )
- return;
- is.read ( ( char * ) &dimension, sizeof ( int ) );
- if ( is.gcount() != sizeof ( int ) )
- return;
- NICE::Vector vec;
- try {
- vec.resize ( dimension );
- } catch ( std::bad_alloc ) {
- fthrow ( IOException, "Unable to allocate a vector with size " << dimension << "." << endl
- << "(debug: class " << classno << " ; " << "sizeof(int) = " << 8*sizeof ( int ) << " Bit ; " << endl
- << "elements read = " << count() << " )" << endl );
- }
- double *data = vec.getDataPointer();
- is.read ( ( char * ) data, sizeof ( double ) *dimension );
- if ( ( int ) is.gcount() != ( int ) sizeof ( double ) *dimension )
- return;
- for ( int k = 0 ; k < dimension ; k++ )
- if ( NICE::isNaN ( data[k] ) ) {
- cerr << "WARNING: nan's found !!" << endl;
- data[k] = 0.0;
- }
- add ( classno, vec );
- }
- }
- LabeledSetVector::ElementPointer LabeledSetVector::pickRandomSample () const
- {
- if ( insertOrder.size() <= 0 ) {
- fprintf ( stderr, "LabeledSet::pickRandomSample: failure !\n" );
- exit ( -1 );
- }
- int selection = rand() % insertOrder.size();
- return insertOrder[selection];
- }
- int LabeledSetVector::count ( int classno ) const
- {
- const_iterator i = find ( classno );
- return ( i == end() ) ? 0 : i->second.size();
- }
- int LabeledSetVector::count () const
- {
- int mycount = 0;
- for ( const_iterator i = begin() ; i != end() ; i++ )
- mycount += i->second.size();
- return mycount;
- }
- int LabeledSetVector::pickRandomSample ( int classno, ElementPointer & i ) const
- {
- const_iterator j = find ( classno );
- if ( j == end() ) return -1;
- const vector<Vector *> & l = j->second;
- int num = rand() % l.size();
- i.first = classno;
- i.second = l[num];
- return classno;
- }
- void LabeledSetVector::clear ()
- {
- if ( ! selection ) {
- for ( Permutation::const_iterator i = insertOrder.begin();
- i != insertOrder.end();
- i++ )
- {
- const NICE::Vector *s = i->second;
- delete s;
- }
- insertOrder.clear();
- }
- std::map< int, vector<Vector *> >::clear();
- }
- void LabeledSetVector::add ( int classno, const NICE::Vector & x )
- {
- if ( selection ) {
- fprintf ( stderr, "Add operation not available for selections !\n" );
- exit ( -1 );
- }
- iterator i = find ( classno );
- if ( i == end() ) {
- operator[] ( classno ) = vector<Vector *>();
- i = find ( classno );
- }
- NICE::Vector *xp = new Vector ( x );
- i->second.push_back ( xp );
- insertOrder.push_back ( ElementPointer ( classno, xp ) );
- }
- void LabeledSetVector::getPermutation ( Permutation & permutation ) const
- {
- permutation = Permutation ( insertOrder );
- }
- void LabeledSetVector::add_reference ( int classno, NICE::Vector *pointer )
- {
- iterator i = find ( classno );
- if ( i == end() ) {
- operator[] ( classno ) = vector<Vector *>();
- i = find ( classno );
- }
- i->second.push_back ( pointer );
- insertOrder.push_back ( ElementPointer ( classno, pointer ) );
- }
- void LabeledSetVector::getClasses ( std::vector<int> & classes ) const
- {
- for ( const_iterator i = begin(); i != end(); i++ )
- classes.push_back ( i->first );
- }
- void LabeledSetVector::printInformation () const
- {
- for ( const_iterator i = begin(); i != end(); i++ )
- {
- cerr << "class " << i->first << ": " << i->second.size() << endl;
- }
- }
- int LabeledSetVector::getMaxClassno() const
- {
- int maxclassno = 0;
- for ( const_iterator i = begin(); i != end(); i++ )
- if ( i->first > maxclassno )
- maxclassno = i->first;
- return maxclassno;
- }
- void LabeledSetVector::getFlatRepresentation ( VVector & vecSet, NICE::Vector & vecSetLabels ) const
- {
- int k = 0;
- vecSetLabels.resize ( count() );
- for ( LabeledSetVector::const_iterator iLOOP_ALL = begin() ; iLOOP_ALL != end() ; iLOOP_ALL++ )
- for ( vector<NICE::Vector *>::const_iterator jLOOP_ALL = iLOOP_ALL->second.begin();
- jLOOP_ALL != iLOOP_ALL->second.end();
- jLOOP_ALL++, k++ )
- {
- const NICE::Vector & ( x ) = * ( *jLOOP_ALL );
- vecSet.push_back ( x );
- vecSetLabels[k] = iLOOP_ALL->first;
- }
- }
- void LabeledSetVector::removePointersToDataWithoutDeletion()
- {
- //remove pointers in the order-struct if needed
- if ( ! this->selection ) {
- for ( Permutation::iterator i = this->insertOrder.begin();
- i != this->insertOrder.end();
- i++ )
- {
- i->second = NULL;
- }
- }
-
- //remove pointers in normal map
- for ( std::map< int, std::vector<NICE::Vector *> >::iterator iter = this->begin(); iter != this->end(); ++iter )
- {
- for ( int j = 0; j < (int)iter->second.size(); j++ )
- {
- iter->second[j] = NULL;
- }
- }
- }
- #endif
|