/** * @file LabeledFileList.cpp * @brief reads images from directory * @author Erik Rodner * @date 17.09.2007 */ #include "core/image/ImageT.h" #include "core/vector/VectorT.h" #include "core/vector/MatrixT.h" #include #include #include "core/basics/StringTools.h" #include "core/basics/FileMgt.h" #include "vislearning/cbaselib/LabeledFileList.h" using namespace OBJREC; using namespace std; using namespace NICE; LabeledFileList::LabeledFileList() { debug_dataset = true; } LabeledFileList::~LabeledFileList() { } int LabeledFileList::getClassFromNumber ( const std::string & lfile, const int exampleID ) const { int cvalue = -1; std::ifstream file( lfile.c_str() ); std::string str; while ( std::getline(file, str) ) { NICE::Vector valList(2,0); NICE::StringTools::splitVector( str, ',', valList ); if ( (int)valList[0] == exampleID ) { cvalue = (int)valList[1]; break; } } return cvalue; } /** * @brief Loads the label information according to a given label file format. * * Supported types of label file format (localization_format): * - "image":
usage of a single channel images containing label regions * - "imagergb":
usage of a multi channel color images containing label regions * - "polygon":
obtaining bounding boxes from textural label files (used with e.g. PASCAL dataset) * - "imagelabeler":
obtaining label information (currently only bounding boxes) from the separate label files (XML like) created with the ImageLabeler ( < file name >_labeled.dat ). @see ImageInfo * * @param classnames class containing all potential class names (label categories) * @param conf configuration structure containing a information from a loaded config file; has to tag "localization_format" in section "main" in order to obtain the correct label information from a file. */ LocalizationResult *LabeledFileList::getLocalizationInfo ( const ClassNames & classnames, int classno, const std::string & file, const Config & conf ) const { /* localization_pattern = image localization_subst = mask localization_format = image */ std::string format = conf.gS ( "main", "localization_format", "unknown" ); if ( format == "unknown" ) return NULL; std::string pattern = conf.gS ( "main", "localization_pattern" ); std::string subst = conf.gS ( "main", "localization_subst" ); std::string lfile = file; if ( ! StringTools::regexSubstitute ( lfile, pattern, subst ) ) { fprintf ( stderr, "Unable to substitute using pattern #%s# and string #%s#\n", pattern.c_str(), lfile.c_str() ); exit ( -1 ); } if ( ! FileMgt::fileExists ( lfile ) && format != "imagergb" ) return NULL; if ( debug_dataset ) { fprintf ( stderr, "LabeledFileList: reading localization information %s\n", lfile.c_str() ); } LocalizationResult *lr = NULL; if ( format == "image" ) { NICE::Image mask; try { mask.read ( lfile ); } catch ( ImageException & ) { fprintf ( stderr, "WARNING: unable to open file %s (no localization info provided)\n", lfile.c_str() ); return NULL; } lr = new LocalizationResult ( &classnames, mask, classno ); } else if ( format == "imagergb" ) { NICE::ColorImage mask; try { mask.read ( lfile ); } catch ( ImageException &e ) { fprintf ( stderr, "WARNING: unable to open file %s (no localization info provided) - creating one with background class only!\n", lfile.c_str() ); //fprintf ( stderr, "Error: %s\n", e.what() ); //return NULL; mask.read ( file ); mask.set(0,0,0); } lr = new LocalizationResult ( &classnames, mask ); } else if ( format == "csv" ) { // CAUTION! This is for experimental use only and needs a certain configuration of // the csv file and scheme for the image file names! NICE::ColorImage mask; mask.read( file ); int exampleID = -1; std::size_t found = file.find( "ID" ); std::string exampleIDStr = file.substr(found+2,found+8); exampleID = std::atoi(exampleIDStr.c_str()); int g = getClassFromNumber( lfile, exampleID ); mask.set((uchar)g,(uchar)g,(uchar)g); lr = new LocalizationResult ( &classnames, mask ); } else if ( format == "polygon" ) { lr = new LocalizationResult ( &classnames ); lr->read ( lfile, LocalizationResult::FILEFORMAT_POLYGON ); if ( debug_dataset ) fprintf (stderr, "LabeledFileList: object localization %d\n", (int)lr->size() ); } else if ( format == "polygon_siftflow" ) { lr = new LocalizationResult ( &classnames ); lr->read ( lfile, LocalizationResult::FILEFORMAT_POLYGON_SIFTFLOW ); if ( debug_dataset ) fprintf (stderr, "LabeledFileList: object localization %d\n", (int)lr->size() ); } else if ( format == "imagelabeler" ) { lr = new LocalizationResult ( &classnames ); lr->loadImageInfo(lfile); } else { fthrow(Exception, "Localization format not yet supported !!\n"); } if ( debug_dataset ) if ( lr != NULL ) fprintf (stderr, "%s (%d objects)\n", lfile.c_str(), (int)lr->size() ); return lr; } void LabeledFileList::getFromPattern ( const std::string & dir, const Config & datasetconf, const ClassNames & classnames, LabeledSet & ls, bool localizationInfoDisabled ) const { std::string filemask; if ( dir.substr ( dir.length() - 1, 1 ) != "/" ) filemask = dir + "/" + datasetconf.gS ( "main", "pattern" ); else filemask = dir + datasetconf.gS ( "main", "pattern" ); std::vector files; int classnameField = datasetconf.gI ( "main", "classname_field", 1 ); std::string fixedClassname = datasetconf.gS ( "main", "fixed_classname", "" ); files.clear(); FileMgt::DirectoryRecursive ( files, dir ); fprintf ( stderr, "LabeledFileList: Files: %d\n", ( int ) files.size() ); sort ( files.begin(), files.end() ); for ( vector::const_iterator i = files.begin(); i != files.end(); i++ ) { vector submatches; // refactor-nice.pl: check this substitution // old: const string & file = *i; const std::string & file = *i; if ( debug_dataset ) fprintf ( stderr, "LabeledFileList: next file: %s\n", file.c_str() ); bool match = StringTools::regexMatch ( file, filemask, submatches ); if ( ( fixedClassname == "" ) && ( ( int ) submatches.size() <= classnameField ) ) match = false; if ( ! match ) { if ( debug_dataset ) fprintf ( stderr, "LabeledFileList: WARNING: %s does not match filemask: %s!!\n", file.c_str(), filemask.c_str() ); } else { std::string classcode = ( fixedClassname == "" ) ? submatches[classnameField] : fixedClassname; if ( classnames.existsClassCode ( classcode ) ) { int classno = classnames.classno ( classcode ); LocalizationResult *lr = NULL; if ( ! localizationInfoDisabled ) lr = getLocalizationInfo ( classnames, classno, file, datasetconf ); if ( debug_dataset ) fprintf ( stderr, "LabeledFileList: LabeledSet: add %s (%d)\n", file.c_str(), classno ); if ( lr == NULL ) { ls.add ( classno, new ImageInfo ( file ) ); } else { ls.add ( classno, new ImageInfo ( file, lr ) ); if ( debug_dataset ) fprintf ( stderr, "LabeledFileList: LocalizationResult added!\n" ); } } else { if ( debug_dataset ) { for ( vector::iterator i = submatches.begin(); i != submatches.end(); i++ ) { fprintf ( stderr, "LabeledFileList: submatch: %s\n", i->c_str() ); } fprintf ( stderr, "LabeledFileList: WARNING: code %s ignored !\n", classcode.c_str() ); } } } if ( debug_dataset ) fprintf ( stderr, "LabeledFileList: filename processed\n" ); } cerr << "directory " << dir << " loaded..." << endl; ls.printInformation(); } void LabeledFileList::getFromList ( const std::string & filelist, const Config & datasetconf, const ClassNames & classnames, LabeledSet & ls, bool localizationInfoDisabled ) const { if ( debug_dataset ) fprintf ( stderr, "Reading file list: %s\n", filelist.c_str() ); ifstream ifs ( filelist.c_str(), ios::in ); if ( ! ifs.good() ) fthrow ( IOException, "File list " << filelist << " not found !" ); std::string fixedClassname = datasetconf.gS ( "main", "fixed_classname", "" ); while ( ! ifs.eof() ) { std::string classcode; std::string file; if ( fixedClassname == "" ) { if ( ! ( ifs >> classcode ) ) break; } else { classcode = fixedClassname; } if ( ! ( ifs >> file ) ) break; file = datasetconf.getAbsoluteFilenameRelativeToThisConfig(file); if ( classnames.existsClassCode ( classcode ) ) { int classno = classnames.classno ( classcode ); LocalizationResult *lr = NULL; if ( ! localizationInfoDisabled ) lr = getLocalizationInfo ( classnames, classno, file, datasetconf ); if ( debug_dataset ) cerr << "Adding file " << file << " with classno " << classno << endl; if ( lr == NULL ) ls.add ( classno, new ImageInfo ( file ) ); else ls.add ( classno, new ImageInfo ( file, lr ) ); } else { if ( debug_dataset ) fprintf ( stderr, "WARNING: code %s ignored !\n", classcode.c_str() ); } } if ( debug_dataset ) ls.printInformation(); } void LabeledFileList::get ( const std::string & dir, const Config & datasetconf, const ClassNames & classnames, LabeledSet & ls, bool localizationInfoDisabled, bool debugDataset ) { std::string pattern = datasetconf.gS("main", "pattern", ""); std::string filelist = datasetconf.gS("main", "filelist", ""); std::string factoryxmlfile = datasetconf.gS("main", "factoryxml", ""); this->debug_dataset = debugDataset; if ( pattern.size() > 0 ) getFromPattern ( dir, datasetconf, classnames, ls, localizationInfoDisabled ); else if ( filelist.size() > 0 ) { std::string cfilelist = datasetconf.gS("main", "filelist"); std::string filelist = ( cfilelist.substr(0,1) == "/" ) ? cfilelist : dir + "/" + cfilelist; getFromList ( filelist, datasetconf, classnames, ls, localizationInfoDisabled ); } else if( !factoryxmlfile.empty() && m_pLabeledSetFactory != NULL ) { factoryxmlfile = ( factoryxmlfile.substr(0,1) == "/" ) ? factoryxmlfile : dir + "/" + factoryxmlfile; m_pLabeledSetFactory->createLabeledSetFromXml( factoryxmlfile, datasetconf,classnames, ls ); } else { fprintf (stderr, "LabeledFileList: Unable to obtain labeled file list\n"); exit(-1); } }