/** * @file compressObjectBankFeatures.cpp * @brief convert ObjectBank features to a sparse histogram representation * @author Erik Rodner * @date 01/23/2012 */ #include #include "core/basics/Config.h" #include "vislearning/cbaselib/MultiDataset.h" #include "vislearning/cbaselib/MutualInformation.h" #include "vislearning/baselib/Globals.h" using namespace std; using namespace NICE; using namespace OBJREC; const bool use_standard = false; // experimental setting Vector transformFeature ( const vector & src ) { Vector dst; if ( use_standard ) { dst = Vector(src); } else { if ( src.size() != 44604 ) fthrow(Exception, "This is not a ObjectBank feature! The size is: " << src.size()); dst.resize ( 177 ); dst.set(0.0); // This was a bad idea: taking the maximum /* for ( uint i = 0 ; i < 177; i++ ) dst[i] = *max_element( src.begin() + i*252, src.begin() + (i+1)*252 ); // even a worse idea: summation for ( uint i = 0 ; i < 177; i++ ) for ( uint j = 0 ; j < 252 ; j++ ) dst[i] += src[j + i*252]; */ } return dst; } void readPlainData ( const Config & conf, const LabeledSet & ls, LabeledSetVector & X, string extension = ".txt" ) { string cacheroot = conf.gS("cache", "root"); X.clear(); LOOP_ALL_S ( ls ) { EACH_S(classno, imgfn); Globals::setCurrentImgFN ( imgfn ); string cachefn = Globals::getCacheFilename ( cacheroot, Globals::SORT_CATEGORIES ) + extension; cerr << "fn: " << imgfn << " cachefn: " << cachefn << endl; vector x; ifstream ifs ( cachefn.c_str(), ios::in ); if ( ! ifs.good() ) fthrow(Exception, "File not found: " << cachefn ); while ( !ifs.eof() ) { double val = 0.0; if ( ifs >> val ) x.push_back(val); } ifs.close(); X.add ( classno, transformFeature( x ) ); } } void saveFeatures ( const Config & conf, const map & features, const Vector & thresholds, const LabeledSet & ls, const string & srcExtension, const string & dstExtension ) { string cacheroot = conf.gS("cache", "root"); LOOP_ALL_S ( ls ) { EACH_S(classno, imgfn); Globals::setCurrentImgFN ( imgfn ); string cachefn = Globals::getCacheFilename ( cacheroot, Globals::SORT_CATEGORIES ) + srcExtension; cerr << "processing " << cachefn << endl; vector x; ifstream ifs ( cachefn.c_str(), ios::in ); if ( ! ifs.good() ) fthrow(Exception, "File not found: " << cachefn ); while ( !ifs.eof() ) { double val = 0.0; if ( ifs >> val ) x.push_back(val); } ifs.close(); Vector xt = transformFeature(x); Vector xnew ( features.size() ); int index = 0; for ( map::const_iterator j = features.begin(); j != features.end(); j++, index++ ) { int srcIndex = j->second; if ( srcIndex >= xt.size() ) fthrow(Exception, "Bad bug in saveFeatures(...)" ); xnew[index] = (xt[srcIndex] > thresholds[srcIndex]) ? 1.0 : 0.0; } // If we do not normalize our features, we pretty much get into // trouble with the minimum kernel...because the vector with only values of "1" is very // much similar to every vector xnew.normalizeL1(); string dst_cachefn = Globals::getCacheFilename ( cacheroot, Globals::SORT_CATEGORIES ) + dstExtension; ofstream ofs ( dst_cachefn.c_str(), ios::out ); if ( ! ofs.good() ) fthrow(Exception, "Unable to write to " << dst_cachefn ); ofs << xnew << endl; ofs.close (); } } /** convert ObjectBank features to a sparse histogram representation */ int main (int argc, char **argv) { std::set_terminate(__gnu_cxx::__verbose_terminate_handler); Config conf ( argc, argv ); MultiDataset md ( &conf ); Vector y; const LabeledSet *train = md["train"]; LabeledSetVector trainData; readPlainData ( conf, *train, trainData, ".jpg.feat" ); // compute optimal thresholds for thresholding MutualInformation mi ( true /*verbose*/ ); Vector thresholds; Vector mis; mi.computeThresholdsOverall ( trainData, thresholds, mis ); cerr << mis << endl; int numFeatures = conf.gI("main", "d", mis.size() ); cerr << "Retaining " << numFeatures << " features ..." << endl; map features; for ( uint i = 0 ; i < mis.size(); i++ ) features.insert ( pair ( - mis[i], i ) ); // all features should be now sorted with features[0] being the most informative one // remove boring features map::iterator j = features.begin(); advance( j, numFeatures ); features.erase ( j, features.end() ); const LabeledSet *test = md["test"]; string dstExtention = conf.gS("main", "dstext", ".txt"); saveFeatures ( conf, features, thresholds, *train, ".jpg.feat", dstExtention ); saveFeatures ( conf, features, thresholds, *test, ".jpg.feat", dstExtention ); return 0; }