123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172 |
- /**
- * @file compressObjectBankFeatures.cpp
- * @brief convert ObjectBank features to a sparse histogram representation
- * @author Erik Rodner
- * @date 01/23/2012
- */
- #include <algorithm>
- #include "core/basics/Config.h"
- #include "vislearning/cbaselib/MultiDataset.h"
- #include "vislearning/cbaselib/MutualInformation.h"
- #include "vislearning/baselib/Globals.h"
- using namespace std;
- using namespace NICE;
- using namespace OBJREC;
- const bool use_standard = false; // experimental setting
- Vector transformFeature ( const vector<double> & src )
- {
- Vector dst;
- if ( use_standard ) {
- dst = Vector(src);
- } else {
- if ( src.size() != 44604 )
- fthrow(Exception, "This is not a ObjectBank feature! The size is: " << src.size());
- dst.resize ( 177 );
- dst.set(0.0);
-
- // This was a bad idea: taking the maximum
- /*
- for ( uint i = 0 ; i < 177; i++ )
- dst[i] = *max_element( src.begin() + i*252, src.begin() + (i+1)*252 );
- // even a worse idea: summation
- for ( uint i = 0 ; i < 177; i++ )
- for ( uint j = 0 ; j < 252 ; j++ )
- dst[i] += src[j + i*252];
- */
- }
- return dst;
- }
- void readPlainData ( const Config & conf, const LabeledSet & ls, LabeledSetVector & X, string extension = ".txt" )
- {
- string cacheroot = conf.gS("cache", "root");
- X.clear();
- LOOP_ALL_S ( ls )
- {
- EACH_S(classno, imgfn);
- Globals::setCurrentImgFN ( imgfn );
- string cachefn = Globals::getCacheFilename ( cacheroot, Globals::SORT_CATEGORIES ) + extension;
-
- cerr << "fn: " << imgfn << " cachefn: " << cachefn << endl;
- vector<double> x;
- ifstream ifs ( cachefn.c_str(), ios::in );
- if ( ! ifs.good() )
- fthrow(Exception, "File not found: " << cachefn );
- while ( !ifs.eof() )
- {
- double val = 0.0;
- if ( ifs >> val )
- x.push_back(val);
- }
- ifs.close();
- X.add ( classno, transformFeature( x ) );
- }
- }
- void saveFeatures ( const Config & conf, const map<double, int> & features, const Vector & thresholds,
- const LabeledSet & ls, const string & srcExtension, const string & dstExtension )
- {
- string cacheroot = conf.gS("cache", "root");
- LOOP_ALL_S ( ls )
- {
- EACH_S(classno, imgfn);
- Globals::setCurrentImgFN ( imgfn );
- string cachefn = Globals::getCacheFilename ( cacheroot, Globals::SORT_CATEGORIES ) + srcExtension;
- cerr << "processing " << cachefn << endl;
- vector<double> x;
- ifstream ifs ( cachefn.c_str(), ios::in );
- if ( ! ifs.good() )
- fthrow(Exception, "File not found: " << cachefn );
- while ( !ifs.eof() ) {
- double val = 0.0;
- if ( ifs >> val )
- x.push_back(val);
- }
- ifs.close();
-
- Vector xt = transformFeature(x);
- Vector xnew ( features.size() );
- int index = 0;
- for ( map<double, int>::const_iterator j = features.begin(); j != features.end(); j++, index++ )
- {
- int srcIndex = j->second;
- if ( srcIndex >= xt.size() )
- fthrow(Exception, "Bad bug in saveFeatures(...)" );
- xnew[index] = (xt[srcIndex] > thresholds[srcIndex]) ? 1.0 : 0.0;
- }
- // If we do not normalize our features, we pretty much get into
- // trouble with the minimum kernel...because the vector with only values of "1" is very
- // much similar to every vector
- xnew.normalizeL1();
- string dst_cachefn = Globals::getCacheFilename ( cacheroot, Globals::SORT_CATEGORIES ) + dstExtension;
- ofstream ofs ( dst_cachefn.c_str(), ios::out );
- if ( ! ofs.good() )
- fthrow(Exception, "Unable to write to " << dst_cachefn );
- ofs << xnew << endl;
- ofs.close ();
- }
-
- }
- /**
-
- convert ObjectBank features to a sparse histogram representation
-
- */
- int main (int argc, char **argv)
- {
- std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
- Config conf ( argc, argv );
-
- MultiDataset md ( &conf );
- Vector y;
- const LabeledSet *train = md["train"];
- LabeledSetVector trainData;
- readPlainData ( conf, *train, trainData, ".jpg.feat" );
-
- // compute optimal thresholds for thresholding
- MutualInformation mi ( true /*verbose*/ );
- Vector thresholds;
- Vector mis;
- mi.computeThresholdsOverall ( trainData, thresholds, mis );
- cerr << mis << endl;
-
- int numFeatures = conf.gI("main", "d", mis.size() );
- cerr << "Retaining " << numFeatures << " features ..." << endl;
-
- map<double, int> features;
- for ( uint i = 0 ; i < mis.size(); i++ )
- features.insert ( pair<double, int> ( - mis[i], i ) );
- // all features should be now sorted with features[0] being the most informative one
- // remove boring features
- map<double, int>::iterator j = features.begin();
- advance( j, numFeatures );
- features.erase ( j, features.end() );
-
- const LabeledSet *test = md["test"];
- string dstExtention = conf.gS("main", "dstext", ".txt");
- saveFeatures ( conf, features, thresholds, *train, ".jpg.feat", dstExtention );
- saveFeatures ( conf, features, thresholds, *test, ".jpg.feat", dstExtention );
-
- return 0;
- }
|