123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- /**
- * @file splitLabeledSetVector.cpp
- * @brief split train.vec/test.vec files
- * @author Erik Rodner
- * @date 03/23/2010
- */
- #include "core/basics/Config.h"
- #include "core/basics/StringTools.h"
- #include "vislearning/cbaselib/LabeledSet.h"
- #include "vislearning/cbaselib/LabeledSetSelection.h"
- #include "core/basics/numerictools.h"
- using namespace std;
- using namespace OBJREC;
- using namespace NICE;
- //#stupid test for git
- void normalizeLabeledSetVector(const LabeledSetVector &teachSet,
- LabeledSetVector &transformedSet)
- {
- transformedSet.clear();
- Vector vector_max, vector_min, vector_span;
- int maxClassNo = teachSet.getMaxClassno();
- int n = teachSet.count();
- int d = teachSet.dimension();
- vector_max.resize(d);
- vector_min.resize(d);
- vector_span.resize(d);
- //get input data
- uint featurecount = 0;
- LOOP_ALL(teachSet)
- {
- EACH(classno,x);
- for (uint k = 0; k < x.size(); ++k)
- {
- double value = x[k];
- if (featurecount == 0)
- {
- vector_max[k] = value;
- vector_min[k] = value;
- }
- else
- {
- if (value > vector_max[k])
- {
- vector_max[k] = value;
- }
- if (value < vector_min[k])
- {
- vector_min[k] = value;
- }
- }
- }
- ++featurecount;
- }
- vector_span = vector_max - vector_min;
- //save transformed Vectors
- LOOP_ALL(teachSet)
- {
- EACH(classno,x);
- NICE::Vector transformed_vector(x.size());
- for (uint k = 0; k < vector_min.size(); ++k)
- {
- if (vector_span[k] > 1e-10)
- {
- transformed_vector[k] = (x[k] - vector_min[k])
- / vector_span[k];
- }
- else
- {
- transformed_vector[k] = 1.0;
- }
- }
- transformedSet.add(classno, transformed_vector);
- }
- }
- /**
- split train.vec/test.vec files
- */
- int main(int argc, char **argv)
- {
- #ifndef __clang__
- #ifndef __llvm__
- std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
- #endif
- #endif
- Config conf(argc, argv);
- int format = conf.gI("main", "format", 2);
- LabeledSetVector all;
- string setfn = conf.gS("main", "set");
- all.read(setfn, format);
- bool normalize = conf.gB("main", "normalize", false);
- if (normalize)
- {
- LabeledSetVector all_tmp;
- normalizeLabeledSetVector(all, all_tmp);
- all = all_tmp;
- }
- bool random = conf.gB("main", "random", false);
- if (random)
- initRand();
- map<int, int> fixedPositiveExamples;
- string exampleList = conf.gS("main", "examples");
- vector<string> list;
- StringTools::split(exampleList, ';', list);
- for (vector<string>::const_iterator i = list.begin(); i != list.end(); i++)
- {
- string e = *i;
- vector<string> f;
- StringTools::split(e, ':', f);
- if (f.size() != 2)
- fthrow(Exception, "Specify -examples classno:number;classno:number;...\n");
- int classno;
- int examples;
- StringTools::convert<int>(f[0], classno);
- StringTools::convert<int>(f[1], examples);
- fixedPositiveExamples.insert(pair<int, int> (classno, examples));
- }
- LabeledSetVector train;
- LabeledSetVector test;
- LabeledSetSelection<LabeledSetVector>::selectRandom(fixedPositiveExamples,
- all, train, test);
- string trainfn = conf.gS("main", "train", "train.vec");
- string testfn = conf.gS("main", "test", "test.vec");
- train.save(trainfn, format);
- test.save(testfn, format);
- return 0;
- }
|