/** 
 * @file splitLabeledSetVector.cpp
 * @brief split train.vec/test.vec files
 * @author Erik Rodner
 * @date 03/23/2010
 */
#include "core/basics/Config.h"
#include "core/basics/StringTools.h"
#include "vislearning/cbaselib/LabeledSet.h"
#include "vislearning/cbaselib/LabeledSetSelection.h"

#include "core/basics/numerictools.h"

using namespace std;
using namespace OBJREC;
using namespace NICE;

//#stupid test for git

void normalizeLabeledSetVector(const LabeledSetVector &teachSet,
		LabeledSetVector &transformedSet)
{
	transformedSet.clear();
	Vector vector_max, vector_min, vector_span;
	int maxClassNo = teachSet.getMaxClassno();
	int n = teachSet.count();
	int d = teachSet.dimension();
	vector_max.resize(d);
	vector_min.resize(d);
	vector_span.resize(d);
	//get input data
	uint featurecount = 0;
	LOOP_ALL(teachSet)
		{
			EACH(classno,x);
			for (uint k = 0; k < x.size(); ++k)
			{
				double value = x[k];
				if (featurecount == 0)
				{
					vector_max[k] = value;
					vector_min[k] = value;
				}
				else
				{
					if (value > vector_max[k])
					{
						vector_max[k] = value;
					}
					if (value < vector_min[k])
					{
						vector_min[k] = value;
					}
				}

			}

			++featurecount;
		}
	vector_span = vector_max - vector_min;

	//save transformed Vectors
	LOOP_ALL(teachSet)
		{
			EACH(classno,x);
			NICE::Vector transformed_vector(x.size());
			for (uint k = 0; k < vector_min.size(); ++k)
			{
				if (vector_span[k] > 1e-10)
				{
					transformed_vector[k] = (x[k] - vector_min[k])
							/ vector_span[k];
				}
				else
				{
					transformed_vector[k] = 1.0;
				}
			}
			transformedSet.add(classno, transformed_vector);
		}

}

/** 

 split train.vec/test.vec files

 */
int main(int argc, char **argv)
{
#ifndef __clang__
#ifndef __llvm__
	std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
#endif
#endif

	Config conf(argc, argv);

	int format = conf.gI("main", "format", 2);
	LabeledSetVector all;

	string setfn = conf.gS("main", "set");
	all.read(setfn, format);

	bool normalize = conf.gB("main", "normalize", false);
	if (normalize)
	{
		LabeledSetVector all_tmp;
		normalizeLabeledSetVector(all, all_tmp);
		all = all_tmp;
	}

	bool random = conf.gB("main", "random", false);
	if (random)
		initRand();

	map<int, int> fixedPositiveExamples;
	string exampleList = conf.gS("main", "examples");

	vector<string> list;
	StringTools::split(exampleList, ';', list);
	for (vector<string>::const_iterator i = list.begin(); i != list.end(); i++)
	{
		string e = *i;
		vector<string> f;
		StringTools::split(e, ':', f);
		if (f.size() != 2)
			fthrow(Exception, "Specify -examples classno:number;classno:number;...\n");
		int classno;
		int examples;
		StringTools::convert<int>(f[0], classno);
		StringTools::convert<int>(f[1], examples);
		fixedPositiveExamples.insert(pair<int, int> (classno, examples));
	}

	LabeledSetVector train;
	LabeledSetVector test;
	LabeledSetSelection<LabeledSetVector>::selectRandom(fixedPositiveExamples,
			all, train, test);

	string trainfn = conf.gS("main", "train", "train.vec");
	string testfn = conf.gS("main", "test", "test.vec");
	train.save(trainfn, format);
	test.save(testfn, format);

	return 0;
}