ComputerVisionJena
/
NICE_VisLearning


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
							#ifdef NICE_USELIB_OPENMP
#include <omp.h>
#endif

#include "vislearning/classifier/fpclassifier/logisticregression/SLR.h"
#include "vislearning/cbaselib/FeaturePool.h"

#include <iostream>

#define SLRDEBUG
#define featnorm

using namespace OBJREC;
using namespace std;
using namespace NICE;

SLR::SLR ()
{
}

SLR::SLR( const Config *_conf, string section ) : conf(_conf)
{
	maxiter = conf->gI(section, "maxiter", 20000 );
	resamp_decay = conf->gD(section, "resamp_decay", 0.5 );
	convergence_tol = conf->gD(section, "convergence_tol", 1e-7 );
	min_resamp =  conf->gD(section, "min_resamp", 0.001 );
	lambda = conf->gD(section, "lambda", 0.1 );
	samplesperclass = conf->gD(section, "samplesperclass", 200.0);
	weight.setDim(0);
	fdim = 0;
}

SLR::~SLR()
{
	//clean up
}

double SLR::classify ( Example & pce )
{
	double result;
	SparseVector *svec;

	if(weight.getDim() == 0)
		return 0.0;

	bool newvec = false;
	
	if(pce.svec != NULL)
	{
		svec = pce.svec;
	}
	else
	{
		Vector x;

		x = *(pce.vec);

#ifdef featnorm
		for(int m = 0; m < (int)x.size(); m++)
		{
			x[m] = (x[m]-minval[m]) /(maxval[m] - minval[m]);
		}
#endif
		svec = new SparseVector(x);

		svec->setDim(x.size());

		newvec = true;

	}
		
	if(weight.size() == 0)
		result = 0.0;
	else
	{
		result = 1.0/(1.0+exp(-svec->innerProduct(weight)));
	}

	if(newvec)
		delete svec;

	return result;
}

void SLR::train ( FeaturePool & _fp, Examples & examples, int classno )
{

	cout << "start train" << endl;
	fp = FeaturePool(_fp);
    
	// Anzahl von Merkmalen
	int fanz = examples.size();
	assert(fanz >= 2);

	// Merkmalsdimension bestimmen
	Vector x;
	fp.calcFeatureVector(examples[0].second, x);
	fdim = x.size();
	assert(fdim > 0);

	stepwise_regression(examples, classno);	
	cout << "end train" << endl;
}

void SLR::restore (istream & is, int format)
{
	weight.restore(is, format);
	fdim = (int) weight.getDim();
	maxval.resize(fdim);
	minval.resize(fdim);
	for(int i = 0; i < fdim; i++)
	{
		is >> maxval[i];
		is >> minval[i];
	}
}

void SLR::store (ostream & os, int format) const

{
	if(format!=-9999){
		weight.store(os, format);
		for(int i = 0; i < fdim; i++)
		{
			os << maxval[i] << " " << minval[i] << endl;
		}
	}else{
		weight.store(os, format);
	}
}

void SLR::clear ()
{
//TODO: einbauen
}

int SLR::stepwise_regression(Examples &x, int classno)
{
	// initialize randomization
	srand (time(NULL));
	//srand (1);
	
	cout << "start regression" << endl;
	
	// get the number of features
	int fnum = x.size();
	
	// create Datamatrix
	GMSparseVectorMatrix X;
	
	GMSparseVectorMatrix Y;
	
	vector<int> count(2, 0);

	maxval.resize(fdim);
	minval.resize(fdim);

	if(x[0].second.svec == NULL) //input normal vectors
	{
		Vector feat;

		for(int i = 0; i < fdim; i++)
		{
			maxval[i] = numeric_limits<double>::min();
			minval[i] = numeric_limits<double>::max();
		}
		
		for(int i = 0; i < fnum; i++)
		{
			int pos = 0;
			if(x[i].first != classno)
				pos = 1;
			
			++count[pos];

			fp.calcFeatureVector(x[i].second, feat);
#ifdef featnorm
			for(int m = 0; m < (int)feat.size(); m++)
			{
				minval[m] = std::min(minval[m], feat[m]);
				maxval[m] = std::max(maxval[m], feat[m]);
			}
#endif
			fdim = feat.size();
		}
	}
	else //input Sparse Vectors
	{
		for(int i = 0; i < fnum; i++)
		{
			int pos = 0;
			if(x[i].first != classno)
				pos = 1;
			
			++count[pos];
		}
		fdim = x[0].second.svec->getDim();
	}
	
	if(count[0] == 0 || count[1] == 0)
	{
		cerr << "not enought samples " << count[0] << " : " << count[1] << endl;
		weight.setDim(0);
		return -1;
	}

	double samples = std::min(count[0], count[1]);
	samples = std::min(samples, samplesperclass);
	//samples = std::max(samples, 200);
	//samples = samplesperclass;
	
	vector<double> rands(2,0.0);
	vector<double> allsizes(2,0.0);
	for(int i = 0; i < 2; i++)
	{
		rands[i] = 1.0;								//use all samples (default if samples>count[i])
		if(samples>0 && samples<1) rands[i] = samples;				//use relative number of samples wrt. class size
		if(samples>1 && samples<=count[i]) rands[i] = samples/(double)count[i]; //use (approximately) fixed absolute number of samples for each class
		allsizes[i]=count[i];
		count[i] = 0;
	}

	for(int i = 0; i < fnum; i++)
	{
		int pos = 0;
		if(x[i].first != classno)
			pos = 1;
			
			
		double r = (double)rand()/(double)RAND_MAX;	
			
		if( r > rands[pos])
			continue;
		++count[pos];

		if(x[0].second.svec == NULL) 
		{
			Vector feat;
			fp.calcFeatureVector(x[i].second, feat);
#ifdef featnorm
			for(int m = 0; m < (int)feat.size(); m++)
			{
				feat[m] = (feat[m]-minval[m]) /(maxval[m] - minval[m]);
			}
#endif

			X.addRow(feat);
		}
		else
		{
			X.addRow(x[i].second.svec);
		}
		SparseVector *v = new SparseVector(2);
		(*v)[pos] = 1.0;
		Y.addRow(v);
	}
	Y.setDel();

	if(x[0].second.svec == NULL)
		X.setDel();
		
	for(int i = 0; i < 2; i++)
	{
		cerr << "Examples for class " << i << ": " << count[i] << " out of " << allsizes[i] << " with p = " << rands[i] << endl;
	}	
	
#undef NORMALIZATION
#ifdef NORMALIZATION
	GMSparseVectorMatrix Xred;
	Xred.resize(X.rows(), X.cols());

	for(int r = 0; r < (int)Xred.rows(); r++)
	{
		for(int c = 0; c < (int)Xred.cols(); c++)
		{
			double tmp = X[r].get(c);
			
			if(Y[r].get(0) == 1)
				tmp *= count[0]/fnum;
			else
				tmp *= count[1]/fnum;
			
			if(fabs(tmp) > 10e-7)
				Xred[r][c] = tmp;
		}
	}
#endif

	fnum = X.rows();
	
	GMSparseVectorMatrix xY;
#ifdef NORMALIZATION
	Xred.mult(Y, xY, true);
#else
	X.mult(Y, xY, true);
#endif

	weight.setDim(fdim);

	// for faster Computing Xw = X*w	
	GMSparseVectorMatrix Xw;
	X.mult(weight, Xw, false, true);
	SparseVector S(fnum);

	for(int r = 0; r < fnum; r++)
	{
		S[r] = 0.0;
		for(int c = 0; c < 2; c++)
		{
			S[r] += exp(Xw[r].get(c));
		}
	}

	// for faster computing ac[i] = (maxClassNo-1)/(2*maxClassNo) * Sum_j (x_i (j))^2
	
	Vector ac(fdim);
	Vector lm_2_ac(fdim);
	
	for(int f = 0; f < fdim; f++)
	{
		ac[f] = 0.0;
		for(int a = 0; a < fnum; a++)
		{
			ac[f] += X[a].get(f)*X[a].get(f);
		}
		ac[f] *= 0.25;
		lm_2_ac[f] = (lambda/2.0)/ac[f];
	}
	
	// initialize the iterative optimization
	double incr = numeric_limits<double>::max();
	long non_zero = 0;
	long wasted_basis = 0;
	long needed_basis = 0;
	
	// prob of resample each weight
	vector<double> p_resamp;
	p_resamp.resize(fdim);

	// loop over cycles
	long cycle = 0;
	
	for (cycle = 0; cycle < maxiter; cycle++)
	{
#ifdef SLRDEBUG
		cerr << "iteration: " << cycle << " of " << maxiter << endl;
#endif
    	// zero out the diffs for assessing change
		double sum2_w_diff = 0.0;
		double sum2_w_old = 0.0;
		wasted_basis = 0;
		if (cycle==1)
			needed_basis = 0;		
		
    	// update each weight
//#pragma omp parallel for
		for (int basis = 0; basis < fdim; basis++) // über alle Dimensionen
		{
			int changed = 0;
			// get the starting weight
			double w_old = weight.get(basis);

			// set the p_resamp if it's the first cycle
			if (cycle == 0)
			{
				p_resamp[basis] = 1.0;
			}

			// see if we're gonna update
			double rval = (double)rand()/(double)RAND_MAX;			
			
			if ((w_old != 0) || (rval < p_resamp[basis]))
			{
	  			// calc the probability
				double XdotP = 0.0;
				for (int i = 0; i < fnum; i++)
				{
#ifdef NORMALIZATION
					double e = Xred[i].get(basis) * exp(Xw[i].get(0))/S[i];
#else
					double e = X[i].get(basis) * exp(Xw[i].get(0))/S[i];
#endif
					if(finite(e))
						XdotP += e;
#ifdef SLRDEBUG
					else
						throw "numerical problems";
#endif
				}
					
				// get the gradient
				double grad = xY[basis].get(0) - XdotP;
	  			// set the new weight
				double w_new = w_old + grad/ac[basis];

				// test that we're within bounds
				if (w_new > lm_2_ac[basis])
				{
	    			// more towards bounds, but keep it
					w_new -= lm_2_ac[basis];
					changed = 1;

	    			// umark from being zero if necessary
					if (w_old == 0.0)
					{
						non_zero += 1;

	      				// reset the p_resample
						p_resamp[basis] = 1.0;

	      				// we needed the basis
						needed_basis += 1;
					}
				}
				else if (w_new < -lm_2_ac[basis])
				{
	    			// more towards bounds, but keep it
					w_new += lm_2_ac[basis];
					changed = 1;

	    			// umark from being zero if necessary
					if (w_old == 0.0)
					{
						non_zero += 1;

	      				// reset the p_resample
						p_resamp[basis] = 1.0;

	      				// we needed the basis
						needed_basis += 1;
					}
				}
				else
				{
	    			// gonna zero it out
					w_new = 0.0;

	    			// decrease the p_resamp
					p_resamp[basis] -= (p_resamp[basis] - min_resamp) * resamp_decay;

	    			// set the number of non-zero
					if (w_old == 0.0)
					{
	     				// we didn't change
						changed = 0;

	      				// and wasted a basis
						wasted_basis += 1;
					}
					else
					{
	      				// we changed
						changed = 1;

	      				// must update num non_zero
						non_zero -= 1;
					}
				}
	  			// process changes if necessary
				if (changed == 1)
				{
	   				// update the expected values
					double w_diff = w_new - w_old;
					for (int i = 0; i < fnum; i++)
					{
						double E_old = exp(Xw[i].get(0));
						double val = X[i].get(basis)*w_diff;
						if(Xw[i].get(0) == 0.0)
						{
							if(fabs(val) > 10e-7)
								Xw[i][0] = val;
						}
						else
							Xw[i][0] += X[i].get(basis)*w_diff;
						double E_new_m = exp(Xw[i].get(0));			
						
						S[i] += E_new_m - E_old;
					}

	    			// update the weight
					if( w_new == 0.0)
					{
						if(weight.get(basis) != 0.0)
						{
							weight.erase(basis);
						}
					}
					else
						weight[basis] = w_new;
	    			// keep track of the sqrt sum squared diffs
//#pragma omp critical
					sum2_w_diff += w_diff*w_diff;
				}
	  			// no matter what we keep track of the old
//#pragma omp critical
				sum2_w_old += w_old*w_old;
			}
		}

    	// finished a cycle, assess convergence
		incr = sqrt(sum2_w_diff) / (sqrt(sum2_w_old)+numeric_limits<double>::epsilon());
#ifdef SLRDEBUG
		cout << "sum2_w_diff: " << sum2_w_diff << " sum2_w_old " << sum2_w_old << endl;
		cout << "convcrit: " << incr << " tol: " << convergence_tol << endl;
		//cout << "sum2_w_wold = " << sum2_w_old << " sum2_w_diff = " << sum2_w_diff << endl;
#endif
		if (incr < convergence_tol)
		{
      		// we converged!!!
			break;
		}
	}

  	// finished updating weights
  	// assess convergence
	cerr << "end regression after " << cycle << " steps"  << endl;
	return cycle;
}