/** 
* @file DTBStandard.cpp
* @brief normal decision tree
* @author Erik Rodner
* @date 05/06/2008

*/
#include <iostream>

#include "vislearning/classifier/fpclassifier/randomforest/DTBStandard.h"

using namespace OBJREC;

using namespace std;
using namespace NICE;

#undef DEBUG_CART

DTBStandard::DTBStandard( const Config *_conf, std::string section )
{
    minimum_information_gain = _conf->gD (section, 
	"minimum_information_gain", 10e-7 );
    minExamples = _conf->gD (section, "min_examples", 10);
}

DTBStandard::~DTBStandard()
{
}

DecisionNode *DTBStandard::build ( const FeaturePool & fp, const Examples & examples, int maxClassNo )
{
    FeatureStorage featureStorage;
    DecisionNode *root;

    fprintf (stderr, "DecisionTree: Precalculating ALL features (This is time and memory consuming !!)\n");

    FullVector example_count ( maxClassNo+1 );

    int example_index = 0;
    for ( vector< pair<int, Example> >::const_iterator example_i = examples.begin();
		example_i != examples.end() ; example_i++, example_index++ )
    {
	int classno = example_i->first;
	
	example_count[classno] += example_i->second.weight;
    }
    
    long count_features = 0;
    int feature_index = 0;
    for ( FeaturePool::const_iterator feature_i = fp.begin();
	    feature_i != fp.end() ; feature_i++ )
    {
	const Feature *f = feature_i->second;
	    
	int example_index = 0;
	for ( vector< pair<int, Example> >::const_iterator example_i = examples.begin();
		    example_i != examples.end() ; example_i++, example_index++ )
	{
	    int classno = example_i->first;
	    const Example & ce = example_i->second;
	    double value = f->val ( &ce );
	    if(value > 0.0 && value < 1e-300)
		    value = 0.0;
	    featureStorage[feature_index].insert ( quadruplet<double, int, int, double> ( value, classno, example_index, ce.weight ) );	
	    count_features++;
	}
	feature_index++;
    }
    
    fprintf (stderr, "DecisionTree: Precalculating done !\n");


    fprintf (stderr, "Feature Value Count : %ld\n", count_features );
    fprintf (stderr, "Examples : %d\n", (int)examples.size() );

    root = buildnode_allfeatures ( fp, featureStorage, example_count );

    return root;
}


DecisionNode *DTBStandard::buildnode_allfeatures ( const FeaturePool & features, 
						    FeatureStorage & featureStorage, 
						    const FullVector & distribution )
{
    DecisionNode *node = NULL;
    assert ( ! featureStorage.empty() );

    node = new DecisionNode();
    node->distribution = distribution;

    double max = 0;
    double sumweight = 0;
    for ( int i = 0 ; i < distribution.size() ; i++ )
    {
	sumweight += distribution[i];
	if ( max < distribution[i] ) max = distribution[i];
    }

    if ( max == sumweight )
    {
	// leaf node reached, because distribution is a simple peak distribution
	    vector<int> exsel;
	    for ( FeatureStorage::iterator ft = featureStorage.begin(); ft != featureStorage.end(); ft++ )
	    {
		    FeatureValues & featureSet = ft->second;

		    for ( FeatureValues::iterator k = featureSet.begin(); k != featureSet.end() ; )
		    {
			    exsel.push_back(k->third);
		    }
	    }
	    node->trainExamplesIndices = exsel;
	return node;
    }

    double optimal_threshold = 0.0;
    int optimal_feature = 0;
    double optimal_ig = 0.0;

    double overall_entropy = 0.0;

    if ( featureStorage.begin()->second.size() < minExamples ) 
    {
	    vector<int> exsel;
	    for ( FeatureStorage::iterator ft = featureStorage.begin(); ft != featureStorage.end(); ft++ )
	    {
		    FeatureValues & featureSet = ft->second;

		    for ( FeatureValues::iterator k = featureSet.begin(); k != featureSet.end() ; )
		    {
			    exsel.push_back(k->third);
		    }
	    }
	    node->trainExamplesIndices = exsel;
	    return node;
    }

    for ( int i = 0 ; i < distribution.size() ; i++ )
    {
	double nk = distribution.data[i];
	if ( nk != 0 ) {
	    overall_entropy -= nk/sumweight * ( log (nk) - log(sumweight) );
	}
    }
    if ( overall_entropy <= 0.0 )
    {
	distribution.store(cerr);
	exit(-1);
    }
    int c = 0;
    for ( FeatureStorage::const_iterator ft = featureStorage.begin(); ft != featureStorage.end(); ft++ , c++)
    {
	int feature_index = ft->first; 
	const FeatureValues & featureSet = ft->second;

	double information_gain     = 0.0;
	FullVector lkappa ( distribution.size() );
	
	double best_information_gain = 0.0;
	double best_threshold = 0.0;
	double l = 0;
	
	double weight_old = 0.0;
	double first_value = featureSet.begin()->first;
	FeatureValues::const_iterator lastElement = featureSet.end();
	lastElement--;
	double last_value  = lastElement->first;
	double weight = lastElement->fourth;
	int previous_class = lastElement->second;
	double previous_value = lastElement->first;
	
	
	
	for ( FeatureValues::const_iterator k  = lastElement;
					     k != featureSet.begin();
					     k--, weight+=k->fourth )
	{
	    int current_class = k->second;
	    double current_value = k->first;
	    assert (! NICE::isNaN(current_value));
	    
	    double m = weight - weight_old;
	    l = weight_old;
	    
	    double r = sumweight - l; 
	   
	    
	    /*if ( (k != featureSet.begin())&& (current_class != previous_class) 
		 // this leds to problems when using really weak classifiers !!
		 && ( (current_value != previous_value) ||
		      ( (current_value != first_value) &&
		        (current_value != last_value)
		      )
		    )
	       )*/
	    if( (current_value != previous_value) && (current_value != last_value) )
	    {
		if ( (l<= 10e-7) || ( r <= 10e-7 ) )
		    continue;

		
		assert ( r > 10e-7 );
		assert ( l > 10e-7 );
		
		double left_entropy = 0.0;
		double right_entropy = 0.0;
	    	for ( int i = 0 ; i < distribution.size() ; i++ )
		{
		    double lk = lkappa[i];
		    double nk = distribution.data[i];
		    double rk = nk - lk;
		    if ( lk > 10e-7 )
		    {
			double ql = lk / (double)l;
			left_entropy -= ql * log(ql);
		    }

		    if ( rk > 10e-7 ) 
		    {
			double qr = rk / (double)r;
			right_entropy -= qr * log(qr);
		    }
		}
		
		information_gain = overall_entropy - l*left_entropy/sumweight - r*right_entropy/sumweight;	

#ifdef DEBUG_CART
		//fprintf (stderr, "ig %f t %f\n sumweight %f e %f le %f re %f\n", information_gain, 0.5 * (current_value + previous_value ),
		//   sumweight, overall_entropy, left_entropy, right_entropy );
		fprintf (stderr, "f: %i ig %f sumweight %f e %f le %f re %f l %f r %f\n", c, information_gain, sumweight, overall_entropy, left_entropy, right_entropy, l, r );
#endif

		if ( information_gain > best_information_gain )
		{
		    best_information_gain = information_gain;
		    best_threshold = 0.5 * (current_value + previous_value);
		}
	    }
	    weight_old = weight;
	    lkappa[current_class] += m;
	    previous_class = current_class;
	    previous_value = current_value;
	}
	

	
	if ( best_information_gain >= optimal_ig )
	{
	    optimal_threshold = best_threshold;
	    optimal_feature = feature_index;
	    optimal_ig = best_information_gain;
	}
    }

#ifdef DEBUG_CART
    fprintf (stderr, "Feature optimization finished: information gain %f threshold %f feature index %d\n", 
	optimal_ig, optimal_threshold, optimal_feature );
#endif

    if ( optimal_ig == 0.0 ) 
    {
	fprintf (stderr, "WARNING: zero mutual information! have a look at your feature values\n");

	FeatureStorage::const_iterator first = featureStorage.begin();
	if ( first == featureStorage.end() )
	{
	    fprintf (stderr, "ERROR: no features given at all !\n");
	    exit(-1);
	}
	const FeatureValues & featureSet = first->second;

	const int max_debug_values = 50;
	int l = 0;
	for ( FeatureValues::const_iterator kd = featureSet.begin();
	      (kd != featureSet.end()) && (l < max_debug_values) ; kd++,l++ )
	    fprintf (stderr, "(%d,%f) ", kd->second, kd->first );

	fprintf (stderr, "\n[...] reverse:\n" );

	l = 0;
	for ( FeatureValues::const_reverse_iterator kd = featureSet.rbegin();
	      (kd != featureSet.rend()) && (l < max_debug_values) ; kd++,l++ )
	    fprintf (stderr, "(%d,%f) ", kd->second, kd->first );

	fprintf (stderr, "\n" );
    }

    // split feature set according to optimal threshold

    if ( optimal_ig <= minimum_information_gain ) 
    {
	    vector<int> exsel;
	    for ( FeatureStorage::iterator ft = featureStorage.begin(); ft != featureStorage.end(); ft++ )
	    {
		    FeatureValues & featureSet = ft->second;

		    for ( FeatureValues::iterator k = featureSet.begin(); k != featureSet.end() ; )
		    {
			    exsel.push_back(k->third);
		    }
	    }
	    node->trainExamplesIndices = exsel;
	    return node;
    }

    node->f = features[optimal_feature].second->clone(); 
    node->threshold = optimal_threshold;

    // kann optimiert werden, da set schon sortiert !!
    set<int> examples_left;
    FullVector distribution_left ( distribution.size() );
    FullVector distribution_right ( distribution.size() );

    int sum_left = 0;
    for ( FeatureValues::const_iterator k  = 
	     featureStorage[optimal_feature].begin();
	     k != featureStorage[optimal_feature].end();
	     k++ )
    {
	int example_index = k->third;
	int classno = k->second;
	double val = k->first;
	if ( val < optimal_threshold ) {
	    examples_left.insert ( example_index );
	    sum_left ++;
	    distribution_left[classno]++;
	} else {
	    distribution_right[classno]++;
	}
    }

    if ( sum_left == 0 ) {
	// seldom case of equal feature values
	// unable to split !!
	fprintf (stderr, "WARNING: equal feature values, splitting impossible\n");
	return node;
    }

    node->f = features[optimal_feature].second->clone();
    node->threshold = optimal_threshold;

#ifdef DEBUG_CART
    node->f->store (cerr);
    cerr << endl;
#endif
    
    FeatureStorage left; 


    for ( FeatureStorage::iterator ft = featureStorage.begin(); 
	    ft != featureStorage.end(); ft++ )
    {
	int feature_index = ft->first; 
	FeatureValues & featureSet = ft->second;

        for ( FeatureValues::iterator k = featureSet.begin();
	     k != featureSet.end() ; )
	{
	    int example_index = k->third;
	    if ( examples_left.find(example_index) != examples_left.end() )
	    {
		left[feature_index].insert ( quadruplet<double, int, int, double> ( *k ) );
		featureSet.erase ( k++ );
	    } else {
		++k;
	    }
	}
    }

#ifdef DEBUG_CART
    for ( int i = 0 ; i < distribution_left.size() ; i++ )
    {
	fprintf (stderr, "DecisionTree: split of class %d (%f <-> %f)\n", i, 
	    distribution_left[i], distribution_right[i] );
    }
#endif

    node->left = buildnode_allfeatures ( features,
                                         left,
					 distribution_left );

    node->right = buildnode_allfeatures ( features,
                                         featureStorage,
					 distribution_right );


    return node;
}