ComputerVisionJena
/
NICE_VisLearning


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
							/**
 * @file DTBOblique.cpp
 * @brief random oblique decision tree
 * @author Sven Sickert
 * @date 10/15/2014

*/
#include <iostream>
#include <time.h>

#include "DTBOblique.h"
#include "vislearning/features/fpfeatures/ConvolutionFeature.h"

#include "core/vector/Algorithms.h"

using namespace OBJREC;

#define DEBUGTREE


using namespace std;
using namespace NICE;

DTBOblique::DTBOblique ( const Config *conf, string section )
{
    saveIndices = conf->gB( section, "save_indices", false);
    useShannonEntropy = conf->gB( section, "use_shannon_entropy", false );
    useOneVsOne = conf->gB( section, "use_one_vs_one", false );

    splitSteps = conf->gI( section, "split_steps", 20 );
    maxDepth = conf->gI( section, "max_depth", 10 );
    minExamples = conf->gI( section, "min_examples", 50);
    regularizationType = conf->gI( section, "regularization_type", 1 );

    minimumEntropy = conf->gD( section, "minimum_entropy", 10e-5 );
    minimumInformationGain = conf->gD( section, "minimum_information_gain", 10e-7 );
    lambdaInit = conf->gD( section, "lambda_init", 0.5 );

}

DTBOblique::~DTBOblique()
{

}

bool DTBOblique::entropyLeftRight (
        const FeatureValuesUnsorted & values,
        double threshold,
        double* stat_left,
        double* stat_right,
        double & entropy_left,
        double & entropy_right,
        double & count_left,
        double & count_right,
        int maxClassNo )
{
    count_left = 0;
    count_right = 0;
    for ( FeatureValuesUnsorted::const_iterator i = values.begin();
          i != values.end();
          i++ )
    {
        int classno = i->second;
        double value = i->first;
        if ( value < threshold ) {
            stat_left[classno] += i->fourth;
            count_left+=i->fourth;
        }
        else
        {
            stat_right[classno] += i->fourth;
            count_right+=i->fourth;
        }
    }

    if ( (count_left == 0) || (count_right == 0) )
        return false;

    entropy_left = 0.0;
    for ( int j = 0 ; j <= maxClassNo ; j++ )
        if ( stat_left[j] != 0 )
            entropy_left -= stat_left[j] * log(stat_left[j]);
    entropy_left /= count_left;
    entropy_left += log(count_left);

    entropy_right = 0.0;
    for ( int j = 0 ; j <= maxClassNo ; j++ )
        if ( stat_right[j] != 0 )
            entropy_right -= stat_right[j] * log(stat_right[j]);
    entropy_right /= count_right;
    entropy_right += log (count_right);

    return true;
}

/** refresh data matrix X and label vector y */
void DTBOblique::getDataAndLabel(
        const FeaturePool &fp,
        const Examples &examples,
        const std::vector<int> &examples_selection,
        NICE::Matrix & matX,
        NICE::Vector & vecY )
{
    ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
    int amountParams = f->getParameterLength();
    int amountExamples = examples_selection.size();

    NICE::Matrix X(amountExamples, amountParams, 0.0 );
    NICE::Vector y(amountExamples, 0.0);

    int matIndex = 0;
    for ( vector<int>::const_iterator si = examples_selection.begin();
          si != examples_selection.end();
          si++ )
    {
        const pair<int, Example> & p = examples[*si];
        const Example & ce = p.second;

        NICE::Vector pixelRepr = f->getFeatureVector( &ce );

        double label = p.first * ce.weight;
        pixelRepr *= ce.weight;

        y.set( matIndex, label );
        X.setRow(matIndex,pixelRepr);

        matIndex++;
    }

    matX = X;
    vecY = y;
}

void DTBOblique::regularizeDataMatrix(
        const NICE::Matrix &X,
        NICE::Matrix &XTXreg,
        const int regOption,
        const double lambda )
{
    XTXreg = X.transpose()*X;
    NICE::Matrix R;
    const int dim = X.cols();

    switch (regOption)
    {
        // identity matrix
        case 0:
            R.resize(dim,dim);
            R.setIdentity();
            R *= lambda;
            XTXreg += R;
            break;

        // differences operator, k=1
        case 1:
            R.resize(dim-1,dim);
            R.set( 0.0 );
            for ( int r = 0; r < dim-1; r++ )
            {
                R(r,r)   =  1.0;
                R(r,r+1) = -1.0;
            }
            R = R.transpose()*R;
            R *= lambda;
            XTXreg += R;
            break;

        // difference operator, k=2
        case 2:
            R.resize(dim-2,dim);
            R.set( 0.0 );
            for ( int r = 0; r < dim-2; r++ )
            {
                R(r,r)   =  1.0;
                R(r,r+1) = -2.0;
                R(r,r+2) =  1.0;
            }
            R = R.transpose()*R;
            R *= lambda;
            XTXreg += R;
            break;

        // as in [Chen et al., 2012]
        case 3:
        {
            NICE::Vector q ( dim, (1.0-lambda) );
            q[0] = 1;
            NICE::Matrix Q;
            Q.tensorProduct(q,q);
            R.multiply(XTXreg,Q);
            for ( int r = 0; r < dim; r++ )
                R(r,r) = q[r] * XTXreg(r,r);
            XTXreg = R;
            break;
        }

        // no regularization
        default:
            std::cerr << "DTBOblique::regularizeDataMatrix: No regularization applied!"
                      << std::endl;
            break;
    }
}

void DTBOblique::findBestSplitThreshold (
        FeatureValuesUnsorted &values,
        SplitInfo &bestSplitInfo,
        const NICE::Vector &beta,
        const double &e,
        const int &maxClassNo )
{
    double *distribution_left = new double [maxClassNo+1];
    double *distribution_right = new double [maxClassNo+1];
    double minValue = (min_element ( values.begin(), values.end() ))->first;
    double maxValue = (max_element ( values.begin(), values.end() ))->first;

    if ( maxValue - minValue < 1e-7 )
        std::cerr << "DTBOblique: Difference between min and max of features values to small!" << std::endl;

    // get best thresholds using complete search
    for ( int i = 0; i < splitSteps; i++ )
    {
        double threshold = (i * (maxValue - minValue ) / (double)splitSteps)
                            + minValue;
        // preparations
        double el, er;
        for ( int k = 0 ; k <= maxClassNo ; k++ )
        {
            distribution_left[k] = 0.0;
            distribution_right[k] = 0.0;
        }

        /** Test the current split */
        // Does another split make sense?
        double count_left;
        double count_right;
        if ( ! entropyLeftRight ( values, threshold,
                                  distribution_left, distribution_right,
                                  el, er, count_left, count_right, maxClassNo ) )
            continue;

        // information gain and entropy
        double pl = (count_left) / (count_left + count_right);
        double ig = e - pl*el - (1-pl)*er;

        if ( useShannonEntropy )
        {
            double esplit = - ( pl*log(pl) + (1-pl)*log(1-pl) );
            ig = 2*ig / ( e + esplit );
        }

        if ( ig > bestSplitInfo.informationGain )
        {
            bestSplitInfo.informationGain = ig;
            bestSplitInfo.threshold = threshold;
            bestSplitInfo.params = beta;

            for ( int k = 0 ; k <= maxClassNo ; k++ )
            {
                bestSplitInfo.distLeft[k] = distribution_left[k];
                bestSplitInfo.distRight[k] = distribution_right[k];
            }
            bestSplitInfo.entropyLeft = el;
            bestSplitInfo.entropyRight = er;
        }
    }

    //cleaning up
    delete [] distribution_left;
    delete [] distribution_right;
}

/** recursive building method */
DecisionNode *DTBOblique::buildRecursive(
        const FeaturePool & fp,
        const Examples & examples,
        std::vector<int> & examples_selection,
        FullVector & distribution,
        double e,
        int maxClassNo,
        int depth,
        double lambdaCurrent )
{

#ifdef DEBUGTREE
    std::cerr << "DTBOblique: Examples: " << (int)examples_selection.size()
              << ", Depth: " << (int)depth << ", Entropy: " << e << std::endl;
#endif

    // initialize new node
    DecisionNode *node = new DecisionNode ();
    node->distribution = distribution;

    // stop criteria: maxDepth, minExamples, min_entropy
    if (    ( e <= minimumEntropy )
         || ( (int)examples_selection.size() < minExamples )
         || ( depth > maxDepth ) )

    {
#ifdef DEBUGTREE
        std::cerr << "DTBOblique: Stopping criteria applied!" << std::endl;
#endif
        node->trainExamplesIndices = examples_selection;
        return node;
    }

    // variables
    FeatureValuesUnsorted values;
    SplitInfo bestSplitInfo;
    bestSplitInfo.threshold = 0.0;
    bestSplitInfo.informationGain = -1.0;
    bestSplitInfo.distLeft = new double [maxClassNo+1];
    bestSplitInfo.distRight = new double [maxClassNo+1];
    bestSplitInfo.entropyLeft = 0.0;
    bestSplitInfo.entropyRight = 0.0;

//    double best_threshold = 0.0;
//    double best_ig = -1.0;
//    double *best_distribution_left = new double [maxClassNo+1];
//    double *best_distribution_right = new double [maxClassNo+1];
//    double best_entropy_left = 0.0;
//    double best_entropy_right = 0.0;

    ConvolutionFeature *f = (ConvolutionFeature*)fp.begin()->second;
    bestSplitInfo.params = f->getParameterVector();

    // Creating data matrix X and label vector y
    NICE::Matrix X, XTXr, G, temp;
    NICE::Vector y, beta;
    getDataAndLabel( fp, examples, examples_selection, X, y );

    // Preparing system of linear equations
    regularizeDataMatrix( X, XTXr, regularizationType, lambdaCurrent );
    choleskyDecomp(XTXr, G);
    choleskyInvert(G, XTXr);
    temp = XTXr * X.transpose();


    if ( useOneVsOne )
    {
        // One-vs-one: Transforming into {-1,0,+1} problem
        for ( int curClass = 0; curClass <= maxClassNo; curClass++ )
            for ( int opClass = 0; opClass <= maxClassNo; opClass++ )
            {
                if ( curClass == opClass ) continue;

                NICE::Vector yCur ( y.size(), 0.0 );
                int idx = 0;
                bool curHasExamples = false;
                bool opHasExamples = false;

                for ( vector<int>::const_iterator si = examples_selection.begin();
                      si != examples_selection.end();
                      si++, idx++ )
                {
                    const pair<int, Example> & p = examples[*si];
                    if ( p.first == curClass )
                    {
                        yCur.set( idx, 1.0 );
                        curHasExamples = true;
                    }
                    else if ( p.first == opClass )
                    {
                        yCur.set( idx, -1.0 );
                        opHasExamples = true;
                    }
                }

                // are there positive examples for current and opposition class in current set?
                if ( !curHasExamples || !opHasExamples ) continue;

                // Solve system of linear equations in a least squares manner
                beta.multiply(temp,yCur,false);

                // Updating parameter vector in convolutional feature
                f->setParameterVector( beta );

                // Feature Values
                values.clear();
                f->calcFeatureValues( examples, examples_selection, values);

                // complete search for threshold
                findBestSplitThreshold ( values, bestSplitInfo, beta, e,
                                         maxClassNo );
            }
    }
    else
    {
        // One-vs-all: Transforming into {-1,+1} problem
        for ( int curClass = 0; curClass <= maxClassNo; curClass++ )
        {
            NICE::Vector yCur ( y.size(), -1.0 );
            int idx = 0;
            bool hasExamples = false;
            for ( vector<int>::const_iterator si = examples_selection.begin();
                  si != examples_selection.end();
                  si++, idx++ )
            {
                const pair<int, Example> & p = examples[*si];
                if ( p.first == curClass )
                {
                    yCur.set( idx, 1.0 );
                    hasExamples = true;
                }
            }

            // is there a positive example for current class in current set?
            if (!hasExamples) continue;

            // Solve system of linear equations in a least squares manner
            beta.multiply(temp,yCur,false);

            // Updating parameter vector in convolutional feature
            f->setParameterVector( beta );

            // Feature Values
            values.clear();
            f->calcFeatureValues( examples, examples_selection, values);

            // complete search for threshold
            findBestSplitThreshold ( values, bestSplitInfo, beta, e, maxClassNo );

        }
    }


    // supress strange behaviour for values near zero (8.88178e-16)
    if (bestSplitInfo.entropyLeft < 1.0e-10 ) bestSplitInfo.entropyLeft = 0.0;
    if (bestSplitInfo.entropyRight < 1.0e-10 ) bestSplitInfo.entropyRight = 0.0;

    // stop criteria: minimum information gain
    if ( bestSplitInfo.informationGain < minimumInformationGain )
    {
#ifdef DEBUGTREE
        std::cerr << "DTBOblique: Minimum information gain reached!" << std::endl;
#endif
        delete [] bestSplitInfo.distLeft;
        delete [] bestSplitInfo.distRight;
        node->trainExamplesIndices = examples_selection;
        return node;
    }

    /** Save the best split to current node */
    f->setParameterVector( bestSplitInfo.params );
    values.clear();
    f->calcFeatureValues( examples, examples_selection, values);
    node->f = f->clone();
    node->threshold = bestSplitInfo.threshold;

    /** Split examples according to best split function */
    vector<int> examples_left;
    vector<int> examples_right;

    examples_left.reserve ( values.size() / 2 );
    examples_right.reserve ( values.size() / 2 );
    for ( FeatureValuesUnsorted::const_iterator i = values.begin();
          i != values.end(); i++ )
    {
        double value = i->first;
        if ( value < bestSplitInfo.threshold )
            examples_left.push_back ( i->third );
        else
            examples_right.push_back ( i->third );
    }

#ifdef DEBUGTREE
    node->f->store( std::cerr );
    std::cerr << std::endl;
    std::cerr << "DTBOblique: Information Gain: " << bestSplitInfo.informationGain
              << ", Left Entropy: " <<  bestSplitInfo.entropyLeft << ", Right Entropy: "
              << bestSplitInfo.entropyRight << std::endl;
#endif

    FullVector distribution_left_sparse ( distribution.size() );
    FullVector distribution_right_sparse ( distribution.size() );
    for ( int k = 0 ; k <= maxClassNo ; k++ )
    {
        double l = bestSplitInfo.distLeft[k];
        double r = bestSplitInfo.distRight[k];
        if ( l != 0 )
            distribution_left_sparse[k] = l;
        if ( r != 0 )
            distribution_right_sparse[k] = r;
#ifdef DEBUGTREE
        std::cerr << "DTBOblique: Split of Class " << k << " ("
                  << l << " <-> " << r << ") " << std::endl;
#endif
    }

    //TODO
    //delete [] best_distribution_left;
    //delete [] best_distribution_right;

    // update lambda by heuristic [Laptev/Buhmann, 2014]
    double lambdaLeft = lambdaCurrent *
            pow(((double)examples_selection.size()/(double)examples_left.size()),(2./f->getParameterLength()));
    double lambdaRight = lambdaCurrent *
            pow(((double)examples_selection.size()/(double)examples_right.size()),(2./f->getParameterLength()));

//#ifdef DEBUGTREE
//    std::cerr << "regularization parameter lambda left " << lambdaLeft
//              << " right " << lambdaRight << std::endl;

//#endif

    /** Recursion */
    // left child
    node->left  = buildRecursive ( fp, examples, examples_left,
                                   distribution_left_sparse, bestSplitInfo.entropyLeft,
                                   maxClassNo, depth+1, lambdaLeft );
    // right child
    node->right = buildRecursive ( fp, examples, examples_right,
                                   distribution_right_sparse, bestSplitInfo.entropyRight,
                                   maxClassNo, depth+1, lambdaRight );

    return node;
}

/** initial building method */
DecisionNode *DTBOblique::build ( const FeaturePool & fp,
                                        const Examples & examples,
                                        int maxClassNo )
{
    int index = 0;

    FullVector distribution ( maxClassNo+1 );
    vector<int> all;

    all.reserve ( examples.size() );
    for ( Examples::const_iterator j = examples.begin();
          j != examples.end(); j++ )
    {
        int classno = j->first;
        distribution[classno] += j->second.weight;

        all.push_back ( index );
        index++;
    }

    double entropy = 0.0;
    double sum = 0.0;
    for ( int i = 0 ; i < distribution.size(); i++ )
    {
        double val = distribution[i];
        if ( val <= 0.0 ) continue;
        entropy -= val*log(val);
        sum += val;
    }
    entropy /= sum;
    entropy += log(sum);

    return buildRecursive ( fp, examples, all, distribution,
                            entropy, maxClassNo, 0, lambdaInit );
}