Prechádzať zdrojové kódy

added Matlab wrapper for CodebookRandomForest clusterer

Johannes Ruehle 11 rokov pred
rodič
commit
48cb8906d1

+ 458 - 0
features/simplefeatures/matlab/CodebookRandomForestMex.cpp

@@ -0,0 +1,458 @@
+/** 
+* @file GPHIKRegressionMex.cpp
+* @author Alexander Freytag
+* @date 17-01-2014 (dd-mm-yyyy)
+* @brief Matlab-Interface of our GPHIKRegression, allowing for training, regression, optimization, variance prediction, incremental learning, and  storing/re-storing.
+*/
+
+// STL includes
+#include <math.h>
+#include <matrix.h>
+#include <mex.h>
+
+// NICE-core includes
+#include <core/basics/Config.h>
+#include <core/basics/Timer.h>
+#include <core/vector/MatrixT.h>
+#include <core/vector/VectorT.h>
+
+// CodebookRandomForest stuff
+#include "vislearning/features/simplefeatures/CodebookRandomForest.h"
+
+#include "vislearning/features/fpfeatures/VectorFeature.h"
+
+// Interface for conversion between Matlab and C objects
+#include "gp-hik-core/matlab/classHandleMtoC.h"
+#include "gp-hik-core/matlab/ConverterMatlabToNICE.h"
+#include "gp-hik-core/matlab/ConverterNICEToMatlab.h"
+
+#include "HelperDataConversionMex.h"
+
+using namespace std; //C basics
+using namespace NICE;  // nice-core
+
+
+NICE::Config parseParametersERC(const mxArray *prhs[], int nrhs)
+{
+  NICE::Config conf;
+  
+  // if first argument is the filename of an existing config file,
+  // read the config accordingly
+  
+  int i_start ( 0 );
+  std::string variable = MatlabConversion::convertMatlabToString(prhs[i_start]);
+  if(variable == "conf")
+  {
+      conf = NICE::Config ( MatlabConversion::convertMatlabToString( prhs[i_start+1] )  );
+      i_start = i_start+2;
+  }
+  
+  // now run over all given parameter specifications
+  // and add them to the config
+  for( int i=i_start; i < nrhs; i+=2 )
+  {
+    std::string variable = MatlabConversion::convertMatlabToString(prhs[i]);
+    
+    /////////////
+    //CodebookRandomForest( int maxDepth
+
+//    number_of_trees = conf->gI(section, "number_of_trees", 20 );
+//    features_per_tree = conf->gD(section, "features_per_tree", 1.0 );
+//    samples_per_tree  = conf->gD(section, "samples_per_tree", 0.2 );
+//    use_simple_balancing = conf->gB(section, "use_simple_balancing", false);
+//    weight_examples = conf->gB(section, "weight_examples", false);
+//    memory_efficient = conf->gB(section, "memory_efficient", false);
+
+    //std::string builder_section = conf->gS(section, "builder_section", "DTBRandom");
+
+
+    if( variable == "number_of_trees")
+    {
+        if ( mxIsInt32( prhs[i+1] ) )
+        {
+            int value = MatlabConversion::convertMatlabToInt32(prhs[i+1]);
+            conf.sI("FPCRandomForests", variable, value);
+        }
+        else
+        {
+            std::string errorMsg = "Unexpected parameter value for \'" +  variable + "\'. Int32 expected.";
+            mexErrMsgIdAndTxt( "mexnice:error", errorMsg.c_str() );
+        }
+
+    }
+
+  }
+
+  return conf;
+}
+
+// MAIN MATLAB FUNCTION
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
+{    
+    // get the command string specifying what to do
+    if (nrhs < 1)
+        mexErrMsgTxt("No commands and options passed... Aborting!");        
+    
+    if( !mxIsChar( prhs[0] ) )
+        mexErrMsgTxt("First argument needs to be the command, ie.e, the class method to call... Aborting!");        
+    
+    std::string cmd = MatlabConversion::convertMatlabToString( prhs[0] );
+      
+        
+    // create object
+    if ( !strcmp("new", cmd.c_str() ) )
+    {
+        // check output variable
+        if (nlhs != 1)
+            mexErrMsgTxt("New: One output expected.");
+        
+        // read config settings
+        //NICE::Config conf = parseParametersGPHIKRegression(prhs+1,nrhs-1);
+
+        int nMaxDepth = 10;
+        // create class instance
+        OBJREC::CodebookRandomForest *pRandomForest = new OBJREC::CodebookRandomForest(nMaxDepth);
+         
+        // handle to the C++ instance
+        plhs[0] = MatlabConversion::convertPtr2Mat<OBJREC::CodebookRandomForest>( pRandomForest );
+        return;
+    }
+    
+    // in all other cases, there should be a second input,
+    // which the be the class instance handle
+    if (nrhs < 2)
+      mexErrMsgTxt("Second input should be a class instance handle.");
+    
+    // delete object
+    if ( !strcmp("delete", cmd.c_str() ) )
+    {
+        // Destroy the C++ object
+        MatlabConversion::destroyObject<OBJREC::CodebookRandomForest>(prhs[1]);
+        return;
+    }
+    
+    // get the class instance pointer from the second input
+    // every following function needs the object
+    OBJREC::CodebookRandomForest *pCodebookClusterer = MatlabConversion::convertMat2Ptr<OBJREC::CodebookRandomForest>(prhs[1]);
+    
+    
+    ////////////////////////////////////////
+    //  Check which class method to call  //
+    ////////////////////////////////////////
+    
+    
+    // standard train - assumes initialized object
+    if (!strcmp("train", cmd.c_str() ))
+    {
+        // Check parameters
+        if (nlhs < 0 || nrhs < 4)
+        {
+            mexErrMsgTxt("Train: Unexpected arguments.");
+        }
+        
+        //------------- read the data --------------
+        if (nrhs != 4)
+        {
+            mexErrMsgTxt("needs 2 matrix inputs, first the training features, second the sample labels");
+            return;
+        }
+
+        const mxArray *t_pArrTrainData   = prhs[2];
+        const mxArray *t_pArrTrainLabels = prhs[3];
+
+        //std::vector< const NICE::SparseVector *> examplesTrain;
+
+        int iNumFeatureDimension = mxGetM( t_pArrTrainData ); // feature dimensions
+
+        OBJREC::Examples examplesTrain;
+
+        bool bRet = MatlabConversion::convertDoubleRawPointersToExamples( t_pArrTrainData, t_pArrTrainLabels, examplesTrain);
+        if( ~bRet )
+        {
+            mexErrMsgTxt("Train: Error creating Examples from raw feature matrix and labels.");
+        }
+
+        //----------------- train our random Forest -------------
+        // read config settings
+        OBJREC::FeaturePool fp;
+        OBJREC::VectorFeature *pVecFeature = new OBJREC::VectorFeature(iNumFeatureDimension);
+        pVecFeature->explode(fp);
+
+        NICE::Config conf = parseParametersERC(prhs+1,nrhs-1);
+
+        OBJREC::FPCRandomForests *pRandForest = new OBJREC::FPCRandomForests(&conf,"FPCRandomForests");
+
+        pRandForest->train(fp, examplesTrain);
+
+        pCodebookClusterer->setClusterForest( pRandForest );
+
+
+        //----------------- clean up -------------
+
+        delete pVecFeature;
+        pVecFeature = NULL;
+        // delete all "exploded" features, they are internally cloned in the random trees anyway
+        fp.destroy();
+        //
+
+        for(int i=0;i<examplesTrain.size(); i++)
+        {
+            if ( examplesTrain[i].second.vec != NULL )
+            {
+                delete examplesTrain[i].second.vec;
+                examplesTrain[i].second.vec = NULL;
+            }
+        }
+        
+        return;
+    }
+/*
+    
+    // perform regression    
+    if ( !strcmp("estimate", cmd.c_str() ) )
+    {
+        // Check parameters
+        if ( (nlhs < 0) || (nrhs < 2) )
+        {
+            mexErrMsgTxt("Test: Unexpected arguments.");
+        }
+        
+        //------------- read the data --------------
+
+        double result;
+        double uncertainty;        
+
+        if ( mxIsSparse( prhs[2] ) )
+        {
+            NICE::SparseVector * example;
+            example = new NICE::SparseVector ( converterMtoNICE.convertSparseVectorToNice( prhs[2] ) );
+            regressor->estimate ( example,  result, uncertainty );
+            
+            //----------------- clean up -------------
+            delete example;
+        }
+        else
+        {
+            NICE::Vector * example;
+            example = new NICE::Vector ( converterMtoNICE.convertDoubleVectorToNice(prhs[2]) ); 
+            regressor->estimate ( example,  result, uncertainty );
+            
+            //----------------- clean up -------------
+            delete example;            
+        }
+          
+          
+
+          // output
+          plhs[0] = mxCreateDoubleScalar( result ); 
+          
+          
+          if(nlhs >= 2)
+          {
+            plhs[1] = mxCreateDoubleScalar( uncertainty );          
+          }
+          return;
+    }
+    
+    // Uncertainty prediction    
+    if ( !strcmp("uncertainty", cmd.c_str() ) )
+    {
+        // Check parameters
+        if ( (nlhs < 0) || (nrhs < 2) )
+        {
+            mexErrMsgTxt("Test: Unexpected arguments.");
+        }
+        
+        double uncertainty;        
+        
+        //------------- read the data --------------
+
+        if ( mxIsSparse( prhs[2] ) )
+        {
+            NICE::SparseVector * example;
+            example = new NICE::SparseVector ( converterMtoNICE.convertSparseVectorToNice( prhs[2] ) );
+            regressor->predictUncertainty( example, uncertainty );
+            
+            //----------------- clean up -------------
+            delete example;            
+        }
+        else
+        {
+            NICE::Vector * example;
+            example = new NICE::Vector ( converterMtoNICE.convertDoubleVectorToNice(prhs[2]) ); 
+            regressor->predictUncertainty( example, uncertainty );
+            
+            //----------------- clean up -------------
+            delete example;            
+        }
+        
+       
+
+          // output
+          plhs[0] = mxCreateDoubleScalar( uncertainty );                    
+          return;
+    }    
+    
+    
+    // Test - evaluate regressor on whole test set  
+    if ( !strcmp("testL2loss", cmd.c_str() ) )
+    {        
+        // Check parameters
+        if (nlhs < 0 || nrhs < 3)
+            mexErrMsgTxt("Test: Unexpected arguments.");
+        //------------- read the data --------------
+        
+        
+        bool dataIsSparse ( mxIsSparse( prhs[2] ) );
+        
+        std::vector< const NICE::SparseVector *> dataTest_sparse;
+        NICE::Matrix dataTest_dense;
+
+        if ( dataIsSparse )
+        {
+            dataTest_sparse = converterMtoNICE.convertSparseMatrixToNice( prhs[2] );
+        }
+        else
+        {    
+            dataTest_dense = converterMtoNICE.convertDoubleMatrixToNice(prhs[2]);          
+        }        
+
+        NICE::Vector yValuesTest;
+        yValuesTest = converterMtoNICE.convertDoubleVectorToNice(prhs[3]);
+	
+        int i_numTestSamples ( yValuesTest.size() );
+        
+	double l2loss ( 0.0 );
+	
+	NICE::Vector scores;
+	NICE::Vector::iterator itScores;
+	if ( nlhs >= 2 )
+	{
+	  scores.resize( i_numTestSamples );
+	  itScores = scores.begin();
+	}
+          
+          
+
+        // ------------------------------------------
+        // ------------- REGRESSION --------------
+        // ------------------------------------------          
+        
+        NICE::Timer t;
+        double testTime (0.0);
+        
+
+
+        for (int i = 0; i < i_numTestSamples; i++)
+        {
+            //----------------- convert data to sparse data structures ---------
+          
+
+            double result;
+
+            if ( dataIsSparse )
+            {                
+              // and perform regression
+              t.start();
+              regressor->estimate( dataTest_sparse[ i ], result);
+              t.stop();
+              testTime += t.getLast();
+            }
+            else
+            {
+                NICE::Vector example ( dataTest_dense.getRow(i) );
+              // and perform regression
+              t.start();
+              regressor->estimate( &example, result );
+              t.stop();
+              testTime += t.getLast();                
+            }
+
+            l2loss += pow ( yValuesTest[i] - result, 2); 
+	    
+	    if ( nlhs >= 2 )
+	    {
+	      *itScores = result;
+	      itScores++;
+	    }	    
+        }
+        
+        std::cerr << "Time for testing: " << testTime << std::endl;          
+        
+        // clean up
+        if ( dataIsSparse )
+        {
+            for ( std::vector<const NICE::SparseVector *>::iterator it = dataTest_sparse.begin(); it != dataTest_sparse.end(); it++) 
+                delete *it;
+        }
+        
+
+
+        plhs[0] = mxCreateDoubleScalar( l2loss );
+
+        if(nlhs >= 2)
+          plhs[1] = converterNICEtoM.convertVectorFromNice(scores);          
+          
+          
+        return;
+    }
+    
+
+
+    ///////////////////// INTERFACE PERSISTENT /////////////////////
+    // interface specific methods for store and restore
+    ///////////////////// INTERFACE PERSISTENT /////////////////////    
+    
+  
+    
+    // store the regressor  to an external file
+    if ( !strcmp("store", cmd.c_str() ) || !strcmp("save", cmd.c_str() ) )
+    {
+        // Check parameters
+        if ( nrhs < 3 )
+            mexErrMsgTxt("store: no destination given.");        
+               
+        std::string s_destination = converterMtoNICE.convertMatlabToString( prhs[2] );
+          
+        std::filebuf fb;
+        fb.open ( s_destination.c_str(), ios::out );
+        std::ostream os(&fb);
+        //
+        regressor->store( os );
+        //   
+        fb.close();        
+            
+        return;
+    }
+    
+    // load regressor from external file    
+    if ( !strcmp("restore", cmd.c_str() ) || !strcmp("load", cmd.c_str() ) )
+    {
+        // Check parameters
+        if ( nrhs < 3 )
+            mexErrMsgTxt("restore: no destination given.");        
+               
+        std::string s_destination = converterMtoNICE.convertMatlabToString( prhs[2] );
+        
+        std::cerr << " aim at restoring the regressor from " << s_destination << std::endl;
+          
+        std::filebuf fbIn;
+        fbIn.open ( s_destination.c_str(), ios::in );
+        std::istream is (&fbIn);
+        //
+        regressor->restore( is );
+        //   
+        fbIn.close();        
+            
+        return;
+    }    
+
+*/
+    
+    // Got here, so command not recognized
+    
+    std::string errorMsg (cmd.c_str() );
+    errorMsg += " -- command not recognized.";
+    mexErrMsgTxt( errorMsg.c_str() );
+
+}

+ 69 - 0
features/simplefeatures/matlab/HelperDataConversionMex.h

@@ -0,0 +1,69 @@
+#ifndef HELPERDATACONVERSIONMEX_H
+#define HELPERDATACONVERSIONMEX_H
+
+// STL includes
+#include <math.h>
+#include <matrix.h>
+#include <mex.h>
+
+// NICE-core includes
+#include <core/vector/MatrixT.h>
+#include <core/vector/VectorT.h>
+
+// Interface for conversion between Matlab and C objects
+#include "gp-hik-core/matlab/ConverterMatlabToNICE.h"
+#include "gp-hik-core/matlab/ConverterNICEToMatlab.h"
+
+#include "vislearning/cbaselib/Example.h"
+
+namespace NICE {
+
+namespace MatlabConversion {
+
+/**
+ * @brief Create an Examples class from a given full matrix of features and a matrix/vector of labels.
+ *
+ * The Examples object consists of individual Example objects containing the label and a pointer to the provided raw feature data.
+ * Note: No feature data is copied - an Example only contains a pointer to the raw double data.
+ * An NICE::Vector is created as an wrapper around this raw double pointer using it, but not copying it.
+ * You need to take care to delete these wrapper vectors once you're finished working with the Examples object, otherwise you generate a memory leak.
+ *
+ * @param p_pArrTrainData double MATLAB matrix containing the features (dimension M) of N samples ( M x N matrix )
+ * @param p_pArrTrainLabels double MATLAB matrix containing the labels for N samples (1xN)
+ *
+ * @param p_ExamplesTrain created Examples class (vector of N Example object with each Example containing a valid vec-ptr to the feature data [uncopied] )
+ *
+ * @return true for successful Examples creation
+ * @author Johannes Ruehle
+ */
+bool convertDoubleRawPointersToExamples( const mxArray *p_pArrTrainData, const mxArray *p_pArrTrainLabels, OBJREC::Examples &p_ExamplesTrain )
+{
+
+    int iNumFeatureDimension = mxGetM( p_pArrTrainData ); // feature dimensions
+
+    NICE::Vector yValuesTrain = convertDoubleVectorToNice( p_pArrTrainLabels );
+    NICE::Matrix matDataTrain = convertDoubleMatrixToNice( p_pArrTrainData   );
+    assert( yValuesTrain.size() == matDataTrain.cols() );
+    assert( iNumFeatureDimension == matDataTrain.rows() );
+
+    p_ExamplesTrain.reserve( matDataTrain.cols() );
+
+    const double *pDataPtr = matDataTrain.getDataPointer();
+
+    for (int i = 0; i < (int)matDataTrain.cols(); i++, pDataPtr+= iNumFeatureDimension )
+    {
+        NICE::Vector *t_pVecTrainData = new NICE::Vector( pDataPtr , (size_t)iNumFeatureDimension);
+        OBJREC::Example t_Example;
+        t_Example.vec = t_pVecTrainData;
+
+        p_ExamplesTrain.push_back( std::pair<int, OBJREC::Example>( (int)yValuesTrain[i], t_Example ) );
+    }
+
+    return true;
+}
+
+}
+
+}
+
+#endif //HELPERDATACONVERSIONMEX_H

+ 109 - 0
features/simplefeatures/matlab/testHelperDataConversionMex.cpp

@@ -0,0 +1,109 @@
+/** 
+* @file GPHIKClassifierMex.cpp
+* @author Alexander Freytag
+* @date 07-01-2014 (dd-mm-yyyy)
+* @brief Matlab-Interface of our GPHIKClassifier, allowing for training, classification, optimization, variance prediction, incremental learning, and  storing/re-storing.
+*/
+
+// STL includes
+#include <math.h>
+#include <matrix.h>
+#include <mex.h>
+
+// NICE-core includes
+#include <core/vector/MatrixT.h>
+#include <core/vector/VectorT.h>
+
+
+// Interface for conversion between Matlab and C objects
+#include "gp-hik-core/matlab/classHandleMtoC.h"
+#include "gp-hik-core/matlab/ConverterMatlabToNICE.h"
+#include "gp-hik-core/matlab/ConverterNICEToMatlab.h"
+
+#include "HelperDataConversionMex.h"
+
+using namespace std; //C basics
+using namespace NICE;  // nice-core
+
+// MAIN MATLAB FUNCTION
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
+{    
+    // get the command string specifying what to do
+    if (nrhs < 1)
+        mexErrMsgTxt("No commands and options passed... Aborting!");        
+    
+    if( !mxIsChar( prhs[0] ) )
+        mexErrMsgTxt("First argument needs to be the command, ie.e, the unit test method to call... Aborting!");
+    
+    std::string cmd = MatlabConversion::convertMatlabToString( prhs[0] );
+    
+    // in all other cases, there should be a second input,
+    // which the be the class instance handle
+    if (nrhs < 2)
+    {
+        mexErrMsgTxt("Second input should be some kind of matrix variable");
+        return;
+    }
+
+    if (nlhs < 1)
+    {
+        mexErrMsgTxt("No return value defined, possible loss of data... Aborting!");
+    }
+
+    ////////////////////////////////////////
+    //  Check which method to call  //
+    ////////////////////////////////////////
+
+    if ( !strcmp("convertDoubleMatrixToExamples", cmd.c_str() ) )
+    {
+        if (nrhs != 3)
+        {
+            mexErrMsgTxt("needs 2 matrix inputs, first the training features, second the sample labels");
+            return;
+        }
+
+        const mxArray *t_pArrTrainData   = prhs[1];
+        const mxArray *t_pArrTrainLabels = prhs[2];
+
+        OBJREC::Examples t_ExamplesTrain;
+
+            bool bConversionSuccess = MatlabConversion::convertDoubleRawPointersToExamples( t_pArrTrainData, t_pArrTrainLabels, t_ExamplesTrain );
+
+        std::cerr << "Examples size: " << t_ExamplesTrain.size() << std::endl;
+        for(int i=0; i< t_ExamplesTrain.size(); i++)
+        {
+            int iClass = t_ExamplesTrain[i].first;
+            OBJREC::Example &t_Example = t_ExamplesTrain[i].second ;
+
+            std::cerr << "Example["<<i<<"]" << "L:" << iClass << " data: "<< *t_Example.vec << std::endl;
+
+        }
+
+        // clean up
+        for(int i=0; i< t_ExamplesTrain.size(); i++)
+        {
+            OBJREC::Example &t_Example = t_ExamplesTrain[i].second;
+
+            if (t_Example.vec != NULL )
+            {
+                delete t_Example.vec;
+                t_Example.vec = NULL;
+            }
+
+        }
+
+
+        // output
+        plhs[0] = mxCreateLogicalScalar( bConversionSuccess );
+
+        return;
+    }    
+    
+    
+    // Got here, so command not recognized
+    
+    std::string errorMsg (cmd.c_str() );
+    errorMsg += " -- command not recognized.";
+    mexErrMsgTxt( errorMsg.c_str() );
+
+}

+ 13 - 0
features/simplefeatures/matlab/unittestCodebookRandomForestMex.m

@@ -0,0 +1,13 @@
+% brief:    Unit testing of the NICE::MatlabConversion functions
+% author:   Johannes Ruehle
+% date:     11-04-2014 (dd-mm-yyyy)
+
+%% test Creation of OBJREC::Examples class from sample matrix and label data
+
+%try
+    hClassifier = CodebookRandomForestMex('new');
+
+    CodebookRandomForestMex('delete', hClassifier);
+%catch ecpn
+%    disp( ecpn );
+%end

+ 19 - 0
features/simplefeatures/matlab/unittestHelperDataConversionMex.m

@@ -0,0 +1,19 @@
+% brief:    Unit testing of the NICE::MatlabConversion functions
+% author:   Johannes Ruehle
+% date:     11-04-2014 (dd-mm-yyyy)
+
+%% test Creation of OBJREC::Examples class from sample matrix and label data
+
+numSamples  = 10;
+numFeatures = 5;
+maxClass    = 3;
+
+matFeatures = rand(numSamples, numFeatures, 'double')';
+matLabels = randi(maxClass, numSamples,1,'double');
+disp(matFeatures);
+try
+    bSuccess = testHelperDataConversionMex( 'convertDoubleMatrixToExamples', matFeatures, matLabels);
+    assert( bSuccess );
+catch ecpn
+    disp( ecpn );
+end

+ 6 - 0
list_exclude_from_build.cmake

@@ -0,0 +1,6 @@
+SET(list_exclude_from_build_SRC
+features/simplefeatures/matlab/CodebookRandomForestMex.cpp
+features/simplefeatures/matlab/HelperDataConversionMex.h
+features/simplefeatures/matlab/testHelperDataConversionMex.cpp
+)
+