123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402 |
- /**
- * @brief Extremely randomized clustering forest program for Matlab input data.
- *
- * @author Johannes Ruehle
- * @date 10/05/2014
- */
- #include <string>
- #include <exception>
- #include <iostream>
- #include <fstream>
- //----------
- #include "vislearning/features/simplefeatures/CodebookRandomForest.h"
- #include "vislearning/features/fpfeatures/VectorFeature.h"
- #include "vislearning/cbaselib/FeaturePool.h"
- #ifdef NICE_USELIB_MATIO
- #include <core/matlabAccess/MatFileIO.h>
- const bool verbose = false;
- const bool verboseStartEnd = true;
- using namespace OBJREC;
- using namespace NICE;
- using namespace std;
- #undef DEBUG_VERBOSE
- struct structCommands
- {
- QString sFunction;
- QString sFileTrainData;
- QString sFileTrainDataLabels;
- QString sConfigFile;
- QString sFileStoreClassifier; // txt file storing the config of the trained codebook rdf
- QString sFileStoreResult; // matlab mat file storing the generated histogram
- };
- bool loadMatlabMatrix(const std::string &sFilename, const std::string &matrix_name, NICE::Matrix &p_Matrix)
- {
- NICE::MatFileIO matlab_file(sFilename, MAT_ACC_RDONLY);
- #ifdef DEBUG_VERBOSE
- // Show the number of variables in the file
- int vars_in_file = matlab_file.getNumberOfVariables();
- std::cout << vars_in_file << " Variables in " << sFilename << "\n";
- // Load the matrix
- std::cout << "Loading matrix \"" << matrix_name << "\"...\n";
- #endif
- // Check if the variable is a matrix
- matvar_t* matrix_variable = matlab_file.getVariableViaName(matrix_name);
- if(matrix_variable == NULL)
- {
- std::cout << "variable is not found in mat file.\n";
- return false;
- }
- if(matrix_variable->rank != 2) {
- std::cout << "Variable is not a matrix. Rank: " << matrix_variable->rank << ".\n";
- return false;
- }
- // Read the dimensions
- int cols = matrix_variable->dims[1];
- int rows = matrix_variable->dims[0];
- std::cout << "Dimensions: " << cols << " x " << rows << "\n";
- // Read the matrix into a vector of vectors
- std::vector< std::vector<double> > matrix_vecvec(rows, std::vector<double>(cols));
- matlab_file.getFeatureMatrixViaName(matrix_vecvec, matrix_name);
- // Now, we want a NICE matrix
- //NICE::MatrixT<double> matrix(rows, cols);
- p_Matrix.resize(rows, cols);
- for(int i = 0; i < rows; i++) {
- for(int j = 0; j < cols; j++) {
- p_Matrix(i,j) = matrix_vecvec[i][j];
- }
- }
- return true;
- }
- NICE::Matrix* loadMatlabVec(const std::string &sFilename, const std::string &matrix_name)
- {
- NICE::Matrix *pMatrix = NULL;
- NICE::MatFileIO *matFile = new NICE::MatFileIO(sFilename, MAT_ACC_RDONLY );
- matvar_t *t = matFile->getVariableViaName(matrix_name);
- if ( t->class_type == MAT_C_DOUBLE)
- {
- double *pD = (double*)( t->data );
- pMatrix = new NICE::Matrix(pD , (int)t->dims[0], (int)t->dims[1], Matrix::copy );
- }
- else
- {
- std::cerr << "raw format of matlab matrix not supported" << std::endl;
- }
- Mat_VarFree(t);
- delete matFile;
- return pMatrix;
- }
- bool saveMatlabVector(const std::string &sFilename, const NICE::Vector &p_Vector, int p_iFodID)
- {
- std::ofstream ofs;
- ofs.open (sFilename.c_str(), std::ofstream::out);
- if (!ofs.is_open())
- return false;
- ofs << p_iFodID << " #fodID" << std::endl;
- ofs << p_Vector.size() << std::endl;
- for(int i=0; i<p_Vector.size(); i++)
- ofs << p_Vector[i] << std::endl;
- ofs.close();
- return true;
- }
- bool storeClassifier(const structCommands &p_Command, const OBJREC::CodebookRandomForest *p_pCodebookRandomForest)
- {
- if( p_Command.sFileStoreClassifier.isEmpty() )
- return false;
- std::string t_sDestinationSave = p_Command.sFileStoreClassifier.toStdString();
- std::ofstream ofs;
- ofs.open (t_sDestinationSave.c_str(), std::ofstream::out);
- p_pCodebookRandomForest->store( ofs );
- ofs.close();
- return true;
- }
- bool restoreClassifier(const structCommands &p_Command, OBJREC::CodebookRandomForest *p_pCodebookRandomForest)
- {
- if( p_Command.sFileStoreClassifier.isEmpty() )
- return false;
- if (p_pCodebookRandomForest == NULL )
- return false;
- std::string t_sDestinationSave = p_Command.sFileStoreClassifier.toStdString();
- std::ifstream ifs2;
- ifs2.open (t_sDestinationSave.c_str() );
- p_pCodebookRandomForest->restore( ifs2 );
- ifs2.close();
- return true;
- }
- bool createAndTrain( const structCommands &p_Command)
- {
- if( p_Command.sConfigFile.isEmpty() )
- {
- std::cout << "no config file provided. Exiting" << std::endl;
- return false;
- }
- NICE::Config t_conf = NICE::Config( p_Command.sConfigFile.toStdString() );
- Matrix *t_pMatDataTrain = loadMatlabVec( p_Command.sFileTrainData.toStdString(), "matFeatures");
- if( t_pMatDataTrain == NULL )
- {
- std::cout << "Training data Matrix couldn't be loaded" << std::endl;
- return 0;
- }
- #ifdef DEBUG_VERBOSE
- for(int i = 0; i<10; i++)
- {
- std::cerr << (*t_pMatDataTrain)(i,0) << " ## " << (*t_pMatDataTrain)(0,i) << std::endl;
- }
- #endif
- Matrix *t_pMatDataTrainLabels = loadMatlabVec( p_Command.sFileTrainDataLabels.toStdString(), "matLabels");
- if( t_pMatDataTrainLabels == NULL )
- {
- std::cout << "Training data label Matrix couldn't be loaded" << std::endl;
- return 0;
- }
- int iNumFeatureDimension = t_pMatDataTrain->rows();
- NICE::Vector t_vecLabelsTrain(t_pMatDataTrainLabels->getDataPointer(), t_pMatDataTrainLabels->rows(), Vector::external);
- OBJREC::Examples examplesTrain;
- bool bRet = OBJREC::Examples::wrapExamplesAroundFeatureMatrix( *t_pMatDataTrain, t_vecLabelsTrain, examplesTrain );
- if( !bRet )
- {
- std::cout << "createAndTrain: Error creating Examples from raw feature matrix and labels." << std::endl;
- return 0;
- }
- //----------------- create raw feature mapping -------------
- OBJREC::FeaturePool fp;
- OBJREC::VectorFeature *pVecFeature = new OBJREC::VectorFeature(iNumFeatureDimension);
- pVecFeature->explode(fp);
- #ifdef DEBUG_VERBOSE
- //----------------- debug features -------------
- OBJREC::Example t_Exp = examplesTrain[0].second;
- NICE::Vector t_FeatVector;
- fp.calcFeatureVector(t_Exp, t_FeatVector);
- std::cerr << "first full Feature Vec: " <<t_FeatVector << std::endl;
- #endif
- //----------------- train our random Forest -------------
- OBJREC::FPCRandomForests *pRandForest = new OBJREC::FPCRandomForests(&t_conf,"RandomForest");
- pRandForest->train(fp, examplesTrain);
- //----------------- create codebook ERC clusterer -------------
- int nMaxDepth = t_conf.gI("CodebookRandomForest", "maxDepthTree",10);
- int nMaxCodebookSize = t_conf.gI("CodebookRandomForest", "maxCodebookSize",100);
- #ifdef DEBUG_VERBOSE
- std::cerr << "maxDepthTree " << nMaxDepth << std::endl;
- std::cerr << "nMaxCodebookSize " << nMaxCodebookSize << std::endl;
- #endif
- OBJREC::CodebookRandomForest *pCodebookRandomForest = new OBJREC::CodebookRandomForest(pRandForest, nMaxDepth,nMaxCodebookSize);
- //----------------- store classifier in file ---------------------
- bool bSuccess = storeClassifier(p_Command, pCodebookRandomForest);
- //----------------- clean up -------------
- delete pCodebookRandomForest;
- delete pVecFeature;
- pVecFeature = NULL;
- // delete all "exploded" features, they are internally cloned in the random trees anyway
- fp.destroy();
- //
- examplesTrain.clean();
- delete t_pMatDataTrain;
- delete t_pMatDataTrainLabels;
- return true;
- }
- bool generateHistogram( const structCommands &p_Command)
- {
- Matrix *t_pMatFodID = loadMatlabVec( p_Command.sFileTrainData.toStdString(), "fodID");
- if( t_pMatFodID == NULL )
- {
- std::cout << "Data Matrix didn't include a fodID, so couldn't be loaded" << std::endl;
- return 0;
- }
- int iFodID = (*t_pMatFodID)(0,0);
- Matrix *t_pMatDataTrain = loadMatlabVec( p_Command.sFileTrainData.toStdString(), "matFeatures");
- if( t_pMatDataTrain == NULL )
- {
- std::cout << "Data Matrix couldn't be loaded" << std::endl;
- return 0;
- }
- //----------------- restore trained codebook forest -------------
- OBJREC::CodebookRandomForest *pCodebookRandomForest = new OBJREC::CodebookRandomForest(-1,-1);
- if( !restoreClassifier(p_Command, pCodebookRandomForest ) )
- {
- std::cout << "Error restoring codebook random forest" << std::endl;
- return false;
- }
- size_t numTrainSamples = t_pMatDataTrain->cols();
- size_t iNumFeatureDimension = t_pMatDataTrain->rows();
- size_t iNumCodewords = pCodebookRandomForest->getCodebookSize();
- #ifdef DEBUG_VERBOSE
- std::cerr << "numTrainSamples " << numTrainSamples << std::endl;
- std::cerr << "iNumFeatureDimension " << iNumFeatureDimension << std::endl;
- std::cerr << "iNumCodewords " << iNumCodewords << std::endl;
- #endif
- //----------------- parse config options -------------
- bool bVerboseOutput = false;
- // if( nrhs > 3)
- // {
- // NICE::Config conf = parseParametersERC(prhs+3, nrhs-3 );
- // bVerboseOutput = conf.gB("CodebookRandomForest", "verbose", false);
- // }
- //----------------- quantize samples into histogram -------------
- NICE::Vector histogram(iNumCodewords, 0.0f);
- const double *pDataPtr = t_pMatDataTrain->getDataPointer();
- int t_iCodebookEntry; double t_fWeight; double t_fDistance;
- for (size_t i = 0; i < numTrainSamples; i++, pDataPtr+= iNumFeatureDimension )
- {
- const NICE::Vector t_VecTrainData( pDataPtr , iNumFeatureDimension);
- pCodebookRandomForest->voteVQ(t_VecTrainData, histogram, t_iCodebookEntry, t_fWeight, t_fDistance );
- if(bVerboseOutput)
- std::cerr << i << ": " << "CBEntry " << t_iCodebookEntry << " Weight: " << t_fWeight << " Distance: " << t_fDistance << std::endl;
- }
- // store histogram
- bool bSuccess = saveMatlabVector(p_Command.sFileStoreResult.toStdString(), histogram , iFodID);
- //----------------- clean up -------------
- delete pCodebookRandomForest;
- delete t_pMatDataTrain;
- return bSuccess;
- }
- #endif
- int main(int argc, char **argv)
- {
- #ifdef NICE_USELIB_MATIO
- #ifndef __clang__
- #ifndef __llvm__
- std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
- #endif
- #endif
- structCommands sCommand;
- QString sCmdArg;
- int iCurrArgIdx = 1;
- while(iCurrArgIdx < argc)
- {
- sCmdArg = QString(argv[iCurrArgIdx]);
- if ( sCmdArg == "--function" )
- {
- iCurrArgIdx++;
- sCommand.sFunction = QString(argv[iCurrArgIdx]);
- }
- else if( sCmdArg == "--config" )
- {
- iCurrArgIdx++;
- sCommand.sConfigFile = QString(argv[iCurrArgIdx]);
- }
- else if( sCmdArg == "--traindata" )
- {
- iCurrArgIdx++;
- sCommand.sFileTrainData = QString(argv[iCurrArgIdx]);
- }
- else if( sCmdArg == "--traindatalabels" )
- {
- iCurrArgIdx++;
- sCommand.sFileTrainDataLabels = QString(argv[iCurrArgIdx]);
- }
- else if( sCmdArg == "--results" )
- {
- iCurrArgIdx++;
- sCommand.sFileStoreResult = QString(argv[iCurrArgIdx]);
- }
- else if( sCmdArg == "--classifier" )
- {
- iCurrArgIdx++;
- sCommand.sFileStoreClassifier = QString(argv[iCurrArgIdx]);
- }
- else if( sCmdArg == "--help" )
- {
- // print_usage();
- return 0;
- }
- else
- {
- std::cout << "unknown command arg: " << sCmdArg.toStdString() << std::endl;
- }
- iCurrArgIdx++;
- }
- ///////////////////////////////////////////////////
- try
- {
- if( sCommand.sFunction.compare("createAndTrain") == 0)
- {
- bool bSuccess = createAndTrain(sCommand);
- }
- else if( sCommand.sFunction.compare("generateHistogram") == 0)
- {
- bool bSuccess = generateHistogram(sCommand);
- }
- }
- catch(std::exception &e)
- {
- std::cerr << "exception occured: " << e.what() << std::endl;
- }
- #else
- return -1;
- #endif
- return 0;
- }
|