/** * @brief Extremely randomized clustering forest program for Matlab input data. * * @author Johannes Ruehle * @date 10/05/2014 */ #ifdef NICE_USELIB_MEX #ifdef NICE_USELIB_MATIO #include #include #include #include //---------- #include "vislearning/features/simplefeatures/CodebookRandomForest.h" #include "vislearning/features/fpfeatures/VectorFeature.h" #include "vislearning/cbaselib/FeaturePool.h" #ifdef NICE_USELIB_MATIO #include const bool verbose = false; const bool verboseStartEnd = true; using namespace OBJREC; using namespace NICE; using namespace std; #undef DEBUG_VERBOSE struct structCommands { QString sFunction; QString sFileTrainData; QString sFileTrainDataLabels; QString sConfigFile; QString sFileStoreClassifier; // txt file storing the config of the trained codebook rdf QString sFileStoreResult; // matlab mat file storing the generated histogram }; bool loadMatlabMatrix(const std::string &sFilename, const std::string &matrix_name, NICE::Matrix &p_Matrix) { NICE::MatFileIO matlab_file(sFilename, MAT_ACC_RDONLY); #ifdef DEBUG_VERBOSE // Show the number of variables in the file int vars_in_file = matlab_file.getNumberOfVariables(); std::cout << vars_in_file << " Variables in " << sFilename << "\n"; // Load the matrix std::cout << "Loading matrix \"" << matrix_name << "\"...\n"; #endif // Check if the variable is a matrix matvar_t* matrix_variable = matlab_file.getVariableViaName(matrix_name); if(matrix_variable == NULL) { std::cout << "variable is not found in mat file.\n"; return false; } if(matrix_variable->rank != 2) { std::cout << "Variable is not a matrix. Rank: " << matrix_variable->rank << ".\n"; return false; } // Read the dimensions int cols = matrix_variable->dims[1]; int rows = matrix_variable->dims[0]; std::cout << "Dimensions: " << cols << " x " << rows << "\n"; // Read the matrix into a vector of vectors std::vector< std::vector > matrix_vecvec(rows, std::vector(cols)); matlab_file.getFeatureMatrixViaName(matrix_vecvec, matrix_name); // Now, we want a NICE matrix //NICE::MatrixT matrix(rows, cols); p_Matrix.resize(rows, cols); for(int i = 0; i < rows; i++) { for(int j = 0; j < cols; j++) { p_Matrix(i,j) = matrix_vecvec[i][j]; } } return true; } NICE::Matrix* loadMatlabVec(const std::string &sFilename, const std::string &matrix_name) { NICE::Matrix *pMatrix = NULL; NICE::MatFileIO *matFile = new NICE::MatFileIO(sFilename, MAT_ACC_RDONLY ); matvar_t *t = matFile->getVariableViaName(matrix_name); if ( t->class_type == MAT_C_DOUBLE) { double *pD = (double*)( t->data ); pMatrix = new NICE::Matrix(pD , (int)t->dims[0], (int)t->dims[1], Matrix::copy ); } else { std::cerr << "raw format of matlab matrix not supported" << std::endl; } Mat_VarFree(t); delete matFile; return pMatrix; } bool saveMatlabVector(const std::string &sFilename, const NICE::Vector &p_Vector, int p_iFodID) { std::ofstream ofs; ofs.open (sFilename.c_str(), std::ofstream::out); if (!ofs.is_open()) return false; ofs << p_iFodID << " #fodID" << std::endl; ofs << p_Vector.size() << std::endl; for(int i=0; istore( ofs ); ofs.close(); return true; } bool restoreClassifier(const structCommands &p_Command, OBJREC::CodebookRandomForest *p_pCodebookRandomForest) { if( p_Command.sFileStoreClassifier.isEmpty() ) return false; if (p_pCodebookRandomForest == NULL ) return false; std::string t_sDestinationSave = p_Command.sFileStoreClassifier.toStdString(); std::ifstream ifs2; ifs2.open (t_sDestinationSave.c_str() ); p_pCodebookRandomForest->restore( ifs2 ); ifs2.close(); return true; } bool createAndTrain( const structCommands &p_Command) { if( p_Command.sConfigFile.isEmpty() ) { std::cout << "no config file provided. Exiting" << std::endl; return false; } NICE::Config t_conf = NICE::Config( p_Command.sConfigFile.toStdString() ); Matrix *t_pMatDataTrain = loadMatlabVec( p_Command.sFileTrainData.toStdString(), "matFeatures"); if( t_pMatDataTrain == NULL ) { std::cout << "Training data Matrix couldn't be loaded" << std::endl; return 0; } #ifdef DEBUG_VERBOSE for(int i = 0; i<10; i++) { std::cerr << (*t_pMatDataTrain)(i,0) << " ## " << (*t_pMatDataTrain)(0,i) << std::endl; } #endif Matrix *t_pMatDataTrainLabels = loadMatlabVec( p_Command.sFileTrainDataLabels.toStdString(), "matLabels"); if( t_pMatDataTrainLabels == NULL ) { std::cout << "Training data label Matrix couldn't be loaded" << std::endl; return 0; } int iNumFeatureDimension = t_pMatDataTrain->rows(); NICE::Vector t_vecLabelsTrain(t_pMatDataTrainLabels->getDataPointer(), t_pMatDataTrainLabels->rows(), Vector::external); OBJREC::Examples examplesTrain; bool bRet = OBJREC::Examples::wrapExamplesAroundFeatureMatrix( *t_pMatDataTrain, t_vecLabelsTrain, examplesTrain ); if( !bRet ) { std::cout << "createAndTrain: Error creating Examples from raw feature matrix and labels." << std::endl; return 0; } //----------------- create raw feature mapping ------------- OBJREC::FeaturePool fp; OBJREC::VectorFeature *pVecFeature = new OBJREC::VectorFeature(iNumFeatureDimension); pVecFeature->explode(fp); #ifdef DEBUG_VERBOSE //----------------- debug features ------------- OBJREC::Example t_Exp = examplesTrain[0].second; NICE::Vector t_FeatVector; fp.calcFeatureVector(t_Exp, t_FeatVector); std::cerr << "first full Feature Vec: " <train(fp, examplesTrain); //----------------- create codebook ERC clusterer ------------- int nMaxDepth = t_conf.gI("CodebookRandomForest", "maxDepthTree",10); int nMaxCodebookSize = t_conf.gI("CodebookRandomForest", "maxCodebookSize",100); #ifdef DEBUG_VERBOSE std::cerr << "maxDepthTree " << nMaxDepth << std::endl; std::cerr << "nMaxCodebookSize " << nMaxCodebookSize << std::endl; #endif OBJREC::CodebookRandomForest *pCodebookRandomForest = new OBJREC::CodebookRandomForest(pRandForest, nMaxDepth,nMaxCodebookSize); //----------------- store classifier in file --------------------- bool bSuccess = storeClassifier(p_Command, pCodebookRandomForest); //----------------- clean up ------------- delete pCodebookRandomForest; delete pVecFeature; pVecFeature = NULL; // delete all "exploded" features, they are internally cloned in the random trees anyway fp.destroy(); // examplesTrain.clean(); delete t_pMatDataTrain; delete t_pMatDataTrainLabels; return true; } bool generateHistogram( const structCommands &p_Command) { Matrix *t_pMatFodID = loadMatlabVec( p_Command.sFileTrainData.toStdString(), "fodID"); if( t_pMatFodID == NULL ) { std::cout << "Data Matrix didn't include a fodID, so couldn't be loaded" << std::endl; return 0; } int iFodID = (*t_pMatFodID)(0,0); Matrix *t_pMatDataTrain = loadMatlabVec( p_Command.sFileTrainData.toStdString(), "matFeatures"); if( t_pMatDataTrain == NULL ) { std::cout << "Data Matrix couldn't be loaded" << std::endl; return 0; } //----------------- restore trained codebook forest ------------- OBJREC::CodebookRandomForest *pCodebookRandomForest = new OBJREC::CodebookRandomForest(-1,-1); if( !restoreClassifier(p_Command, pCodebookRandomForest ) ) { std::cout << "Error restoring codebook random forest" << std::endl; return false; } size_t numTrainSamples = t_pMatDataTrain->cols(); size_t iNumFeatureDimension = t_pMatDataTrain->rows(); size_t iNumCodewords = pCodebookRandomForest->getCodebookSize(); #ifdef DEBUG_VERBOSE std::cerr << "numTrainSamples " << numTrainSamples << std::endl; std::cerr << "iNumFeatureDimension " << iNumFeatureDimension << std::endl; std::cerr << "iNumCodewords " << iNumCodewords << std::endl; #endif //----------------- parse config options ------------- bool bVerboseOutput = false; // if( nrhs > 3) // { // NICE::Config conf = parseParametersERC(prhs+3, nrhs-3 ); // bVerboseOutput = conf.gB("CodebookRandomForest", "verbose", false); // } //----------------- quantize samples into histogram ------------- NICE::Vector histogram(iNumCodewords, 0.0f); const double *pDataPtr = t_pMatDataTrain->getDataPointer(); int t_iCodebookEntry; double t_fWeight; double t_fDistance; for (size_t i = 0; i < numTrainSamples; i++, pDataPtr+= iNumFeatureDimension ) { const NICE::Vector t_VecTrainData( pDataPtr , iNumFeatureDimension); pCodebookRandomForest->voteVQ(t_VecTrainData, histogram, t_iCodebookEntry, t_fWeight, t_fDistance ); if(bVerboseOutput) std::cerr << i << ": " << "CBEntry " << t_iCodebookEntry << " Weight: " << t_fWeight << " Distance: " << t_fDistance << std::endl; } // store histogram bool bSuccess = saveMatlabVector(p_Command.sFileStoreResult.toStdString(), histogram , iFodID); //----------------- clean up ------------- delete pCodebookRandomForest; delete t_pMatDataTrain; return bSuccess; } #endif int main(int argc, char **argv) { #ifdef NICE_USELIB_MATIO #ifndef __clang__ #ifndef __llvm__ std::set_terminate(__gnu_cxx::__verbose_terminate_handler); #endif #endif structCommands sCommand; QString sCmdArg; int iCurrArgIdx = 1; while(iCurrArgIdx < argc) { sCmdArg = QString(argv[iCurrArgIdx]); if ( sCmdArg == "--function" ) { iCurrArgIdx++; sCommand.sFunction = QString(argv[iCurrArgIdx]); } else if( sCmdArg == "--config" ) { iCurrArgIdx++; sCommand.sConfigFile = QString(argv[iCurrArgIdx]); } else if( sCmdArg == "--traindata" ) { iCurrArgIdx++; sCommand.sFileTrainData = QString(argv[iCurrArgIdx]); } else if( sCmdArg == "--traindatalabels" ) { iCurrArgIdx++; sCommand.sFileTrainDataLabels = QString(argv[iCurrArgIdx]); } else if( sCmdArg == "--results" ) { iCurrArgIdx++; sCommand.sFileStoreResult = QString(argv[iCurrArgIdx]); } else if( sCmdArg == "--classifier" ) { iCurrArgIdx++; sCommand.sFileStoreClassifier = QString(argv[iCurrArgIdx]); } else if( sCmdArg == "--help" ) { // print_usage(); return 0; } else { std::cout << "unknown command arg: " << sCmdArg.toStdString() << std::endl; } iCurrArgIdx++; } /////////////////////////////////////////////////// try { if( sCommand.sFunction.compare("createAndTrain") == 0) { bool bSuccess = createAndTrain(sCommand); } else if( sCommand.sFunction.compare("generateHistogram") == 0) { bool bSuccess = generateHistogram(sCommand); } } catch(std::exception &e) { std::cerr << "exception occured: " << e.what() << std::endl; } #else return -1; #endif return 0; } #endif //#ifdef NICE_USELIB_MATIO #endif