progCodebookRandomForest.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. /**
  2. * @brief Extremely randomized clustering forest program for Matlab input data.
  3. *
  4. * @author Johannes Ruehle
  5. * @date 10/05/2014
  6. */
  7. #ifdef NICE_USELIB_MEX
  8. #ifdef NICE_USELIB_MATIO
  9. #include <string>
  10. #include <exception>
  11. #include <iostream>
  12. #include <fstream>
  13. //----------
  14. #include "vislearning/features/simplefeatures/CodebookRandomForest.h"
  15. #include "vislearning/features/fpfeatures/VectorFeature.h"
  16. #include "vislearning/cbaselib/FeaturePool.h"
  17. #ifdef NICE_USELIB_MATIO
  18. #include <core/matlabAccess/MatFileIO.h>
  19. const bool verbose = false;
  20. const bool verboseStartEnd = true;
  21. using namespace OBJREC;
  22. using namespace NICE;
  23. using namespace std;
  24. #undef DEBUG_VERBOSE
  25. struct structCommands
  26. {
  27. QString sFunction;
  28. QString sFileTrainData;
  29. QString sFileTrainDataLabels;
  30. QString sConfigFile;
  31. QString sFileStoreClassifier; // txt file storing the config of the trained codebook rdf
  32. QString sFileStoreResult; // matlab mat file storing the generated histogram
  33. };
  34. bool loadMatlabMatrix(const std::string &sFilename, const std::string &matrix_name, NICE::Matrix &p_Matrix)
  35. {
  36. NICE::MatFileIO matlab_file(sFilename, MAT_ACC_RDONLY);
  37. #ifdef DEBUG_VERBOSE
  38. // Show the number of variables in the file
  39. int vars_in_file = matlab_file.getNumberOfVariables();
  40. std::cout << vars_in_file << " Variables in " << sFilename << "\n";
  41. // Load the matrix
  42. std::cout << "Loading matrix \"" << matrix_name << "\"...\n";
  43. #endif
  44. // Check if the variable is a matrix
  45. matvar_t* matrix_variable = matlab_file.getVariableViaName(matrix_name);
  46. if(matrix_variable == NULL)
  47. {
  48. std::cout << "variable is not found in mat file.\n";
  49. return false;
  50. }
  51. if(matrix_variable->rank != 2) {
  52. std::cout << "Variable is not a matrix. Rank: " << matrix_variable->rank << ".\n";
  53. return false;
  54. }
  55. // Read the dimensions
  56. int cols = matrix_variable->dims[1];
  57. int rows = matrix_variable->dims[0];
  58. std::cout << "Dimensions: " << cols << " x " << rows << "\n";
  59. // Read the matrix into a vector of vectors
  60. std::vector< std::vector<double> > matrix_vecvec(rows, std::vector<double>(cols));
  61. matlab_file.getFeatureMatrixViaName(matrix_vecvec, matrix_name);
  62. // Now, we want a NICE matrix
  63. //NICE::MatrixT<double> matrix(rows, cols);
  64. p_Matrix.resize(rows, cols);
  65. for(int i = 0; i < rows; i++) {
  66. for(int j = 0; j < cols; j++) {
  67. p_Matrix(i,j) = matrix_vecvec[i][j];
  68. }
  69. }
  70. return true;
  71. }
  72. NICE::Matrix* loadMatlabVec(const std::string &sFilename, const std::string &matrix_name)
  73. {
  74. NICE::Matrix *pMatrix = NULL;
  75. NICE::MatFileIO *matFile = new NICE::MatFileIO(sFilename, MAT_ACC_RDONLY );
  76. matvar_t *t = matFile->getVariableViaName(matrix_name);
  77. if ( t->class_type == MAT_C_DOUBLE)
  78. {
  79. double *pD = (double*)( t->data );
  80. pMatrix = new NICE::Matrix(pD , (int)t->dims[0], (int)t->dims[1], Matrix::copy );
  81. }
  82. else
  83. {
  84. std::cerr << "raw format of matlab matrix not supported" << std::endl;
  85. }
  86. Mat_VarFree(t);
  87. delete matFile;
  88. return pMatrix;
  89. }
  90. bool saveMatlabVector(const std::string &sFilename, const NICE::Vector &p_Vector, int p_iFodID)
  91. {
  92. std::ofstream ofs;
  93. ofs.open (sFilename.c_str(), std::ofstream::out);
  94. if (!ofs.is_open())
  95. return false;
  96. ofs << p_iFodID << " #fodID" << std::endl;
  97. ofs << p_Vector.size() << std::endl;
  98. for(int i=0; i<p_Vector.size(); i++)
  99. ofs << p_Vector[i] << std::endl;
  100. ofs.close();
  101. return true;
  102. }
  103. bool storeClassifier(const structCommands &p_Command, const OBJREC::CodebookRandomForest *p_pCodebookRandomForest)
  104. {
  105. if( p_Command.sFileStoreClassifier.isEmpty() )
  106. return false;
  107. std::string t_sDestinationSave = p_Command.sFileStoreClassifier.toStdString();
  108. std::ofstream ofs;
  109. ofs.open (t_sDestinationSave.c_str(), std::ofstream::out);
  110. p_pCodebookRandomForest->store( ofs );
  111. ofs.close();
  112. return true;
  113. }
  114. bool restoreClassifier(const structCommands &p_Command, OBJREC::CodebookRandomForest *p_pCodebookRandomForest)
  115. {
  116. if( p_Command.sFileStoreClassifier.isEmpty() )
  117. return false;
  118. if (p_pCodebookRandomForest == NULL )
  119. return false;
  120. std::string t_sDestinationSave = p_Command.sFileStoreClassifier.toStdString();
  121. std::ifstream ifs2;
  122. ifs2.open (t_sDestinationSave.c_str() );
  123. p_pCodebookRandomForest->restore( ifs2 );
  124. ifs2.close();
  125. return true;
  126. }
  127. bool createAndTrain( const structCommands &p_Command)
  128. {
  129. if( p_Command.sConfigFile.isEmpty() )
  130. {
  131. std::cout << "no config file provided. Exiting" << std::endl;
  132. return false;
  133. }
  134. NICE::Config t_conf = NICE::Config( p_Command.sConfigFile.toStdString() );
  135. Matrix *t_pMatDataTrain = loadMatlabVec( p_Command.sFileTrainData.toStdString(), "matFeatures");
  136. if( t_pMatDataTrain == NULL )
  137. {
  138. std::cout << "Training data Matrix couldn't be loaded" << std::endl;
  139. return 0;
  140. }
  141. #ifdef DEBUG_VERBOSE
  142. for(int i = 0; i<10; i++)
  143. {
  144. std::cerr << (*t_pMatDataTrain)(i,0) << " ## " << (*t_pMatDataTrain)(0,i) << std::endl;
  145. }
  146. #endif
  147. Matrix *t_pMatDataTrainLabels = loadMatlabVec( p_Command.sFileTrainDataLabels.toStdString(), "matLabels");
  148. if( t_pMatDataTrainLabels == NULL )
  149. {
  150. std::cout << "Training data label Matrix couldn't be loaded" << std::endl;
  151. return 0;
  152. }
  153. int iNumFeatureDimension = t_pMatDataTrain->rows();
  154. NICE::Vector t_vecLabelsTrain(t_pMatDataTrainLabels->getDataPointer(), t_pMatDataTrainLabels->rows(), Vector::external);
  155. OBJREC::Examples examplesTrain;
  156. bool bRet = OBJREC::Examples::wrapExamplesAroundFeatureMatrix( *t_pMatDataTrain, t_vecLabelsTrain, examplesTrain );
  157. if( !bRet )
  158. {
  159. std::cout << "createAndTrain: Error creating Examples from raw feature matrix and labels." << std::endl;
  160. return 0;
  161. }
  162. //----------------- create raw feature mapping -------------
  163. OBJREC::FeaturePool fp;
  164. OBJREC::VectorFeature *pVecFeature = new OBJREC::VectorFeature(iNumFeatureDimension);
  165. pVecFeature->explode(fp);
  166. #ifdef DEBUG_VERBOSE
  167. //----------------- debug features -------------
  168. OBJREC::Example t_Exp = examplesTrain[0].second;
  169. NICE::Vector t_FeatVector;
  170. fp.calcFeatureVector(t_Exp, t_FeatVector);
  171. std::cerr << "first full Feature Vec: " <<t_FeatVector << std::endl;
  172. #endif
  173. //----------------- train our random Forest -------------
  174. OBJREC::FPCRandomForests *pRandForest = new OBJREC::FPCRandomForests(&t_conf,"RandomForest");
  175. pRandForest->train(fp, examplesTrain);
  176. //----------------- create codebook ERC clusterer -------------
  177. int nMaxDepth = t_conf.gI("CodebookRandomForest", "maxDepthTree",10);
  178. int nMaxCodebookSize = t_conf.gI("CodebookRandomForest", "maxCodebookSize",100);
  179. #ifdef DEBUG_VERBOSE
  180. std::cerr << "maxDepthTree " << nMaxDepth << std::endl;
  181. std::cerr << "nMaxCodebookSize " << nMaxCodebookSize << std::endl;
  182. #endif
  183. OBJREC::CodebookRandomForest *pCodebookRandomForest = new OBJREC::CodebookRandomForest(pRandForest, nMaxDepth,nMaxCodebookSize);
  184. //----------------- store classifier in file ---------------------
  185. bool bSuccess = storeClassifier(p_Command, pCodebookRandomForest);
  186. //----------------- clean up -------------
  187. delete pCodebookRandomForest;
  188. delete pVecFeature;
  189. pVecFeature = NULL;
  190. // delete all "exploded" features, they are internally cloned in the random trees anyway
  191. fp.destroy();
  192. //
  193. examplesTrain.clean();
  194. delete t_pMatDataTrain;
  195. delete t_pMatDataTrainLabels;
  196. return true;
  197. }
  198. bool generateHistogram( const structCommands &p_Command)
  199. {
  200. Matrix *t_pMatFodID = loadMatlabVec( p_Command.sFileTrainData.toStdString(), "fodID");
  201. if( t_pMatFodID == NULL )
  202. {
  203. std::cout << "Data Matrix didn't include a fodID, so couldn't be loaded" << std::endl;
  204. return 0;
  205. }
  206. int iFodID = (*t_pMatFodID)(0,0);
  207. Matrix *t_pMatDataTrain = loadMatlabVec( p_Command.sFileTrainData.toStdString(), "matFeatures");
  208. if( t_pMatDataTrain == NULL )
  209. {
  210. std::cout << "Data Matrix couldn't be loaded" << std::endl;
  211. return 0;
  212. }
  213. //----------------- restore trained codebook forest -------------
  214. OBJREC::CodebookRandomForest *pCodebookRandomForest = new OBJREC::CodebookRandomForest(-1,-1);
  215. if( !restoreClassifier(p_Command, pCodebookRandomForest ) )
  216. {
  217. std::cout << "Error restoring codebook random forest" << std::endl;
  218. return false;
  219. }
  220. size_t numTrainSamples = t_pMatDataTrain->cols();
  221. size_t iNumFeatureDimension = t_pMatDataTrain->rows();
  222. size_t iNumCodewords = pCodebookRandomForest->getCodebookSize();
  223. #ifdef DEBUG_VERBOSE
  224. std::cerr << "numTrainSamples " << numTrainSamples << std::endl;
  225. std::cerr << "iNumFeatureDimension " << iNumFeatureDimension << std::endl;
  226. std::cerr << "iNumCodewords " << iNumCodewords << std::endl;
  227. #endif
  228. //----------------- parse config options -------------
  229. bool bVerboseOutput = false;
  230. // if( nrhs > 3)
  231. // {
  232. // NICE::Config conf = parseParametersERC(prhs+3, nrhs-3 );
  233. // bVerboseOutput = conf.gB("CodebookRandomForest", "verbose", false);
  234. // }
  235. //----------------- quantize samples into histogram -------------
  236. NICE::Vector histogram(iNumCodewords, 0.0f);
  237. const double *pDataPtr = t_pMatDataTrain->getDataPointer();
  238. int t_iCodebookEntry; double t_fWeight; double t_fDistance;
  239. for (size_t i = 0; i < numTrainSamples; i++, pDataPtr+= iNumFeatureDimension )
  240. {
  241. const NICE::Vector t_VecTrainData( pDataPtr , iNumFeatureDimension);
  242. pCodebookRandomForest->voteVQ(t_VecTrainData, histogram, t_iCodebookEntry, t_fWeight, t_fDistance );
  243. if(bVerboseOutput)
  244. std::cerr << i << ": " << "CBEntry " << t_iCodebookEntry << " Weight: " << t_fWeight << " Distance: " << t_fDistance << std::endl;
  245. }
  246. // store histogram
  247. bool bSuccess = saveMatlabVector(p_Command.sFileStoreResult.toStdString(), histogram , iFodID);
  248. //----------------- clean up -------------
  249. delete pCodebookRandomForest;
  250. delete t_pMatDataTrain;
  251. return bSuccess;
  252. }
  253. #endif
  254. int main(int argc, char **argv)
  255. {
  256. #ifdef NICE_USELIB_MATIO
  257. #ifndef __clang__
  258. #ifndef __llvm__
  259. std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
  260. #endif
  261. #endif
  262. structCommands sCommand;
  263. QString sCmdArg;
  264. int iCurrArgIdx = 1;
  265. while(iCurrArgIdx < argc)
  266. {
  267. sCmdArg = QString(argv[iCurrArgIdx]);
  268. if ( sCmdArg == "--function" )
  269. {
  270. iCurrArgIdx++;
  271. sCommand.sFunction = QString(argv[iCurrArgIdx]);
  272. }
  273. else if( sCmdArg == "--config" )
  274. {
  275. iCurrArgIdx++;
  276. sCommand.sConfigFile = QString(argv[iCurrArgIdx]);
  277. }
  278. else if( sCmdArg == "--traindata" )
  279. {
  280. iCurrArgIdx++;
  281. sCommand.sFileTrainData = QString(argv[iCurrArgIdx]);
  282. }
  283. else if( sCmdArg == "--traindatalabels" )
  284. {
  285. iCurrArgIdx++;
  286. sCommand.sFileTrainDataLabels = QString(argv[iCurrArgIdx]);
  287. }
  288. else if( sCmdArg == "--results" )
  289. {
  290. iCurrArgIdx++;
  291. sCommand.sFileStoreResult = QString(argv[iCurrArgIdx]);
  292. }
  293. else if( sCmdArg == "--classifier" )
  294. {
  295. iCurrArgIdx++;
  296. sCommand.sFileStoreClassifier = QString(argv[iCurrArgIdx]);
  297. }
  298. else if( sCmdArg == "--help" )
  299. {
  300. // print_usage();
  301. return 0;
  302. }
  303. else
  304. {
  305. std::cout << "unknown command arg: " << sCmdArg.toStdString() << std::endl;
  306. }
  307. iCurrArgIdx++;
  308. }
  309. ///////////////////////////////////////////////////
  310. try
  311. {
  312. if( sCommand.sFunction.compare("createAndTrain") == 0)
  313. {
  314. bool bSuccess = createAndTrain(sCommand);
  315. }
  316. else if( sCommand.sFunction.compare("generateHistogram") == 0)
  317. {
  318. bool bSuccess = generateHistogram(sCommand);
  319. }
  320. }
  321. catch(std::exception &e)
  322. {
  323. std::cerr << "exception occured: " << e.what() << std::endl;
  324. }
  325. #else
  326. return -1;
  327. #endif
  328. return 0;
  329. }
  330. #endif //#ifdef NICE_USELIB_MATIO
  331. #endif