classifyDatasetGPHIK.cpp 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. /**
  2. * @file classifyDatasetGPHIK.cpp
  3. * @brief Perform classification on an arbitrary dataset with the GPHIK-Classifier
  4. * @author Alexander Freytag
  5. * @date 16-09-2013
  6. */
  7. // STL-includes
  8. #include <iostream>
  9. #include <vector>
  10. // NICE-core includes
  11. #include <core/basics/Config.h>
  12. #include <core/basics/Exception.h>
  13. #include <core/vector/MatrixT.h>
  14. #include <core/vector/SparseVectorT.h>
  15. // gp-hik-core includes
  16. #include "gp-hik-core/GPHIKClassifier.h"
  17. void readSparseExamples ( const std::string & fn, std::vector< const NICE::SparseVector * > & examples, NICE::Vector & labels )
  18. {
  19. // initially cleaning of variables
  20. examples.clear();
  21. labels.clear();
  22. std::vector<double> labels_std;
  23. labels_std.clear();
  24. std::cerr << "Reading " << fn << std::endl;
  25. std::ifstream ifs ( fn.c_str(), std::ios::in );
  26. if ( ! ifs.good() )
  27. {
  28. std::cerr << "Unable to read " << fn << std::endl;
  29. return;
  30. }
  31. // read until no new line is in the file anymore
  32. while ( !ifs.eof() )
  33. {
  34. int classno;
  35. if ( !(ifs >> classno) )
  36. break;
  37. labels_std.push_back( classno );
  38. NICE::SparseVector *v = new NICE::SparseVector;
  39. /* needed format in every line:
  40. * SVECTOR dimension size index value index value ... END
  41. * with
  42. * SVECTOR -- starting flag
  43. * dimension -- overall feature dimension and
  44. * size -- number of non-zero entries for the current feature vector
  45. * index -- integer value specifying a non-zero dimension
  46. * value -- double value specifying the value for the corresp. non-zero dimension
  47. * END -- ending flag
  48. */
  49. try
  50. {
  51. v->restore ( ifs, NICE::SparseVector::FORMAT_INDEX );
  52. }
  53. catch ( NICE::Exception excep)
  54. {
  55. std::cerr << "Error while reading features. Error message: " << excep.what() << std::endl;
  56. break;
  57. }
  58. examples.push_back ( v );
  59. }
  60. ifs.close();
  61. labels = NICE::Vector( labels_std );
  62. }
  63. void mapClassNumbersToIndices( const NICE::Vector & labels, std::map<int,int> & mapClassNoToIdx )
  64. {
  65. mapClassNoToIdx.clear();
  66. int classCnt ( 0 );
  67. for ( NICE::Vector::const_iterator it_labels = labels.begin(); it_labels != labels.end(); it_labels++ )
  68. {
  69. if ( mapClassNoToIdx.find( *it_labels ) == mapClassNoToIdx.end() )
  70. {
  71. mapClassNoToIdx.insert( std::pair<int,int>( (int) round(*it_labels), classCnt ) );
  72. classCnt++;
  73. }
  74. }
  75. }
  76. int main (int argc, char* argv[])
  77. {
  78. #ifndef __clang__
  79. #ifndef __llvm__
  80. std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
  81. #endif
  82. #endif
  83. NICE::Config conf ( argc, argv );
  84. NICE::GPHIKClassifier classifier ( &conf, "GPHIKClassifier" );
  85. // ========================================================================
  86. // TRAINING STEP
  87. // ========================================================================
  88. // read training data
  89. std::vector< const NICE::SparseVector * > examplesTrain;
  90. NICE::Vector labelsTrain;
  91. std::string s_fn_trainingSet = conf.gS("main", "trainset");
  92. readSparseExamples ( s_fn_trainingSet, examplesTrain, labelsTrain );
  93. //map the occuring classes to a minimal set of indices
  94. std::map<int,int> map_classNoToClassIdx_train; // < classNo, Idx>
  95. mapClassNumbersToIndices( labelsTrain, map_classNoToClassIdx_train );
  96. //how many different classes do we have in the training set?
  97. int i_noClassesTrain ( map_classNoToClassIdx_train.size() );
  98. // train GPHIK classifier
  99. classifier.train ( examplesTrain, labelsTrain );
  100. // ========================================================================
  101. // TEST STEP
  102. // ========================================================================
  103. // read test data
  104. std::vector< const NICE::SparseVector * > examplesTest;
  105. NICE::Vector labelsTest;
  106. std::string s_fn_testSet = conf.gS("main", "testset");
  107. readSparseExamples ( s_fn_testSet, examplesTest, labelsTest );
  108. //map the occuring classes to a minimal set of indices
  109. std::map<int,int> map_classNoToClassIdx_test; // < classNo, Idx>
  110. mapClassNumbersToIndices( labelsTest, map_classNoToClassIdx_test );
  111. //how many different classes do we have in the test set?
  112. int i_noClassesTest ( map_classNoToClassIdx_test.size() );
  113. // evaluate GPHIK classifier on unseen test data
  114. int idx ( 0 );
  115. NICE::SparseVector scores; /* not needed in this evaluation, so we just declare it ones */
  116. NICE::Matrix confusion ( i_noClassesTest, i_noClassesTrain, 0.0 );
  117. for (std::vector< const NICE::SparseVector *>::const_iterator itTestExamples = examplesTest.begin(); itTestExamples != examplesTest.end(); itTestExamples++, idx++)
  118. {
  119. int classno_groundtruth = labelsTest( idx );
  120. int classno_predicted;
  121. classifier.classify ( *itTestExamples, classno_predicted, scores /* not needed anyway in that evaluation*/ );
  122. int idx_classno_groundtruth ( map_classNoToClassIdx_test[ classno_groundtruth ] );
  123. int idx_classno_predicted ( map_classNoToClassIdx_train[ classno_predicted ] );
  124. confusion( idx_classno_groundtruth, idx_classno_predicted ) += 1;
  125. }
  126. confusion.normalizeRowsL1();
  127. std::cerr << confusion << std::endl;
  128. std::cerr << "average recognition rate: " << confusion.trace()/confusion.rows() << std::endl;
  129. // ========================================================================
  130. // clean up memory
  131. // ========================================================================
  132. // release memore of feature vectors from training set
  133. for (std::vector< const NICE::SparseVector *>::const_iterator itTrainExamples = examplesTrain.begin(); itTrainExamples != examplesTrain.end(); itTrainExamples++ )
  134. {
  135. delete *itTrainExamples;
  136. }
  137. // release memore of feature vectors from test set
  138. for (std::vector< const NICE::SparseVector *>::const_iterator itTestExamples = examplesTest.begin(); itTestExamples != examplesTest.end(); itTestExamples++ )
  139. {
  140. delete *itTestExamples;
  141. }
  142. return 0;
  143. }