classifyDatasetGPHIK.cpp 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. /**
  2. * @file classifyDatasetGPHIK.cpp
  3. * @brief Perform classification on an arbitrary dataset with the GPHIK-Classifier
  4. * @author Alexander Freytag
  5. * @date 16-09-2013
  6. */
  7. // STL-includes
  8. #include <iostream>
  9. #include <vector>
  10. // NICE-core includes
  11. #include <core/basics/Config.h>
  12. #include <core/basics/Exception.h>
  13. #include <core/vector/MatrixT.h>
  14. #include <core/vector/SparseVectorT.h>
  15. // gp-hik-core includes
  16. #include "gp-hik-core/GPHIKClassifier.h"
  17. void readSparseExamples ( const std::string & _fn,
  18. std::vector< const NICE::SparseVector * > & _examples,
  19. NICE::Vector & _labels
  20. )
  21. {
  22. // initially cleaning of variables
  23. _examples.clear();
  24. _labels.clear();
  25. std::vector<double> labels_std;
  26. labels_std.clear();
  27. std::cerr << "Reading " << _fn << std::endl;
  28. std::ifstream ifs ( _fn.c_str(), std::ios::in );
  29. if ( ! ifs.good() )
  30. {
  31. std::cerr << "Unable to read " << _fn << std::endl;
  32. return;
  33. }
  34. // read until no new line is in the file anymore
  35. while ( !ifs.eof() )
  36. {
  37. int classno;
  38. if ( !(ifs >> classno) )
  39. break;
  40. labels_std.push_back( classno );
  41. NICE::SparseVector *v = new NICE::SparseVector;
  42. /* needed format in every line:
  43. * SVECTOR dimension size index value index value ... END
  44. * with
  45. * SVECTOR -- starting flag
  46. * dimension -- overall feature dimension and
  47. * size -- number of non-zero entries for the current feature vector
  48. * index -- integer value specifying a non-zero dimension
  49. * value -- double value specifying the value for the corresp. non-zero dimension
  50. * END -- ending flag
  51. */
  52. try
  53. {
  54. v->restore ( ifs, NICE::SparseVector::FORMAT_INDEX );
  55. }
  56. catch ( NICE::Exception excep)
  57. {
  58. std::cerr << "Error while reading features. Error message: " << excep.what() << std::endl;
  59. break;
  60. }
  61. _examples.push_back ( v );
  62. }
  63. ifs.close();
  64. _labels = NICE::Vector( labels_std );
  65. }
  66. void mapClassNumbersToIndices( const NICE::Vector & _labels,
  67. std::map< uint, uint > & _mapClassNoToIdx
  68. )
  69. {
  70. _mapClassNoToIdx.clear();
  71. int classCnt ( 0 );
  72. for ( NICE::Vector::const_iterator it_labels = _labels.begin(); it_labels != _labels.end(); it_labels++ )
  73. {
  74. if ( _mapClassNoToIdx.find( *it_labels ) == _mapClassNoToIdx.end() )
  75. {
  76. _mapClassNoToIdx.insert( std::pair< uint, uint >( (uint) round(*it_labels), classCnt ) );
  77. classCnt++;
  78. }
  79. }
  80. }
  81. int main (int argc, char* argv[])
  82. {
  83. #ifndef __clang__
  84. #ifndef __llvm__
  85. std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
  86. #endif
  87. #endif
  88. NICE::Config conf ( argc, argv );
  89. NICE::GPHIKClassifier classifier ( &conf, "GPHIKClassifier" );
  90. // ========================================================================
  91. // TRAINING STEP
  92. // ========================================================================
  93. // read training data
  94. std::vector< const NICE::SparseVector * > examplesTrain;
  95. NICE::Vector labelsTrain;
  96. std::string s_fn_trainingSet = conf.gS("main", "trainset");
  97. readSparseExamples ( s_fn_trainingSet, examplesTrain, labelsTrain );
  98. //map the occuring classes to a minimal set of indices
  99. std::map< uint, uint > map_classNoToClassIdx_train; // < classNo, Idx>
  100. mapClassNumbersToIndices( labelsTrain, map_classNoToClassIdx_train );
  101. //how many different classes do we have in the training set?
  102. int i_noClassesTrain ( map_classNoToClassIdx_train.size() );
  103. // train GPHIK classifier
  104. classifier.train ( examplesTrain, labelsTrain );
  105. // ========================================================================
  106. // TEST STEP
  107. // ========================================================================
  108. // read test data
  109. std::vector< const NICE::SparseVector * > examplesTest;
  110. NICE::Vector labelsTest;
  111. std::string s_fn_testSet = conf.gS("main", "testset");
  112. readSparseExamples ( s_fn_testSet, examplesTest, labelsTest );
  113. //map the occuring classes to a minimal set of indices
  114. std::map< uint, uint > map_classNoToClassIdx_test; // < classNo, Idx>
  115. mapClassNumbersToIndices( labelsTest, map_classNoToClassIdx_test );
  116. //how many different classes do we have in the test set?
  117. int i_noClassesTest ( map_classNoToClassIdx_test.size() );
  118. // evaluate GPHIK classifier on unseen test data
  119. int idx ( 0 );
  120. NICE::SparseVector scores; /* not needed in this evaluation, so we just declare it ones */
  121. NICE::Matrix confusion ( i_noClassesTest, i_noClassesTrain, 0.0 );
  122. for (std::vector< const NICE::SparseVector *>::const_iterator itTestExamples = examplesTest.begin(); itTestExamples != examplesTest.end(); itTestExamples++, idx++)
  123. {
  124. uint classno_groundtruth = labelsTest( idx );
  125. uint classno_predicted;
  126. classifier.classify ( *itTestExamples, classno_predicted, scores /* not needed anyway in that evaluation*/ );
  127. uint idx_classno_groundtruth ( map_classNoToClassIdx_test[ classno_groundtruth ] );
  128. uint idx_classno_predicted ( map_classNoToClassIdx_train[ classno_predicted ] );
  129. confusion( idx_classno_groundtruth, idx_classno_predicted ) += 1;
  130. }
  131. confusion.normalizeRowsL1();
  132. std::cerr << confusion << std::endl;
  133. std::cerr << "average recognition rate: " << confusion.trace()/confusion.rows() << std::endl;
  134. // ========================================================================
  135. // clean up memory
  136. // ========================================================================
  137. // release memore of feature vectors from training set
  138. for (std::vector< const NICE::SparseVector *>::const_iterator itTrainExamples = examplesTrain.begin(); itTrainExamples != examplesTrain.end(); itTrainExamples++ )
  139. {
  140. delete *itTrainExamples;
  141. }
  142. // release memore of feature vectors from test set
  143. for (std::vector< const NICE::SparseVector *>::const_iterator itTestExamples = examplesTest.begin(); itTestExamples != examplesTest.end(); itTestExamples++ )
  144. {
  145. delete *itTestExamples;
  146. }
  147. return 0;
  148. }