classifyDatasetGPHIK.cpp 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. /**
  2. * @file classifyDatasetGPHIK.cpp
  3. * @brief Perform classification on an arbitrary dataset with the GPHIK-Classifier
  4. * @author Alexander Freytag
  5. * @date 16-09-2013
  6. */
  7. // STL-includes
  8. #include <iostream>
  9. #include <vector>
  10. // NICE-core includes
  11. #include <core/basics/Config.h>
  12. #include <core/basics/Exception.h>
  13. #include <core/vector/MatrixT.h>
  14. #include <core/vector/SparseVectorT.h>
  15. // gp-hik-core includes
  16. #include "gp-hik-core/GPHIKClassifier.h"
  17. void readSparseExamples ( const std::string & fn, std::vector< NICE::SparseVector * > & examples, NICE::Vector & labels )
  18. {
  19. // initially cleaning of variables
  20. examples.clear();
  21. labels.clear();
  22. std::vector<double> labels_std;
  23. labels_std.clear();
  24. std::cerr << "Reading " << fn << std::endl;
  25. std::ifstream ifs ( fn.c_str(), std::ios::in );
  26. if ( ! ifs.good() )
  27. {
  28. std::cerr << "Unable to read " << fn << std::endl;
  29. return;
  30. }
  31. // read until no new line is in the file anymore
  32. while ( !ifs.eof() )
  33. {
  34. int classno;
  35. if ( !(ifs >> classno) )
  36. break;
  37. labels_std.push_back( classno );
  38. NICE::SparseVector *v = new NICE::SparseVector;
  39. /* needed format in every line:
  40. * SVECTOR dimension size index value index value ... END
  41. * with
  42. * SVECTOR -- starting flag
  43. * dimension -- overall feature dimension and
  44. * size -- number of non-zero entries for the current feature vector
  45. * index -- integer value specifying a non-zero dimension
  46. * value -- double value specifying the value for the corresp. non-zero dimension
  47. * END -- ending flag
  48. */
  49. try
  50. {
  51. v->restore ( ifs, NICE::SparseVector::FORMAT_INDEX );
  52. }
  53. catch ( NICE::Exception excep)
  54. {
  55. std::cerr << "Error while reading features. Error message: " << excep.what() << std::endl;
  56. break;
  57. }
  58. examples.push_back ( v );
  59. }
  60. ifs.close();
  61. labels = NICE::Vector( labels_std );
  62. }
  63. void mapClassNumbersToIndices( const NICE::Vector & labels, std::map<int,int> & mapClassNoToIdx )
  64. {
  65. mapClassNoToIdx.clear();
  66. int classCnt ( 0 );
  67. for ( NICE::Vector::const_iterator it_labels = labels.begin(); it_labels != labels.end(); it_labels++ )
  68. {
  69. if ( mapClassNoToIdx.find( *it_labels ) == mapClassNoToIdx.end() )
  70. {
  71. mapClassNoToIdx.insert( std::pair<int,int>( (int) round(*it_labels), classCnt ) );
  72. classCnt++;
  73. }
  74. }
  75. }
  76. int main (int argc, char* argv[])
  77. {
  78. std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
  79. NICE::Config conf ( argc, argv );
  80. NICE::GPHIKClassifier classifier ( &conf, "GPHIKClassifier" );
  81. // ========================================================================
  82. // TRAINING STEP
  83. // ========================================================================
  84. // read training data
  85. std::vector< NICE::SparseVector * > examplesTrain;
  86. NICE::Vector labelsTrain;
  87. std::string s_fn_trainingSet = conf.gS("main", "trainset");
  88. readSparseExamples ( s_fn_trainingSet, examplesTrain, labelsTrain );
  89. //map the occuring classes to a minimal set of indices
  90. std::map<int,int> map_classNoToClassIdx_train; // < classNo, Idx>
  91. mapClassNumbersToIndices( labelsTrain, map_classNoToClassIdx_train );
  92. //how many different classes do we have in the training set?
  93. int i_noClassesTrain ( map_classNoToClassIdx_train.size() );
  94. // train GPHIK classifier
  95. classifier.train ( examplesTrain, labelsTrain );
  96. // ========================================================================
  97. // TEST STEP
  98. // ========================================================================
  99. // read test data
  100. std::vector< NICE::SparseVector * > examplesTest;
  101. NICE::Vector labelsTest;
  102. std::string s_fn_testSet = conf.gS("main", "testset");
  103. readSparseExamples ( s_fn_testSet, examplesTest, labelsTest );
  104. //map the occuring classes to a minimal set of indices
  105. std::map<int,int> map_classNoToClassIdx_test; // < classNo, Idx>
  106. mapClassNumbersToIndices( labelsTest, map_classNoToClassIdx_test );
  107. //how many different classes do we have in the test set?
  108. int i_noClassesTest ( map_classNoToClassIdx_test.size() );
  109. // evaluate GPHIK classifier on unseen test data
  110. int idx ( 0 );
  111. NICE::SparseVector scores; /* not needed in this evaluation, so we just declare it ones */
  112. NICE::Matrix confusion ( i_noClassesTest, i_noClassesTrain, 0.0 );
  113. for (std::vector< NICE::SparseVector *>::const_iterator itTestExamples = examplesTest.begin(); itTestExamples != examplesTest.end(); itTestExamples++, idx++)
  114. {
  115. int classno_groundtruth = labelsTest( idx );
  116. int classno_predicted;
  117. classifier.classify ( *itTestExamples, classno_predicted, scores /* not needed anyway in that evaluation*/ );
  118. int idx_classno_groundtruth ( map_classNoToClassIdx_test[ classno_groundtruth ] );
  119. int idx_classno_predicted ( map_classNoToClassIdx_train[ classno_predicted ] );
  120. confusion( idx_classno_groundtruth, idx_classno_predicted ) += 1;
  121. }
  122. confusion.normalizeRowsL1();
  123. std::cerr << confusion << std::endl;
  124. std::cerr << "average recognition rate: " << confusion.trace()/confusion.rows() << std::endl;
  125. // ========================================================================
  126. // clean up memory
  127. // ========================================================================
  128. // release memore of feature vectors from training set
  129. for (std::vector< NICE::SparseVector *>::const_iterator itTrainExamples = examplesTrain.begin(); itTrainExamples != examplesTrain.end(); itTrainExamples++ )
  130. {
  131. delete *itTrainExamples;
  132. }
  133. // release memore of feature vectors from test set
  134. for (std::vector< NICE::SparseVector *>::const_iterator itTestExamples = examplesTest.begin(); itTestExamples != examplesTest.end(); itTestExamples++ )
  135. {
  136. delete *itTestExamples;
  137. }
  138. return 0;
  139. }