testClassifier.cpp 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. /**
  2. * @file testClassifier.cpp
  3. * @brief main program for classifier evaluation
  4. * @author Erik Rodner
  5. * @date 2007-10-12
  6. */
  7. #include <objrec/nice_nonvis.h>
  8. #include <fstream>
  9. #include <iostream>
  10. #include <vislearning/cbaselib/MultiDataset.h>
  11. #include <objrec/iclassifier/icgeneric/CSGeneric.h>
  12. #include <vislearning/cbaselib/ClassificationResults.h>
  13. #include <vislearning/cbaselib/MutualInformation.h>
  14. #include "vislearning/classifier/classifierbase/FeaturePoolClassifier.h"
  15. #include <vislearning/classifier/fpclassifier/randomforest/FPCRandomForestTransfer.h>
  16. #include <vislearning/classifier/classifierinterfaces/VCFeaturePool.h>
  17. #include "core/basics/Config.h"
  18. #include <vislearning/baselib/Preprocess.h>
  19. #include <core/basics/StringTools.h>
  20. #undef DEBUG
  21. using namespace OBJREC;
  22. using namespace NICE;
  23. using namespace std;
  24. void binarizeVector( NICE::Vector & xout, const NICE::Vector & x, const NICE::Vector & thresholds )
  25. {
  26. xout.resize( x.size() );
  27. for ( size_t i = 0 ; i < x.size() ; i++ )
  28. if ( fabs( x[i] ) > thresholds[i] )
  29. xout[i] = 1.0;
  30. else
  31. xout[i] = 0.0;
  32. }
  33. void binarizeSet( LabeledSetVector & dst, const LabeledSetVector & src, const NICE::Vector & thresholds )
  34. {
  35. LOOP_ALL( src )
  36. {
  37. EACH( classno, x );
  38. NICE::Vector dstv;
  39. binarizeVector( dstv, x, thresholds );
  40. dst.add( classno, dstv );
  41. }
  42. }
  43. int main( int argc, char **argv )
  44. {
  45. fprintf( stderr, "testClassifier: init\n" );
  46. std::set_terminate( __gnu_cxx::__verbose_terminate_handler );
  47. Config conf( argc, argv );
  48. string wekafile = conf.gS( "main", "weka", "" );
  49. string trainfn = conf.gS( "main", "train", "train.vec" );
  50. string testfn = conf.gS( "main", "test", "test.vec" );
  51. int format = conf.gI( "main", "format", 0 );
  52. bool binarize = conf.gB( "main", "binarize", false );
  53. int wekaclass = conf.gI( "main", "wekaclass", 1 );
  54. string classifier_cache = conf.gS( "main", "classifiercache", "" );
  55. string classifier_cache_in = conf.gS( "main", "classifierin", "" );
  56. int numRuns = conf.gI( "main", "runs", 1 );
  57. string writeImgNet = conf.gS( "main", "imgnet", "" );
  58. // classno:text,classno:text,...
  59. string classes = conf.gS( "main", "classes", "" );
  60. int classesnb = conf.gI( "main", "classes", 0 );
  61. string classesconf = conf.gS( "main", "classesconf", "" );
  62. fprintf( stderr, "testClassifier: reading config\n" );
  63. Preprocess::Init( &conf );
  64. fprintf( stderr, "testClassifier: reading multi dataset\n" );
  65. int testMaxClassNo;
  66. int trainMaxClassNo;
  67. ClassNames *classNames;
  68. if ( classes.size() == 0 && classesnb != 0 )
  69. {
  70. classNames = new ClassNames();
  71. for ( int classno = 0 ; classno < classesnb ; classno++ )
  72. {
  73. classNames->addClass( classno, StringTools::convertToString<int> ( classno ), StringTools::convertToString<int> ( classno ) );
  74. }
  75. trainMaxClassNo = classNames->getMaxClassno();
  76. testMaxClassNo = trainMaxClassNo;
  77. }
  78. else
  79. if ( classes.size() > 0 )
  80. {
  81. classNames = new ClassNames();
  82. vector<string> classes_sub;
  83. StringTools::split( string( classes ), ',', classes_sub );
  84. for ( vector<string>::const_iterator i = classes_sub.begin();
  85. i != classes_sub.end(); i++ )
  86. {
  87. vector<string> desc;
  88. StringTools::split( *i, ':', desc );
  89. if ( desc.size() != 2 )
  90. break;
  91. int classno = StringTools::convert<int> ( desc[0] );
  92. classNames->addClass( classno, desc[1], desc[1] );
  93. }
  94. trainMaxClassNo = classNames->getMaxClassno();
  95. testMaxClassNo = trainMaxClassNo;
  96. classNames->store( cout );
  97. }
  98. else if ( classesconf.size() > 0 ) {
  99. classNames = new ClassNames();
  100. Config cConf( classesconf );
  101. classNames->readFromConfig( cConf, "*" );
  102. trainMaxClassNo = classNames->getMaxClassno();
  103. testMaxClassNo = trainMaxClassNo;
  104. }
  105. else
  106. {
  107. MultiDataset md( &conf );
  108. classNames = new ClassNames( md.getClassNames( "train" ), "*" );
  109. testMaxClassNo = md.getClassNames( "test" ).getMaxClassno();
  110. trainMaxClassNo = md.getClassNames( "train" ).getMaxClassno();
  111. }
  112. LabeledSetVector train;
  113. if ( classifier_cache_in.size() <= 0 )
  114. {
  115. fprintf( stderr, "testClassifier: Reading training dataset from %s\n", trainfn.c_str() );
  116. train.read( trainfn, format );
  117. train.printInformation();
  118. } else {
  119. fprintf( stderr, "testClassifier: skipping training set %s\n", trainfn.c_str() );
  120. }
  121. LabeledSetVector test;
  122. fprintf( stderr, "testClassifier: Reading test dataset from %s\n", testfn.c_str() );
  123. test.read( testfn, format );
  124. ClassificationResults cresults;
  125. ofstream outinet;
  126. if ( writeImgNet.length() > 0 )
  127. {
  128. outinet.open( writeImgNet.c_str() );
  129. }
  130. for ( int runs = 0 ; runs < numRuns ; runs++ ) {
  131. VecClassifier *vec_classifier = NULL;
  132. if ( conf.gS( "main", "classifier" ) == "random_forest_transfer" )
  133. {
  134. FeaturePoolClassifier *fpc = new FPCRandomForestTransfer( &conf, classNames );
  135. vec_classifier = new VCFeaturePool( &conf, fpc );
  136. } else {
  137. vec_classifier = CSGeneric::selectVecClassifier( &conf, "main" );
  138. }
  139. NICE::Vector thresholds;
  140. if ( classifier_cache_in.size() <= 0 )
  141. {
  142. if ( binarize ) {
  143. LabeledSetVector trainbin;
  144. NICE::Vector mis;
  145. MutualInformation mi;
  146. fprintf( stderr, "testClassifier: computing mutual information\n" );
  147. mi.computeThresholdsOverall( train, thresholds, mis );
  148. fprintf( stderr, "testClassifier: done!\n" );
  149. binarizeSet( trainbin, train, thresholds );
  150. vec_classifier->teach( trainbin );
  151. } else {
  152. vec_classifier->teach( train );
  153. }
  154. vec_classifier->finishTeaching();
  155. if ( classifier_cache.size() > 0 )
  156. vec_classifier->save( classifier_cache );
  157. } else {
  158. vec_classifier->setMaxClassNo( classNames->getMaxClassno() );
  159. vec_classifier->read( classifier_cache_in );
  160. }
  161. ProgressBar pb( "Classification" );
  162. pb.show();
  163. std::vector<int> count( testMaxClassNo + 1, 0 );
  164. std::vector<int> correct( testMaxClassNo + 1, 0 );
  165. MatrixT<int> confusionMatrix( testMaxClassNo + 1, trainMaxClassNo + 1, 0 );
  166. int n = test.count();
  167. LOOP_ALL( test )
  168. {
  169. EACH( classno, v );
  170. pb.update( n );
  171. #ifdef DEBUG
  172. fprintf( stderr, "\tclassification\n" );
  173. #endif
  174. ClassificationResult r;
  175. if ( binarize )
  176. {
  177. NICE::Vector vout;
  178. binarizeVector( vout, v, thresholds );
  179. r = vec_classifier->classify( vout );
  180. } else {
  181. r = vec_classifier->classify( v );
  182. }
  183. r.classno_groundtruth = classno;
  184. r.classname = classNames->text( r.classno );
  185. #ifdef DEBUG
  186. if ( r.classno == classno )
  187. fprintf( stderr, "+ classification %d (\"%s\") <-> %d (\"%s\") score=%f\n", classno,
  188. classNames->text( classno ).c_str(), r.classno, r.classname.c_str(), r.scores[r.classno] );
  189. else
  190. fprintf( stderr, "- classification %d (\"%s\") <-> %d (\"%s\") score=%f\n", classno,
  191. classNames->text( classno ).c_str(), r.classno, r.classname.c_str(), r.scores[r.classno] );
  192. r.scores.store( cerr );
  193. #endif
  194. if ( writeImgNet.length() > 0 )
  195. {
  196. for ( int z = 1; z < r.scores.size() - 1; z++ )
  197. {
  198. outinet << r.scores[z] << " ";
  199. }
  200. outinet << r.scores[r.scores.size()-1] << endl;
  201. }
  202. if ( r.classno >= 0 )
  203. {
  204. if ( classno == r.classno ) correct[classno]++;
  205. count[classno]++;
  206. if ( r.ok() ) {
  207. confusionMatrix( classno, r.classno )++;
  208. }
  209. cresults.push_back( r );
  210. }
  211. }
  212. pb.hide();
  213. if ( wekafile.size() > 0 )
  214. {
  215. string wekafile_s = wekafile;
  216. if ( numRuns > 1 )
  217. wekafile_s = wekafile_s + "." + StringTools::convertToString<int>( runs ) + ".txt";
  218. cresults.writeWEKA( wekafile_s, wekaclass );
  219. }
  220. int count_total = 0;
  221. int correct_total = 0;
  222. int classes_tested = 0;
  223. double avg_recognition = 0.0;
  224. for ( size_t classno = 0; classno < correct.size(); classno++ )
  225. {
  226. if ( count[classno] == 0 ) {
  227. fprintf( stdout, "class %d not tested !!\n", ( int )classno );
  228. } else {
  229. fprintf( stdout, "classification result class %d (\"%s\") : %5.2f %%\n",
  230. ( int )classno, classNames->text( classno ).c_str(), correct[classno]*100.0 / count[classno] );
  231. avg_recognition += correct[classno] / ( double )count[classno];
  232. classes_tested++;
  233. }
  234. count_total += count[classno];
  235. correct_total += correct[classno];
  236. }
  237. avg_recognition /= classes_tested;
  238. fprintf( stdout, "overall recognition rate : %-5.3f %%\n", correct_total*100.0 / count_total );
  239. fprintf( stdout, "average recognition rate : %-5.3f %%\n", avg_recognition*100 );
  240. fprintf( stdout, "total:%d misclassified:%d\n", count_total, count_total - correct_total );
  241. int max_count = *( max_element( count.begin(), count.end() ) );
  242. fprintf( stdout, "no of classes : %d\n", classNames->numClasses() );
  243. fprintf( stdout, "lower bound 1 : %f\n", 100.0 / ( classNames->numClasses() ) );
  244. fprintf( stdout, "lower bound 2 : %f\n", max_count * 100.0 / ( double ) count_total );
  245. cout << confusionMatrix << endl;
  246. delete vec_classifier;
  247. }
  248. delete classNames;
  249. return 0;
  250. }