GPHIKRawClassifier.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. /**
  2. * @file GPHIKRawClassifier.cpp
  3. * @brief Main interface for our GP HIK classifier (similar to the feature pool classifier interface in vislearning) (Implementation)
  4. * @author Erik Rodner, Alexander Freytag
  5. * @date 02/01/2012
  6. */
  7. // STL includes
  8. #include <iostream>
  9. // NICE-core includes
  10. #include <core/basics/numerictools.h>
  11. #include <core/basics/Timer.h>
  12. #include <core/algebra/ILSConjugateGradients.h>
  13. // gp-hik-core includes
  14. #include "GPHIKRawClassifier.h"
  15. #include "GMHIKernelRaw.h"
  16. using namespace std;
  17. using namespace NICE;
  18. /////////////////////////////////////////////////////
  19. /////////////////////////////////////////////////////
  20. // PROTECTED METHODS
  21. /////////////////////////////////////////////////////
  22. /////////////////////////////////////////////////////
  23. /////////////////////////////////////////////////////
  24. /////////////////////////////////////////////////////
  25. // PUBLIC METHODS
  26. /////////////////////////////////////////////////////
  27. /////////////////////////////////////////////////////
  28. GPHIKRawClassifier::GPHIKRawClassifier( )
  29. {
  30. this->b_isTrained = false;
  31. this->confSection = "";
  32. this->nnz_per_dimension = NULL;
  33. // in order to be sure about all necessary variables be setup with default values, we
  34. // run initFromConfig with an empty config
  35. NICE::Config tmpConfEmpty ;
  36. this->initFromConfig ( &tmpConfEmpty, this->confSection );
  37. }
  38. GPHIKRawClassifier::GPHIKRawClassifier( const Config *_conf,
  39. const string & _confSection
  40. )
  41. {
  42. ///////////
  43. // same code as in empty constructor - duplication can be avoided with C++11 allowing for constructor delegation
  44. ///////////
  45. this->b_isTrained = false;
  46. this->confSection = "";
  47. this->q = NULL;
  48. ///////////
  49. // here comes the new code part different from the empty constructor
  50. ///////////
  51. this->confSection = _confSection;
  52. // if no config file was given, we either restore the classifier from an external file, or run ::init with
  53. // an emtpy config (using default values thereby) when calling the train-method
  54. if ( _conf != NULL )
  55. {
  56. this->initFromConfig( _conf, _confSection );
  57. }
  58. else
  59. {
  60. // if no config was given, we create an empty one
  61. NICE::Config tmpConfEmpty ;
  62. this->initFromConfig ( &tmpConfEmpty, this->confSection );
  63. }
  64. }
  65. GPHIKRawClassifier::~GPHIKRawClassifier()
  66. {
  67. delete solver;
  68. }
  69. void GPHIKRawClassifier::initFromConfig(const Config *_conf,
  70. const string & _confSection
  71. )
  72. {
  73. this->d_noise = _conf->gD( _confSection, "noise", 0.01);
  74. this->confSection = _confSection;
  75. this->b_verbose = _conf->gB( _confSection, "verbose", false);
  76. this->b_debug = _conf->gB( _confSection, "debug", false);
  77. this->f_tolerance = _conf->gD( _confSection, "f_tolerance", 1e-10);
  78. //FIXME this is not used in that way for the standard GPHIKClassifier
  79. //string ilssection = "FMKGPHyperparameterOptimization";
  80. string ilssection = _confSection;
  81. uint ils_max_iterations = _conf->gI( ilssection, "ils_max_iterations", 1000 );
  82. double ils_min_delta = _conf->gD( ilssection, "ils_min_delta", 1e-7 );
  83. double ils_min_residual = _conf->gD( ilssection, "ils_min_residual", 1e-7 );
  84. bool ils_verbose = _conf->gB( ilssection, "ils_verbose", false );
  85. this->solver = new ILSConjugateGradients( ils_verbose,
  86. ils_max_iterations,
  87. ils_min_delta,
  88. ils_min_residual
  89. );
  90. }
  91. ///////////////////// ///////////////////// /////////////////////
  92. // GET / SET
  93. ///////////////////// ///////////////////// /////////////////////
  94. std::set<uint> GPHIKRawClassifier::getKnownClassNumbers ( ) const
  95. {
  96. if ( ! this->b_isTrained )
  97. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  98. return this->knownClasses;
  99. }
  100. ///////////////////// ///////////////////// /////////////////////
  101. // CLASSIFIER STUFF
  102. ///////////////////// ///////////////////// /////////////////////
  103. void GPHIKRawClassifier::classify ( const NICE::SparseVector * _xstar,
  104. uint & _result,
  105. SparseVector & _scores
  106. ) const
  107. {
  108. if ( ! this->b_isTrained )
  109. fthrow(Exception, "Classifier not trained yet -- aborting!" );
  110. _scores.clear();
  111. GMHIKernelRaw::sparseVectorElement **dataMatrix = gm->getDataMatrix();
  112. uint maxClassNo = 0;
  113. for ( std::map<uint, PrecomputedType>::const_iterator i = this->precomputedA.begin() ; i != this->precomputedA.end(); i++ )
  114. {
  115. uint classno = i->first;
  116. maxClassNo = std::max ( maxClassNo, classno );
  117. double beta = 0;
  118. if ( this->q != NULL ) {
  119. std::map<uint, double *>::const_iterator j = this->precomputedT.find ( classno );
  120. double *T = j->second;
  121. for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++ )
  122. {
  123. uint dim = i->first;
  124. double v = i->second;
  125. uint qBin = q->quantize( v, dim );
  126. beta += T[dim * q->getNumberOfBins() + qBin];
  127. }
  128. } else {
  129. const PrecomputedType & A = i->second;
  130. std::map<uint, PrecomputedType>::const_iterator j = this->precomputedB.find ( classno );
  131. const PrecomputedType & B = j->second;
  132. for (SparseVector::const_iterator i = _xstar->begin(); i != _xstar->end(); i++)
  133. {
  134. uint dim = i->first;
  135. double fval = i->second;
  136. uint nnz = this->nnz_per_dimension[dim];
  137. uint nz = this->num_examples - nnz;
  138. if ( nnz == 0 ) continue;
  139. if ( fval < this->f_tolerance ) continue;
  140. uint position = 0;
  141. //this->X_sorted.findFirstLargerInDimension(dim, fval, position);
  142. GMHIKernelRaw::sparseVectorElement fval_element;
  143. fval_element.value = fval;
  144. GMHIKernelRaw::sparseVectorElement *it = upper_bound ( dataMatrix[dim], dataMatrix[dim] + nnz, fval_element );
  145. position = distance ( dataMatrix[dim], it );
  146. bool posIsZero ( position == 0 );
  147. if ( !posIsZero )
  148. position--;
  149. double firstPart = 0.0;
  150. if ( !posIsZero && ((position-nz) < this->num_examples) )
  151. firstPart = (A[dim][position-nz]);
  152. double secondPart( B[dim][this->num_examples-1-nz]);
  153. if ( !posIsZero && (position >= nz) )
  154. secondPart -= B[dim][position-nz];
  155. // but apply using the transformed one
  156. beta += firstPart + secondPart* fval;
  157. }
  158. }
  159. _scores[ classno ] = beta;
  160. }
  161. _scores.setDim ( *this->knownClasses.rbegin() + 1 );
  162. if ( this->knownClasses.size() > 2 )
  163. { // multi-class classification
  164. _result = _scores.maxElement();
  165. }
  166. else if ( this->knownClasses.size() == 2 ) // binary setting
  167. {
  168. uint class1 = *(this->knownClasses.begin());
  169. uint class2 = *(this->knownClasses.rbegin());
  170. uint class_for_which_we_have_a_score = _scores.begin()->first;
  171. uint class_for_which_we_dont_have_a_score = (class1 == class_for_which_we_have_a_score ? class2 : class1);
  172. _scores[class_for_which_we_dont_have_a_score] = - _scores[class_for_which_we_have_a_score];
  173. _result = _scores[class_for_which_we_have_a_score] > 0.0 ? class_for_which_we_have_a_score : class_for_which_we_dont_have_a_score;
  174. }
  175. }
  176. /** training process */
  177. void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *> & _examples,
  178. const NICE::Vector & _labels
  179. )
  180. {
  181. // security-check: examples and labels have to be of same size
  182. if ( _examples.size() != _labels.size() )
  183. {
  184. fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
  185. }
  186. this->num_examples = _examples.size();
  187. this->knownClasses.clear();
  188. for ( uint i = 0; i < _labels.size(); i++ )
  189. this->knownClasses.insert((uint)_labels[i]);
  190. std::map<uint, NICE::Vector> binLabels;
  191. for ( set<uint>::const_iterator j = knownClasses.begin(); j != knownClasses.end(); j++ )
  192. {
  193. uint current_class = *j;
  194. Vector labels_binary ( _labels.size() );
  195. for ( uint i = 0; i < _labels.size(); i++ )
  196. labels_binary[i] = ( _labels[i] == current_class ) ? 1.0 : -1.0;
  197. binLabels.insert ( pair<uint, NICE::Vector>( current_class, labels_binary) );
  198. }
  199. // handle special binary case
  200. if ( knownClasses.size() == 2 )
  201. {
  202. std::map<uint, NICE::Vector>::iterator it = binLabels.begin();
  203. it++;
  204. binLabels.erase( binLabels.begin(), it );
  205. }
  206. train ( _examples, binLabels );
  207. }
  208. void GPHIKRawClassifier::train ( const std::vector< const NICE::SparseVector *> & _examples,
  209. std::map<uint, NICE::Vector> & _binLabels
  210. )
  211. {
  212. // security-check: examples and labels have to be of same size
  213. for ( std::map< uint, NICE::Vector >::const_iterator binLabIt = _binLabels.begin();
  214. binLabIt != _binLabels.end();
  215. binLabIt++
  216. )
  217. {
  218. if ( _examples.size() != binLabIt->second.size() )
  219. {
  220. fthrow(Exception, "Given examples do not match label vector in size -- aborting!" );
  221. }
  222. }
  223. if ( this->b_verbose )
  224. std::cerr << "GPHIKRawClassifier::train" << std::endl;
  225. Timer t;
  226. t.start();
  227. precomputedA.clear();
  228. precomputedB.clear();
  229. precomputedT.clear();
  230. // sort examples in each dimension and "transpose" the feature matrix
  231. // set up the GenericMatrix interface
  232. gm = new GMHIKernelRaw ( _examples, this->d_noise );
  233. nnz_per_dimension = gm->getNNZPerDimension();
  234. // solve linear equations for each class
  235. // be careful when parallising this!
  236. for ( map<uint, NICE::Vector>::const_iterator i = _binLabels.begin();
  237. i != _binLabels.end(); i++ )
  238. {
  239. uint classno = i->first;
  240. if (b_verbose)
  241. std::cerr << "Training for class " << classno << endl;
  242. const Vector & y = i->second;
  243. Vector alpha;
  244. solver->solveLin( *gm, y, alpha );
  245. // TODO: get lookup tables, A, B, etc. and store them
  246. gm->updateTables(alpha);
  247. double **A = gm->getTableA();
  248. double **B = gm->getTableB();
  249. precomputedA.insert ( pair<uint, PrecomputedType> ( classno, A ) );
  250. precomputedB.insert ( pair<uint, PrecomputedType> ( classno, B ) );
  251. }
  252. t.stop();
  253. if ( this->b_verbose )
  254. std::cerr << "Time used for setting up the fmk object: " << t.getLast() << std::endl;
  255. //indicate that we finished training successfully
  256. this->b_isTrained = true;
  257. // clean up all examples ??
  258. if ( this->b_verbose )
  259. std::cerr << "Learning finished" << std::endl;
  260. }