SLR.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. #ifdef NICE_USELIB_OPENMP
  2. #include <omp.h>
  3. #endif
  4. #include <time.h>
  5. #include "vislearning/classifier/fpclassifier/logisticregression/SLR.h"
  6. #include "vislearning/cbaselib/FeaturePool.h"
  7. #include "core/image/ImageT.h"
  8. //#include "core/imagedisplay/ImageDisplay.h"
  9. #include <iostream>
  10. #define SLRDEBUG
  11. #define featnorm
  12. using namespace OBJREC;
  13. using namespace std;
  14. using namespace NICE;
  15. SLR::SLR ()
  16. {
  17. }
  18. SLR::SLR ( const Config *_conf, string section ) : conf ( _conf )
  19. {
  20. maxiter = conf->gI ( section, "maxiter", 20000 );
  21. resamp_decay = conf->gD ( section, "resamp_decay", 0.5 );
  22. convergence_tol = conf->gD ( section, "convergence_tol", 1e-7 );
  23. min_resamp = conf->gD ( section, "min_resamp", 0.001 );
  24. lambda = conf->gD ( section, "lambda", 0.1 );
  25. samplesperclass = conf->gD ( section, "samplesperclass", 200.0 );
  26. weight.setDim ( 0 );
  27. fdim = 0;
  28. }
  29. SLR::~SLR()
  30. {
  31. //clean up
  32. }
  33. double SLR::classify ( Example & pce )
  34. {
  35. double result;
  36. SparseVector *svec;
  37. if ( weight.getDim() == 0 )
  38. return 0.0;
  39. bool newvec = false;
  40. if ( pce.svec != NULL )
  41. {
  42. svec = pce.svec;
  43. }
  44. else
  45. {
  46. Vector x;
  47. x = * ( pce.vec );
  48. #ifdef featnorm
  49. for ( int m = 0; m < ( int ) x.size(); m++ )
  50. {
  51. x[m] = ( x[m] - minval[m] ) / ( maxval[m] - minval[m] );
  52. }
  53. #endif
  54. svec = new SparseVector ( x );
  55. svec->setDim ( x.size() );
  56. newvec = true;
  57. }
  58. if ( weight.size() == 0 )
  59. result = 0.0;
  60. else
  61. {
  62. result = 1.0 / ( 1.0 + exp ( -svec->innerProduct ( weight ) ) );
  63. }
  64. if ( newvec )
  65. delete svec;
  66. return result;
  67. }
  68. void SLR::train ( FeaturePool & _fp, Examples & examples, int classno )
  69. {
  70. cout << "start train" << endl;
  71. fp = FeaturePool ( _fp );
  72. // Anzahl von Merkmalen
  73. int fanz = examples.size();
  74. assert ( fanz >= 2 );
  75. // Merkmalsdimension bestimmen
  76. Vector x;
  77. fp.calcFeatureVector ( examples[0].second, x );
  78. fdim = x.size();
  79. assert ( fdim > 0 );
  80. stepwise_regression ( examples, classno );
  81. cout << "end train" << endl;
  82. }
  83. void SLR::restore ( istream & is, int format )
  84. {
  85. weight.restore ( is, format );
  86. fdim = ( int ) weight.getDim();
  87. maxval.resize ( fdim );
  88. minval.resize ( fdim );
  89. for ( int i = 0; i < fdim; i++ )
  90. {
  91. is >> maxval[i];
  92. is >> minval[i];
  93. }
  94. }
  95. void SLR::store ( ostream & os, int format ) const
  96. {
  97. if ( format != -9999 ) {
  98. weight.store ( os, format );
  99. for ( int i = 0; i < fdim; i++ )
  100. {
  101. os << maxval[i] << " " << minval[i] << endl;
  102. }
  103. } else {
  104. weight.store ( os, format );
  105. }
  106. }
  107. void SLR::clear ()
  108. {
  109. //TODO: einbauen
  110. }
  111. int SLR::stepwise_regression ( Examples &x, int classno )
  112. {
  113. // initialize randomization
  114. srand ( time ( NULL ) );
  115. //srand (1);
  116. cout << "start regression" << endl;
  117. // get the number of features
  118. int fnum = x.size();
  119. // create Datamatrix
  120. GMSparseVectorMatrix X;
  121. GMSparseVectorMatrix Y;
  122. vector<int> count ( 2, 0 );
  123. maxval.resize ( fdim );
  124. minval.resize ( fdim );
  125. if ( x[0].second.svec == NULL ) //input normal vectors
  126. {
  127. Vector feat;
  128. for ( int i = 0; i < fdim; i++ )
  129. {
  130. maxval[i] = numeric_limits<double>::min();
  131. minval[i] = numeric_limits<double>::max();
  132. }
  133. for ( int i = 0; i < fnum; i++ )
  134. {
  135. int pos = 0;
  136. if ( x[i].first != classno )
  137. pos = 1;
  138. ++count[pos];
  139. fp.calcFeatureVector ( x[i].second, feat );
  140. #ifdef featnorm
  141. for ( int m = 0; m < ( int ) feat.size(); m++ )
  142. {
  143. minval[m] = std::min ( minval[m], feat[m] );
  144. maxval[m] = std::max ( maxval[m], feat[m] );
  145. }
  146. #endif
  147. fdim = feat.size();
  148. }
  149. }
  150. else //input Sparse Vectors
  151. {
  152. for ( int i = 0; i < fnum; i++ )
  153. {
  154. int pos = 0;
  155. if ( x[i].first != classno )
  156. pos = 1;
  157. ++count[pos];
  158. }
  159. fdim = x[0].second.svec->getDim();
  160. }
  161. if ( count[0] == 0 || count[1] == 0 )
  162. {
  163. cerr << "not enought samples " << count[0] << " : " << count[1] << endl;
  164. weight.setDim ( 0 );
  165. return -1;
  166. }
  167. double samples = std::min ( count[0], count[1] );
  168. samples = std::min ( samples, samplesperclass );
  169. //samples = std::max(samples, 200);
  170. //samples = samplesperclass;
  171. vector<double> rands ( 2, 0.0 );
  172. vector<double> allsizes ( 2, 0.0 );
  173. for ( int i = 0; i < 2; i++ )
  174. {
  175. rands[i] = 1.0; //use all samples (default if samples>count[i])
  176. if ( samples > 0 && samples < 1 ) rands[i] = samples; //use relative number of samples wrt. class size
  177. if ( samples > 1 && samples <= count[i] ) rands[i] = samples / ( double ) count[i]; //use (approximately) fixed absolute number of samples for each class
  178. allsizes[i] = count[i];
  179. count[i] = 0;
  180. }
  181. for ( int i = 0; i < fnum; i++ )
  182. {
  183. int pos = 0;
  184. if ( x[i].first != classno )
  185. pos = 1;
  186. double r = ( double ) rand() / ( double ) RAND_MAX;
  187. if ( r > rands[pos] )
  188. continue;
  189. ++count[pos];
  190. if ( x[0].second.svec == NULL )
  191. {
  192. Vector feat;
  193. fp.calcFeatureVector ( x[i].second, feat );
  194. #ifdef featnorm
  195. for ( int m = 0; m < ( int ) feat.size(); m++ )
  196. {
  197. feat[m] = ( feat[m] - minval[m] ) / ( maxval[m] - minval[m] );
  198. }
  199. #endif
  200. X.addRow ( feat );
  201. }
  202. else
  203. {
  204. X.addRow ( x[i].second.svec );
  205. }
  206. SparseVector *v = new SparseVector ( 2 );
  207. ( *v ) [pos] = 1.0;
  208. Y.addRow ( v );
  209. }
  210. Y.setDel();
  211. if ( x[0].second.svec == NULL )
  212. X.setDel();
  213. for ( int i = 0; i < 2; i++ )
  214. {
  215. cerr << "Examples for class " << i << ": " << count[i] << " out of " << allsizes[i] << " with p = " << rands[i] << endl;
  216. }
  217. #undef NORMALIZATION
  218. #ifdef NORMALIZATION
  219. GMSparseVectorMatrix Xred;
  220. Xred.resize ( X.rows(), X.cols() );
  221. for ( int r = 0; r < ( int ) Xred.rows(); r++ )
  222. {
  223. for ( int c = 0; c < ( int ) Xred.cols(); c++ )
  224. {
  225. double tmp = X[r].get ( c );
  226. if ( Y[r].get ( 0 ) == 1 )
  227. tmp *= count[0] / fnum;
  228. else
  229. tmp *= count[1] / fnum;
  230. if ( fabs ( tmp ) > 10e-7 )
  231. Xred[r][c] = tmp;
  232. }
  233. }
  234. #endif
  235. fnum = X.rows();
  236. GMSparseVectorMatrix xY;
  237. #ifdef NORMALIZATION
  238. Xred.mult ( Y, xY, true );
  239. #else
  240. X.mult ( Y, xY, true );
  241. #endif
  242. weight.setDim ( fdim );
  243. // for faster Computing Xw = X*w
  244. GMSparseVectorMatrix Xw;
  245. X.mult ( weight, Xw, false, true );
  246. SparseVector S ( fnum );
  247. for ( int r = 0; r < fnum; r++ )
  248. {
  249. S[r] = 0.0;
  250. for ( int c = 0; c < 2; c++ )
  251. {
  252. S[r] += exp ( Xw[r].get ( c ) );
  253. }
  254. }
  255. // for faster computing ac[i] = (maxClassNo-1)/(2*maxClassNo) * Sum_j (x_i (j))^2
  256. Vector ac ( fdim );
  257. Vector lm_2_ac ( fdim );
  258. for ( int f = 0; f < fdim; f++ )
  259. {
  260. ac[f] = 0.0;
  261. for ( int a = 0; a < fnum; a++ )
  262. {
  263. ac[f] += X[a].get ( f ) * X[a].get ( f );
  264. }
  265. ac[f] *= 0.25;
  266. lm_2_ac[f] = ( lambda / 2.0 ) / ac[f];
  267. }
  268. // initialize the iterative optimization
  269. double incr = numeric_limits<double>::max();
  270. long non_zero = 0;
  271. long wasted_basis = 0;
  272. long needed_basis = 0;
  273. // prob of resample each weight
  274. vector<double> p_resamp;
  275. p_resamp.resize ( fdim );
  276. // loop over cycles
  277. long cycle = 0;
  278. for ( cycle = 0; cycle < maxiter; cycle++ )
  279. {
  280. #ifdef SLRDEBUG
  281. cerr << "iteration: " << cycle << " of " << maxiter << endl;
  282. #endif
  283. // zero out the diffs for assessing change
  284. double sum2_w_diff = 0.0;
  285. double sum2_w_old = 0.0;
  286. wasted_basis = 0;
  287. if ( cycle == 1 )
  288. needed_basis = 0;
  289. // update each weight
  290. //#pragma omp parallel for
  291. for ( int basis = 0; basis < fdim; basis++ ) // über alle Dimensionen
  292. {
  293. int changed = 0;
  294. // get the starting weight
  295. double w_old = weight.get ( basis );
  296. // set the p_resamp if it's the first cycle
  297. if ( cycle == 0 )
  298. {
  299. p_resamp[basis] = 1.0;
  300. }
  301. // see if we're gonna update
  302. double rval = ( double ) rand() / ( double ) RAND_MAX;
  303. if ( ( w_old != 0 ) || ( rval < p_resamp[basis] ) )
  304. {
  305. // calc the probability
  306. double XdotP = 0.0;
  307. for ( int i = 0; i < fnum; i++ )
  308. {
  309. #ifdef NORMALIZATION
  310. double e = Xred[i].get ( basis ) * exp ( Xw[i].get ( 0 ) ) / S[i];
  311. #else
  312. double e = X[i].get ( basis ) * exp ( Xw[i].get ( 0 ) ) / S[i];
  313. #endif
  314. if ( NICE::isFinite ( e ) )
  315. XdotP += e;
  316. #ifdef SLRDEBUG
  317. else
  318. throw "numerical problems";
  319. #endif
  320. }
  321. // get the gradient
  322. double grad = xY[basis].get ( 0 ) - XdotP;
  323. // set the new weight
  324. double w_new = w_old + grad / ac[basis];
  325. // test that we're within bounds
  326. if ( w_new > lm_2_ac[basis] )
  327. {
  328. // more towards bounds, but keep it
  329. w_new -= lm_2_ac[basis];
  330. changed = 1;
  331. // umark from being zero if necessary
  332. if ( w_old == 0.0 )
  333. {
  334. non_zero += 1;
  335. // reset the p_resample
  336. p_resamp[basis] = 1.0;
  337. // we needed the basis
  338. needed_basis += 1;
  339. }
  340. }
  341. else if ( w_new < -lm_2_ac[basis] )
  342. {
  343. // more towards bounds, but keep it
  344. w_new += lm_2_ac[basis];
  345. changed = 1;
  346. // umark from being zero if necessary
  347. if ( w_old == 0.0 )
  348. {
  349. non_zero += 1;
  350. // reset the p_resample
  351. p_resamp[basis] = 1.0;
  352. // we needed the basis
  353. needed_basis += 1;
  354. }
  355. }
  356. else
  357. {
  358. // gonna zero it out
  359. w_new = 0.0;
  360. // decrease the p_resamp
  361. p_resamp[basis] -= ( p_resamp[basis] - min_resamp ) * resamp_decay;
  362. // set the number of non-zero
  363. if ( w_old == 0.0 )
  364. {
  365. // we didn't change
  366. changed = 0;
  367. // and wasted a basis
  368. wasted_basis += 1;
  369. }
  370. else
  371. {
  372. // we changed
  373. changed = 1;
  374. // must update num non_zero
  375. non_zero -= 1;
  376. }
  377. }
  378. // process changes if necessary
  379. if ( changed == 1 )
  380. {
  381. // update the expected values
  382. double w_diff = w_new - w_old;
  383. for ( int i = 0; i < fnum; i++ )
  384. {
  385. double E_old = exp ( Xw[i].get ( 0 ) );
  386. double val = X[i].get ( basis ) * w_diff;
  387. if ( Xw[i].get ( 0 ) == 0.0 )
  388. {
  389. if ( fabs ( val ) > 10e-7 )
  390. Xw[i][0] = val;
  391. }
  392. else
  393. Xw[i][0] += X[i].get ( basis ) * w_diff;
  394. double E_new_m = exp ( Xw[i].get ( 0 ) );
  395. S[i] += E_new_m - E_old;
  396. }
  397. // update the weight
  398. if ( w_new == 0.0 )
  399. {
  400. if ( weight.get ( basis ) != 0.0 )
  401. {
  402. weight.erase ( basis );
  403. }
  404. }
  405. else
  406. weight[basis] = w_new;
  407. // keep track of the sqrt sum squared diffs
  408. //#pragma omp critical
  409. sum2_w_diff += w_diff * w_diff;
  410. }
  411. // no matter what we keep track of the old
  412. //#pragma omp critical
  413. sum2_w_old += w_old * w_old;
  414. }
  415. }
  416. // finished a cycle, assess convergence
  417. incr = sqrt ( sum2_w_diff ) / ( sqrt ( sum2_w_old ) + numeric_limits<double>::epsilon() );
  418. #ifdef SLRDEBUG
  419. cout << "sum2_w_diff: " << sum2_w_diff << " sum2_w_old " << sum2_w_old << endl;
  420. cout << "convcrit: " << incr << " tol: " << convergence_tol << endl;
  421. //cout << "sum2_w_wold = " << sum2_w_old << " sum2_w_diff = " << sum2_w_diff << endl;
  422. #endif
  423. if ( incr < convergence_tol )
  424. {
  425. // we converged!!!
  426. break;
  427. }
  428. }
  429. // finished updating weights
  430. // assess convergence
  431. cerr << "end regression after " << cycle << " steps" << endl;
  432. return cycle;
  433. }