SLR.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. #ifdef NICE_USELIB_OPENMP
  2. #include <omp.h>
  3. #endif
  4. #include "vislearning/classifier/fpclassifier/logisticregression/SLR.h"
  5. #include "vislearning/cbaselib/FeaturePool.h"
  6. #include "core/image/ImageT.h"
  7. //#include "core/imagedisplay/ImageDisplay.h"
  8. #include <iostream>
  9. #define SLRDEBUG
  10. #define featnorm
  11. using namespace OBJREC;
  12. using namespace std;
  13. using namespace NICE;
  14. SLR::SLR ()
  15. {
  16. }
  17. SLR::SLR ( const Config *_conf, string section ) : conf ( _conf )
  18. {
  19. maxiter = conf->gI ( section, "maxiter", 20000 );
  20. resamp_decay = conf->gD ( section, "resamp_decay", 0.5 );
  21. convergence_tol = conf->gD ( section, "convergence_tol", 1e-7 );
  22. min_resamp = conf->gD ( section, "min_resamp", 0.001 );
  23. lambda = conf->gD ( section, "lambda", 0.1 );
  24. samplesperclass = conf->gD ( section, "samplesperclass", 200.0 );
  25. weight.setDim ( 0 );
  26. fdim = 0;
  27. }
  28. SLR::~SLR()
  29. {
  30. //clean up
  31. }
  32. double SLR::classify ( Example & pce )
  33. {
  34. double result;
  35. SparseVector *svec;
  36. if ( weight.getDim() == 0 )
  37. return 0.0;
  38. bool newvec = false;
  39. if ( pce.svec != NULL )
  40. {
  41. svec = pce.svec;
  42. }
  43. else
  44. {
  45. Vector x;
  46. x = * ( pce.vec );
  47. #ifdef featnorm
  48. for ( int m = 0; m < ( int ) x.size(); m++ )
  49. {
  50. x[m] = ( x[m] - minval[m] ) / ( maxval[m] - minval[m] );
  51. }
  52. #endif
  53. svec = new SparseVector ( x );
  54. svec->setDim ( x.size() );
  55. newvec = true;
  56. }
  57. if ( weight.size() == 0 )
  58. result = 0.0;
  59. else
  60. {
  61. result = 1.0 / ( 1.0 + exp ( -svec->innerProduct ( weight ) ) );
  62. }
  63. if ( newvec )
  64. delete svec;
  65. return result;
  66. }
  67. void SLR::train ( FeaturePool & _fp, Examples & examples, int classno )
  68. {
  69. cout << "start train" << endl;
  70. fp = FeaturePool ( _fp );
  71. // Anzahl von Merkmalen
  72. int fanz = examples.size();
  73. assert ( fanz >= 2 );
  74. // Merkmalsdimension bestimmen
  75. Vector x;
  76. fp.calcFeatureVector ( examples[0].second, x );
  77. fdim = x.size();
  78. assert ( fdim > 0 );
  79. stepwise_regression ( examples, classno );
  80. cout << "end train" << endl;
  81. }
  82. void SLR::restore ( istream & is, int format )
  83. {
  84. weight.restore ( is, format );
  85. fdim = ( int ) weight.getDim();
  86. maxval.resize ( fdim );
  87. minval.resize ( fdim );
  88. for ( int i = 0; i < fdim; i++ )
  89. {
  90. is >> maxval[i];
  91. is >> minval[i];
  92. }
  93. }
  94. void SLR::store ( ostream & os, int format ) const
  95. {
  96. if ( format != -9999 ) {
  97. weight.store ( os, format );
  98. for ( int i = 0; i < fdim; i++ )
  99. {
  100. os << maxval[i] << " " << minval[i] << endl;
  101. }
  102. } else {
  103. weight.store ( os, format );
  104. }
  105. }
  106. void SLR::clear ()
  107. {
  108. //TODO: einbauen
  109. }
  110. int SLR::stepwise_regression ( Examples &x, int classno )
  111. {
  112. // initialize randomization
  113. srand ( time ( NULL ) );
  114. //srand (1);
  115. cout << "start regression" << endl;
  116. // get the number of features
  117. int fnum = x.size();
  118. // create Datamatrix
  119. GMSparseVectorMatrix X;
  120. GMSparseVectorMatrix Y;
  121. vector<int> count ( 2, 0 );
  122. maxval.resize ( fdim );
  123. minval.resize ( fdim );
  124. if ( x[0].second.svec == NULL ) //input normal vectors
  125. {
  126. Vector feat;
  127. for ( int i = 0; i < fdim; i++ )
  128. {
  129. maxval[i] = numeric_limits<double>::min();
  130. minval[i] = numeric_limits<double>::max();
  131. }
  132. for ( int i = 0; i < fnum; i++ )
  133. {
  134. int pos = 0;
  135. if ( x[i].first != classno )
  136. pos = 1;
  137. ++count[pos];
  138. fp.calcFeatureVector ( x[i].second, feat );
  139. #ifdef featnorm
  140. for ( int m = 0; m < ( int ) feat.size(); m++ )
  141. {
  142. minval[m] = std::min ( minval[m], feat[m] );
  143. maxval[m] = std::max ( maxval[m], feat[m] );
  144. }
  145. #endif
  146. fdim = feat.size();
  147. }
  148. }
  149. else //input Sparse Vectors
  150. {
  151. for ( int i = 0; i < fnum; i++ )
  152. {
  153. int pos = 0;
  154. if ( x[i].first != classno )
  155. pos = 1;
  156. ++count[pos];
  157. }
  158. fdim = x[0].second.svec->getDim();
  159. }
  160. if ( count[0] == 0 || count[1] == 0 )
  161. {
  162. cerr << "not enought samples " << count[0] << " : " << count[1] << endl;
  163. weight.setDim ( 0 );
  164. return -1;
  165. }
  166. double samples = std::min ( count[0], count[1] );
  167. samples = std::min ( samples, samplesperclass );
  168. //samples = std::max(samples, 200);
  169. //samples = samplesperclass;
  170. vector<double> rands ( 2, 0.0 );
  171. vector<double> allsizes ( 2, 0.0 );
  172. for ( int i = 0; i < 2; i++ )
  173. {
  174. rands[i] = 1.0; //use all samples (default if samples>count[i])
  175. if ( samples > 0 && samples < 1 ) rands[i] = samples; //use relative number of samples wrt. class size
  176. if ( samples > 1 && samples <= count[i] ) rands[i] = samples / ( double ) count[i]; //use (approximately) fixed absolute number of samples for each class
  177. allsizes[i] = count[i];
  178. count[i] = 0;
  179. }
  180. for ( int i = 0; i < fnum; i++ )
  181. {
  182. int pos = 0;
  183. if ( x[i].first != classno )
  184. pos = 1;
  185. double r = ( double ) rand() / ( double ) RAND_MAX;
  186. if ( r > rands[pos] )
  187. continue;
  188. ++count[pos];
  189. if ( x[0].second.svec == NULL )
  190. {
  191. Vector feat;
  192. fp.calcFeatureVector ( x[i].second, feat );
  193. #ifdef featnorm
  194. for ( int m = 0; m < ( int ) feat.size(); m++ )
  195. {
  196. feat[m] = ( feat[m] - minval[m] ) / ( maxval[m] - minval[m] );
  197. }
  198. #endif
  199. X.addRow ( feat );
  200. }
  201. else
  202. {
  203. X.addRow ( x[i].second.svec );
  204. }
  205. SparseVector *v = new SparseVector ( 2 );
  206. ( *v ) [pos] = 1.0;
  207. Y.addRow ( v );
  208. }
  209. Y.setDel();
  210. if ( x[0].second.svec == NULL )
  211. X.setDel();
  212. for ( int i = 0; i < 2; i++ )
  213. {
  214. cerr << "Examples for class " << i << ": " << count[i] << " out of " << allsizes[i] << " with p = " << rands[i] << endl;
  215. }
  216. #undef NORMALIZATION
  217. #ifdef NORMALIZATION
  218. GMSparseVectorMatrix Xred;
  219. Xred.resize ( X.rows(), X.cols() );
  220. for ( int r = 0; r < ( int ) Xred.rows(); r++ )
  221. {
  222. for ( int c = 0; c < ( int ) Xred.cols(); c++ )
  223. {
  224. double tmp = X[r].get ( c );
  225. if ( Y[r].get ( 0 ) == 1 )
  226. tmp *= count[0] / fnum;
  227. else
  228. tmp *= count[1] / fnum;
  229. if ( fabs ( tmp ) > 10e-7 )
  230. Xred[r][c] = tmp;
  231. }
  232. }
  233. #endif
  234. fnum = X.rows();
  235. GMSparseVectorMatrix xY;
  236. #ifdef NORMALIZATION
  237. Xred.mult ( Y, xY, true );
  238. #else
  239. X.mult ( Y, xY, true );
  240. #endif
  241. weight.setDim ( fdim );
  242. // for faster Computing Xw = X*w
  243. GMSparseVectorMatrix Xw;
  244. X.mult ( weight, Xw, false, true );
  245. SparseVector S ( fnum );
  246. for ( int r = 0; r < fnum; r++ )
  247. {
  248. S[r] = 0.0;
  249. for ( int c = 0; c < 2; c++ )
  250. {
  251. S[r] += exp ( Xw[r].get ( c ) );
  252. }
  253. }
  254. // for faster computing ac[i] = (maxClassNo-1)/(2*maxClassNo) * Sum_j (x_i (j))^2
  255. Vector ac ( fdim );
  256. Vector lm_2_ac ( fdim );
  257. for ( int f = 0; f < fdim; f++ )
  258. {
  259. ac[f] = 0.0;
  260. for ( int a = 0; a < fnum; a++ )
  261. {
  262. ac[f] += X[a].get ( f ) * X[a].get ( f );
  263. }
  264. ac[f] *= 0.25;
  265. lm_2_ac[f] = ( lambda / 2.0 ) / ac[f];
  266. }
  267. // initialize the iterative optimization
  268. double incr = numeric_limits<double>::max();
  269. long non_zero = 0;
  270. long wasted_basis = 0;
  271. long needed_basis = 0;
  272. // prob of resample each weight
  273. vector<double> p_resamp;
  274. p_resamp.resize ( fdim );
  275. // loop over cycles
  276. long cycle = 0;
  277. for ( cycle = 0; cycle < maxiter; cycle++ )
  278. {
  279. #ifdef SLRDEBUG
  280. cerr << "iteration: " << cycle << " of " << maxiter << endl;
  281. #endif
  282. // zero out the diffs for assessing change
  283. double sum2_w_diff = 0.0;
  284. double sum2_w_old = 0.0;
  285. wasted_basis = 0;
  286. if ( cycle == 1 )
  287. needed_basis = 0;
  288. // update each weight
  289. //#pragma omp parallel for
  290. for ( int basis = 0; basis < fdim; basis++ ) // über alle Dimensionen
  291. {
  292. int changed = 0;
  293. // get the starting weight
  294. double w_old = weight.get ( basis );
  295. // set the p_resamp if it's the first cycle
  296. if ( cycle == 0 )
  297. {
  298. p_resamp[basis] = 1.0;
  299. }
  300. // see if we're gonna update
  301. double rval = ( double ) rand() / ( double ) RAND_MAX;
  302. if ( ( w_old != 0 ) || ( rval < p_resamp[basis] ) )
  303. {
  304. // calc the probability
  305. double XdotP = 0.0;
  306. for ( int i = 0; i < fnum; i++ )
  307. {
  308. #ifdef NORMALIZATION
  309. double e = Xred[i].get ( basis ) * exp ( Xw[i].get ( 0 ) ) / S[i];
  310. #else
  311. double e = X[i].get ( basis ) * exp ( Xw[i].get ( 0 ) ) / S[i];
  312. #endif
  313. if ( finite ( e ) )
  314. XdotP += e;
  315. #ifdef SLRDEBUG
  316. else
  317. throw "numerical problems";
  318. #endif
  319. }
  320. // get the gradient
  321. double grad = xY[basis].get ( 0 ) - XdotP;
  322. // set the new weight
  323. double w_new = w_old + grad / ac[basis];
  324. // test that we're within bounds
  325. if ( w_new > lm_2_ac[basis] )
  326. {
  327. // more towards bounds, but keep it
  328. w_new -= lm_2_ac[basis];
  329. changed = 1;
  330. // umark from being zero if necessary
  331. if ( w_old == 0.0 )
  332. {
  333. non_zero += 1;
  334. // reset the p_resample
  335. p_resamp[basis] = 1.0;
  336. // we needed the basis
  337. needed_basis += 1;
  338. }
  339. }
  340. else if ( w_new < -lm_2_ac[basis] )
  341. {
  342. // more towards bounds, but keep it
  343. w_new += lm_2_ac[basis];
  344. changed = 1;
  345. // umark from being zero if necessary
  346. if ( w_old == 0.0 )
  347. {
  348. non_zero += 1;
  349. // reset the p_resample
  350. p_resamp[basis] = 1.0;
  351. // we needed the basis
  352. needed_basis += 1;
  353. }
  354. }
  355. else
  356. {
  357. // gonna zero it out
  358. w_new = 0.0;
  359. // decrease the p_resamp
  360. p_resamp[basis] -= ( p_resamp[basis] - min_resamp ) * resamp_decay;
  361. // set the number of non-zero
  362. if ( w_old == 0.0 )
  363. {
  364. // we didn't change
  365. changed = 0;
  366. // and wasted a basis
  367. wasted_basis += 1;
  368. }
  369. else
  370. {
  371. // we changed
  372. changed = 1;
  373. // must update num non_zero
  374. non_zero -= 1;
  375. }
  376. }
  377. // process changes if necessary
  378. if ( changed == 1 )
  379. {
  380. // update the expected values
  381. double w_diff = w_new - w_old;
  382. for ( int i = 0; i < fnum; i++ )
  383. {
  384. double E_old = exp ( Xw[i].get ( 0 ) );
  385. double val = X[i].get ( basis ) * w_diff;
  386. if ( Xw[i].get ( 0 ) == 0.0 )
  387. {
  388. if ( fabs ( val ) > 10e-7 )
  389. Xw[i][0] = val;
  390. }
  391. else
  392. Xw[i][0] += X[i].get ( basis ) * w_diff;
  393. double E_new_m = exp ( Xw[i].get ( 0 ) );
  394. S[i] += E_new_m - E_old;
  395. }
  396. // update the weight
  397. if ( w_new == 0.0 )
  398. {
  399. if ( weight.get ( basis ) != 0.0 )
  400. {
  401. weight.erase ( basis );
  402. }
  403. }
  404. else
  405. weight[basis] = w_new;
  406. // keep track of the sqrt sum squared diffs
  407. //#pragma omp critical
  408. sum2_w_diff += w_diff * w_diff;
  409. }
  410. // no matter what we keep track of the old
  411. //#pragma omp critical
  412. sum2_w_old += w_old * w_old;
  413. }
  414. }
  415. // finished a cycle, assess convergence
  416. incr = sqrt ( sum2_w_diff ) / ( sqrt ( sum2_w_old ) + numeric_limits<double>::epsilon() );
  417. #ifdef SLRDEBUG
  418. cout << "sum2_w_diff: " << sum2_w_diff << " sum2_w_old " << sum2_w_old << endl;
  419. cout << "convcrit: " << incr << " tol: " << convergence_tol << endl;
  420. //cout << "sum2_w_wold = " << sum2_w_old << " sum2_w_diff = " << sum2_w_diff << endl;
  421. #endif
  422. if ( incr < convergence_tol )
  423. {
  424. // we converged!!!
  425. break;
  426. }
  427. }
  428. // finished updating weights
  429. // assess convergence
  430. cerr << "end regression after " << cycle << " steps" << endl;
  431. return cycle;
  432. }