SLR.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. #ifdef NICE_USELIB_OPENMP
  2. #include <omp.h>
  3. #endif
  4. #include "vislearning/classifier/fpclassifier/logisticregression/SLR.h"
  5. #include "vislearning/cbaselib/FeaturePool.h"
  6. #include <iostream>
  7. #define SLRDEBUG
  8. #define featnorm
  9. using namespace OBJREC;
  10. using namespace std;
  11. using namespace NICE;
  12. SLR::SLR ()
  13. {
  14. }
  15. SLR::SLR ( const Config *_conf, string section ) : conf ( _conf )
  16. {
  17. maxiter = conf->gI ( section, "maxiter", 20000 );
  18. resamp_decay = conf->gD ( section, "resamp_decay", 0.5 );
  19. convergence_tol = conf->gD ( section, "convergence_tol", 1e-7 );
  20. min_resamp = conf->gD ( section, "min_resamp", 0.001 );
  21. lambda = conf->gD ( section, "lambda", 0.1 );
  22. samplesperclass = conf->gD ( section, "samplesperclass", 200.0 );
  23. weight.setDim ( 0 );
  24. fdim = 0;
  25. }
  26. SLR::~SLR()
  27. {
  28. //clean up
  29. }
  30. double SLR::classify ( Example & pce )
  31. {
  32. double result;
  33. SparseVector *svec;
  34. if ( weight.getDim() == 0 )
  35. return 0.0;
  36. bool newvec = false;
  37. if ( pce.svec != NULL )
  38. {
  39. svec = pce.svec;
  40. }
  41. else
  42. {
  43. Vector x;
  44. x = * ( pce.vec );
  45. #ifdef featnorm
  46. for ( int m = 0; m < ( int ) x.size(); m++ )
  47. {
  48. x[m] = ( x[m] - minval[m] ) / ( maxval[m] - minval[m] );
  49. }
  50. #endif
  51. svec = new SparseVector ( x );
  52. svec->setDim ( x.size() );
  53. newvec = true;
  54. }
  55. if ( weight.size() == 0 )
  56. result = 0.0;
  57. else
  58. {
  59. result = 1.0 / ( 1.0 + exp ( -svec->innerProduct ( weight ) ) );
  60. }
  61. if ( newvec )
  62. delete svec;
  63. return result;
  64. }
  65. void SLR::train ( FeaturePool & _fp, Examples & examples, int classno )
  66. {
  67. cout << "start train" << endl;
  68. fp = FeaturePool ( _fp );
  69. // Anzahl von Merkmalen
  70. int fanz = examples.size();
  71. assert ( fanz >= 2 );
  72. // Merkmalsdimension bestimmen
  73. Vector x;
  74. fp.calcFeatureVector ( examples[0].second, x );
  75. fdim = x.size();
  76. assert ( fdim > 0 );
  77. stepwise_regression ( examples, classno );
  78. cout << "end train" << endl;
  79. }
  80. void SLR::restore ( istream & is, int format )
  81. {
  82. weight.restore ( is, format );
  83. fdim = ( int ) weight.getDim();
  84. maxval.resize ( fdim );
  85. minval.resize ( fdim );
  86. for ( int i = 0; i < fdim; i++ )
  87. {
  88. is >> maxval[i];
  89. is >> minval[i];
  90. }
  91. }
  92. void SLR::store ( ostream & os, int format ) const
  93. {
  94. if ( format != -9999 ) {
  95. weight.store ( os, format );
  96. for ( int i = 0; i < fdim; i++ )
  97. {
  98. os << maxval[i] << " " << minval[i] << endl;
  99. }
  100. } else {
  101. weight.store ( os, format );
  102. }
  103. }
  104. void SLR::clear ()
  105. {
  106. //TODO: einbauen
  107. }
  108. int SLR::stepwise_regression ( Examples &x, int classno )
  109. {
  110. // initialize randomization
  111. srand ( time ( NULL ) );
  112. //srand (1);
  113. cout << "start regression" << endl;
  114. // get the number of features
  115. int fnum = x.size();
  116. // create Datamatrix
  117. GMSparseVectorMatrix X;
  118. GMSparseVectorMatrix Y;
  119. vector<int> count ( 2, 0 );
  120. maxval.resize ( fdim );
  121. minval.resize ( fdim );
  122. if ( x[0].second.svec == NULL ) //input normal vectors
  123. {
  124. Vector feat;
  125. for ( int i = 0; i < fdim; i++ )
  126. {
  127. maxval[i] = numeric_limits<double>::min();
  128. minval[i] = numeric_limits<double>::max();
  129. }
  130. for ( int i = 0; i < fnum; i++ )
  131. {
  132. int pos = 0;
  133. if ( x[i].first != classno )
  134. pos = 1;
  135. ++count[pos];
  136. fp.calcFeatureVector ( x[i].second, feat );
  137. #ifdef featnorm
  138. for ( int m = 0; m < ( int ) feat.size(); m++ )
  139. {
  140. minval[m] = std::min ( minval[m], feat[m] );
  141. maxval[m] = std::max ( maxval[m], feat[m] );
  142. }
  143. #endif
  144. fdim = feat.size();
  145. }
  146. }
  147. else //input Sparse Vectors
  148. {
  149. for ( int i = 0; i < fnum; i++ )
  150. {
  151. int pos = 0;
  152. if ( x[i].first != classno )
  153. pos = 1;
  154. ++count[pos];
  155. }
  156. fdim = x[0].second.svec->getDim();
  157. }
  158. if ( count[0] == 0 || count[1] == 0 )
  159. {
  160. cerr << "not enought samples " << count[0] << " : " << count[1] << endl;
  161. weight.setDim ( 0 );
  162. return -1;
  163. }
  164. double samples = std::min ( count[0], count[1] );
  165. samples = std::min ( samples, samplesperclass );
  166. //samples = std::max(samples, 200);
  167. //samples = samplesperclass;
  168. vector<double> rands ( 2, 0.0 );
  169. vector<double> allsizes ( 2, 0.0 );
  170. for ( int i = 0; i < 2; i++ )
  171. {
  172. rands[i] = 1.0; //use all samples (default if samples>count[i])
  173. if ( samples > 0 && samples < 1 ) rands[i] = samples; //use relative number of samples wrt. class size
  174. if ( samples > 1 && samples <= count[i] ) rands[i] = samples / ( double ) count[i]; //use (approximately) fixed absolute number of samples for each class
  175. allsizes[i] = count[i];
  176. count[i] = 0;
  177. }
  178. for ( int i = 0; i < fnum; i++ )
  179. {
  180. int pos = 0;
  181. if ( x[i].first != classno )
  182. pos = 1;
  183. double r = ( double ) rand() / ( double ) RAND_MAX;
  184. if ( r > rands[pos] )
  185. continue;
  186. ++count[pos];
  187. if ( x[0].second.svec == NULL )
  188. {
  189. Vector feat;
  190. fp.calcFeatureVector ( x[i].second, feat );
  191. #ifdef featnorm
  192. for ( int m = 0; m < ( int ) feat.size(); m++ )
  193. {
  194. feat[m] = ( feat[m] - minval[m] ) / ( maxval[m] - minval[m] );
  195. }
  196. #endif
  197. X.addRow ( feat );
  198. }
  199. else
  200. {
  201. X.addRow ( x[i].second.svec );
  202. }
  203. SparseVector *v = new SparseVector ( 2 );
  204. ( *v ) [pos] = 1.0;
  205. Y.addRow ( v );
  206. }
  207. Y.setDel();
  208. if ( x[0].second.svec == NULL )
  209. X.setDel();
  210. for ( int i = 0; i < 2; i++ )
  211. {
  212. cerr << "Examples for class " << i << ": " << count[i] << " out of " << allsizes[i] << " with p = " << rands[i] << endl;
  213. }
  214. #undef NORMALIZATION
  215. #ifdef NORMALIZATION
  216. GMSparseVectorMatrix Xred;
  217. Xred.resize ( X.rows(), X.cols() );
  218. for ( int r = 0; r < ( int ) Xred.rows(); r++ )
  219. {
  220. for ( int c = 0; c < ( int ) Xred.cols(); c++ )
  221. {
  222. double tmp = X[r].get ( c );
  223. if ( Y[r].get ( 0 ) == 1 )
  224. tmp *= count[0] / fnum;
  225. else
  226. tmp *= count[1] / fnum;
  227. if ( fabs ( tmp ) > 10e-7 )
  228. Xred[r][c] = tmp;
  229. }
  230. }
  231. #endif
  232. fnum = X.rows();
  233. GMSparseVectorMatrix xY;
  234. #ifdef NORMALIZATION
  235. Xred.mult ( Y, xY, true );
  236. #else
  237. X.mult ( Y, xY, true );
  238. #endif
  239. weight.setDim ( fdim );
  240. // for faster Computing Xw = X*w
  241. GMSparseVectorMatrix Xw;
  242. X.mult ( weight, Xw, false, true );
  243. SparseVector S ( fnum );
  244. for ( int r = 0; r < fnum; r++ )
  245. {
  246. S[r] = 0.0;
  247. for ( int c = 0; c < 2; c++ )
  248. {
  249. S[r] += exp ( Xw[r].get ( c ) );
  250. }
  251. }
  252. // for faster computing ac[i] = (maxClassNo-1)/(2*maxClassNo) * Sum_j (x_i (j))^2
  253. Vector ac ( fdim );
  254. Vector lm_2_ac ( fdim );
  255. for ( int f = 0; f < fdim; f++ )
  256. {
  257. ac[f] = 0.0;
  258. for ( int a = 0; a < fnum; a++ )
  259. {
  260. ac[f] += X[a].get ( f ) * X[a].get ( f );
  261. }
  262. ac[f] *= 0.25;
  263. lm_2_ac[f] = ( lambda / 2.0 ) / ac[f];
  264. }
  265. // initialize the iterative optimization
  266. double incr = numeric_limits<double>::max();
  267. long non_zero = 0;
  268. long wasted_basis = 0;
  269. long needed_basis = 0;
  270. // prob of resample each weight
  271. vector<double> p_resamp;
  272. p_resamp.resize ( fdim );
  273. // loop over cycles
  274. long cycle = 0;
  275. for ( cycle = 0; cycle < maxiter; cycle++ )
  276. {
  277. #ifdef SLRDEBUG
  278. cerr << "iteration: " << cycle << " of " << maxiter << endl;
  279. #endif
  280. // zero out the diffs for assessing change
  281. double sum2_w_diff = 0.0;
  282. double sum2_w_old = 0.0;
  283. wasted_basis = 0;
  284. if ( cycle == 1 )
  285. needed_basis = 0;
  286. // update each weight
  287. //#pragma omp parallel for
  288. for ( int basis = 0; basis < fdim; basis++ ) // über alle Dimensionen
  289. {
  290. int changed = 0;
  291. // get the starting weight
  292. double w_old = weight.get ( basis );
  293. // set the p_resamp if it's the first cycle
  294. if ( cycle == 0 )
  295. {
  296. p_resamp[basis] = 1.0;
  297. }
  298. // see if we're gonna update
  299. double rval = ( double ) rand() / ( double ) RAND_MAX;
  300. if ( ( w_old != 0 ) || ( rval < p_resamp[basis] ) )
  301. {
  302. // calc the probability
  303. double XdotP = 0.0;
  304. for ( int i = 0; i < fnum; i++ )
  305. {
  306. #ifdef NORMALIZATION
  307. double e = Xred[i].get ( basis ) * exp ( Xw[i].get ( 0 ) ) / S[i];
  308. #else
  309. double e = X[i].get ( basis ) * exp ( Xw[i].get ( 0 ) ) / S[i];
  310. #endif
  311. if ( finite ( e ) )
  312. XdotP += e;
  313. #ifdef SLRDEBUG
  314. else
  315. throw "numerical problems";
  316. #endif
  317. }
  318. // get the gradient
  319. double grad = xY[basis].get ( 0 ) - XdotP;
  320. // set the new weight
  321. double w_new = w_old + grad / ac[basis];
  322. // test that we're within bounds
  323. if ( w_new > lm_2_ac[basis] )
  324. {
  325. // more towards bounds, but keep it
  326. w_new -= lm_2_ac[basis];
  327. changed = 1;
  328. // umark from being zero if necessary
  329. if ( w_old == 0.0 )
  330. {
  331. non_zero += 1;
  332. // reset the p_resample
  333. p_resamp[basis] = 1.0;
  334. // we needed the basis
  335. needed_basis += 1;
  336. }
  337. }
  338. else if ( w_new < -lm_2_ac[basis] )
  339. {
  340. // more towards bounds, but keep it
  341. w_new += lm_2_ac[basis];
  342. changed = 1;
  343. // umark from being zero if necessary
  344. if ( w_old == 0.0 )
  345. {
  346. non_zero += 1;
  347. // reset the p_resample
  348. p_resamp[basis] = 1.0;
  349. // we needed the basis
  350. needed_basis += 1;
  351. }
  352. }
  353. else
  354. {
  355. // gonna zero it out
  356. w_new = 0.0;
  357. // decrease the p_resamp
  358. p_resamp[basis] -= ( p_resamp[basis] - min_resamp ) * resamp_decay;
  359. // set the number of non-zero
  360. if ( w_old == 0.0 )
  361. {
  362. // we didn't change
  363. changed = 0;
  364. // and wasted a basis
  365. wasted_basis += 1;
  366. }
  367. else
  368. {
  369. // we changed
  370. changed = 1;
  371. // must update num non_zero
  372. non_zero -= 1;
  373. }
  374. }
  375. // process changes if necessary
  376. if ( changed == 1 )
  377. {
  378. // update the expected values
  379. double w_diff = w_new - w_old;
  380. for ( int i = 0; i < fnum; i++ )
  381. {
  382. double E_old = exp ( Xw[i].get ( 0 ) );
  383. double val = X[i].get ( basis ) * w_diff;
  384. if ( Xw[i].get ( 0 ) == 0.0 )
  385. {
  386. if ( fabs ( val ) > 10e-7 )
  387. Xw[i][0] = val;
  388. }
  389. else
  390. Xw[i][0] += X[i].get ( basis ) * w_diff;
  391. double E_new_m = exp ( Xw[i].get ( 0 ) );
  392. S[i] += E_new_m - E_old;
  393. }
  394. // update the weight
  395. if ( w_new == 0.0 )
  396. {
  397. if ( weight.get ( basis ) != 0.0 )
  398. {
  399. weight.erase ( basis );
  400. }
  401. }
  402. else
  403. weight[basis] = w_new;
  404. // keep track of the sqrt sum squared diffs
  405. //#pragma omp critical
  406. sum2_w_diff += w_diff * w_diff;
  407. }
  408. // no matter what we keep track of the old
  409. //#pragma omp critical
  410. sum2_w_old += w_old * w_old;
  411. }
  412. }
  413. // finished a cycle, assess convergence
  414. incr = sqrt ( sum2_w_diff ) / ( sqrt ( sum2_w_old ) + numeric_limits<double>::epsilon() );
  415. #ifdef SLRDEBUG
  416. cout << "sum2_w_diff: " << sum2_w_diff << " sum2_w_old " << sum2_w_old << endl;
  417. cout << "convcrit: " << incr << " tol: " << convergence_tol << endl;
  418. //cout << "sum2_w_wold = " << sum2_w_old << " sum2_w_diff = " << sum2_w_diff << endl;
  419. #endif
  420. if ( incr < convergence_tol )
  421. {
  422. // we converged!!!
  423. break;
  424. }
  425. }
  426. // finished updating weights
  427. // assess convergence
  428. cerr << "end regression after " << cycle << " steps" << endl;
  429. return cycle;
  430. }