123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518 |
- #ifdef NICE_USELIB_OPENMP
- #include <omp.h>
- #endif
- #include "vislearning/classifier/fpclassifier/logisticregression/SLR.h"
- #include "vislearning/cbaselib/FeaturePool.h"
- #include <iostream>
- #define SLRDEBUG
- #define featnorm
- using namespace OBJREC;
- using namespace std;
- using namespace NICE;
- SLR::SLR ()
- {
- }
- SLR::SLR ( const Config *_conf, string section ) : conf ( _conf )
- {
- maxiter = conf->gI ( section, "maxiter", 20000 );
- resamp_decay = conf->gD ( section, "resamp_decay", 0.5 );
- convergence_tol = conf->gD ( section, "convergence_tol", 1e-7 );
- min_resamp = conf->gD ( section, "min_resamp", 0.001 );
- lambda = conf->gD ( section, "lambda", 0.1 );
- samplesperclass = conf->gD ( section, "samplesperclass", 200.0 );
- weight.setDim ( 0 );
- fdim = 0;
- }
- SLR::~SLR()
- {
- //clean up
- }
- double SLR::classify ( Example & pce )
- {
- double result;
- SparseVector *svec;
- if ( weight.getDim() == 0 )
- return 0.0;
- bool newvec = false;
- if ( pce.svec != NULL )
- {
- svec = pce.svec;
- }
- else
- {
- Vector x;
- x = * ( pce.vec );
- #ifdef featnorm
- for ( int m = 0; m < ( int ) x.size(); m++ )
- {
- x[m] = ( x[m] - minval[m] ) / ( maxval[m] - minval[m] );
- }
- #endif
- svec = new SparseVector ( x );
- svec->setDim ( x.size() );
- newvec = true;
- }
- if ( weight.size() == 0 )
- result = 0.0;
- else
- {
- result = 1.0 / ( 1.0 + exp ( -svec->innerProduct ( weight ) ) );
- }
- if ( newvec )
- delete svec;
- return result;
- }
- void SLR::train ( FeaturePool & _fp, Examples & examples, int classno )
- {
- cout << "start train" << endl;
- fp = FeaturePool ( _fp );
- // Anzahl von Merkmalen
- int fanz = examples.size();
- assert ( fanz >= 2 );
- // Merkmalsdimension bestimmen
- Vector x;
- fp.calcFeatureVector ( examples[0].second, x );
- fdim = x.size();
- assert ( fdim > 0 );
- stepwise_regression ( examples, classno );
- cout << "end train" << endl;
- }
- void SLR::restore ( istream & is, int format )
- {
- weight.restore ( is, format );
- fdim = ( int ) weight.getDim();
- maxval.resize ( fdim );
- minval.resize ( fdim );
- for ( int i = 0; i < fdim; i++ )
- {
- is >> maxval[i];
- is >> minval[i];
- }
- }
- void SLR::store ( ostream & os, int format ) const
- {
- if ( format != -9999 ) {
- weight.store ( os, format );
- for ( int i = 0; i < fdim; i++ )
- {
- os << maxval[i] << " " << minval[i] << endl;
- }
- } else {
- weight.store ( os, format );
- }
- }
- void SLR::clear ()
- {
- //TODO: einbauen
- }
- int SLR::stepwise_regression ( Examples &x, int classno )
- {
- // initialize randomization
- srand ( time ( NULL ) );
- //srand (1);
- cout << "start regression" << endl;
- // get the number of features
- int fnum = x.size();
- // create Datamatrix
- GMSparseVectorMatrix X;
- GMSparseVectorMatrix Y;
- vector<int> count ( 2, 0 );
- maxval.resize ( fdim );
- minval.resize ( fdim );
- if ( x[0].second.svec == NULL ) //input normal vectors
- {
- Vector feat;
- for ( int i = 0; i < fdim; i++ )
- {
- maxval[i] = numeric_limits<double>::min();
- minval[i] = numeric_limits<double>::max();
- }
- for ( int i = 0; i < fnum; i++ )
- {
- int pos = 0;
- if ( x[i].first != classno )
- pos = 1;
- ++count[pos];
- fp.calcFeatureVector ( x[i].second, feat );
- #ifdef featnorm
- for ( int m = 0; m < ( int ) feat.size(); m++ )
- {
- minval[m] = std::min ( minval[m], feat[m] );
- maxval[m] = std::max ( maxval[m], feat[m] );
- }
- #endif
- fdim = feat.size();
- }
- }
- else //input Sparse Vectors
- {
- for ( int i = 0; i < fnum; i++ )
- {
- int pos = 0;
- if ( x[i].first != classno )
- pos = 1;
- ++count[pos];
- }
- fdim = x[0].second.svec->getDim();
- }
- if ( count[0] == 0 || count[1] == 0 )
- {
- cerr << "not enought samples " << count[0] << " : " << count[1] << endl;
- weight.setDim ( 0 );
- return -1;
- }
- double samples = std::min ( count[0], count[1] );
- samples = std::min ( samples, samplesperclass );
- //samples = std::max(samples, 200);
- //samples = samplesperclass;
- vector<double> rands ( 2, 0.0 );
- vector<double> allsizes ( 2, 0.0 );
- for ( int i = 0; i < 2; i++ )
- {
- rands[i] = 1.0; //use all samples (default if samples>count[i])
- if ( samples > 0 && samples < 1 ) rands[i] = samples; //use relative number of samples wrt. class size
- if ( samples > 1 && samples <= count[i] ) rands[i] = samples / ( double ) count[i]; //use (approximately) fixed absolute number of samples for each class
- allsizes[i] = count[i];
- count[i] = 0;
- }
- for ( int i = 0; i < fnum; i++ )
- {
- int pos = 0;
- if ( x[i].first != classno )
- pos = 1;
- double r = ( double ) rand() / ( double ) RAND_MAX;
- if ( r > rands[pos] )
- continue;
- ++count[pos];
- if ( x[0].second.svec == NULL )
- {
- Vector feat;
- fp.calcFeatureVector ( x[i].second, feat );
- #ifdef featnorm
- for ( int m = 0; m < ( int ) feat.size(); m++ )
- {
- feat[m] = ( feat[m] - minval[m] ) / ( maxval[m] - minval[m] );
- }
- #endif
- X.addRow ( feat );
- }
- else
- {
- X.addRow ( x[i].second.svec );
- }
- SparseVector *v = new SparseVector ( 2 );
- ( *v ) [pos] = 1.0;
- Y.addRow ( v );
- }
- Y.setDel();
- if ( x[0].second.svec == NULL )
- X.setDel();
- for ( int i = 0; i < 2; i++ )
- {
- cerr << "Examples for class " << i << ": " << count[i] << " out of " << allsizes[i] << " with p = " << rands[i] << endl;
- }
- #undef NORMALIZATION
- #ifdef NORMALIZATION
- GMSparseVectorMatrix Xred;
- Xred.resize ( X.rows(), X.cols() );
- for ( int r = 0; r < ( int ) Xred.rows(); r++ )
- {
- for ( int c = 0; c < ( int ) Xred.cols(); c++ )
- {
- double tmp = X[r].get ( c );
- if ( Y[r].get ( 0 ) == 1 )
- tmp *= count[0] / fnum;
- else
- tmp *= count[1] / fnum;
- if ( fabs ( tmp ) > 10e-7 )
- Xred[r][c] = tmp;
- }
- }
- #endif
- fnum = X.rows();
- GMSparseVectorMatrix xY;
- #ifdef NORMALIZATION
- Xred.mult ( Y, xY, true );
- #else
- X.mult ( Y, xY, true );
- #endif
- weight.setDim ( fdim );
- // for faster Computing Xw = X*w
- GMSparseVectorMatrix Xw;
- X.mult ( weight, Xw, false, true );
- SparseVector S ( fnum );
- for ( int r = 0; r < fnum; r++ )
- {
- S[r] = 0.0;
- for ( int c = 0; c < 2; c++ )
- {
- S[r] += exp ( Xw[r].get ( c ) );
- }
- }
- // for faster computing ac[i] = (maxClassNo-1)/(2*maxClassNo) * Sum_j (x_i (j))^2
- Vector ac ( fdim );
- Vector lm_2_ac ( fdim );
- for ( int f = 0; f < fdim; f++ )
- {
- ac[f] = 0.0;
- for ( int a = 0; a < fnum; a++ )
- {
- ac[f] += X[a].get ( f ) * X[a].get ( f );
- }
- ac[f] *= 0.25;
- lm_2_ac[f] = ( lambda / 2.0 ) / ac[f];
- }
- // initialize the iterative optimization
- double incr = numeric_limits<double>::max();
- long non_zero = 0;
- long wasted_basis = 0;
- long needed_basis = 0;
- // prob of resample each weight
- vector<double> p_resamp;
- p_resamp.resize ( fdim );
- // loop over cycles
- long cycle = 0;
- for ( cycle = 0; cycle < maxiter; cycle++ )
- {
- #ifdef SLRDEBUG
- cerr << "iteration: " << cycle << " of " << maxiter << endl;
- #endif
- // zero out the diffs for assessing change
- double sum2_w_diff = 0.0;
- double sum2_w_old = 0.0;
- wasted_basis = 0;
- if ( cycle == 1 )
- needed_basis = 0;
- // update each weight
- //#pragma omp parallel for
- for ( int basis = 0; basis < fdim; basis++ ) // über alle Dimensionen
- {
- int changed = 0;
- // get the starting weight
- double w_old = weight.get ( basis );
- // set the p_resamp if it's the first cycle
- if ( cycle == 0 )
- {
- p_resamp[basis] = 1.0;
- }
- // see if we're gonna update
- double rval = ( double ) rand() / ( double ) RAND_MAX;
- if ( ( w_old != 0 ) || ( rval < p_resamp[basis] ) )
- {
- // calc the probability
- double XdotP = 0.0;
- for ( int i = 0; i < fnum; i++ )
- {
- #ifdef NORMALIZATION
- double e = Xred[i].get ( basis ) * exp ( Xw[i].get ( 0 ) ) / S[i];
- #else
- double e = X[i].get ( basis ) * exp ( Xw[i].get ( 0 ) ) / S[i];
- #endif
- if ( finite ( e ) )
- XdotP += e;
- #ifdef SLRDEBUG
- else
- throw "numerical problems";
- #endif
- }
- // get the gradient
- double grad = xY[basis].get ( 0 ) - XdotP;
- // set the new weight
- double w_new = w_old + grad / ac[basis];
- // test that we're within bounds
- if ( w_new > lm_2_ac[basis] )
- {
- // more towards bounds, but keep it
- w_new -= lm_2_ac[basis];
- changed = 1;
- // umark from being zero if necessary
- if ( w_old == 0.0 )
- {
- non_zero += 1;
- // reset the p_resample
- p_resamp[basis] = 1.0;
- // we needed the basis
- needed_basis += 1;
- }
- }
- else if ( w_new < -lm_2_ac[basis] )
- {
- // more towards bounds, but keep it
- w_new += lm_2_ac[basis];
- changed = 1;
- // umark from being zero if necessary
- if ( w_old == 0.0 )
- {
- non_zero += 1;
- // reset the p_resample
- p_resamp[basis] = 1.0;
- // we needed the basis
- needed_basis += 1;
- }
- }
- else
- {
- // gonna zero it out
- w_new = 0.0;
- // decrease the p_resamp
- p_resamp[basis] -= ( p_resamp[basis] - min_resamp ) * resamp_decay;
- // set the number of non-zero
- if ( w_old == 0.0 )
- {
- // we didn't change
- changed = 0;
- // and wasted a basis
- wasted_basis += 1;
- }
- else
- {
- // we changed
- changed = 1;
- // must update num non_zero
- non_zero -= 1;
- }
- }
- // process changes if necessary
- if ( changed == 1 )
- {
- // update the expected values
- double w_diff = w_new - w_old;
- for ( int i = 0; i < fnum; i++ )
- {
- double E_old = exp ( Xw[i].get ( 0 ) );
- double val = X[i].get ( basis ) * w_diff;
- if ( Xw[i].get ( 0 ) == 0.0 )
- {
- if ( fabs ( val ) > 10e-7 )
- Xw[i][0] = val;
- }
- else
- Xw[i][0] += X[i].get ( basis ) * w_diff;
- double E_new_m = exp ( Xw[i].get ( 0 ) );
- S[i] += E_new_m - E_old;
- }
- // update the weight
- if ( w_new == 0.0 )
- {
- if ( weight.get ( basis ) != 0.0 )
- {
- weight.erase ( basis );
- }
- }
- else
- weight[basis] = w_new;
- // keep track of the sqrt sum squared diffs
- //#pragma omp critical
- sum2_w_diff += w_diff * w_diff;
- }
- // no matter what we keep track of the old
- //#pragma omp critical
- sum2_w_old += w_old * w_old;
- }
- }
- // finished a cycle, assess convergence
- incr = sqrt ( sum2_w_diff ) / ( sqrt ( sum2_w_old ) + numeric_limits<double>::epsilon() );
- #ifdef SLRDEBUG
- cout << "sum2_w_diff: " << sum2_w_diff << " sum2_w_old " << sum2_w_old << endl;
- cout << "convcrit: " << incr << " tol: " << convergence_tol << endl;
- //cout << "sum2_w_wold = " << sum2_w_old << " sum2_w_diff = " << sum2_w_diff << endl;
- #endif
- if ( incr < convergence_tol )
- {
- // we converged!!!
- break;
- }
- }
- // finished updating weights
- // assess convergence
- cerr << "end regression after " << cycle << " steps" << endl;
- return cycle;
- }
|