123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451 |
- /**
- * @file GMHIKernelRaw.cpp
- * @brief Fast multiplication with histogram intersection kernel matrices (Implementation)
- * @author Erik Rodner, Alexander Freytag
- * @date 01/02/2012
- */
- #include <iostream>
- #include <core/vector/VVector.h>
- #include <core/basics/Timer.h>
- #include "GMHIKernelRaw.h"
- using namespace NICE;
- using namespace std;
- GMHIKernelRaw::GMHIKernelRaw( const std::vector< const NICE::SparseVector *> &_examples,
- const double _d_noise
- const NICE::Quantization * _q
- )
- {
- this->examples_raw = NULL;
- this->nnz_per_dimension = NULL;
- this->table_A = NULL;
- this->table_B = NULL;
- this->d_noise = _d_noise;
- this->q = _q;
- initData(_examples);
- }
- GMHIKernelRaw::~GMHIKernelRaw()
- {
- cleanupData();
- }
- void GMHIKernelRaw::cleanupData()
- {
- if ( this->examples_raw != NULL ) {
- for ( uint d = 0; d < this->num_dimension; d++ )
- if (examples_raw[d] != NULL)
- delete [] examples_raw[d];
- delete [] this->examples_raw;
- this->examples_raw = NULL;
- }
- if ( this->nnz_per_dimension != NULL ) {
- delete [] this->nnz_per_dimension;
- this->nnz_per_dimension = NULL;
- }
- if ( this->table_A != NULL ) {
- for ( uint d = 0; d < this->num_dimension; d++ )
- if (table_A[d] != NULL)
- delete [] table_A[d];
- delete [] this->table_A;
- this->table_A = NULL;
- }
- if ( this->table_B != NULL ) {
- for ( uint d = 0; d < this->num_dimension; d++ )
- if (table_B[d] != NULL)
- delete [] table_B[d];
- delete [] this->table_B;
- this->table_B = NULL;
- }
- }
- void GMHIKernelRaw::initData ( const std::vector< const NICE::SparseVector *> &_examples )
- {
- if (_examples.size() == 0 )
- fthrow(Exception, "No examples given for learning");
- cleanupData();
- this->num_dimension = _examples[0]->getDim();
- this->examples_raw = new sparseVectorElement *[num_dimension];
- this->nnz_per_dimension = new uint [num_dimension];
- this->num_examples = _examples.size();
- // waste memory and allocate a non-sparse data block
- sparseVectorElement **examples_raw_increment = new sparseVectorElement *[num_dimension];
- for (uint d = 0; d < this->num_dimension; d++)
- {
- this->examples_raw[d] = new sparseVectorElement [ this->num_examples ];
- examples_raw_increment[d] = this->examples_raw[d];
- this->nnz_per_dimension[d] = 0;
- }
- // additionally allocate a Vector with as many entries as examples
- // this vector will contain the L1 norm values of all examples + noise
- // thereby, it represents the diagonal entries of our kernel matrix for
- // the special case of minimum kernel
- this->diagonalElements.resize ( this->num_examples );
- this->diagonalElements.set ( this->d_noise );
- uint example_index = 0;
- NICE::Vector::iterator itDiagEl = this->diagonalElements.begin();
- // minor pre-allocation
- uint index;
- double value;
- double l1norm;
- for ( std::vector< const NICE::SparseVector * >::const_iterator i = _examples.begin();
- i != _examples.end();
- i++, example_index++, itDiagEl++
- )
- {
- l1norm = 0.0;
- const NICE::SparseVector *x = *i;
- for ( NICE::SparseVector::const_iterator j = x->begin(); j != x->end(); j++ )
- {
- index = j->first;
- value = j->second;
- examples_raw_increment[index]->value = value;
- examples_raw_increment[index]->example_index = example_index;
- // move to the next element
- examples_raw_increment[index]++;
- this->nnz_per_dimension[index]++;
- l1norm = l1norm + value;
- }
- *itDiagEl = *itDiagEl + l1norm;
- }
- delete [] examples_raw_increment;
- // sort along each dimension
- for (uint d = 0; d < this->num_dimension; d++)
- {
- uint nnz = this->nnz_per_dimension[d];
- if ( nnz > 1 )
- std::sort( this->examples_raw[d], this->examples_raw[d] + nnz );
- }
- // pre-allocate the A and B matrices
- this->table_A = allocateTableAorB();
- this->table_B = allocateTableAorB();
- // Quantization for classification?
- if ( this->q != NULL )
- {
- // (1) if yes, setup the parameters of the quantization object
- this->q->computeParametersFromData ( this );
- this->table_T = allocateTableT();
- }
- }
- double **GMHIKernelRaw::allocateTableAorB() const
- {
- double **table;
- table = new double *[this->num_dimension];
- for (uint i = 0; i < this->num_dimension; i++)
- {
- uint nnz = this->nnz_per_dimension[i];
- if (nnz>0) {
- table[i] = new double [ nnz ];
- } else {
- table[i] = NULL;
- }
- }
- return table;
- }
- double **GMHIKernelRaw::allocateTableT() const
- {
- double **table;
- table = new double *[this->num_dimension * this->q->getNumberOfBins()];
- return table;
- }
- void GMHIKernelRaw::copyTableAorB(double **src, double **dst) const
- {
- for (uint i = 0; i < this->num_dimension; i++)
- {
- uint nnz = this->nnz_per_dimension[i];
- if (nnz>0) {
- for (uint j = 0; j < nnz; j++)
- dst[i][j] = src[i][j];
- } else {
- dst[i] = NULL;
- }
- }
- }
- void GMHIKernelRaw::copyTableC(double **src, double **dst) const
- {
- for (uint i = 0; i < this->num_dimension; i++)
- {
- for (uint j = 0; j < this->q->getNumberOfBins(); j++)
- {
- //FIXME can we speed this up using pointer increments?
- dst[i][j] = src[i][j];
- }
- }
- }
- void GMHIKernelRaw::updateTables ( const NICE::Vector _x ) const
- {
- // pre-computions if quantization is activated
- double * prototypes;
- double * p_prototypes;
- // store prototypes
- if ( this->q != NULL)
- {
- // number of quantization bins
- uint hmax = _q->getNumberOfBins();
- double * prototypes = new double [ hmax * this->ui_d ];
- double * p_prototypes = prototypes;
- for (uint dim = 0; dim < this->ui_d; dim++)
- {
- for ( uint i = 0 ; i < hmax ; i++ )
- {
- if ( _pf != NULL )
- {
- *p_prototypes = _pf->f ( dim, _q->getPrototype( i, dim ) );
- } else
- {
- *p_prototypes = _q->getPrototype( i, dim );
- }
- p_prototypes++;
- }
- }
- }
- // start the actual computations of A, B, and optionally T
- for (uint dim = 0; dim < this->num_dimension; dim++)
- {
- double alpha_sum = 0.0;
- double alpha_times_x_sum = 0.0;
- uint nnz = nnz_per_dimension[dim];
- // loop through all elements in sorted order
- sparseVectorElement *training_values_in_dim = examples_raw[dim];
- for ( uint cntNonzeroFeat = 0; cntNonzeroFeat < nnz; cntNonzeroFeat++, training_values_in_dim++ )
- {
- // index of the feature
- int index = training_values_in_dim->example_index;
- // element of the feature
- double elem = training_values_in_dim->value;
- alpha_times_x_sum += _x[index] * elem;
- this->table_A[dim][cntNonzeroFeat] = alpha_times_x_sum;
- alpha_sum += _x[index];
- this->table_B[dim][cntNonzeroFeat] = alpha_sum;
- if ( this->q != NULL)
- {
- // // index of the element, which is always bigger than the current value fval
- // uint index = 0;
- // // we use the quantization of the original features! the transformed feature were
- // // already used to calculate A and B, this of course assumes monotonic functions!!!
- // uint qBin = _q->quantize ( i->first, dim );
- // // the next loop is linear in max(hmax, n)
- // // REMARK: this could be changed to hmax*log(n), when
- // // we use binary search
- // for (uint j = 0; j < hmax; j++)
- // {
- // double fval = prototypes[ dim*hmax + j ];
- // double t;
- // if ( (index == 0) && (j < qBin) ) {
- // // current element is smaller than everything else
- // // resulting value = fval * sum_l=1^n alpha_l
- // t = fval*( _B[dim][this->ui_n-1 - nrZeroIndices] );
- // } else {
- // // move to next example, if necessary
- // while ( (j >= qBin) && ( index < (this->ui_n-1-nrZeroIndices)) )
- // {
- // index++;
- // iPredecessor = i;
- // i++;
- // if ( i->first != iPredecessor->first )
- // qBin = _q->quantize ( i->first, dim );
- // }
- // // compute current element in the lookup table and keep in mind that
- // // index is the next element and not the previous one
- // //NOTE pay attention: this is only valid if all entries are positive! -
- // // If not, ask whether the current feature is greater than zero. If so, subtract the nrZeroIndices, if not do not
- // if ( (j >= qBin) && ( index==(this->ui_n-1-nrZeroIndices) ) ) {
- // // the current element (fval) is equal or bigger to the element indexed by index
- // // in fact, the term B[dim][this->n-1-nrZeroIndices] - B[dim][index] is equal to zero and vanishes, which is logical, since all elements are smaller than j!
- // t = _A[dim][index];// + fval*( _B[dim][this->ui_n-1-nrZeroIndices] - _B[dim][index] );
- // } else {
- // // standard case
- // t = _A[dim][index-1] + fval*( _B[dim][this->ui_n-1-nrZeroIndices] - _B[dim][index-1] );
- // }
- // }
- // Tlookup[ dim*hmax + j ] = t;
- // }
- }
- }
- }
- }
- /** multiply with a vector: A*x = y */
- void GMHIKernelRaw::multiply (NICE::Vector & _y, const NICE::Vector & _x) const
- {
- // STEP 1: initialize tables A and B
- updateTables(_x);
- _y.resize( this->num_examples );
- _y.set(0.0);
- for (uint dim = 0; dim < this->num_dimension; dim++)
- {
- uint nnz = this->nnz_per_dimension[dim];
- uint nz = this->num_examples - nnz;
- if ( nnz == 0 ) {
- // all values are zero in this dimension :) and we can simply ignore the feature
- continue;
- }
- sparseVectorElement *training_values_in_dim = examples_raw[dim];
- for ( uint cntNonzeroFeat = 0; cntNonzeroFeat < nnz; cntNonzeroFeat++, training_values_in_dim++ )
- {
- uint feat = training_values_in_dim->example_index;
- uint inversePosition = cntNonzeroFeat;
- double fval = training_values_in_dim->value;
- double firstPart = this->table_A[dim][inversePosition];
- double secondPart = this->table_B[dim][nnz-1] - this->table_B[dim][inversePosition];
- _y[feat] += firstPart + fval * secondPart;
- }
- }
- for (uint feat = 0; feat < this->num_examples; feat++)
- _y[feat] += this->d_noise * _x[feat];
- }
- /** get the number of rows in A */
- uint GMHIKernelRaw::rows () const
- {
- // return the number of examples
- return num_examples;
- }
- /** get the number of columns in A */
- uint GMHIKernelRaw::cols () const
- {
- // return the number of examples
- return num_examples;
- }
- double **GMHIKernelRaw::getTableA() const
- {
- double **t = allocateTableAorB();
- copyTableAorB(this->table_A, t);
- return t;
- }
- double **GMHIKernelRaw::getTableB() const
- {
- double **t = allocateTableAorB();
- copyTableAorB(this->table_B, t);
- return t;
- }
- double **GMHIKernelRaw::getTableT() const
- {
- double **t = allocateTableT();
- copyTableT(this->table_T, t);
- return t;
- }
- uint *GMHIKernelRaw::getNNZPerDimension() const
- {
- uint *v = new uint[this->num_dimension];
- for (uint i = 0; i < this->num_dimension; i++)
- v[i] = this->nnz_per_dimension[i];
- return v;
- }
- uint NICE::GMHIKernelRaw::getNumberOfDimensions() const
- {
- return this->num_dimension;
- }
- void NICE::GMHIKernelRaw::getDiagonalElements( NICE::Vector & _diagonalElements) const
- {
- _diagonalElements = this->diagonalElements;
- }
- double NICE::GMHIKernelRaw::getLargestValue ( ) const
- {
- double vmax (0.0);
- double vtmp (0.0);
- uint tmpIdx ( 0 );
- // compare largest elements of all dimensions
- for (uint d = 0; d < this->num_dimension; d++)
- {
- uint nnz = this->nnz_per_dimension[d];
- if ( nnz > 1 )
- {
- tmpIdx = tmpIdx + nnz;
- vtmp = this->examples_raw[tmpIdx];
- if ( vtmp > vmax )
- {
- vmax = vtmp;
- }
- }
- }
- return vmax;
- }
- NICE::Vector NICE::GMHIKernelRaw::getLargestValuePerDimension ( ) const
- {
- NICE::Vector vmax ( this->get_d() );
- NICE::Vector::iterator vmaxIt = vmax.begin();
- uint tmpIdx ( 0 );
- for (uint d = 0; d < this->num_dimension; d++, vmaxIt++)
- {
- uint nnz = this->nnz_per_dimension[d];
- if ( nnz > 1 )
- {
- tmpIdx = tmpIdx + nnz;
- *vmaxIt = this->examples_raw[tmpIdx];
- }
- else
- {
- *vmaxIt = 0.0;
- }
- }
- return vmax;
- }
|