/**
* @file KernelData.cpp
* @brief caching some kernel data
* @author Erik Rodner
* @date 01/19/2010

*/
#include <iostream>

#include "KernelData.h"
#include "core/algebra/CholeskyRobustAuto.h"
#include "core/algebra/CholeskyRobust.h"
#include "core/vector/Algorithms.h"

#ifdef NICE_USELIB_OPENMP
#include <omp.h> //for parallel processing
#endif


#include "vector"


using namespace std;
using namespace NICE;
using namespace OBJREC;

KernelData::KernelData()
{
  // empty config
  Config conf;
  initFromConfig ( &conf, "DONTCARE" );
}

KernelData::KernelData ( const KernelData & src )
{
  kernelMatrix = src.kernelMatrix;
  inverseKernelMatrix = src.inverseKernelMatrix;
  choleskyMatrix = src.choleskyMatrix;
  for ( map<int, NICE::Matrix *>::const_iterator i = src.cachedMatrices.begin();
        i != src.cachedMatrices.end(); i++ )
  {
    Matrix *M = new Matrix ( * ( i->second ) );
    cachedMatrices.insert ( pair<int, NICE::Matrix *> ( i->first, M ) );
  }
  logdet = src.logdet;
  verbose = src.verbose;
  cr = src.cr->clone();
}

KernelData::KernelData ( const Config *conf, const Matrix & kernelMatrix, const string & section, bool updateCholesky )
{
  initFromConfig ( conf, section );
  this->kernelMatrix = kernelMatrix;
  if ( updateCholesky ) 
    updateCholeskyFactorization();
}

KernelData::KernelData ( const Config *conf, const string & section )
{
  initFromConfig ( conf, section );
}

void KernelData::initFromConfig ( const Config *conf, const string & section )
{
  verbose = conf->gB ( section, "verbose", false );
  string inv_method = conf->gS ( section, "robust_cholesky", "auto" );

  double noiseStep = conf->gD ( section, "rchol_noise_variance", 1e-7 );
  double minimumLogDet = conf->gD ( section, "rchol_minimum_logdet", - std::numeric_limits<double>::max() );
  bool useCuda = conf->gB ( section, "rchol_cuda", true );
  if ( verbose && useCuda )
    std::cerr << "KernelData: using the cuda implementation of cholesky decomposition (might be inaccurate)" << std::endl;

  if ( inv_method == "auto" )
  {
    if ( verbose )
      std::cerr << "KernelData: using the cholesky method with automatic regularization" << std::endl;
    cr = new CholeskyRobustAuto ( verbose, noiseStep, minimumLogDet, useCuda );
  } else {
    if ( verbose )
      std::cerr << "KernelData: using the cholesky method with static regularization" << std::endl;

    cr = new CholeskyRobust ( verbose, noiseStep, useCuda );
  }
}

KernelData::~KernelData()
{
  delete cr;
}

void KernelData::updateCholeskyFactorization ()
{
  if ( verbose )
    std::cerr << "KernelData: kernel: " << kernelMatrix.rows() << " " << kernelMatrix.cols() << std::endl;

  if ( ( kernelMatrix.rows() <= 0 ) || ( kernelMatrix.cols() <= 0 ) )
    fthrow ( Exception, "KernelData: no kernel matrix available !" );

  if ( kernelMatrix.containsNaN() )
  {
    if ( verbose )
      std::cerr << "KernelData: kernel matrix contains NaNs (setting inverse to identity)" << std::endl;

    logdet = numeric_limits<double>::max();

    choleskyMatrix.resize ( kernelMatrix.rows(), kernelMatrix.cols() );
    choleskyMatrix.setIdentity();
  } else {
    if ( verbose )
      std::cerr << "KernelData: calculating cholesky decomposition" << std::endl;

    cr->robustChol ( kernelMatrix, choleskyMatrix );
    logdet = cr->getLastLogDet();

    if ( !finite ( logdet ) )
    {
      choleskyMatrix.resize ( kernelMatrix.rows(), kernelMatrix.cols() );
      choleskyMatrix.setIdentity();
      logdet = numeric_limits<double>::max();
    }
  }
}

void KernelData::updateInverseKernelMatrix ()
{
  if ( ! hasCholeskyFactorization() )
    updateCholeskyFactorization();
  inverseKernelMatrix.resize ( choleskyMatrix.rows(), choleskyMatrix.cols() );
  choleskyInvertLargeScale ( choleskyMatrix, inverseKernelMatrix );
}


void KernelData::computeInverseKernelMultiply ( const Vector & x, Vector & result ) const
{
  if ( choleskyMatrix.rows() == 0 )
    fthrow ( Exception, "Cholesky factorization was not initialized, use updateCholeskyFactorization() in advance" );
  choleskySolveLargeScale ( choleskyMatrix, x, result );
}

const NICE::Matrix & KernelData::getKernelMatrix() const
{
  return kernelMatrix;
}

NICE::Matrix & KernelData::getKernelMatrix()
{
  return kernelMatrix;
}

const NICE::Matrix & KernelData::getInverseKernelMatrix() const
{
  return inverseKernelMatrix;
};

NICE::Matrix & KernelData::getInverseKernelMatrix()
{
  return inverseKernelMatrix;
};

const NICE::Matrix & KernelData::getCholeskyMatrix() const
{
  return choleskyMatrix;
}

const Matrix & KernelData::getCachedMatrix ( int i ) const
{
  map<int, NICE::Matrix *>::const_iterator it = cachedMatrices.find ( i );
  if ( it != cachedMatrices.end() )
    return * ( it->second );
  else
    fthrow ( Exception, "Cached matrix with index " << i << " is not available." );
}

void KernelData::setCachedMatrix ( int i, Matrix *m )
{
  cachedMatrices[i] = m;
}

uint KernelData::getKernelMatrixSize () const {
  uint mysize = ( kernelMatrix.rows() == 0 ) ? ( choleskyMatrix.rows() ) : kernelMatrix.rows();
  return mysize;
};

void KernelData::getLooEstimates ( const Vector & y, Vector & muLoo, Vector & sigmaLoo ) const
{
  if ( inverseKernelMatrix.rows() != getKernelMatrixSize() )
    fthrow ( Exception, "updateInverseKernelMatrix() has to be called in advance to use this function\n" );
  if ( y.size() != inverseKernelMatrix.rows() )
    fthrow ( Exception, "inverse kernel matrix does not fit to the size of the vector of function values y\n" );

  Vector alpha;
  computeInverseKernelMultiply ( y, alpha );
  muLoo.resize ( y.size() );
  sigmaLoo.resize ( y.size() );
  for ( uint l = 0 ; l < y.size(); l++ )
  {
    sigmaLoo[l] = 1.0 / inverseKernelMatrix ( l, l );
    muLoo[l] = y[l] - alpha[l] * sigmaLoo[l];
  }
}


KernelData *KernelData::clone ( void ) const
{
  return new KernelData ( *this );
}

/**
* @brief Updates the GP-Likelihood, if only the i-th row and column has changed. Time is O(n^2) instaed O(n^3)
* @author Alexander Lütz
* @date 01/12/2010 (dd/mm/yyyy)
*/
void KernelData::getGPLikelihoodWithOneNewRow ( const NICE::Vector & y, const double & oldLogdetK, const int & rowIndex, const NICE::Vector & newRow, const Vector & oldAlpha , Vector & newAlpha, double & loglike )
{
  // oldAlpha = K^{-1} y

  // K' new kernel matrix = exchange the row and column at position rowIndex
  // with newRow
  // try to find U and V such that K' = K + U*V with U,V having rank 2

  // rowIndex'th base vector
  Vector ei ( y.size(), 0.0 );
  ei[rowIndex] = 1.0;

  // we have to consider the diagonal entry
  Vector a = newRow - kernelMatrix.getRow ( rowIndex ) ;
  a[rowIndex] = a[rowIndex] / 2;

  NICE::Matrix U ( y.size(), 2 );
  NICE::Matrix V ( 2, y.size() );
  for ( uint i = 0; i < y.size(); i++ )
  {
    U ( i, 0 ) = a[i];
    U ( i, 1 ) = ei[i];
    V ( 0, i ) = ei[i];
    V ( 1, i ) = a[i];
  }

  // Sherman Woodbury-Morrison Formula:
  // alpha_new = (K + UV)^{-1} y = K^{-1} y - K^{-1} U ( I + V
  // K^{-1} U )^{-1} V K^{-1} y = oldAlpha - B ( I + V B )^{-1} V oldAlpha
  // = oldAlpha - B F^{-1} V oldAlpha
  // with B = K^{-1} U and F = (I+VB)

  // Time complexity: 2 choleskySolve calls: O(n^2)

  NICE::Vector B_1;
  computeInverseKernelMultiply ( U.getColumn ( 0 ), B_1 );

  NICE::Vector B_2;
  computeInverseKernelMultiply ( U.getColumn ( 1 ), B_2 );

  NICE::Matrix B ( y.size(), 2 );

  for ( uint i = 0; i < y.size(); i++ )
  {
    B ( i, 0 ) = B_1[i];
    B ( i, 1 ) = B_2[i];
  }

  NICE::Matrix F ( 2, 2 );
  F.setIdentity();
  Matrix V_B ( 2, 2 );
  V_B.multiply ( V, B );
  F += V_B;

  // Time complexity: 1 linear equation system with 2 variables
  // can be computed in O(1) using a fixed implementation
  NICE::Matrix F_inv = NICE::Matrix ( 2, 2 );
  double denominator = F ( 0, 0 ) * F ( 1, 1 ) - F ( 0, 1 ) * F ( 1, 0 );
  F_inv ( 0, 0 ) = F ( 1, 1 ) / denominator;
  F_inv ( 0, 1 ) = -F ( 0, 1 ) / denominator;
  F_inv ( 1, 0 ) = - F ( 1, 0 ) / denominator;
  F_inv ( 1, 1 ) = F ( 0, 0 ) / denominator;

  Matrix M_oldAlpha ( y.size(), 1 );
  for ( uint i = 0; i < y.size(); i++ )
  {
    M_oldAlpha ( i, 0 ) = oldAlpha[i];
  }
  NICE::Matrix V_oldAlpha;
  V_oldAlpha.multiply ( V, M_oldAlpha );
  NICE::Matrix F_inv_V_old_Alpha;
  F_inv_V_old_Alpha.multiply ( F_inv, V_oldAlpha );

  NICE::Matrix M_newAlpha;
  M_newAlpha.multiply ( B, F_inv_V_old_Alpha );

  M_newAlpha *= -1;
  M_newAlpha += M_oldAlpha;

  newAlpha = NICE::Vector ( y.size() );
  for ( uint i = 0; i < y.size(); i++ )
  {
    newAlpha[i] = M_newAlpha ( i, 0 );
  }

  // Matrix Determinant Lemma
  // http://en.wikipedia.org/wiki/Matrix_determinant_lemma
  // det(K + U*V) = det(I + V * K^{-1} * U) * det(K)
  // logdet(K + U*V) = logdet( F ) + logdet(K)
  double logdetF = log ( F ( 0, 0 ) * F ( 1, 1 ) - F ( 0, 1 ) * F ( 1, 0 ) );

  double newLogdetK = logdetF + oldLogdetK;

  logdet = newLogdetK;

  loglike = newLogdetK + newAlpha.scalarProduct ( y );
}

/**
* @brief Updates the GP-Likelihood, if only the i-th row and column has changed. Time is O(n^2) instaed O(n^3). This is only the first part. Usefull for multiclass-problems.
* @author Alexander Lütz
* @date 01/12/2010 (dd/mm/yyyy)
*/
void KernelData::getGPLikelihoodWithOneNewRow_FirstPart ( const int & rowIndex, const NICE::Vector & newRow )
{

  // oldAlpha = K^{-1} y

  // K' new kernel matrix = exchange the row and column at position rowIndex
  // with newRow
  // try to find U and V such that K' = K + U*V with U,V having rank 2

  // rowIndex'th base vector
  Vector ei ( newRow.size(), 0.0 );
  ei[rowIndex] = 1.0;

  // we have to consider the diagonal entry
  Vector a = newRow - kernelMatrix.getRow ( rowIndex ) ;
  a[rowIndex] = a[rowIndex] / 2;

  U.resize ( newRow.size(), 2 );
  V.resize ( 2, newRow.size() );
//  #pragma omp parallel for
  for ( uint i = 0; i < newRow.size(); i++ )
  {
    U ( i, 0 ) = a[i];
    U ( i, 1 ) = ei[i];
    V ( 0, i ) = ei[i];
    V ( 1, i ) = a[i];
  }

  if ( verbose )
  {
    std::cerr << std::endl << "KernelData::perform_Rank_2_Update -- U:"  << std::endl;
    for ( uint ik = 0; ik < U.rows(); ik++ )
    {
      for ( uint jk = 0; jk < U.cols(); jk++ )
      {
        std::cerr << U ( ik, jk ) << " ";
      }
      std::cerr << std::endl;
    }
    std::cerr << std::endl << "KernelData::perform_Rank_2_Update -- V:"  << std::endl;
    for ( uint ik = 0; ik < V.rows(); ik++ )
    {
      for ( uint jk = 0; jk < V.cols(); jk++ )
      {
        std::cerr << V ( ik, jk ) << " ";
      }
      std::cerr << std::endl;
    }
  }

  // Sherman Woodbury-Morrison Formula:
  // alpha_new = (K + UV)^{-1} y = K^{-1} y - K^{-1} U ( I + V
  // K^{-1} U )^{-1} V K^{-1} y = oldAlpha - B ( I + V B )^{-1} V oldAlpha
  // = oldAlpha - B F^{-1} V oldAlpha
  // with B = K^{-1} U and F = (I+VB)

  // Time complexity: 2 choleskySolve calls: O(n^2)


  NICE::Vector B_1;
  computeInverseKernelMultiply ( U.getColumn ( 0 ), B_1 );

  NICE::Vector B_2;
  computeInverseKernelMultiply ( U.getColumn ( 1 ), B_2 );

  B.resize ( newRow.size(), 2 );

  for ( uint i = 0; i < newRow.size(); i++ )
  {
    B ( i, 0 ) = B_1[i];
    B ( i, 1 ) = B_2[i];
  }

  if ( verbose )
  {
    std::cerr << std::endl << "KernelData::perform_Rank_2_Update -- B:"  << std::endl;
    for ( uint ik = 0; ik < B.rows(); ik++ )
    {
      for ( uint jk = 0; jk < B.cols(); jk++ )
      {
        std::cerr << B ( ik, jk ) << " ";
      }
      std::cerr << std::endl;
    }
  }

  F.resize ( 2, 2 );
  F.setIdentity();
  Matrix V_B ( 2, 2 );
  V_B.multiply ( V, B );
  F += V_B;

  if ( verbose )
  {
    std::cerr << std::endl << "KernelData::perform_Rank_2_Update -- F:"  << std::endl;
    for ( uint ik = 0; ik < F.rows(); ik++ )
    {
      for ( uint jk = 0; jk < F.cols(); jk++ )
      {
        std::cerr << F ( ik, jk ) << " ";
      }
      std::cerr << std::endl;
    }
  }

  // Time complexity: 1 linear equation system with 2 variables
  // can be computed in O(1) using a fixed implementation
  F_inv.resize ( 2, 2 );
  double denominator = F ( 0, 0 ) * F ( 1, 1 ) - F ( 0, 1 ) * F ( 1, 0 );
  F_inv ( 0, 0 ) = F ( 1, 1 ) / denominator;
  F_inv ( 0, 1 ) = -F ( 0, 1 ) / denominator;
  F_inv ( 1, 0 ) = - F ( 1, 0 ) / denominator;
  F_inv ( 1, 1 ) = F ( 0, 0 ) / denominator;
  
  if ( verbose )
  {
    std::cerr << std::endl << "KernelData::perform_Rank_2_Update -- F_inv:"  << std::endl;
    for ( uint ik = 0; ik < F_inv.rows(); ik++ )
    {
      for ( uint jk = 0; jk < F_inv.cols(); jk++ )
      {
        std::cerr << F_inv ( ik, jk ) << " ";
      }
      std::cerr << std::endl;
    }

    NICE::Matrix MultiplicationResult ( F_inv.cols(), F_inv.cols(), 0.0 );
    MultiplicationResult.multiply ( F, F_inv );
    std::cerr << std::endl << "KernelData::perform_Rank_2_Update -- F-inversion MultiplicationResult:"  << std::endl;
    for ( uint ik = 0; ik < MultiplicationResult.rows(); ik++ )
    {
      for ( uint jk = 0; jk < MultiplicationResult.cols(); jk++ )
      {
        std::cerr << MultiplicationResult ( ik, jk ) << " ";
      }
      std::cerr << std::endl;
    }
  }
}

/**
* @brief Updates the GP-Likelihood, if only the i-th row and column has changed. Time is O(n^2) instaed O(n^3). This is only the second part. Usefull for multiclass-problems.
* @author Alexander Lütz
* @date 01/12/2010 (dd/mm/yyyy)
*/
void KernelData::getGPLikelihoodWithOneNewRow_SecondPart ( const NICE::Vector & y, const double & oldLogdetK, const Vector & oldAlpha , Vector & newAlpha, double & loglike )
{
  Matrix M_oldAlpha ( y.size(), 1 );
  for ( uint i = 0; i < y.size(); i++ )
  {
    M_oldAlpha ( i, 0 ) = oldAlpha[i];
  }
  NICE::Matrix V_oldAlpha;
  V_oldAlpha.multiply ( V, M_oldAlpha );
  NICE::Matrix F_inv_V_old_Alpha;
  F_inv_V_old_Alpha.multiply ( F_inv, V_oldAlpha );

  NICE::Matrix M_newAlpha;
  M_newAlpha.multiply ( B, F_inv_V_old_Alpha );

  M_newAlpha *= -1;
  M_newAlpha += M_oldAlpha;

  newAlpha = NICE::Vector ( y.size() );
  for ( uint i = 0; i < y.size(); i++ )
  {
    newAlpha[i] = M_newAlpha ( i, 0 );
  }

  // Matrix Determinant Lemma
  // http://en.wikipedia.org/wiki/Matrix_determinant_lemma
  // det(K + U*V) = det(I + V * K^{-1} * U) * det(K)
  // logdet(K + U*V) = logdet( F ) + logdet(K)
  double logdetF = log ( F ( 0, 0 ) * F ( 1, 1 ) - F ( 0, 1 ) * F ( 1, 0 ) );

  double newLogdetK = logdetF + oldLogdetK;

  logdet = newLogdetK;

  loglike = newLogdetK + newAlpha.scalarProduct ( y );
}


/**
* @brief Updates the GP-Likelihood, if only the i-th row and column has changed. Time is O(n^2) instaed O(n^3).
* @author Alexander Lütz
* @date 01/09/2011 (dd/mm/yyyy)
*/
void KernelData::perform_Rank_2_Update ( const int & rowIndex, const NICE::Vector & newRow )
{
  getGPLikelihoodWithOneNewRow_FirstPart ( rowIndex, newRow );
  Matrix prod_1;
  prod_1.multiply ( V, inverseKernelMatrix );

  if ( verbose )
  {
    std::cerr << std::endl << "KernelData::perform_Rank_2_Update -- prod_1:"  << std::endl;
    for ( uint ik = 0; ik < prod_1.rows(); ik++ )
    {
      for ( uint jk = 0; jk < prod_1.cols(); jk++ )
      {
        std::cerr << prod_1 ( ik, jk ) << " ";
      }
      std::cerr << std::endl;
    }
  }

  Matrix prod_2;
  prod_2.multiply ( F_inv, prod_1 );

  if ( verbose )
  {
    std::cerr << std::endl << "KernelData::perform_Rank_2_Update -- prod_2:"  << std::endl;
    for ( uint ik = 0; ik < prod_2.rows(); ik++ )
    {
      for ( uint jk = 0; jk < prod_2.cols(); jk++ )
      {
        std::cerr << prod_2 ( ik, jk ) << " ";
      }
      std::cerr << std::endl;
    }
  }

  Matrix prod_3;
  prod_3.multiply ( B, prod_2 );

  if ( verbose )
  {
    std::cerr << std::endl << "KernelData::perform_Rank_2_Update -- prod_3:"  << std::endl;
    for ( uint ik = 0; ik < prod_3.rows(); ik++ )
    {
      for ( uint jk = 0; jk < prod_3.cols(); jk++ )
      {
        std::cerr << prod_3 ( ik, jk ) << " ";
      }
      std::cerr << std::endl;
    }
  }
  inverseKernelMatrix = inverseKernelMatrix - prod_3;
  
//   std::cerr << "perform rank 2 update: inverseKernelMatrix: " << inverseKernelMatrix << std::endl;

  //correct the stored kernel matrix after our computations
  for ( uint i = 0; i < newRow.size(); i++ )
  {
    kernelMatrix ( i, rowIndex ) = newRow[i];
    kernelMatrix ( rowIndex, i ) = newRow[i];
  }
//   std::cerr << "kernelMatrix: " << kernelMatrix << std::endl << " newRow: " << newRow << std::endl;
}

/**
* @brief Updates the GP-Likelihood, if only k rows and colums are changed. Time is O(k^3+n^2) instaed O(n^3). Alternatively it could also be done with iteratively change one row, which leads to O(k*n^2).
* @author Alexander Lütz
* @date 01/09/2011 (dd/mm/yyyy)
*/
void KernelData::perform_Rank_2k_Update ( const std::vector<int> & rowIndices, const std::vector<NICE::Vector> & newRows )
{
  if ( ( rowIndices.size() != 0 ) && ( rowIndices.size() == newRows.size() ) )
  {
    std::vector<NICE::Vector> unity_vectors;
    std::vector<NICE::Vector> diff_vectors;
    for ( uint j = 0; j < rowIndices.size(); j++ )
    {
      NICE::Vector unity_vector ( newRows[0].size(), 0.0 );
      unity_vector[rowIndices[j] ] = 1.0;
      unity_vectors.push_back ( unity_vector );

      NICE::Vector a = newRows[j] - kernelMatrix.getRow ( rowIndices[j] );
      for ( uint x = 0; x < rowIndices.size(); x++ )
      {
        a[rowIndices[x] ] /= 2.0;
      }
      diff_vectors.push_back ( a );
    }

    U.resize ( newRows[0].size(), 2*rowIndices.size() );
    V.resize ( 2*rowIndices.size(), newRows[0].size() );

    for ( uint i = 0; i < newRows[0].size(); i++ )
    {
      for ( uint j = 0; j < rowIndices.size(); j++ )
      {
        U ( i, rowIndices.size() + j ) = ( unity_vectors[j] ) [i];
        U ( i, j ) = ( diff_vectors[j] ) [i];

        V ( rowIndices.size() + j, i ) = ( diff_vectors[j] ) [i];
        V ( j, i ) = ( unity_vectors[j] ) [i];
      }
    }

    if ( verbose )
    {
      std::cerr << std::endl << "KernelData::perform_Rank_2k_Update -- U: " << U << std::endl;
      std::cerr << std::endl << "KernelData::perform_Rank_2k_Update -- V: " << V  << std::endl;
    }

    NICE::Matrix UV ( newRows[0].size(), newRows[0].size() );
    UV.multiply ( U, V );

    if ( verbose )
    {
      // we have to consider the entries which are added twice
      for ( int x = 0; x < ( int ) rowIndices.size(); x++ )
      {
        for ( int y = x; y < ( int ) rowIndices.size(); y++ )
        {
          UV ( rowIndices[x] , rowIndices[y] ) /= 2.0;
          if ( x != y )
            UV ( rowIndices[y] , rowIndices[x] ) /= 2.0;
        }
      }
    }

    if ( verbose )
    {
      std::cerr << std::endl << "KernelData::perform_Rank_2k_Update -- UV:"  << UV << std::endl;
    }


    B.resize ( newRows[0].size(), 2*rowIndices.size() );
    for ( uint j = 0; j < 2*rowIndices.size(); j++ )
    {
      NICE::Vector B_row;
      computeInverseKernelMultiply ( U.getColumn ( j ), B_row );
      for ( uint i = 0; i < newRows[0].size(); i++ )
      {
        B ( i, j ) = B_row[i];
      }
    }

    if ( verbose )
    {
      std::cerr << std::endl << "KernelData::perform_Rank_2k_Update -- B:"  << B << std::endl;
    }

    F.resize ( 2*rowIndices.size(), 2*rowIndices.size() );
    F.setIdentity();
    Matrix V_B ( 2*rowIndices.size(), 2*rowIndices.size() );
    V_B.multiply ( V, B );
    F += V_B;

    if ( verbose )
    {
      std::cerr << std::endl << "KernelData::perform_Rank_2k_Update -- F:"  << F << std::endl;
    }

    //invert F!
    //we can't rely on methods like cholesky decomposition, since F has not to be positive definite
    F_inv = invert ( F );

    if ( verbose )
    {
      std::cerr << std::endl << "KernelData::perform_Rank_2k_Update -- F_inv:"  << F_inv <<std::endl;
    }

    if ( verbose )
    {
      NICE::Matrix MultiplicationResult ( F.rows(), F_inv.cols(), 0.0 );
      MultiplicationResult.multiply ( F, F_inv );  
      std::cerr << std::endl << "KernelData::perform_Rank_2k_Update -- F-inversion MultiplicationResult:"  << MultiplicationResult << std::endl;
    }

    Matrix prod_1;  
    prod_1.multiply ( V, inverseKernelMatrix );
    std::cerr << "prod_1: " << prod_1.rows() << " x " << prod_1.cols() << std::endl;
    
    std::cerr << "v and inverse matrix multiplied" << std::endl;

    if ( verbose )
    {
      std::cerr << std::endl << "KernelData::perform_Rank_2k_Update -- prod_1:"  << prod_1 << std::endl;
    }

    Matrix prod_2;   
    prod_2.resize(F.rows(), prod_1.cols());
          
    prod_2.multiply ( F_inv, prod_1 );

    if ( verbose )
    {
      std::cerr << std::endl << "KernelData::perform_Rank_2k_Update -- prod_2:"  << prod_2 << std::endl;
    }

    std::cerr << "B: " << B.rows() << " x " << B.cols() << std::endl;
    Matrix prod_3;
    prod_3.multiply ( B, prod_2 );
    
    std::cerr << "prod_3 created: " << prod_3.rows() << " x " << prod_3.cols() << std::endl;

    if ( verbose )
    {
      std::cerr << std::endl << "KernelData::perform_Rank_2k_Update -- prod_3:"  << prod_3 << std::endl;
    }
    inverseKernelMatrix = inverseKernelMatrix - prod_3;

    //remember the new kernel entries for the next time
    for ( uint i = 0; i < rowIndices.size(); i++ )
    {
      for ( uint ik = 0; ik < kernelMatrix.rows(); ik++ )
      {
        kernelMatrix ( ik, rowIndices[i] ) = ( newRows[i] ) [ik];
        kernelMatrix ( rowIndices[i], ik ) = ( newRows[i] ) [ik];
      }
    }
  }
  else
  {
    std::cerr << "Failure" << std::endl;
  }
}

void KernelData::delete_one_row ( const int & rowIndex )
{
  if ( ( inverseKernelMatrix.rows() != 0 ) && ( inverseKernelMatrix.cols() != 0 ) )
  {
    inverseKernelMatrix.deleteCol ( rowIndex );
    inverseKernelMatrix.deleteRow ( rowIndex );
  }

  if ( ( choleskyMatrix.rows() != 0 ) && ( choleskyMatrix.cols() != 0 ) )
  {
    choleskyMatrix.deleteCol ( rowIndex );
    choleskyMatrix.deleteRow ( rowIndex );
  }

  if ( ( kernelMatrix.rows() != 0 ) && ( kernelMatrix.cols() != 0 ) )
  {
    kernelMatrix.deleteCol ( rowIndex );
    kernelMatrix.deleteRow ( rowIndex );
  }
}

void KernelData::delete_multiple_rows ( std::vector<int> & indices )
{
  if ( ( inverseKernelMatrix.rows() >= indices.size() ) && ( inverseKernelMatrix.cols() >= indices.size() ) )
  {
    inverseKernelMatrix.deleteCols ( indices );
    inverseKernelMatrix.deleteRows ( indices );
  }

  if ( ( choleskyMatrix.rows() >= indices.size() ) && ( choleskyMatrix.cols() >= indices.size() ) )
  {
    choleskyMatrix.deleteCols ( indices );
    choleskyMatrix.deleteRows ( indices );
  }

  if ( ( kernelMatrix.rows() >= indices.size() ) && ( kernelMatrix.cols() >= indices.size() ) )
  {
    kernelMatrix.deleteCols ( indices );
    kernelMatrix.deleteRows ( indices );
  }
}

void KernelData::setKernelMatrix ( const NICE::Matrix & k_matrix )
{
  kernelMatrix = k_matrix;
}

void KernelData::increase_size_by_One()
{
  NICE::Matrix new_Kernel ( kernelMatrix.rows() + 1, kernelMatrix.cols() + 1 );
  new_Kernel.setBlock ( 0, 0, kernelMatrix );
  for ( uint i = 0; i < kernelMatrix.rows() - 1; i++ )
  {
    new_Kernel ( i, kernelMatrix.cols() ) = 0.0;
    new_Kernel ( kernelMatrix.rows(), i ) = 0.0;
  }
  new_Kernel ( kernelMatrix.rows(), kernelMatrix.cols() ) = 1.0;
  //NOTE Maybe it would be more efficient to work directly with pointers to the memory
  kernelMatrix.resize ( new_Kernel.rows(), new_Kernel.cols() );
  kernelMatrix = new_Kernel;

  new_Kernel.setBlock ( 0, 0, inverseKernelMatrix );
  //NOTE Maybe it would be more efficient to work directly with pointers to the memory
  inverseKernelMatrix.resize ( new_Kernel.rows(), new_Kernel.cols() );
  inverseKernelMatrix = new_Kernel;

  new_Kernel.setBlock ( 0, 0, choleskyMatrix );
  //NOTE Maybe it would be more efficient to work directly with pointers to the memory
  choleskyMatrix.resize ( new_Kernel.rows(), new_Kernel.cols() );
  choleskyMatrix = new_Kernel;
}

void KernelData::increase_size_by_k ( const uint & k )
{
  NICE::Matrix new_Kernel ( kernelMatrix.rows() + k, kernelMatrix.cols() + k );
  new_Kernel.setBlock ( 0, 0, kernelMatrix );
  for ( uint i = 0; i < kernelMatrix.rows() - 1; i++ )
  {
    for ( uint j = 0; j < k; j++ )
    {
      new_Kernel ( i, kernelMatrix.cols() + j ) = 0.0;
      new_Kernel ( kernelMatrix.rows() + j, i ) = 0.0;
    }
  }
  for ( uint j = 0; j < k; j++ )
  {
    new_Kernel ( kernelMatrix.rows() + j, kernelMatrix.cols() + j ) = 1.0;
  }
  //NOTE Maybe it would be more efficient to work directly with pointers to the memory
  kernelMatrix.resize ( new_Kernel.rows(), new_Kernel.cols() );
  kernelMatrix = new_Kernel;

  new_Kernel.setBlock ( 0, 0, inverseKernelMatrix );
  //NOTE Maybe it would be more efficient to work directly with pointers to the memory
  inverseKernelMatrix.resize ( new_Kernel.rows(), new_Kernel.cols() );
  inverseKernelMatrix = new_Kernel;

  new_Kernel.setBlock ( 0, 0, choleskyMatrix );
  //NOTE Maybe it would be more efficient to work directly with pointers to the memory
  choleskyMatrix.resize ( new_Kernel.rows(), new_Kernel.cols() );
  choleskyMatrix = new_Kernel;
}