Browse Source

added spline and RANSAC regression

Frank Prüfer 11 years ago
parent
commit
f91cb353c6

+ 20 - 3
regression/linregression/LinRegression.cpp

@@ -29,13 +29,19 @@ LinRegression::LinRegression(uint dimension)
 LinRegression::LinRegression ( const LinRegression & src ) : 
 RegressionAlgorithm ( src )
 {
+<<<<<<< HEAD
   dim = src.dim;
+=======
+dim = src.dim;
+modelParams = src.modelParams;
+>>>>>>> frank
 }
 
 LinRegression::~LinRegression()
 {
 }
 
+<<<<<<< HEAD
 LinRegression* LinRegression::clone ( void ) const
 {
   return new LinRegression(*this);
@@ -54,6 +60,15 @@ void LinRegression::teach ( const NICE::VVector & x, const NICE::Vector & y ){
   
   for ( uint i = 0;i < dim;i++ )  //initialize vector of model parameters
   {
+=======
+void LinRegression::teach ( const NICE::VVector & x, const NICE::Vector & y )
+{  
+  if (dim == 0){	//dimension not specified via constructor
+    dim = x[0].size()+1;  //use full dimension of data
+  }
+  
+  for ( uint i = 0;i < dim;i++ ){  //initialize vector of model parameters
+>>>>>>> frank
     modelParams.push_back(0.0);
   }
   
@@ -117,11 +132,13 @@ void LinRegression::teach ( const NICE::VVector & x, const NICE::Vector & y ){
   }
 }
 
-std::vector<double> LinRegression::getModelParams(){
+std::vector<double> LinRegression::getModelParams()
+{
   return modelParams;
 }
 
-double LinRegression::predict ( const NICE::Vector & x ){
+double LinRegression::predict ( const NICE::Vector & x )
+{
   double y;
   if ( dim == 2 )     //two-dimensional least squares
   {  
@@ -136,4 +153,4 @@ double LinRegression::predict ( const NICE::Vector & x ){
   }
   
   return y;
-}
+}

+ 121 - 0
regression/linregression/RANSACReg.cpp

@@ -0,0 +1,121 @@
+/**
+* @file RANSACReg.cpp
+* @brief Implementation of RANSAC (RANdom SAmple Consensus) for regression purposes
+* @author Frank Prüfer
+* @date 09/10/2013
+
+*/  
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <iostream>
+
+#include "vislearning/regression/linregression/LinRegression.h"
+#include "vislearning/regression/linregression/RANSACReg.h"
+
+using namespace OBJREC;
+
+using namespace std;
+using namespace NICE;
+
+RANSACReg::RANSACReg ( const Config *_conf )
+{
+  threshold = _conf->gD("RANSACReg","threshold",0.5);
+  iter = _conf->gI("RANSACReg","iterations",10);
+}
+
+RANSACReg::RANSACReg ( const RANSACReg & src ) : RegressionAlgorithm ( src )
+{
+  threshold = src.threshold;
+  n = src.n;
+  iter = src.iter;
+  dataSet = src.dataSet;
+  labelSet = src.labelSet;
+  modelParams = src.modelParams;
+}
+
+RANSACReg::~RANSACReg()
+{
+}
+
+void RANSACReg::teach ( const NICE::VVector & dataSet, const NICE::Vector & labelSet )
+{
+  //for iter iterations do
+    //choose random subset of n points (n = dataSet[0].size()+1)
+    //do LinRegression on subset
+    //get modelParameters
+    //test how many points, which are not in subset, are close to model (use threshold and distancefunc here) -> these points are consneus set
+    //if consensus set contains more points than any previous one, take this model as best_model
+  //maybe compute best_model again with all points of best_consensusSet  
+  //store best_model and maybe best_consensusSet
+  
+  NICE::VVector best_CS(0,0);
+  std::vector<double> best_labelCS;
+  
+  cerr<<"Size of training data: "<<dataSet.size()<<endl;
+  
+  vector<int> indices;
+  for ( uint i = 0; i < dataSet.size(); i++ )
+    indices.push_back(i);
+  
+  n = dataSet[0].size()+1;
+
+  for ( uint i = 0; i < iter; i++ ){
+    random_shuffle( indices.begin(), indices.end() );
+    NICE::VVector randDataSubset;
+    std::vector<double> randLabelSubset;
+    
+    for ( uint j = 0; j < n; j++ ){	//choose random subset of n points
+      randDataSubset.push_back( dataSet[indices[j]] );
+      randLabelSubset.push_back( labelSet[indices[j]] );
+    }
+    
+    LinRegression *linReg = new LinRegression ();
+    linReg->teach ( randDataSubset, (NICE::Vector)randLabelSubset );	//do LinRegression on subset
+    std::vector<double> tmp_modelParams = linReg->getModelParams();
+    
+    NICE::VVector current_CS;
+    std::vector<double> current_labelCS;
+    
+    for ( uint j = n; j < indices.size(); j++ ){	//compute distance between each datapoint and current model
+      double lengthNormalVector = 0; 
+      double sum = 0;
+      for ( uint k = 0; k < tmp_modelParams.size(); k++ ){
+	sum += tmp_modelParams[k] * dataSet[indices[j]][k];
+	lengthNormalVector += tmp_modelParams[k] * tmp_modelParams[k];
+      }
+      lengthNormalVector = sqrt(lengthNormalVector);
+      
+      double distance = ( sum - labelSet[indices[j]] )/ lengthNormalVector;
+//       cerr<<"distance: "<<distance<<endl;
+      
+
+      if ( abs(distance) < threshold ){	//if point is close to model, it belongs to consensus set
+	current_CS.push_back ( dataSet[indices[j]] );
+	current_labelCS.push_back ( labelSet[indices[j]] );
+      }
+    }
+    
+    if ( current_CS.size() > best_CS.size() ){	//if consensus set contains more points than any previous one, take this model as best_model
+      best_CS = current_CS;
+      best_labelCS = current_labelCS;
+    }
+  }
+  
+  cerr<<"Size of best_CS: "<<best_CS.size()<<endl;
+  LinRegression *best_linReg = new LinRegression ();	//compute best_model again with all points of best_consensusSet
+  best_linReg->teach ( best_CS, (NICE::Vector)best_labelCS );
+  modelParams = best_linReg->getModelParams();    
+}
+  
+double RANSACReg::predict ( const NICE::Vector & x )
+{
+  NICE::Vector nModel(modelParams);
+  NICE:: Vector xTmp(1,1.0);
+  xTmp.append(x);
+  double y = xTmp.scalarProduct(nModel);
+
+  return y;
+  
+}

+ 64 - 0
regression/linregression/RANSACReg.h

@@ -0,0 +1,64 @@
+/**
+* @file RANSACReg.h
+* @brief Implementation of RANSAC (RANdom SAmple Consensus) for regression purposes
+* @author Frank Prüfer
+* @date 09/10/2013
+
+*/   
+#ifndef RANSACREGINCLUDE
+#define RANSACREGINCLUDE
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+#include "core/vector/MatrixT.h"
+
+#include "core/basics/Config.h"
+
+#include "vislearning/regression/regressionbase/RegressionAlgorithm.h"
+
+namespace OBJREC
+{
+class RANSACReg : public RegressionAlgorithm
+{
+  protected:
+    /** threshold value for determining when a datum fits a model */
+    double threshold;
+    
+    /** mminimum number of data required to fit the model */
+    uint n;
+    
+    /** number of iterations performed by the algorithm */
+    uint iter;
+    
+    /** vector of model parameters */
+    std::vector<double> modelParams;
+    
+    /** set of data points */
+    NICE::VVector dataSet;
+    
+    /** set of responses according to dataset */
+    std::vector<double> labelSet;
+    
+
+  public:
+    /** simple constructor */
+    RANSACReg ( const NICE::Config *conf );
+    
+    /** copy constructor */
+    RANSACReg ( const RANSACReg & src );
+    
+    /** simple destructor */
+    virtual ~RANSACReg();
+    
+    /** predict response using simple vector */
+    double predict ( const NICE::Vector & x );
+    
+    /** teach whole set at once */
+    void teach ( const NICE::VVector & dataSet, const NICE::Vector & labelSet );
+
+};
+}	//namespace
+
+
+
+#endif

+ 2 - 2
regression/progs/testNPRegression.cpp

@@ -1,6 +1,6 @@
 /**
-* @file testLinRegression.cpp
-* @brief test of linear regression
+* @file testNPRegression.cpp
+* @brief test of non-parametric regression
 * @author Frank Prüfer
 * @date 08/29/2013
 

+ 286 - 0
regression/progs/testRANSACRegression.cpp

@@ -0,0 +1,286 @@
+/**
+* @file testRANSACRegression.cpp
+* @brief test of RANSAC regression
+* @author Frank Prüfer
+* @date 09/11/2013
+
+*/
+
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "core/basics/Config.h"
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "vislearning/baselib/ICETools.h"
+
+#include "vislearning/regression/linregression/RANSACReg.h"
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+void csvline_populate ( vector<string> &record,
+                       const string& line,
+                       char delimiter )
+{
+  int linepos=0;
+  int inquotes=false;
+  char c;
+  int linemax=line.length();
+  string curstring;
+  record.clear();
+
+  while(line[linepos]!=0 && linepos < linemax)
+  {
+    c = line[linepos];
+
+    if (!inquotes && curstring.length()==0 && c=='"')
+    {
+      //beginquotechar
+      inquotes=true;
+    }
+    else if (inquotes && c=='"')
+    {
+      //quotechar
+      if ( (linepos+1 <linemax) && (line[linepos+1]=='"') )
+      {
+        //encountered 2 double quotes in a row (resolves to 1 double quote)
+        curstring.push_back(c);
+        linepos++;
+      }
+      else
+      {
+        //endquotechar
+        inquotes=false;
+      }
+    }
+    else if (!inquotes && c==delimiter)
+    {
+      //end of field
+      record.push_back( curstring );
+      curstring="";
+    }
+    else if (!inquotes && (c=='\r' || c=='\n') )
+    {
+     record.push_back( curstring );
+     return;
+    }
+    else
+    {
+      curstring.push_back(c);
+    }
+    linepos++;
+  }
+  
+  record.push_back( curstring );
+}
+
+void loadData( NICE::VVector &Data,
+               NICE::Vector &y,
+               const string &path,
+               const string &xdat,
+               const string &ydat )
+{
+
+  vector<string> row;
+  string line;
+
+  cerr<<"Preloading Data...";
+  ifstream in( (path+xdat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found" <<endl;
+    exit(EXIT_FAILURE);
+  }
+
+  int numData = 0;
+
+  while ( getline(in, line)  && in.good() )
+  {
+    csvline_populate(row, line, ',');
+    vector<double> vec;
+    for (int i = 0; i < (int)row.size(); i++)
+    {
+      double dval = 0.0;
+      dval = atof(row[i].data() );
+      vec.push_back(dval);
+    }
+    NICE::Vector nvec(vec);
+    Data.push_back(nvec);
+    numData++;
+  }
+  in.close();
+
+  cerr<<"Finished."<<endl<<"Starting to get preloaded Labels...";
+
+  in.open( (path+ydat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found! Setting default value 0.0..." <<endl;
+    y.resize(numData);
+    y.set(0.0);
+  }
+  else
+  {
+    y.resize(numData);
+    int count = 0;
+    while(getline(in, line)  && in.good() )
+    {
+      csvline_populate(row, line, ',');
+      for ( int i = 0; i < (int)row.size(); i++ )
+      {
+        double dval = 0.0;
+        dval = atof(row[i].data() );
+        y.set(count,dval);
+        count++;
+      }
+    }
+    in.close();
+  }
+
+  cerr<<"Finished."<<endl;
+}
+
+void testFrame (  Config conf,
+		  NICE::VVector &xdata,
+		  NICE::Vector &y )
+{
+  cerr<<"\nStarting test framework..."<<endl;
+  
+  /*------------Initialize Variables-----------*/
+  ofstream storeEvalData;
+  double trainRatio = conf.gD( "debug", "training_ratio", .9 );
+  
+  int trainingSize = (int)(trainRatio*xdata.size());
+  int testingSize = xdata.size() - trainingSize;
+  
+  vector<int> indices;
+  for ( int i = 0; i < (int)xdata.size(); i++ )
+    indices.push_back(i);
+  
+  int nfolds = conf.gI( "debug", "nfolds", 10 );
+  Vector mef_v ( nfolds );
+  Vector corr_v ( nfolds );
+  Vector resub_v ( nfolds );
+  Vector diff_v ( nfolds );
+
+  bool saveConfig = conf.gB( "debug", "save_config", false );
+  
+  /*------------Store Configuration------------*/
+  string filename = conf.gS( "debug", "filename" );
+  
+  if ( saveConfig )
+  {
+    cout << "Configuration will be stored in: " << filename << "_config" << endl;
+    
+    storeEvalData.open ( (filename+"_config").c_str() );
+
+    storeEvalData.close();
+  } else
+  {
+    cout << "Configuration will not be stored." << endl;
+  }
+  
+  /*------------Setting up PreRDF--------------*/
+  for ( int k = 0; k < nfolds; k++)
+  {
+    string fold;
+    ostringstream convert;
+    convert << k;
+    fold = convert.str();
+    
+    cout << "\nFOLD " << k << ":\n======" << endl;
+    
+
+    cerr << "Initializing LinRegression...";
+    RANSACReg *RReg = new RANSACReg ( &conf );
+    cerr << "Finished." << endl;
+    
+    cerr << "Teaching the LinRegression algorithm...";
+    NICE::VVector trainData, testData;
+    NICE::Vector trainVals ( trainingSize );
+    NICE::Vector testVals ( testingSize );
+    random_shuffle( indices.begin(), indices.end() );
+    for ( int i = 0; i < trainingSize; i++ )
+    {
+      trainData.push_back ( xdata[ indices[i] ] );
+      trainVals.set( i, y[ indices[i] ] );
+    }
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      testData.push_back ( xdata[ indices[j+trainingSize] ] );
+      testVals.set( j, y[ indices[j+trainingSize] ] );
+    }
+    
+    RReg->teach ( trainData, trainVals );
+    cerr << "Finished." << endl;
+    
+    /*-------------Testing RDF-GP--------------*/
+
+    cerr << "\nGetting prediction values for all data points...";
+    NICE::Vector predictionValues( testingSize );
+    predictionValues.set ( 0.0 );
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      predictionValues[j] = RReg->predict( testData[j] );
+    }
+    cerr << "Finished." << endl;
+    
+    /*---------------Evaluation----------------*/
+    NICE::Vector diff = testVals - predictionValues;
+    
+    double mod_var = diff.StdDev()*diff.StdDev();
+    double tar_var = testVals.StdDev()*testVals.StdDev();
+    mef_v.set( k, (1-mod_var/tar_var) );
+    
+    NICE::Vector meanv( predictionValues.size() );
+    meanv.set( diff.Mean() );
+    NICE::Vector lhs = diff - meanv;
+    meanv.set( testVals.Mean() );
+    NICE::Vector rhs = testVals - meanv;
+    lhs *= rhs;
+    double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() );
+    corr_v.set( k, corr );
+    
+    diff *= diff;
+    diff_v.set( k, diff.Mean());
+    resub_v.set( k, (diff.Mean() / tar_var) );
+  }
+  
+  /*------------------Output-------------------*/
+  cout << "\nSimple Cross Validation Stats:\n==============================" << endl;
+  cout << "  Modelling Efficiency: " << mef_v.Mean() << endl;
+  cout << "  Correlation: " << corr_v.Mean() << endl;
+  cout << "  Mean Square Error: " << diff_v.Mean() << endl;
+  cout << "  Standardized MSE: " << resub_v.Mean() << endl;
+}
+
+
+int main (int argc, char **argv) {
+
+  Config conf ( argc, argv );   //get config from user input
+  
+  string path = conf.gS( "debug", "path", "." );
+  string dataset = conf.gS( "debug", "dataset", "flux" );
+
+  NICE::VVector xdata;
+  NICE::Vector y;
+
+  loadData(xdata, y, path, (dataset+"_x.csv"), (dataset+"_y.csv") ); //load all data
+  
+  testFrame( conf, xdata, y );
+
+  return 0;
+}
+
+
+ 
+ 

+ 291 - 0
regression/progs/testSplineRegression.cpp

@@ -0,0 +1,291 @@
+ 
+/**
+* @file testSplineRegression.cpp
+* @brief test of spline regression
+* @author Frank Prüfer
+* @date 09/03/2013
+
+*/
+
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "core/basics/Config.h"
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+
+#include "vislearning/baselib/ICETools.h"
+
+#include "vislearning/regression/splineregression/CRSplineReg.h"
+
+using namespace OBJREC;
+using namespace NICE;
+using namespace std;
+
+void csvline_populate ( vector<string> &record,
+                       const string& line,
+                       char delimiter )
+{
+  int linepos=0;
+  int inquotes=false;
+  char c;
+  int linemax=line.length();
+  string curstring;
+  record.clear();
+
+  while(line[linepos]!=0 && linepos < linemax)
+  {
+    c = line[linepos];
+
+    if (!inquotes && curstring.length()==0 && c=='"')
+    {
+      //beginquotechar
+      inquotes=true;
+    }
+    else if (inquotes && c=='"')
+    {
+      //quotechar
+      if ( (linepos+1 <linemax) && (line[linepos+1]=='"') )
+      {
+        //encountered 2 double quotes in a row (resolves to 1 double quote)
+        curstring.push_back(c);
+        linepos++;
+      }
+      else
+      {
+        //endquotechar
+        inquotes=false;
+      }
+    }
+    else if (!inquotes && c==delimiter)
+    {
+      //end of field
+      record.push_back( curstring );
+      curstring="";
+    }
+    else if (!inquotes && (c=='\r' || c=='\n') )
+    {
+     record.push_back( curstring );
+     return;
+    }
+    else
+    {
+      curstring.push_back(c);
+    }
+    linepos++;
+  }
+  
+  record.push_back( curstring );
+}
+
+void loadData( NICE::VVector &Data,
+               NICE::Vector &y,
+               const string &path,
+               const string &xdat,
+               const string &ydat )
+{
+
+  vector<string> row;
+  string line;
+
+  cerr<<"Preloading Data...";
+  ifstream in( (path+xdat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found" <<endl;
+    exit(EXIT_FAILURE);
+  }
+
+  int numData = 0;
+
+  while ( getline(in, line)  && in.good() )
+  {
+    csvline_populate(row, line, ',');
+    vector<double> vec;
+    for (int i = 0; i < (int)row.size(); i++)
+    {
+      double dval = 0.0;
+      dval = atof(row[i].data() );
+      vec.push_back(dval);
+    }
+    NICE::Vector nvec(vec);
+    Data.push_back(nvec);
+    numData++;
+  }
+  in.close();
+
+  cerr<<"Finished."<<endl<<"Starting to get preloaded Labels...";
+
+  in.open( (path+ydat).c_str() );
+  if ( in.fail() )
+  {
+    cout << "File not found! Setting default value 0.0..." <<endl;
+    y.resize(numData);
+    y.set(0.0);
+  }
+  else
+  {
+    y.resize(numData);
+    int count = 0;
+    while(getline(in, line)  && in.good() )
+    {
+      csvline_populate(row, line, ',');
+      for ( int i = 0; i < (int)row.size(); i++ )
+      {
+        double dval = 0.0;
+        dval = atof(row[i].data() );
+        y.set(count,dval);
+        count++;
+      }
+    }
+    in.close();
+  }
+
+  cerr<<"Finished."<<endl;
+}
+
+void testFrame (  Config conf,
+		  NICE::VVector &xdata,
+		  NICE::Vector &y )
+{
+  cerr<<"\nStarting test framework..."<<endl;
+  
+  /*------------Initialize Variables-----------*/
+  ofstream storeEvalData;
+  double trainRatio = conf.gD( "debug", "training_ratio", .9 );
+  
+  int trainingSize = (int)(trainRatio*xdata.size());
+  int testingSize = xdata.size() - trainingSize;
+  
+  vector<int> indices;
+  for ( int i = 0; i < (int)xdata.size(); i++ )
+    indices.push_back(i);
+  
+  int nfolds = conf.gI( "debug", "nfolds", 10 );
+  Vector mef_v ( nfolds );
+  Vector corr_v ( nfolds );
+  Vector resub_v ( nfolds );
+  Vector diff_v ( nfolds );
+
+  bool saveConfig = conf.gB( "debug", "save_config", false );
+  
+  /*------------Store Configuration------------*/
+  string filename = conf.gS( "debug", "filename" );
+  
+  if ( saveConfig )
+  {
+    cout << "Configuration will be stored in: " << filename << "_config" << endl;
+    
+    storeEvalData.open ( (filename+"_config").c_str() );
+
+    storeEvalData.close();
+  } else
+  {
+    cout << "Configuration will not be stored." << endl;
+  }
+  
+  /*------------Setting up NPRegression--------------*/
+  for ( int k = 0; k < nfolds; k++)
+  {
+    string fold;
+    ostringstream convert;
+    convert << k;
+    fold = convert.str();
+    
+    cout << "\nFOLD " << k << ":\n======" << endl;
+    
+
+    cerr << "Initializing NPRegression...";
+    CRSplineReg *spline = new CRSplineReg ( &conf );
+    cerr << "Finished." << endl;
+    
+    cerr << "Teaching the NPRegression algorithm...";
+    NICE::VVector trainData, testData;
+    NICE::Vector trainVals ( trainingSize );
+    NICE::Vector testVals ( testingSize );
+    random_shuffle( indices.begin(), indices.end() );
+    for ( int i = 0; i < trainingSize; i++ )
+    {
+      trainData.push_back ( xdata[ indices[i] ] );
+      trainVals.set( i, y[ indices[i] ] );
+    }
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      testData.push_back ( xdata[ indices[j+trainingSize] ] );
+      testVals.set( j, y[ indices[j+trainingSize] ] );
+    }
+    
+    spline->teach ( trainData, trainVals );
+    cerr << "Finished." << endl;
+    
+    /*-------------Testing RDF-GP--------------*/
+
+    cerr << "\nGetting prediction values for all data points...";
+    NICE::Vector predictionValues( testingSize );
+    predictionValues.set ( 0.0 );
+#pragma omp parallel for    
+    for ( int j = 0; j < testingSize; j++ )
+    {
+      predictionValues[j] = spline->predict( testData[j] );
+    }
+    cerr << "Finished." << endl;
+    
+    /*---------------Evaluation----------------*/
+    NICE::Vector diff = testVals - predictionValues;
+    
+    double mod_var = diff.StdDev()*diff.StdDev();
+    double tar_var = testVals.StdDev()*testVals.StdDev();
+    mef_v.set( k, (1-mod_var/tar_var) );
+    
+    NICE::Vector meanv( predictionValues.size() );
+    meanv.set( diff.Mean() );
+    NICE::Vector lhs = diff - meanv;
+    meanv.set( testVals.Mean() );
+    NICE::Vector rhs = testVals - meanv;
+    lhs *= rhs;
+    double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() );
+    corr_v.set( k, corr );
+    
+    diff *= diff;
+    diff_v.set( k, diff.Mean());
+    resub_v.set( k, (diff.Mean() / tar_var) );
+  }
+  
+  /*------------------Output-------------------*/
+  cout << "\nSimple Cross Validation Stats:\n==============================" << endl;
+  cout << "  Modelling Efficiency: " << mef_v.Mean() << endl;
+  cout << "  Correlation: " << corr_v.Mean() << endl;
+  cout << "  Mean Square Error: " << diff_v.Mean() << endl;
+  cout << "  Standardized MSE: " << resub_v.Mean() << endl;
+}
+
+
+int main (int argc, char **argv) {
+
+  Config conf ( argc, argv );   //get config from user input
+  
+  string path = conf.gS( "debug", "path", "." );
+  string dataset = conf.gS( "debug", "dataset", "flux" );
+
+  NICE::VVector xdata;
+  NICE::Vector y;
+
+  loadData(xdata, y, path, (dataset+"_x.csv"), (dataset+"_y.csv") ); //load all data
+  
+  testFrame( conf, xdata, y );
+
+  return 0;
+}
+
+
+ 

+ 218 - 0
regression/splineregression/CRSplineReg.cpp

@@ -0,0 +1,218 @@
+/**
+* @file CRSplineReg.cpp
+* @brief Implementation of Catmull-Rom-Splines for regression purposes
+* @author Frank Prüfer
+* @date 09/03/2013
+
+*/  
+#ifdef NICE_USELIB_OPENMP
+#include <omp.h>
+#endif
+
+#include <iostream>
+
+#include "vislearning/regression/splineregression/CRSplineReg.h"
+#include "vislearning/regression/linregression/LinRegression.h"
+
+#include "vislearning/math/mathbase/FullVector.h"
+
+using namespace OBJREC;
+
+using namespace std;
+using namespace NICE;
+
+CRSplineReg::CRSplineReg (  const NICE::Config *_conf )
+{
+  tau = _conf->gD("CRSplineReg","tau",0.5);
+  sortDim = _conf->gI("CRSplineReg","sortDim",0);
+}
+
+CRSplineReg::CRSplineReg (  uint sDim )
+{
+  sortDim = sDim;
+}
+
+CRSplineReg::CRSplineReg ( const CRSplineReg & src ) : RegressionAlgorithm ( src )
+{
+  tau = src.tau;
+  dataSet = src.dataSet;
+  labelSet = src.labelSet;
+  sortDim = src.sortDim;
+}
+
+CRSplineReg::~CRSplineReg()
+{
+}
+
+void CRSplineReg::teach ( const NICE::VVector & _dataSet, const NICE::Vector & _labelSet)
+{
+    fprintf (stderr, "teach using all !\n");
+    //NOTE this is crucial if we clear _teachSet afterwards!
+    //therefore, take care NOT to call _techSet.clear() somewhere out of this method
+    this->dataSet = _dataSet;
+    this->labelSet = _labelSet.std_vector();
+    
+    std::cerr << "number of known training samples: " << this->dataSet.size() << std::endl;   
+    
+}
+
+void CRSplineReg::teach ( const NICE::Vector & x, const double & y )
+{
+    std::cerr << "CRSplineReg::teach one new example" << std::endl;
+    
+    for ( size_t i = 0 ; i < x.size() ; i++ )
+      if ( isnan(x[i]) ) 
+      {
+          fprintf (stderr, "There is a NAN value in within this vector: x[%d] = %f\n", (int)i, x[i]);
+          cerr << x << endl;
+          exit(-1);
+      }
+
+    dataSet.push_back ( x );
+    
+    labelSet.push_back ( y );
+    
+    std::cerr << "number of known training samples: " << dataSet.size()<< std::endl;
+}
+
+double CRSplineReg::predict ( const NICE::Vector & x )
+{
+  
+  if ( dataSet.size() <= 0 ) {
+    fprintf (stderr, "CRSplineReg: please use the train method first\n");
+    exit(-1);
+  }
+  int dimension = dataSet[0].size();
+
+  FullVector data ( dataSet.size()+1 );
+  
+#pragma omp parallel for  
+  for ( uint i = 0; i < dataSet.size(); i++ ){
+    data[i] = dataSet[i][sortDim];
+  }
+  data[dataSet.size()] = x[sortDim];
+    
+  std::vector<int> sortedInd;
+  data.getSortedIndices(sortedInd);
+    
+  int index;
+   
+  for ( uint i = 0; i < sortedInd.size(); i++ ){
+    if ( sortedInd[i] == dataSet.size() ){
+      index = i;
+      break;
+    }
+  }
+
+  NICE::Matrix points (4,dimension+1,0.0);
+  if ( index >= 2 && index < (sortedInd.size() - 2) ){	//everything is okay
+    points.setRow(0,dataSet[sortedInd[index-2]]);
+    points(0,dimension) = labelSet[sortedInd[index-2]];
+    points.setRow(1,dataSet[sortedInd[index-1]]);
+    points(1,dimension) = labelSet[sortedInd[index-1]];      
+    points.setRow(2,dataSet[sortedInd[index+1]]);
+    points(2,dimension) = labelSet[sortedInd[index+1]];      
+    points.setRow(3,dataSet[sortedInd[index+2]]);
+    points(3,dimension) = labelSet[sortedInd[index+2]];           
+  }
+  else if ( index == 1 ){	//just one point left from x
+    points.setRow(0,dataSet[sortedInd[index-1]]);
+    points(0,dimension) = labelSet[sortedInd[index-1]];
+    points.setRow(1,dataSet[sortedInd[index-1]]);
+    points(1,dimension) = labelSet[sortedInd[index-1]];      
+    points.setRow(2,dataSet[sortedInd[index+1]]);
+    points(2,dimension) = labelSet[sortedInd[index+1]];      
+    points.setRow(3,dataSet[sortedInd[index+2]]);
+    points(3,dimension) = labelSet[sortedInd[index+2]];      
+  }
+  else if ( index == 0 ){	//x is the farthest left point
+    points.setRow(0,dataSet[sortedInd[index+1]]);
+    points(0,dimension) = labelSet[sortedInd[index+1]];
+    points.setRow(1,dataSet[sortedInd[index+1]]);
+    points(1,dimension) = labelSet[sortedInd[index+1]];      
+    points.setRow(2,dataSet[sortedInd[index+1]]);
+    points(2,dimension) = labelSet[sortedInd[index+1]];      
+    points.setRow(3,dataSet[sortedInd[index+2]]);
+    points(3,dimension) = labelSet[sortedInd[index+2]]; 
+  }
+  else if ( index == (sortedInd.size() - 2) ){	//just one point right from x
+    points.setRow(0,dataSet[sortedInd[index-2]]);
+    points(0,dimension) = labelSet[sortedInd[index-2]];
+    points.setRow(1,dataSet[sortedInd[index-1]]);
+    points(1,dimension) = labelSet[sortedInd[index-1]];      
+    points.setRow(2,dataSet[sortedInd[index+1]]);
+    points(2,dimension) = labelSet[sortedInd[index+1]];      
+    points.setRow(3,dataSet[sortedInd[index+1]]);
+    points(3,dimension) = labelSet[sortedInd[index+1]];   
+  }
+  else if ( index == (sortedInd.size() - 1) ){	//x is the farthest right point
+    points.setRow(0,dataSet[sortedInd[index-2]]);
+    points(0,dimension) = labelSet[sortedInd[index-2]];
+    points.setRow(1,dataSet[sortedInd[index-1]]);
+    points(1,dimension) = labelSet[sortedInd[index-1]];      
+    points.setRow(2,dataSet[sortedInd[index-1]]);
+    points(2,dimension) = labelSet[sortedInd[index-1]];      
+    points.setRow(3,dataSet[sortedInd[index-1]]);
+    points(3,dimension) = labelSet[sortedInd[index-1]];     
+  }
+
+  double t = (x[sortDim]-points(1,sortDim)) / (points(2,sortDim)-points(1,sortDim));	//this is just some kind of heuristic
+  if ( t != t || t < 0 || t > 1){	//check if t is NAN, -inf or inf (happens in the farthest right or left case from above)
+    t = 0.5;
+  }
+
+  //P(t) = b0*P0 + b1*P1 + b2*P2 + b3*P3    
+  NICE::Vector P(dimension);
+  double y;
+  double b0,b1,b2,b3;
+    
+  b0 = tau * (-(t*t*t) + 2*t*t - t);
+  b1 = tau * (3*t*t*t - 5*t*t + 2);
+  b2 = tau * (-3*t*t*t + 4*t*t + t);
+  b3 = tau * (t*t*t - t*t);
+
+#pragma omp parallel for  
+  for ( uint i = 0; i < dimension; i++ ){
+    P[i] = b0*points(0,i) + b1*points(1,i) + b2*points(2,i) + b3*points(3,i);
+  }
+  
+  double diff1 = (P-x).normL2();
+  uint counter = 1;
+  while ( diff1 > 1e-5 && counter <= 21){	//adjust t to fit data better
+    double tmp = t;;
+    if (tmp > 0.5)
+      tmp = 1 - tmp;
+    t += tmp/counter;
+     
+    b0 = tau * (-(t*t*t) + 2*t*t - t);
+    b1 = tau * (3*t*t*t - 5*t*t + 2);
+    b2 = tau * (-3*t*t*t + 4*t*t + t);
+    b3 = tau * (t*t*t - t*t);
+      
+    for ( uint i = 0; i < dimension; i++ ){
+      P[i] = b0*points(0,i) + b1*points(1,i) + b2*points(2,i) + b3*points(3,i);
+    }
+    
+    double diff2 = (P-x).normL2();
+    if ( diff2 > diff1 && t > 0) {
+      t -= 2*tmp/counter;    
+	
+      b0 = tau * (-(t*t*t) + 2*t*t - t);
+      b1 = tau * (3*t*t*t - 5*t*t + 2);
+      b2 = tau * (-3*t*t*t + 4*t*t + t);
+      b3 = tau * (t*t*t - t*t);
+
+#pragma omp parallel for      
+      for ( uint i = 0; i < dimension; i++ ){
+	P[i] = b0*points(0,i) + b1*points(1,i) + b2*points(2,i) + b3*points(3,i);
+      }
+      diff1 = (P-x).normL2();
+    }
+    counter++;
+  }
+  
+  y = b0*points(0,dimension) + b1*points(1,dimension) + b2*points(2,dimension) + b3*points(3,dimension);
+
+  return y;
+  
+}

+ 65 - 0
regression/splineregression/CRSplineReg.h

@@ -0,0 +1,65 @@
+/**
+* @file CRSplineReg.h
+* @brief Implementation of Catmull-Rom-Splines for regression purposes
+* @author Frank Prüfer
+* @date 09/03/2013
+
+*/  
+#ifndef CRSPLINEREGINCLUDE
+#define CRSPLINEREGINCLUDE
+
+#include "core/vector/VectorT.h"
+#include "core/vector/VVector.h"
+#include "core/vector/MatrixT.h"
+
+#include "core/basics/Config.h"
+
+#include "vislearning/regression/regressionbase/RegressionAlgorithm.h"
+
+namespace OBJREC
+{
+class CRSplineReg : public RegressionAlgorithm
+{
+  protected:
+    /** smoothness parameter */
+    double tau;
+    
+    /** dimension which is used for sorting the data (maybe use something like PCA to determine this variable) */
+    uint sortDim;
+    
+    /** set of data points */
+    NICE::VVector dataSet;
+    
+    /** set of responses according to dataset */
+    std::vector<double> labelSet;
+  
+  public:
+    /** simple constructor */
+    CRSplineReg( const NICE::Config *_conf );
+    
+    /** simple constructor specifying in which dimension data should be sorted*/
+    CRSplineReg( uint sDim );
+    
+    /** copy constructor */
+    CRSplineReg ( const CRSplineReg & src );
+    
+    /** simple destructor */
+    virtual ~CRSplineReg();
+    
+    /** predict response using simple vector */
+    double predict ( const NICE::Vector & x );
+    
+    /** teach whole set at once */
+    void teach ( const NICE::VVector & dataSet, const NICE::Vector & labelSet );
+
+    /** teach one data point at a time */
+    void teach ( const NICE::Vector & x, const double & y );
+  
+};
+}	//nameospace
+
+
+
+
+
+#endif

+ 8 - 0
regression/splineregression/Makefile

@@ -0,0 +1,8 @@
+#TARGETS_FROM:=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM)
+#$(info recursivly going up: $(TARGETS_FROM) ($(shell pwd)))
+
+all:
+
+%:
+	$(MAKE) TARGETS_FROM=$(notdir $(patsubst %/,%,$(shell pwd)))/$(TARGETS_FROM) -C .. $@
+

+ 103 - 0
regression/splineregression/Makefile.inc

@@ -0,0 +1,103 @@
+# LIBRARY-DIRECTORY-MAKEFILE
+# conventions:
+# - all subdirectories containing a "Makefile.inc" are considered sublibraries
+#   exception: "progs/" and "tests/" subdirectories!
+# - all ".C", ".cpp" and ".c" files in the current directory are linked to a
+#   library
+# - the library depends on all sublibraries 
+# - the library name is created with $(LIBNAME), i.e. it will be somehow
+#   related to the directory name and with the extension .a
+#   (e.g. lib1/sublib -> lib1_sublib.a)
+# - the library will be added to the default build list ALL_LIBRARIES
+
+# --------------------------------
+# - remember the last subdirectory
+#
+# set the variable $(SUBDIR) correctly to the current subdirectory. this
+# variable can be used throughout the current makefile.inc. The many 
+# SUBDIR_before, _add, and everything are only required so that we can recover
+# the previous content of SUBDIR before exitting the makefile.inc
+
+SUBDIR_add:=$(dir $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
+SUBDIR_before:=$(SUBDIR)
+SUBDIR:=$(strip $(SUBDIR_add))
+SUBDIR_before_$(SUBDIR):=$(SUBDIR_before)
+ifeq "$(SUBDIR)" "./"
+SUBDIR:=
+endif
+
+# ------------------------
+# - include subdirectories
+#
+# note the variables $(SUBDIRS_OF_$(SUBDIR)) are required later on to recover
+# the dependencies automatically. if you handle dependencies on your own, you
+# can also dump the $(SUBDIRS_OF_$(SUBDIR)) variable, and include the
+# makefile.inc of the subdirectories on your own...
+
+SUBDIRS_OF_$(SUBDIR):=$(patsubst %/Makefile.inc,%,$(wildcard $(SUBDIR)*/Makefile.inc))
+include $(SUBDIRS_OF_$(SUBDIR):%=%/Makefile.inc)
+
+# ----------------------------
+# - include local dependencies
+#
+# you can specify libraries needed by the individual objects or by the whole
+# directory. the object specific additional libraries are only considered
+# when compiling the specific object files
+# TODO: update documentation...
+
+-include $(SUBDIR)libdepend.inc
+
+$(foreach d,$(filter-out %progs %tests,$(SUBDIRS_OF_$(SUBDIR))),$(eval $(call PKG_DEPEND_INT,$(d))))
+
+# ---------------------------
+# - objects in this directory
+#
+# the use of the variable $(OBJS) is not mandatory. it is mandatory however
+# to update $(ALL_OBJS) in a way that it contains the path and name of
+# all objects. otherwise we can not include the appropriate .d files.
+
+OBJS:=$(patsubst %.cpp,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.cpp))) \
+      $(patsubst %.C,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.C))) \
+	  $(shell grep -ls Q_OBJECT $(SUBDIR)*.h | sed -e's@^@/@;s@.*/@$(OBJDIR)moc_@;s@\.h$$@.o@') \
+      $(patsubst %.c,$(OBJDIR)%.o,$(notdir $(wildcard $(SUBDIR)*.c)))
+ALL_OBJS += $(OBJS)
+
+# ----------------------------
+# - binaries in this directory
+#
+# output of binaries in this directory. none of the variables has to be used.
+# but everything you add to $(ALL_LIBRARIES) and $(ALL_BINARIES) will be
+# compiled with `make all`. be sure again to add the files with full path.
+
+LIBRARY_BASENAME:=$(call LIBNAME,$(SUBDIR))
+ifneq "$(SUBDIR)" ""
+ALL_LIBRARIES+=$(LIBDIR)$(LIBRARY_BASENAME).$(LINK_FILE_EXTENSION)
+endif
+
+# ---------------------
+# - binary dependencies
+#
+# there is no way of determining the binary dependencies automatically, so we
+# follow conventions. the current library depends on all sublibraries.
+# all other dependencies have to be added manually by specifying, that the
+# current .pc file depends on some other .pc file. binaries depending on
+# libraries should exclusivelly use the .pc files as well.
+
+ifeq "$(SKIP_BUILD_$(OBJDIR))" "1"
+$(LIBDIR)$(LIBRARY_BASENAME).a:
+else
+$(LIBDIR)$(LIBRARY_BASENAME).a:$(OBJS) \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).a,.$(LINK_FILE_EXTENSION))
+endif
+
+$(PKGDIR)$(LIBRARY_BASENAME).pc: \
+	$(call PRINT_INTLIB_DEPS,$(PKGDIR)$(LIBRARY_BASENAME).pc,.pc)
+
+# -------------------
+# - subdir management
+#
+# as the last step, always add this line to correctly recover the subdirectory
+# of the makefile including this one!
+
+SUBDIR:=$(SUBDIR_before_$(SUBDIR))
+

+ 1 - 0
regression/splineregression/libdepend.inc

@@ -0,0 +1 @@
+$(call PKG_DEPEND_INT,vislearning/regression/regressionbase)