|
@@ -0,0 +1,286 @@
|
|
|
+/**
|
|
|
+* @file testRANSACRegression.cpp
|
|
|
+* @brief test of RANSAC regression
|
|
|
+* @author Frank Prüfer
|
|
|
+* @date 09/11/2013
|
|
|
+
|
|
|
+*/
|
|
|
+
|
|
|
+#include <sstream>
|
|
|
+#include <iostream>
|
|
|
+#include <fstream>
|
|
|
+#include <sstream>
|
|
|
+#include <string>
|
|
|
+#include <vector>
|
|
|
+#include <stdlib.h>
|
|
|
+#include <assert.h>
|
|
|
+
|
|
|
+#include "core/basics/Config.h"
|
|
|
+#include "core/vector/VectorT.h"
|
|
|
+#include "core/vector/VVector.h"
|
|
|
+
|
|
|
+#include "vislearning/baselib/ICETools.h"
|
|
|
+
|
|
|
+#include "vislearning/regression/linregression/RANSACReg.h"
|
|
|
+
|
|
|
+using namespace OBJREC;
|
|
|
+using namespace NICE;
|
|
|
+using namespace std;
|
|
|
+
|
|
|
+void csvline_populate ( vector<string> &record,
|
|
|
+ const string& line,
|
|
|
+ char delimiter )
|
|
|
+{
|
|
|
+ int linepos=0;
|
|
|
+ int inquotes=false;
|
|
|
+ char c;
|
|
|
+ int linemax=line.length();
|
|
|
+ string curstring;
|
|
|
+ record.clear();
|
|
|
+
|
|
|
+ while(line[linepos]!=0 && linepos < linemax)
|
|
|
+ {
|
|
|
+ c = line[linepos];
|
|
|
+
|
|
|
+ if (!inquotes && curstring.length()==0 && c=='"')
|
|
|
+ {
|
|
|
+ //beginquotechar
|
|
|
+ inquotes=true;
|
|
|
+ }
|
|
|
+ else if (inquotes && c=='"')
|
|
|
+ {
|
|
|
+ //quotechar
|
|
|
+ if ( (linepos+1 <linemax) && (line[linepos+1]=='"') )
|
|
|
+ {
|
|
|
+ //encountered 2 double quotes in a row (resolves to 1 double quote)
|
|
|
+ curstring.push_back(c);
|
|
|
+ linepos++;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ //endquotechar
|
|
|
+ inquotes=false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else if (!inquotes && c==delimiter)
|
|
|
+ {
|
|
|
+ //end of field
|
|
|
+ record.push_back( curstring );
|
|
|
+ curstring="";
|
|
|
+ }
|
|
|
+ else if (!inquotes && (c=='\r' || c=='\n') )
|
|
|
+ {
|
|
|
+ record.push_back( curstring );
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ curstring.push_back(c);
|
|
|
+ }
|
|
|
+ linepos++;
|
|
|
+ }
|
|
|
+
|
|
|
+ record.push_back( curstring );
|
|
|
+}
|
|
|
+
|
|
|
+void loadData( NICE::VVector &Data,
|
|
|
+ NICE::Vector &y,
|
|
|
+ const string &path,
|
|
|
+ const string &xdat,
|
|
|
+ const string &ydat )
|
|
|
+{
|
|
|
+
|
|
|
+ vector<string> row;
|
|
|
+ string line;
|
|
|
+
|
|
|
+ cerr<<"Preloading Data...";
|
|
|
+ ifstream in( (path+xdat).c_str() );
|
|
|
+ if ( in.fail() )
|
|
|
+ {
|
|
|
+ cout << "File not found" <<endl;
|
|
|
+ exit(EXIT_FAILURE);
|
|
|
+ }
|
|
|
+
|
|
|
+ int numData = 0;
|
|
|
+
|
|
|
+ while ( getline(in, line) && in.good() )
|
|
|
+ {
|
|
|
+ csvline_populate(row, line, ',');
|
|
|
+ vector<double> vec;
|
|
|
+ for (int i = 0; i < (int)row.size(); i++)
|
|
|
+ {
|
|
|
+ double dval = 0.0;
|
|
|
+ dval = atof(row[i].data() );
|
|
|
+ vec.push_back(dval);
|
|
|
+ }
|
|
|
+ NICE::Vector nvec(vec);
|
|
|
+ Data.push_back(nvec);
|
|
|
+ numData++;
|
|
|
+ }
|
|
|
+ in.close();
|
|
|
+
|
|
|
+ cerr<<"Finished."<<endl<<"Starting to get preloaded Labels...";
|
|
|
+
|
|
|
+ in.open( (path+ydat).c_str() );
|
|
|
+ if ( in.fail() )
|
|
|
+ {
|
|
|
+ cout << "File not found! Setting default value 0.0..." <<endl;
|
|
|
+ y.resize(numData);
|
|
|
+ y.set(0.0);
|
|
|
+ }
|
|
|
+ else
|
|
|
+ {
|
|
|
+ y.resize(numData);
|
|
|
+ int count = 0;
|
|
|
+ while(getline(in, line) && in.good() )
|
|
|
+ {
|
|
|
+ csvline_populate(row, line, ',');
|
|
|
+ for ( int i = 0; i < (int)row.size(); i++ )
|
|
|
+ {
|
|
|
+ double dval = 0.0;
|
|
|
+ dval = atof(row[i].data() );
|
|
|
+ y.set(count,dval);
|
|
|
+ count++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ in.close();
|
|
|
+ }
|
|
|
+
|
|
|
+ cerr<<"Finished."<<endl;
|
|
|
+}
|
|
|
+
|
|
|
+void testFrame ( Config conf,
|
|
|
+ NICE::VVector &xdata,
|
|
|
+ NICE::Vector &y )
|
|
|
+{
|
|
|
+ cerr<<"\nStarting test framework..."<<endl;
|
|
|
+
|
|
|
+ /*------------Initialize Variables-----------*/
|
|
|
+ ofstream storeEvalData;
|
|
|
+ double trainRatio = conf.gD( "debug", "training_ratio", .9 );
|
|
|
+
|
|
|
+ int trainingSize = (int)(trainRatio*xdata.size());
|
|
|
+ int testingSize = xdata.size() - trainingSize;
|
|
|
+
|
|
|
+ vector<int> indices;
|
|
|
+ for ( int i = 0; i < (int)xdata.size(); i++ )
|
|
|
+ indices.push_back(i);
|
|
|
+
|
|
|
+ int nfolds = conf.gI( "debug", "nfolds", 10 );
|
|
|
+ Vector mef_v ( nfolds );
|
|
|
+ Vector corr_v ( nfolds );
|
|
|
+ Vector resub_v ( nfolds );
|
|
|
+ Vector diff_v ( nfolds );
|
|
|
+
|
|
|
+ bool saveConfig = conf.gB( "debug", "save_config", false );
|
|
|
+
|
|
|
+ /*------------Store Configuration------------*/
|
|
|
+ string filename = conf.gS( "debug", "filename" );
|
|
|
+
|
|
|
+ if ( saveConfig )
|
|
|
+ {
|
|
|
+ cout << "Configuration will be stored in: " << filename << "_config" << endl;
|
|
|
+
|
|
|
+ storeEvalData.open ( (filename+"_config").c_str() );
|
|
|
+
|
|
|
+ storeEvalData.close();
|
|
|
+ } else
|
|
|
+ {
|
|
|
+ cout << "Configuration will not be stored." << endl;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*------------Setting up PreRDF--------------*/
|
|
|
+ for ( int k = 0; k < nfolds; k++)
|
|
|
+ {
|
|
|
+ string fold;
|
|
|
+ ostringstream convert;
|
|
|
+ convert << k;
|
|
|
+ fold = convert.str();
|
|
|
+
|
|
|
+ cout << "\nFOLD " << k << ":\n======" << endl;
|
|
|
+
|
|
|
+
|
|
|
+ cerr << "Initializing LinRegression...";
|
|
|
+ RANSACReg *RReg = new RANSACReg ( &conf );
|
|
|
+ cerr << "Finished." << endl;
|
|
|
+
|
|
|
+ cerr << "Teaching the LinRegression algorithm...";
|
|
|
+ NICE::VVector trainData, testData;
|
|
|
+ NICE::Vector trainVals ( trainingSize );
|
|
|
+ NICE::Vector testVals ( testingSize );
|
|
|
+ random_shuffle( indices.begin(), indices.end() );
|
|
|
+ for ( int i = 0; i < trainingSize; i++ )
|
|
|
+ {
|
|
|
+ trainData.push_back ( xdata[ indices[i] ] );
|
|
|
+ trainVals.set( i, y[ indices[i] ] );
|
|
|
+ }
|
|
|
+ for ( int j = 0; j < testingSize; j++ )
|
|
|
+ {
|
|
|
+ testData.push_back ( xdata[ indices[j+trainingSize] ] );
|
|
|
+ testVals.set( j, y[ indices[j+trainingSize] ] );
|
|
|
+ }
|
|
|
+
|
|
|
+ RReg->teach ( trainData, trainVals );
|
|
|
+ cerr << "Finished." << endl;
|
|
|
+
|
|
|
+ /*-------------Testing RDF-GP--------------*/
|
|
|
+
|
|
|
+ cerr << "\nGetting prediction values for all data points...";
|
|
|
+ NICE::Vector predictionValues( testingSize );
|
|
|
+ predictionValues.set ( 0.0 );
|
|
|
+ for ( int j = 0; j < testingSize; j++ )
|
|
|
+ {
|
|
|
+ predictionValues[j] = RReg->predict( testData[j] );
|
|
|
+ }
|
|
|
+ cerr << "Finished." << endl;
|
|
|
+
|
|
|
+ /*---------------Evaluation----------------*/
|
|
|
+ NICE::Vector diff = testVals - predictionValues;
|
|
|
+
|
|
|
+ double mod_var = diff.StdDev()*diff.StdDev();
|
|
|
+ double tar_var = testVals.StdDev()*testVals.StdDev();
|
|
|
+ mef_v.set( k, (1-mod_var/tar_var) );
|
|
|
+
|
|
|
+ NICE::Vector meanv( predictionValues.size() );
|
|
|
+ meanv.set( diff.Mean() );
|
|
|
+ NICE::Vector lhs = diff - meanv;
|
|
|
+ meanv.set( testVals.Mean() );
|
|
|
+ NICE::Vector rhs = testVals - meanv;
|
|
|
+ lhs *= rhs;
|
|
|
+ double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() );
|
|
|
+ corr_v.set( k, corr );
|
|
|
+
|
|
|
+ diff *= diff;
|
|
|
+ diff_v.set( k, diff.Mean());
|
|
|
+ resub_v.set( k, (diff.Mean() / tar_var) );
|
|
|
+ }
|
|
|
+
|
|
|
+ /*------------------Output-------------------*/
|
|
|
+ cout << "\nSimple Cross Validation Stats:\n==============================" << endl;
|
|
|
+ cout << " Modelling Efficiency: " << mef_v.Mean() << endl;
|
|
|
+ cout << " Correlation: " << corr_v.Mean() << endl;
|
|
|
+ cout << " Mean Square Error: " << diff_v.Mean() << endl;
|
|
|
+ cout << " Standardized MSE: " << resub_v.Mean() << endl;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+int main (int argc, char **argv) {
|
|
|
+
|
|
|
+ Config conf ( argc, argv ); //get config from user input
|
|
|
+
|
|
|
+ string path = conf.gS( "debug", "path", "." );
|
|
|
+ string dataset = conf.gS( "debug", "dataset", "flux" );
|
|
|
+
|
|
|
+ NICE::VVector xdata;
|
|
|
+ NICE::Vector y;
|
|
|
+
|
|
|
+ loadData(xdata, y, path, (dataset+"_x.csv"), (dataset+"_y.csv") ); //load all data
|
|
|
+
|
|
|
+ testFrame( conf, xdata, y );
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|