/** * @file testRegressionGP.cpp * @brief test of GP * @author Sven Sickert * @date 07/11/2013 */ #include #include #include #include #include #include #include #include #include "core/basics/Config.h" #include "core/vector/VectorT.h" #include "core/vector/VVector.h" //#include "vislearning/baselib/ICETools.h" #include "vislearning/regression/gpregression/RegGaussianProcess.h" #include "vislearning/math/kernels/KernelExp.h" using namespace OBJREC; using namespace NICE; using namespace std; void csvline_populate ( vector &record, const string& line, char delimiter ) { int linepos=0; int inquotes=false; char c; int linemax=line.length(); string curstring; record.clear(); while(line[linepos]!=0 && linepos < linemax) { c = line[linepos]; if (!inquotes && curstring.length()==0 && c=='"') { //beginquotechar inquotes=true; } else if (inquotes && c=='"') { //quotechar if ( (linepos+1 row; string line; cerr<<"Preloading Data..."; ifstream in( (path+xdat).c_str() ); if ( in.fail() ) { cout << "File not found" < indices; for ( int i = 0; i < (int)xdata.size(); i++ ) indices.push_back(i); int nfolds = confRDF.gI( "debug", "nfolds", 10 ); Vector mef_v ( nfolds ); Vector corr_v ( nfolds ); Vector resub_v ( nfolds ); Vector diff_v ( nfolds ); KernelExp *kernel_template = new KernelExp ( confRDF.gD("Kernel", "log_rbf_gamma", -2.5), 0.0 ); /*--------------Setting up GP----------------*/ for ( int k = 0; k < nfolds; k++) { string fold; ostringstream convert; convert << k; fold = convert.str(); cout << "\nFOLD " << k << ":\n======" << endl; cerr << "Initializing GP regression..."; Kernel *kernel_function = NULL; kernel_function = new KernelExp ( *(kernel_template) ); RegGaussianProcess *regGP = new RegGaussianProcess( &confRDF, kernel_function, "GPRegression" ); NICE::VVector trainData, testData; NICE::Vector trainVals ( trainingSize ); NICE::Vector testVals ( testingSize ); random_shuffle( indices.begin(), indices.end() ); for ( int i = 0; i < trainingSize; i++ ) { trainData.push_back ( xdata[ indices[i] ] ); trainVals.set( i, y[ indices[i] ] ); } for ( int j = 0; j < testingSize; j++ ) { testData.push_back ( xdata[ indices[j+trainingSize] ] ); testVals.set( j, y[ indices[j+trainingSize] ] ); } cerr << "Finished." << endl; cerr << "Teaching the GP regression..."; regGP->teach( trainData, trainVals ); cerr << "Finished." << endl; /*---------------Testing GP----------------*/ cerr << "\nGetting prediction values for all data points..."; NICE::Vector predictionValues( testingSize ); predictionValues.set ( 0.0 ); for ( int j = 0; j < testingSize; j++ ) { predictionValues[j] = regGP->predict( testData[j] ); } cerr << "Finished." << endl; /*---------------Evaluation----------------*/ NICE::Vector diff = testVals - predictionValues; for (int j = 0; j < testingSize; j++) cerr << testVals[j] << " " << predictionValues[j] << endl; double mod_var = diff.StdDev()*diff.StdDev(); double tar_var = testVals.StdDev()*testVals.StdDev(); mef_v.set( k, (1-mod_var/tar_var) ); NICE::Vector meanv( predictionValues.size() ); meanv.set( diff.Mean() ); NICE::Vector lhs = diff - meanv; meanv.set( testVals.Mean() ); NICE::Vector rhs = testVals - meanv; lhs *= rhs; double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() ); corr_v.set( k, corr ); diff *= diff; diff_v.set( k, diff.Mean()); resub_v.set( k, (diff.Mean() / tar_var) ); } /*------------------Output-------------------*/ cout << "\nSimple Cross Validation Stats:\n==============================" << endl; cout << " Modelling Efficiency: " << mef_v.Mean() << endl; cout << " Correlation: " << corr_v.Mean() << endl; cout << " Mean Square Error: " << diff_v.Mean() << endl; cout << " Standardized MSE: " << resub_v.Mean() << endl; } int main (int argc, char **argv) { string path = "/home/sickert/data/cosre-MPI/regression-fluxcom/_DATA/"; Config confRDF(path+"config.conf"); //Config for RDF NICE::VVector xdata; NICE::Vector y; /*----------Load dataset---------*/ loadData(xdata, y, path, "flux_x.csv", "flux_y.csv"); //load all data testFrame( confRDF, xdata, y ); return 0; }