/** * @file testLinRegression.cpp * @brief test of linear regression * @author Frank Prüfer * @date 08/13/2013 */ #include #include #include #include #include #include #include #include #include "core/basics/Config.h" #include "core/vector/VectorT.h" #include "core/vector/VVector.h" #include "vislearning/baselib/ICETools.h" #include "vislearning/regression/linregression/LinRegression.h" using namespace OBJREC; using namespace NICE; using namespace std; void csvline_populate ( vector &record, const string& line, char delimiter ) { int linepos=0; int inquotes=false; char c; int linemax=line.length(); string curstring; record.clear(); while(line[linepos]!=0 && linepos < linemax) { c = line[linepos]; if (!inquotes && curstring.length()==0 && c=='"') { //beginquotechar inquotes=true; } else if (inquotes && c=='"') { //quotechar if ( (linepos+1 row; string line; cerr<<"Preloading Data..."; ifstream in( (path+xdat).c_str() ); if ( in.fail() ) { cout << "File not found" < vec; for (int i = 0; i < (int)row.size(); i++) { double dval = 0.0; dval = atof(row[i].data() ); vec.push_back(dval); } NICE::Vector nvec(vec); Data.push_back(nvec); numData++; } in.close(); cerr<<"Finished."< indices; for ( int i = 0; i < (int)xdata.size(); i++ ) indices.push_back(i); int nfolds = conf.gI( "debug", "nfolds", 10 ); Vector mef_v ( nfolds ); Vector corr_v ( nfolds ); Vector resub_v ( nfolds ); Vector diff_v ( nfolds ); bool saveConfig = conf.gB( "debug", "save_config", false ); /*------------Store Configuration------------*/ string filename = conf.gS( "debug", "filename" ); if ( saveConfig ) { cout << "Configuration will be stored in: " << filename << "_config" << endl; storeEvalData.open ( (filename+"_config").c_str() ); storeEvalData.close(); } else { cout << "Configuration will not be stored." << endl; } /*------------Setting up PreRDF--------------*/ for ( int k = 0; k < nfolds; k++) { string fold; ostringstream convert; convert << k; fold = convert.str(); cout << "\nFOLD " << k << ":\n======" << endl; cerr << "Initializing LinRegression..."; LinRegression *linReg = new LinRegression (); cerr << "Finished." << endl; cerr << "Teaching the LinRegression algorithm..."; NICE::VVector trainData, testData; NICE::Vector trainVals ( trainingSize ); NICE::Vector testVals ( testingSize ); random_shuffle( indices.begin(), indices.end() ); for ( int i = 0; i < trainingSize; i++ ) { trainData.push_back ( xdata[ indices[i] ] ); trainVals.set( i, y[ indices[i] ] ); } for ( int j = 0; j < testingSize; j++ ) { testData.push_back ( xdata[ indices[j+trainingSize] ] ); testVals.set( j, y[ indices[j+trainingSize] ] ); } linReg->teach ( trainData, trainVals ); cerr << "Finished." << endl; /*-------------Testing RDF-GP--------------*/ cerr << "\nGetting prediction values for all data points..."; NICE::Vector predictionValues( testingSize ); predictionValues.set ( 0.0 ); for ( int j = 0; j < testingSize; j++ ) { predictionValues[j] = linReg->predict( testData[j] ); } cerr << "Finished." << endl; /*---------------Evaluation----------------*/ NICE::Vector diff = testVals - predictionValues; double mod_var = diff.StdDev()*diff.StdDev(); double tar_var = testVals.StdDev()*testVals.StdDev(); mef_v.set( k, (1-mod_var/tar_var) ); NICE::Vector meanv( predictionValues.size() ); meanv.set( diff.Mean() ); NICE::Vector lhs = diff - meanv; meanv.set( testVals.Mean() ); NICE::Vector rhs = testVals - meanv; lhs *= rhs; double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() ); corr_v.set( k, corr ); diff *= diff; diff_v.set( k, diff.Mean()); resub_v.set( k, (diff.Mean() / tar_var) ); } /*------------------Output-------------------*/ cout << "\nSimple Cross Validation Stats:\n==============================" << endl; cout << " Modelling Efficiency: " << mef_v.Mean() << endl; cout << " Correlation: " << corr_v.Mean() << endl; cout << " Mean Square Error: " << diff_v.Mean() << endl; cout << " Standardized MSE: " << resub_v.Mean() << endl; } int main (int argc, char **argv) { Config conf ( argc, argv ); //get config from user input string path = conf.gS( "debug", "path", "." ); string dataset = conf.gS( "debug", "dataset", "flux" ); NICE::VVector xdata; NICE::Vector y; loadData(xdata, y, path, (dataset+"_x.csv"), (dataset+"_y.csv") ); //load all data testFrame( conf, xdata, y ); return 0; }