testNPRegression.cpp 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. /**
  2. * @file testNPRegression.cpp
  3. * @brief test of non-parametric regression
  4. * @author Frank Prüfer
  5. * @date 08/29/2013
  6. */
  7. #ifdef NICE_USELIB_OPENMP
  8. #include <omp.h>
  9. #endif
  10. #include <sstream>
  11. #include <iostream>
  12. #include <fstream>
  13. #include <sstream>
  14. #include <string>
  15. #include <vector>
  16. #include <stdlib.h>
  17. #include <assert.h>
  18. #include "core/basics/Config.h"
  19. #include "core/vector/VectorT.h"
  20. #include "core/vector/VVector.h"
  21. #include "vislearning/baselib/ICETools.h"
  22. #include "vislearning/regression/npregression/RegKNN.h"
  23. using namespace OBJREC;
  24. using namespace NICE;
  25. using namespace std;
  26. void csvline_populate ( vector<string> &record,
  27. const string& line,
  28. char delimiter )
  29. {
  30. int linepos=0;
  31. int inquotes=false;
  32. char c;
  33. int linemax=line.length();
  34. string curstring;
  35. record.clear();
  36. while(line[linepos]!=0 && linepos < linemax)
  37. {
  38. c = line[linepos];
  39. if (!inquotes && curstring.length()==0 && c=='"')
  40. {
  41. //beginquotechar
  42. inquotes=true;
  43. }
  44. else if (inquotes && c=='"')
  45. {
  46. //quotechar
  47. if ( (linepos+1 <linemax) && (line[linepos+1]=='"') )
  48. {
  49. //encountered 2 double quotes in a row (resolves to 1 double quote)
  50. curstring.push_back(c);
  51. linepos++;
  52. }
  53. else
  54. {
  55. //endquotechar
  56. inquotes=false;
  57. }
  58. }
  59. else if (!inquotes && c==delimiter)
  60. {
  61. //end of field
  62. record.push_back( curstring );
  63. curstring="";
  64. }
  65. else if (!inquotes && (c=='\r' || c=='\n') )
  66. {
  67. record.push_back( curstring );
  68. return;
  69. }
  70. else
  71. {
  72. curstring.push_back(c);
  73. }
  74. linepos++;
  75. }
  76. record.push_back( curstring );
  77. }
  78. void loadData( NICE::VVector &Data,
  79. NICE::Vector &y,
  80. const string &path,
  81. const string &xdat,
  82. const string &ydat )
  83. {
  84. vector<string> row;
  85. string line;
  86. cerr<<"Preloading Data...";
  87. ifstream in( (path+xdat).c_str() );
  88. if ( in.fail() )
  89. {
  90. cout << "File not found" <<endl;
  91. exit(EXIT_FAILURE);
  92. }
  93. int numData = 0;
  94. while ( getline(in, line) && in.good() )
  95. {
  96. csvline_populate(row, line, ',');
  97. vector<double> vec;
  98. for (int i = 0; i < (int)row.size(); i++)
  99. {
  100. double dval = 0.0;
  101. dval = atof(row[i].data() );
  102. vec.push_back(dval);
  103. }
  104. NICE::Vector nvec(vec);
  105. Data.push_back(nvec);
  106. numData++;
  107. }
  108. in.close();
  109. cerr<<"Finished."<<endl<<"Starting to get preloaded Labels...";
  110. in.open( (path+ydat).c_str() );
  111. if ( in.fail() )
  112. {
  113. cout << "File not found! Setting default value 0.0..." <<endl;
  114. y.resize(numData);
  115. y.set(0.0);
  116. }
  117. else
  118. {
  119. y.resize(numData);
  120. int count = 0;
  121. while(getline(in, line) && in.good() )
  122. {
  123. csvline_populate(row, line, ',');
  124. for ( int i = 0; i < (int)row.size(); i++ )
  125. {
  126. double dval = 0.0;
  127. dval = atof(row[i].data() );
  128. y.set(count,dval);
  129. count++;
  130. }
  131. }
  132. in.close();
  133. }
  134. cerr<<"Finished."<<endl;
  135. }
  136. void testFrame ( Config conf,
  137. NICE::VVector &xdata,
  138. NICE::Vector &y )
  139. {
  140. cerr<<"\nStarting test framework..."<<endl;
  141. /*------------Initialize Variables-----------*/
  142. ofstream storeEvalData;
  143. double trainRatio = conf.gD( "debug", "training_ratio", .9 );
  144. int trainingSize = (int)(trainRatio*xdata.size());
  145. int testingSize = xdata.size() - trainingSize;
  146. vector<int> indices;
  147. for ( int i = 0; i < (int)xdata.size(); i++ )
  148. indices.push_back(i);
  149. int nfolds = conf.gI( "debug", "nfolds", 10 );
  150. Vector mef_v ( nfolds );
  151. Vector corr_v ( nfolds );
  152. Vector resub_v ( nfolds );
  153. Vector diff_v ( nfolds );
  154. bool saveConfig = conf.gB( "debug", "save_config", false );
  155. /*------------Store Configuration------------*/
  156. string filename = conf.gS( "debug", "filename" );
  157. if ( saveConfig )
  158. {
  159. cout << "Configuration will be stored in: " << filename << "_config" << endl;
  160. storeEvalData.open ( (filename+"_config").c_str() );
  161. storeEvalData.close();
  162. } else
  163. {
  164. cout << "Configuration will not be stored." << endl;
  165. }
  166. /*------------Setting up NPRegression--------------*/
  167. for ( int k = 0; k < nfolds; k++)
  168. {
  169. string fold;
  170. ostringstream convert;
  171. convert << k;
  172. fold = convert.str();
  173. cout << "\nFOLD " << k << ":\n======" << endl;
  174. cerr << "Initializing NPRegression...";
  175. RegKNN *knn = new RegKNN (&conf, NULL);
  176. cerr << "Finished." << endl;
  177. cerr << "Teaching the NPRegression algorithm...";
  178. NICE::VVector trainData, testData;
  179. NICE::Vector trainVals ( trainingSize );
  180. NICE::Vector testVals ( testingSize );
  181. random_shuffle( indices.begin(), indices.end() );
  182. for ( int i = 0; i < trainingSize; i++ )
  183. {
  184. trainData.push_back ( xdata[ indices[i] ] );
  185. trainVals.set( i, y[ indices[i] ] );
  186. }
  187. for ( int j = 0; j < testingSize; j++ )
  188. {
  189. testData.push_back ( xdata[ indices[j+trainingSize] ] );
  190. testVals.set( j, y[ indices[j+trainingSize] ] );
  191. }
  192. knn->teach ( trainData, trainVals );
  193. cerr << "Finished." << endl;
  194. /*-------------Testing RDF-GP--------------*/
  195. cerr << "\nGetting prediction values for all data points...";
  196. NICE::Vector predictionValues( testingSize );
  197. predictionValues.set ( 0.0 );
  198. #pragma omp parallel for
  199. for ( int j = 0; j < testingSize; j++ )
  200. {
  201. predictionValues[j] = knn->predict( testData[j] );
  202. }
  203. cerr << "Finished." << endl;
  204. /*---------------Evaluation----------------*/
  205. NICE::Vector diff = testVals - predictionValues;
  206. double mod_var = diff.StdDev()*diff.StdDev();
  207. double tar_var = testVals.StdDev()*testVals.StdDev();
  208. mef_v.set( k, (1-mod_var/tar_var) );
  209. NICE::Vector meanv( predictionValues.size() );
  210. meanv.set( diff.Mean() );
  211. NICE::Vector lhs = diff - meanv;
  212. meanv.set( testVals.Mean() );
  213. NICE::Vector rhs = testVals - meanv;
  214. lhs *= rhs;
  215. double corr = lhs.Mean() / sqrt( diff.StdDev()*diff.StdDev()*testVals.StdDev()*testVals.StdDev() );
  216. corr_v.set( k, corr );
  217. diff *= diff;
  218. diff_v.set( k, diff.Mean());
  219. resub_v.set( k, (diff.Mean() / tar_var) );
  220. }
  221. /*------------------Output-------------------*/
  222. cout << "\nSimple Cross Validation Stats:\n==============================" << endl;
  223. cout << " Modelling Efficiency: " << mef_v.Mean() << endl;
  224. cout << " Correlation: " << corr_v.Mean() << endl;
  225. cout << " Mean Square Error: " << diff_v.Mean() << endl;
  226. cout << " Standardized MSE: " << resub_v.Mean() << endl;
  227. }
  228. int main (int argc, char **argv) {
  229. Config conf ( argc, argv ); //get config from user input
  230. string path = conf.gS( "debug", "path", "." );
  231. string dataset = conf.gS( "debug", "dataset", "flux" );
  232. NICE::VVector xdata;
  233. NICE::Vector y;
  234. loadData(xdata, y, path, (dataset+"_x.csv"), (dataset+"_y.csv") ); //load all data
  235. testFrame( conf, xdata, y );
  236. return 0;
  237. }