RunExperiment.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. #! /usr/bin/python
  2. import scipy.io
  3. import numpy
  4. import time
  5. import socket
  6. import datetime
  7. import pickle
  8. ###
  9. import sys
  10. import os
  11. sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),os.pardir))
  12. ###
  13. import helperFunctions
  14. import methodSelection
  15. import datasetAcquisition
  16. ###
  17. if len(sys.argv) != 2:
  18. raise Exception('No config file given!')
  19. print ''
  20. print ' -- config -- '
  21. print ''
  22. defaultFname = os.path.join(os.path.dirname(sys.argv[1]),os.pardir,'setup.cfg')
  23. if not os.path.isfile(defaultFname):
  24. defaultFname = sys.argv[1]
  25. expSetup = helperFunctions.getConfig(sys.argv[1], 'experiment', 'extExpSetup', defaultFname, 'str', True)
  26. alMethod = helperFunctions.getConfig(sys.argv[1], 'activeLearning', 'method', None, 'str', True)
  27. identifier = helperFunctions.getConfig(sys.argv[1], 'experiment', 'identifier', alMethod, 'str', True)
  28. sigmaN = helperFunctions.getConfig(expSetup, 'activeLearning', 'sigmaN', None, 'float', True)
  29. kernel = helperFunctions.getConfig(expSetup, 'activeLearning', 'kernel', None, 'str', True)
  30. gamma = helperFunctions.getConfig(expSetup, 'activeLearning', 'gamma', None, 'float', True)
  31. numKernelCores = helperFunctions.getConfig(expSetup, 'activeLearning', 'numKernelCores', None, 'int', True)
  32. numRndInits = helperFunctions.getConfig(expSetup, 'experiment', 'numRndInits', None, 'int', True)
  33. indicesFileName = helperFunctions.getConfig(expSetup, 'experiment', 'indicesFileName', None, 'str', True)
  34. numSteps = helperFunctions.getConfig(expSetup, 'experiment', 'numSteps', 100, 'int', True)
  35. startInitIdx = helperFunctions.getConfig(expSetup, 'experiment', 'startInitIdx', 0, 'int', True)
  36. endInitIdx = helperFunctions.getConfig(expSetup, 'experiment', 'endInitIdx', numRndInits - 1, 'int', True)
  37. numTestSamples = helperFunctions.getConfig(expSetup, 'experiment', 'numTestSamples', None, 'int', True)
  38. continueExperiment = helperFunctions.getConfig(sys.argv[1], 'experiment', 'continueExperiment', False, 'bool', True)
  39. continueExperiment = helperFunctions.getConfig(expSetup, 'experiment', 'continueExperiment', continueExperiment, 'bool', True)
  40. writeAttemptsNmb = helperFunctions.getConfig(expSetup, 'experiment', 'writeAttemptsNmb', 5, 'int', True)
  41. writeAttemptsDelay = helperFunctions.getConfig(expSetup, 'experiment', 'writeAttemptsDelay', 30, 'int', True)
  42. if startInitIdx != endInitIdx:
  43. resultsFileName = helperFunctions.getConfig(sys.argv[1], 'experiment', 'resultsFileName', os.path.dirname(os.path.abspath(sys.argv[1])) + '/results.mat', 'str', True)
  44. else:
  45. resultsFileName = helperFunctions.getConfig(sys.argv[1], 'experiment', 'resultsFileName', os.path.dirname(os.path.abspath(sys.argv[1])) + '/results' + str(startInitIdx) + '.mat', 'str', True)
  46. print ''
  47. print 'host:', socket.gethostname()
  48. print 'pid:', os.getpid()
  49. print 'now:', datetime.datetime.strftime(datetime.datetime.now(), '%d.%m.%Y %H:%M:%S')
  50. print 'git:', helperFunctions.getGitHash()
  51. print ''
  52. sys.stdout.flush()
  53. if not os.path.isdir(os.path.dirname(os.path.abspath(resultsFileName))) or not os.path.exists(os.path.dirname(os.path.abspath(resultsFileName))):
  54. raise Exception('Results path deas not exist!')
  55. if os.getcwd() != os.path.dirname(os.path.abspath(resultsFileName)):
  56. print ''
  57. print '>>> current path != results path <<<'
  58. print 'current:', os.getcwd()
  59. print 'rerults:', os.path.dirname(os.path.abspath(resultsFileName))
  60. print ''
  61. ###
  62. print 'loading data ...'
  63. sys.stdout.flush()
  64. x,y = datasetAcquisition.readData(expSetup)
  65. ###
  66. if continueExperiment:
  67. print ''
  68. print 'loading previous results ...'
  69. sys.stdout.flush()
  70. loaded = scipy.io.loadmat(resultsFileName)
  71. queriedIdxs = loaded['queriedIdxs']
  72. timeNeeded = loaded['timeNeeded']
  73. errors_singleRMSEs = loaded['errors_singleRMSEs']
  74. errors_meanRMSEs = loaded['errors_meanRMSEs']
  75. startInitIdx = int(loaded['lastRndInitIdx'])
  76. print 'resuming with init {} / {}'.format(startInitIdx + 1,endInitIdx + 1)
  77. else:
  78. queriedIdxs = numpy.zeros((numRndInits,numSteps))
  79. timeNeeded = numpy.zeros((numRndInits,numSteps))
  80. errors_singleRMSEs = numpy.zeros((numRndInits,numSteps + 1,y.shape[1]))
  81. errors_meanRMSEs = numpy.zeros((numRndInits,numSteps + 1))
  82. ###
  83. pickleIn = open(indicesFileName)
  84. indices = pickle.load(pickleIn)
  85. pickleIn.close()
  86. ###
  87. timePast = 0
  88. totalRuns = (endInitIdx + 1)*numSteps - startInitIdx*numSteps
  89. queriesPast = 0
  90. t0 = time.time()
  91. ###
  92. for rndInitIdx in range(startInitIdx, endInitIdx + 1):
  93. print ''
  94. trainIdxs = indices['trainIdxs'][rndInitIdx]
  95. testIdxs = indices['testIdxs'][rndInitIdx]
  96. poolIdxs = numpy.delete(numpy.asarray(range(y.shape[0])), numpy.concatenate((trainIdxs,testIdxs)))
  97. xTest = x[testIdxs,:]
  98. yTest = y[testIdxs,:]
  99. xTrain = x[trainIdxs,:]
  100. yTrain = y[trainIdxs,:]
  101. xPool = x[poolIdxs,:]
  102. yPool = y[poolIdxs,:]
  103. orgIdxs = numpy.asmatrix(range(1,yPool.shape[0] + 1))
  104. regressor = methodSelection.selectActiveLearning(alMethod, sys.argv[1])
  105. regressor.train(xTrain, yTrain, sigmaN=sigmaN, gamma=gamma, kernel=kernel, numKernelCores=numKernelCores)
  106. print ''
  107. sys.stdout.flush()
  108. preds = regressor.predict(xTest)
  109. errors_singleRMSEs[rndInitIdx,0,:] = helperFunctions.getSingleRMSEs(yTest, preds)
  110. errors_meanRMSEs[rndInitIdx,0] = helperFunctions.getMeanRMSE(yTest, preds)
  111. print 'xTrain:', xTrain.shape, ', yTrain:', yTrain.shape, ', xTest:', xTest.shape, ', yTest:', yTest.shape, ', xPool:', xPool.shape, ', yPool:', yPool.shape
  112. print 'next rndInit:', rndInitIdx, ', initial meanRMSE:', errors_meanRMSEs[rndInitIdx,0]
  113. sys.stdout.flush()
  114. for step in range(numSteps):
  115. t1 = time.time()
  116. alScores = regressor.calcAlScores(xPool)
  117. if alScores.shape[0] != xPool.shape[0]:
  118. raise Exception('alScores.shape[0] != xPool.shape[0]')
  119. if alScores.shape[1] != 1:
  120. raise Exception('alScores.shape[1] != 1')
  121. if not numpy.all(numpy.isfinite(alScores)):
  122. raise Exception('not numpy.all(numpy.isfinite(alScores))')
  123. chosenIdx = numpy.argmax(alScores, axis=0).item(0)
  124. newX = xPool[chosenIdx,:]
  125. newY = yPool[chosenIdx,:]
  126. regressor.update(newX, newY)
  127. queriedIdxs[rndInitIdx,step] = orgIdxs[0,chosenIdx]
  128. preds = regressor.predict(xTest)
  129. errors_singleRMSEs[rndInitIdx,step+1,:] = helperFunctions.getSingleRMSEs(yTest, preds)
  130. errors_meanRMSEs[rndInitIdx,step+1] = helperFunctions.getMeanRMSE(yTest, preds)
  131. xPool = numpy.delete(xPool, (chosenIdx), axis=0)
  132. yPool = numpy.delete(yPool, (chosenIdx), axis=0)
  133. orgIdxs = numpy.delete(orgIdxs, (chosenIdx), axis=1)
  134. t2 = time.time()
  135. timeNeeded[rndInitIdx,step] = (t1 - t2)
  136. queriesPast = queriesPast + 1
  137. timeStats = helperFunctions.estTimeLeft(t0, t2, queriesPast, totalRuns)
  138. print queriesPast, '/', totalRuns, '- t past:', '%.3f'%(timeStats[0]/3600.0), 'h, ~t per pass:', '%.3f'%timeStats[1], 's, ~t left:', '%.3f'%(timeStats[2]/3600.0), 'h, ~t total:', '%.3f'%(timeStats[3]/3600.0), 'h'
  139. print 'chosenIdx:', chosenIdx, ', rndInit:', rndInitIdx, ', step:', step, ', meanRMSE:', '%.5f'%errors_meanRMSEs[rndInitIdx,step+1]
  140. sys.stdout.flush()
  141. writeAttempt = 0
  142. while True:
  143. try:
  144. scipy.io.savemat(resultsFileName, dict(queriedIdxs=queriedIdxs,
  145. name=identifier,
  146. identifier=identifier,
  147. timeNeeded=timeNeeded,
  148. lastRndInitIdx=rndInitIdx,
  149. errors_singleRMSEs=errors_singleRMSEs,
  150. errors_meanRMSEs=errors_meanRMSEs)
  151. break
  152. except:
  153. writeAttempt = writeAttempt + 1
  154. if writeAttempt >= writeAttemptsNmb:
  155. raise Exception('ERROR: Writing file {} failed {} times!'.format(resultsFileName, writeAttempt))
  156. print ''
  157. print 'WARNING: Writing file {} failed ({} / {}, retry after {} seconds)!'.format(resultsFileName, writeAttempt, writeAttemptsNmb, writeAttemptsDelay)
  158. sys.stdout.flush()
  159. time.sleep(writeAttemptsDelay)
  160. print 'done'
  161. print 'now', datetime.datetime.strftime(datetime.datetime.now(), '%d.%m.%Y %H:%M:%S')