#! /usr/bin/python import scipy.io import numpy import time import socket import datetime import pickle ### import sys import os sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),os.pardir)) ### import helperFunctions import methodSelection import datasetAcquisition ### if len(sys.argv) != 2: raise Exception('No config file given!') print '' print ' -- config -- ' print '' defaultFname = os.path.join(os.path.dirname(sys.argv[1]),os.pardir,'setup.cfg') if not os.path.isfile(defaultFname): defaultFname = sys.argv[1] expSetup = helperFunctions.getConfig(sys.argv[1], 'experiment', 'extExpSetup', defaultFname, 'str', True) alMethod = helperFunctions.getConfig(sys.argv[1], 'activeLearning', 'method', None, 'str', True) identifier = helperFunctions.getConfig(sys.argv[1], 'experiment', 'identifier', alMethod, 'str', True) sigmaN = helperFunctions.getConfig(expSetup, 'activeLearning', 'sigmaN', None, 'float', True) kernel = helperFunctions.getConfig(expSetup, 'activeLearning', 'kernel', None, 'str', True) gamma = helperFunctions.getConfig(expSetup, 'activeLearning', 'gamma', None, 'float', True) numKernelCores = helperFunctions.getConfig(expSetup, 'activeLearning', 'numKernelCores', None, 'int', True) numRndInits = helperFunctions.getConfig(expSetup, 'experiment', 'numRndInits', None, 'int', True) indicesFileName = helperFunctions.getConfig(expSetup, 'experiment', 'indicesFileName', None, 'str', True) numSteps = helperFunctions.getConfig(expSetup, 'experiment', 'numSteps', 100, 'int', True) startInitIdx = helperFunctions.getConfig(expSetup, 'experiment', 'startInitIdx', 0, 'int', True) endInitIdx = helperFunctions.getConfig(expSetup, 'experiment', 'endInitIdx', numRndInits - 1, 'int', True) numTestSamples = helperFunctions.getConfig(expSetup, 'experiment', 'numTestSamples', None, 'int', True) continueExperiment = helperFunctions.getConfig(sys.argv[1], 'experiment', 'continueExperiment', False, 'bool', True) continueExperiment = helperFunctions.getConfig(expSetup, 'experiment', 'continueExperiment', continueExperiment, 'bool', True) writeAttemptsNmb = helperFunctions.getConfig(expSetup, 'experiment', 'writeAttemptsNmb', 5, 'int', True) writeAttemptsDelay = helperFunctions.getConfig(expSetup, 'experiment', 'writeAttemptsDelay', 30, 'int', True) if startInitIdx != endInitIdx: resultsFileName = helperFunctions.getConfig(sys.argv[1], 'experiment', 'resultsFileName', os.path.dirname(os.path.abspath(sys.argv[1])) + '/results.mat', 'str', True) else: resultsFileName = helperFunctions.getConfig(sys.argv[1], 'experiment', 'resultsFileName', os.path.dirname(os.path.abspath(sys.argv[1])) + '/results' + str(startInitIdx) + '.mat', 'str', True) print '' print 'host:', socket.gethostname() print 'pid:', os.getpid() print 'now:', datetime.datetime.strftime(datetime.datetime.now(), '%d.%m.%Y %H:%M:%S') print 'git:', helperFunctions.getGitHash() print '' sys.stdout.flush() if not os.path.isdir(os.path.dirname(os.path.abspath(resultsFileName))) or not os.path.exists(os.path.dirname(os.path.abspath(resultsFileName))): raise Exception('Results path deas not exist!') if os.getcwd() != os.path.dirname(os.path.abspath(resultsFileName)): print '' print '>>> current path != results path <<<' print 'current:', os.getcwd() print 'rerults:', os.path.dirname(os.path.abspath(resultsFileName)) print '' ### print 'loading data ...' sys.stdout.flush() x,y = datasetAcquisition.readData(expSetup) ### if continueExperiment: print '' print 'loading previous results ...' sys.stdout.flush() loaded = scipy.io.loadmat(resultsFileName) queriedIdxs = loaded['queriedIdxs'] timeNeeded = loaded['timeNeeded'] errors_singleRMSEs = loaded['errors_singleRMSEs'] errors_meanRMSEs = loaded['errors_meanRMSEs'] startInitIdx = int(loaded['lastRndInitIdx']) print 'resuming with init {} / {}'.format(startInitIdx + 1,endInitIdx + 1) else: queriedIdxs = numpy.zeros((numRndInits,numSteps)) timeNeeded = numpy.zeros((numRndInits,numSteps)) errors_singleRMSEs = numpy.zeros((numRndInits,numSteps + 1,y.shape[1])) errors_meanRMSEs = numpy.zeros((numRndInits,numSteps + 1)) ### pickleIn = open(indicesFileName) indices = pickle.load(pickleIn) pickleIn.close() ### timePast = 0 totalRuns = (endInitIdx + 1)*numSteps - startInitIdx*numSteps queriesPast = 0 t0 = time.time() ### for rndInitIdx in range(startInitIdx, endInitIdx + 1): print '' trainIdxs = indices['trainIdxs'][rndInitIdx] testIdxs = indices['testIdxs'][rndInitIdx] poolIdxs = numpy.delete(numpy.asarray(range(y.shape[0])), numpy.concatenate((trainIdxs,testIdxs))) xTest = x[testIdxs,:] yTest = y[testIdxs,:] xTrain = x[trainIdxs,:] yTrain = y[trainIdxs,:] xPool = x[poolIdxs,:] yPool = y[poolIdxs,:] orgIdxs = numpy.asmatrix(range(1,yPool.shape[0] + 1)) regressor = methodSelection.selectActiveLearning(alMethod, sys.argv[1]) regressor.train(xTrain, yTrain, sigmaN=sigmaN, gamma=gamma, kernel=kernel, numKernelCores=numKernelCores) print '' sys.stdout.flush() preds = regressor.predict(xTest) errors_singleRMSEs[rndInitIdx,0,:] = helperFunctions.getSingleRMSEs(yTest, preds) errors_meanRMSEs[rndInitIdx,0] = helperFunctions.getMeanRMSE(yTest, preds) print 'xTrain:', xTrain.shape, ', yTrain:', yTrain.shape, ', xTest:', xTest.shape, ', yTest:', yTest.shape, ', xPool:', xPool.shape, ', yPool:', yPool.shape print 'next rndInit:', rndInitIdx, ', initial meanRMSE:', errors_meanRMSEs[rndInitIdx,0] sys.stdout.flush() for step in range(numSteps): t1 = time.time() alScores = regressor.calcAlScores(xPool) if alScores.shape[0] != xPool.shape[0]: raise Exception('alScores.shape[0] != xPool.shape[0]') if alScores.shape[1] != 1: raise Exception('alScores.shape[1] != 1') if not numpy.all(numpy.isfinite(alScores)): raise Exception('not numpy.all(numpy.isfinite(alScores))') chosenIdx = numpy.argmax(alScores, axis=0).item(0) newX = xPool[chosenIdx,:] newY = yPool[chosenIdx,:] regressor.update(newX, newY) queriedIdxs[rndInitIdx,step] = orgIdxs[0,chosenIdx] preds = regressor.predict(xTest) errors_singleRMSEs[rndInitIdx,step+1,:] = helperFunctions.getSingleRMSEs(yTest, preds) errors_meanRMSEs[rndInitIdx,step+1] = helperFunctions.getMeanRMSE(yTest, preds) xPool = numpy.delete(xPool, (chosenIdx), axis=0) yPool = numpy.delete(yPool, (chosenIdx), axis=0) orgIdxs = numpy.delete(orgIdxs, (chosenIdx), axis=1) t2 = time.time() timeNeeded[rndInitIdx,step] = (t1 - t2) queriesPast = queriesPast + 1 timeStats = helperFunctions.estTimeLeft(t0, t2, queriesPast, totalRuns) print queriesPast, '/', totalRuns, '- t past:', '%.3f'%(timeStats[0]/3600.0), 'h, ~t per pass:', '%.3f'%timeStats[1], 's, ~t left:', '%.3f'%(timeStats[2]/3600.0), 'h, ~t total:', '%.3f'%(timeStats[3]/3600.0), 'h' print 'chosenIdx:', chosenIdx, ', rndInit:', rndInitIdx, ', step:', step, ', meanRMSE:', '%.5f'%errors_meanRMSEs[rndInitIdx,step+1] sys.stdout.flush() writeAttempt = 0 while True: try: scipy.io.savemat(resultsFileName, dict(queriedIdxs=queriedIdxs, name=identifier, identifier=identifier, timeNeeded=timeNeeded, lastRndInitIdx=rndInitIdx, errors_singleRMSEs=errors_singleRMSEs, errors_meanRMSEs=errors_meanRMSEs) break except: writeAttempt = writeAttempt + 1 if writeAttempt >= writeAttemptsNmb: raise Exception('ERROR: Writing file {} failed {} times!'.format(resultsFileName, writeAttempt)) print '' print 'WARNING: Writing file {} failed ({} / {}, retry after {} seconds)!'.format(resultsFileName, writeAttempt, writeAttemptsNmb, writeAttemptsDelay) sys.stdout.flush() time.sleep(writeAttemptsDelay) print 'done' print 'now', datetime.datetime.strftime(datetime.datetime.now(), '%d.%m.%Y %H:%M:%S')