LifelongLearning
/
gpEMOCreg


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
							#! /usr/bin/python

import scipy.io
import numpy
import time
import socket
import datetime
import pickle

###

import sys
import os
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),os.pardir))

###

import helperFunctions
import methodSelection
import datasetAcquisition

###

if len(sys.argv) != 2:
    raise Exception('No config file given!')

print ''
print ' -- config -- '
print ''

defaultFname =  os.path.join(os.path.dirname(sys.argv[1]),os.pardir,'setup.cfg')
if not os.path.isfile(defaultFname):
    defaultFname = sys.argv[1]

expSetup = helperFunctions.getConfig(sys.argv[1], 'experiment', 'extExpSetup', defaultFname, 'str', True)
alMethod = helperFunctions.getConfig(sys.argv[1], 'activeLearning', 'method', None, 'str', True)
identifier = helperFunctions.getConfig(sys.argv[1], 'experiment', 'identifier', alMethod, 'str', True)

sigmaN = helperFunctions.getConfig(expSetup, 'activeLearning', 'sigmaN', None, 'float', True)
kernel = helperFunctions.getConfig(expSetup, 'activeLearning', 'kernel', None, 'str', True)
gamma = helperFunctions.getConfig(expSetup, 'activeLearning', 'gamma', None, 'float', True)
numKernelCores = helperFunctions.getConfig(expSetup, 'activeLearning', 'numKernelCores', None, 'int', True)

numRndInits = helperFunctions.getConfig(expSetup, 'experiment', 'numRndInits', None, 'int', True)
indicesFileName = helperFunctions.getConfig(expSetup, 'experiment', 'indicesFileName', None, 'str', True)
numSteps = helperFunctions.getConfig(expSetup, 'experiment', 'numSteps', 100, 'int', True)
startInitIdx = helperFunctions.getConfig(expSetup, 'experiment', 'startInitIdx', 0, 'int', True)
endInitIdx = helperFunctions.getConfig(expSetup, 'experiment', 'endInitIdx', numRndInits - 1, 'int', True)
numTestSamples = helperFunctions.getConfig(expSetup, 'experiment', 'numTestSamples', None, 'int', True)

continueExperiment = helperFunctions.getConfig(sys.argv[1], 'experiment', 'continueExperiment', False, 'bool', True)
continueExperiment = helperFunctions.getConfig(expSetup, 'experiment', 'continueExperiment', continueExperiment, 'bool', True)

writeAttemptsNmb = helperFunctions.getConfig(expSetup, 'experiment', 'writeAttemptsNmb', 5, 'int', True)
writeAttemptsDelay = helperFunctions.getConfig(expSetup, 'experiment', 'writeAttemptsDelay', 30, 'int', True)

if startInitIdx != endInitIdx:
    resultsFileName = helperFunctions.getConfig(sys.argv[1], 'experiment', 'resultsFileName', os.path.dirname(os.path.abspath(sys.argv[1])) + '/results.mat', 'str', True)
else:
    resultsFileName = helperFunctions.getConfig(sys.argv[1], 'experiment', 'resultsFileName', os.path.dirname(os.path.abspath(sys.argv[1])) + '/results' + str(startInitIdx) + '.mat', 'str', True)

print ''
print 'host:', socket.gethostname()
print 'pid:', os.getpid()
print 'now:', datetime.datetime.strftime(datetime.datetime.now(), '%d.%m.%Y %H:%M:%S')
print 'git:', helperFunctions.getGitHash()
print ''
sys.stdout.flush()

if not os.path.isdir(os.path.dirname(os.path.abspath(resultsFileName))) or not os.path.exists(os.path.dirname(os.path.abspath(resultsFileName))):
    raise Exception('Results path deas not exist!')

if os.getcwd() != os.path.dirname(os.path.abspath(resultsFileName)):
    print ''
    print '>>> current path != results path <<<'
    print 'current:', os.getcwd()
    print 'rerults:', os.path.dirname(os.path.abspath(resultsFileName))
    print ''

###

print 'loading data ...'
sys.stdout.flush()

x,y = datasetAcquisition.readData(expSetup)

###

if continueExperiment:

    print ''
    print 'loading previous results ...'
    sys.stdout.flush()

    loaded = scipy.io.loadmat(resultsFileName)

    queriedIdxs = loaded['queriedIdxs']
    timeNeeded = loaded['timeNeeded']
    errors_singleRMSEs = loaded['errors_singleRMSEs']
    errors_meanRMSEs = loaded['errors_meanRMSEs']

    startInitIdx = int(loaded['lastRndInitIdx'])
    print 'resuming with init {} / {}'.format(startInitIdx + 1,endInitIdx + 1)

else:
    queriedIdxs = numpy.zeros((numRndInits,numSteps))
    timeNeeded = numpy.zeros((numRndInits,numSteps))
    errors_singleRMSEs = numpy.zeros((numRndInits,numSteps + 1,y.shape[1]))
    errors_meanRMSEs = numpy.zeros((numRndInits,numSteps + 1))

###

pickleIn = open(indicesFileName)
indices = pickle.load(pickleIn)
pickleIn.close()

###

timePast = 0
totalRuns = (endInitIdx + 1)*numSteps - startInitIdx*numSteps
queriesPast = 0
t0 = time.time()

###

for rndInitIdx in range(startInitIdx, endInitIdx + 1):

    print ''
    trainIdxs = indices['trainIdxs'][rndInitIdx]
    testIdxs = indices['testIdxs'][rndInitIdx]
    poolIdxs = numpy.delete(numpy.asarray(range(y.shape[0])), numpy.concatenate((trainIdxs,testIdxs)))

    xTest = x[testIdxs,:]
    yTest = y[testIdxs,:]
    xTrain = x[trainIdxs,:]
    yTrain = y[trainIdxs,:]
    xPool = x[poolIdxs,:]
    yPool = y[poolIdxs,:]

    orgIdxs = numpy.asmatrix(range(1,yPool.shape[0] + 1))

    regressor = methodSelection.selectActiveLearning(alMethod, sys.argv[1])
    regressor.train(xTrain, yTrain, sigmaN=sigmaN, gamma=gamma, kernel=kernel, numKernelCores=numKernelCores)
    print ''
    sys.stdout.flush()

    preds = regressor.predict(xTest)

    errors_singleRMSEs[rndInitIdx,0,:] = helperFunctions.getSingleRMSEs(yTest, preds)
    errors_meanRMSEs[rndInitIdx,0] = helperFunctions.getMeanRMSE(yTest, preds)

    print 'xTrain:', xTrain.shape, ', yTrain:', yTrain.shape, ', xTest:', xTest.shape, ', yTest:', yTest.shape, ', xPool:', xPool.shape, ', yPool:', yPool.shape
    print 'next rndInit:', rndInitIdx, ', initial meanRMSE:', errors_meanRMSEs[rndInitIdx,0]
    sys.stdout.flush()

    for step in range(numSteps):

        t1 = time.time()

        alScores = regressor.calcAlScores(xPool)

        if alScores.shape[0] != xPool.shape[0]:
            raise Exception('alScores.shape[0] != xPool.shape[0]')

        if alScores.shape[1] != 1:
            raise Exception('alScores.shape[1] != 1')

        if not numpy.all(numpy.isfinite(alScores)):
            raise Exception('not numpy.all(numpy.isfinite(alScores))')

        chosenIdx = numpy.argmax(alScores, axis=0).item(0)

        newX = xPool[chosenIdx,:]
        newY = yPool[chosenIdx,:]

        regressor.update(newX, newY)

        queriedIdxs[rndInitIdx,step] = orgIdxs[0,chosenIdx]

        preds = regressor.predict(xTest)

        errors_singleRMSEs[rndInitIdx,step+1,:] = helperFunctions.getSingleRMSEs(yTest, preds)
        errors_meanRMSEs[rndInitIdx,step+1] = helperFunctions.getMeanRMSE(yTest, preds)

        xPool = numpy.delete(xPool, (chosenIdx), axis=0)
        yPool = numpy.delete(yPool, (chosenIdx), axis=0)
        orgIdxs = numpy.delete(orgIdxs, (chosenIdx), axis=1)

        t2 = time.time()
        timeNeeded[rndInitIdx,step] = (t1 - t2)

        queriesPast = queriesPast + 1
        timeStats = helperFunctions.estTimeLeft(t0, t2, queriesPast, totalRuns)

        print queriesPast, '/', totalRuns, '- t past:', '%.3f'%(timeStats[0]/3600.0), 'h, ~t per pass:', '%.3f'%timeStats[1], 's, ~t left:', '%.3f'%(timeStats[2]/3600.0), 'h, ~t total:', '%.3f'%(timeStats[3]/3600.0), 'h'
        print 'chosenIdx:', chosenIdx,  ', rndInit:', rndInitIdx, ', step:', step, ', meanRMSE:', '%.5f'%errors_meanRMSEs[rndInitIdx,step+1]
        sys.stdout.flush()

    writeAttempt = 0
    while True:
        try:
            scipy.io.savemat(resultsFileName, dict(queriedIdxs=queriedIdxs,
                                                name=identifier,
                                                identifier=identifier,
                                                timeNeeded=timeNeeded,
                                                lastRndInitIdx=rndInitIdx,
                                                errors_singleRMSEs=errors_singleRMSEs,
                                                errors_meanRMSEs=errors_meanRMSEs)
            break
        except:
            writeAttempt = writeAttempt + 1
            if writeAttempt >= writeAttemptsNmb:
                raise Exception('ERROR: Writing file {} failed {} times!'.format(resultsFileName, writeAttempt))
            print ''
            print 'WARNING: Writing file {} failed ({} / {}, retry after {} seconds)!'.format(resultsFileName, writeAttempt, writeAttemptsNmb, writeAttemptsDelay)
            sys.stdout.flush()
            time.sleep(writeAttemptsDelay)

print 'done'
print 'now', datetime.datetime.strftime(datetime.datetime.now(), '%d.%m.%Y %H:%M:%S')