#! /usr/bin/python import scipy.io import numpy import time import socket import datetime ### import sys import os sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),os.pardir)) ### import helperFunctions import methodSelection import datasetAcquisition ### if len(sys.argv) != 2: raise Exception('No config file given!') print '' print ' -- config I -- ' print '' defaultFname = os.path.join(os.path.dirname(sys.argv[1]),os.pardir,'setup.cfg') if not os.path.isfile(defaultFname): defaultFname = sys.argv[1] setupFileName = helperFunctions.getConfig(sys.argv[1], 'experiment', 'setupFileName', defaultFname, 'str', True) numTasks = helperFunctions.getConfig(setupFileName, 'experiment', 'numTasks', 3, 'int', True) numRndInits = helperFunctions.getConfig(setupFileName, 'experiment', 'numRndInits', 3, 'int', True) numSteps = helperFunctions.getConfig(setupFileName, 'experiment', 'numSteps', 500, 'int', True) numCls = helperFunctions.getConfig(setupFileName, 'experiment', 'numCls', 80, 'int', True) forbiddenCls = helperFunctions.getConfig(setupFileName, 'experiment', 'forbiddenCls', [], 'intList', True) notificationPath = helperFunctions.getConfig(setupFileName, 'experiment', 'notificationPath', None, 'str', True) writeAttemptsNmb = helperFunctions.getConfig(setupFileName, 'experiment', 'writeAttemptsNmb', 5, 'int', True) writeAttemptsDelay = helperFunctions.getConfig(setupFileName, 'experiment', 'writeAttemptsDelay', 30, 'int', True) print '' print ' -- config II -- ' print '' rejectNoise = helperFunctions.getConfig(sys.argv[1], 'experiment', 'rejectNoise', True, 'bool', True) continueExperiment = helperFunctions.getConfig(sys.argv[1], 'experiment', 'continueExperiment', False, 'bool', True) useApproximation = helperFunctions.getConfig(sys.argv[1], 'experiment', 'prepareApproximation', False, 'bool', True) alMethod = helperFunctions.getConfig(sys.argv[1], 'activeLearning', 'method', None, 'str', True) rewMethod = helperFunctions.getConfig(sys.argv[1], 'reweighting', 'method', 'None', 'str', True) startTaskIdx = helperFunctions.getConfig(sys.argv[1], 'experiment', 'startTaskIdx', 0, 'int', True) endTaskIdx = helperFunctions.getConfig(sys.argv[1], 'experiment', 'endTaskIdx', numTasks - 1, 'int', True) if (startTaskIdx != endTaskIdx) or (numTasks < 2): resultsFileName = helperFunctions.getConfig(sys.argv[1], 'experiment', 'resultsFileName', os.getcwd() + '/results.mat', 'str', True) else: resultsFileName = helperFunctions.getConfig(sys.argv[1], 'experiment', 'resultsFileName', os.getcwd() + '/results' + str(startTaskIdx) + '.mat', 'str', True) identifier = helperFunctions.getConfig(sys.argv[1], 'experiment', 'identifier', os.path.basename(os.path.dirname(resultsFileName)), 'str', True) print '' print 'host:', socket.gethostname() print 'pid:', os.getpid() print 'now:', datetime.datetime.strftime(datetime.datetime.now(), '%d.%m.%Y %H:%M:%S') print 'git:', helperFunctions.getGitHash() sys.stdout.flush() if identifier is None or resultsFileName is None or alMethod is None: raise Exception('ERROR: Config incomplete!') if not os.path.isdir(os.path.dirname(os.path.abspath(resultsFileName))) or not os.path.exists(os.path.dirname(os.path.abspath(resultsFileName))): raise Exception('ERROR: Results path does not exist!') if os.getcwd() != os.path.dirname(os.path.abspath(resultsFileName)): print '' print 'WARNING: current path != results path' print 'current:', os.getcwd() print 'rerults:', os.path.dirname(os.path.abspath(resultsFileName)) ### if continueExperiment and os.path.isfile(resultsFileName): print '' print 'loading previous results ...' sys.stdout.flush() tmp = scipy.io.loadmat(resultsFileName)['results'] values = tmp.item(0) names = list(tmp.dtype.names) confMats = numpy.asarray(values[names.index('confMats')], dtype=numpy.float) queriedIdxs = numpy.asarray(values[names.index('queriedIdxs')], dtype=numpy.float) #name = values[names.index('name')].item(0) identifier = values[names.index('identifier')].item(0) knownCls = numpy.asarray(values[names.index('knownCls')], dtype=numpy.float) timeNeeded = numpy.asarray(values[names.index('timeNeeded')], dtype=numpy.float) startTaskIdx = values[names.index('lastTaskIdx')].item(0) startRndInitIdx = values[names.index('lastRndInitIdx')].item(0) + 1 else: queriedIdxs = numpy.zeros((numTasks,numRndInits,numSteps)) timeNeeded = numpy.zeros((numTasks,numRndInits,numSteps)) confMats = numpy.zeros((numTasks,numRndInits,numSteps + 1,numCls,numCls)) knownCls = numpy.zeros((numTasks,numRndInits,numSteps + 1)) startRndInitIdx = 0 ### #numpy.random.seed(int(time.time()*1000.0)) timePast = 0 totalRuns = (endTaskIdx + 1)*numRndInits*numSteps - startTaskIdx*numRndInits*numSteps - startRndInitIdx*numSteps queriesPast = 0 for taskIdx in range(startTaskIdx, endTaskIdx + 1): for rndInitIdx in range(startRndInitIdx, numRndInits): if notificationPath is not None: helperFunctions.writeNotification(notificationPath, 'status__' + identifier + '__' + socket.gethostname() + '__' + str(taskIdx) + '__' + str(rndInitIdx)) print'' print 'loading data ...' xTrain, yTrain, xPool, yPool, xTest, yTest = datasetAcquisition.readDataForInit(taskIdx, rndInitIdx, setupFileName) print'' print 'training models ...' classifier = methodSelection.selectActiveLearning(alMethod, sys.argv[1]) classifier.train(xTrain, yTrain) reweighter = methodSelection.selectReweighter(rewMethod, sys.argv[1]) reweighter.train(xTrain, yTrain) sys.stdout.flush() pred = classifier.test(xTest) confMats[taskIdx,rndInitIdx,0,:,:] = helperFunctions.confusionMatrix(yTest, pred) knownCls[taskIdx,rndInitIdx,0] = classifier.yUni.shape[1] print '' print 'next task:', taskIdx, ', next rndInit:', rndInitIdx print 'xTrain: {}, yTrain: {} [#cls: {}], xPool: {}, yPool: {} [#cls: {}, #noise: {}], xTest: {}, yTest: {} [#cls: {}]'.format(xTrain.shape, yTrain.shape, len(numpy.unique(numpy.asarray(yTrain))), xPool.shape, yPool.shape, len(numpy.unique(numpy.asarray(yPool))), numpy.sum(yPool==-1), xTest.shape, yTest.shape, len(numpy.unique(numpy.asarray(yTest)))) print 'initial acc:', helperFunctions.getAvgAcc(confMats[taskIdx,rndInitIdx,0,:,:]), ', initial knownCls:', int(knownCls[taskIdx,rndInitIdx,0]) sys.stdout.flush() orgIdxs = numpy.asmatrix(range(1,yPool.shape[0] + 1)) if useApproximation: print 'prepare approximation ...' print sys.stdout.flush() classifier.prepareApprox(xPool) for step in range(numSteps): t0 = time.time() alScores1 = classifier.getAlScores(xPool) alScores2 = reweighter.reweight(alScores1, xPool) chosenIdx = numpy.argmax(alScores2, axis=0).item(0) newX = xPool[chosenIdx,:] newY = yPool[chosenIdx,:] reweighter.update(newX, newY) if not(rejectNoise and newY == -1): classifier.update(newX, newY) if newY == -1: print '-- updated with noise' else: print '-- noise drawn and rejected' queriedIdxs[taskIdx,rndInitIdx,step] = orgIdxs[0,chosenIdx] pred = classifier.test(xTest, True) confMats[taskIdx,rndInitIdx,step + 1,:,:] = helperFunctions.confusionMatrix(yTest, pred) knownCls[taskIdx,rndInitIdx,step + 1] = classifier.yUni.shape[1] - (classifier.yUni == -1).any() xPool = numpy.delete(xPool, (chosenIdx), axis=0) yPool = numpy.delete(yPool, (chosenIdx), axis=0) orgIdxs = numpy.delete(orgIdxs, (chosenIdx), axis=1) if useApproximation: classifier.clearApprox(chosenIdx) t1 = time.time() timeNeeded[taskIdx,rndInitIdx,step] = (t1 - t0) queriesPast = queriesPast + 1 timePast = timePast + float(t1 - t0) timePerPass = timePast / float(queriesPast) timeOva = timePerPass * totalRuns estTimeLeft = timeOva - timePast print queriesPast, '/', totalRuns, '- time past:', '%.3f'%(timePast/3600.0), 'h, avg. time per pass:', '%.3f'%timePerPass, 's, est. time left:', '%.3f'%(estTimeLeft/3600.0), 'h, est. time over all:', '%.3f'%(timeOva/3600.0), 'h' print 'chosenIdx:', chosenIdx, ', task:', taskIdx, ', rndInit:', rndInitIdx, ', step:', step, ', acc:', '%.5f'%helperFunctions.getAvgAcc(confMats[taskIdx,rndInitIdx,step + 1,:,:]), ', knownCls:', int(knownCls[taskIdx,rndInitIdx,step + 1]) sys.stdout.flush() results = dict(confMats=confMats, queriedIdxs=queriedIdxs, name=identifier, identifier=identifier, knownCls=knownCls, timeNeeded=timeNeeded, lastTaskIdx=taskIdx, lastRndInitIdx=rndInitIdx) writeAttempt = 0 while True: try: scipy.io.savemat(resultsFileName, dict(results=results)) break except: writeAttempt = writeAttempt + 1 if writeAttempt >= writeAttemptsNmb: raise Exception('ERROR: Writing file {} failed {} times!'.format(resultsFileName, writeAttempt)) print '' print 'WARNING: Writing file {} failed ({} / {}, retry after {} seconds)!'.format(resultsFileName, writeAttempt, writeAttemptsNmb, writeAttemptsDelay) sys.stdout.flush() time.sleep(writeAttemptsDelay) startRndInitIdx = 0 if notificationPath is not None: helperFunctions.writeNotification(notificationPath, 'status__' + identifier + '__' + socket.gethostname() + '__done') print 'done' print 'now', datetime.datetime.strftime(datetime.datetime.now(), '%d.%m.%Y %H:%M:%S')