#! /usr/bin/python import numpy import scipy.cluster.vq import pickle import sys import os sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),os.pardir)) import helperFunctions import activeLearningLinGPprototype class Classifier(activeLearningLinGPprototype.ClassifierPrototype): def __init__(self, sigmaN = 0.00178, usePde = True, configFile=None): activeLearningLinGPprototype.ClassifierPrototype.__init__(self, sigmaN=sigmaN, configFile=configFile) self.usePde = helperFunctions.getConfig(configFile, 'activeLearning', 'usePde', usePde, 'bool', True) self.approxMode = helperFunctions.getConfig(configFile, 'activeLearning', 'approxMode', 'rnd', 'str', True) self.approxSize = helperFunctions.getConfig(configFile, 'activeLearning', 'approxSize', 500, 'int', True) self.cachedClusters = None self.cachedApprox = None self.cachedDensity = None # x.shape = (number of samples, feat dim) def calcEMOC(self, x, allX=None, density=None): containsNoise = (self.yUni == -1).any() tmpVec1 = self.invCreg*x.T tmpVec2 = numpy.sum(numpy.multiply(x.T,tmpVec1), axis=0) sigmaF = self.calcSigmaF(x, tmpVec2) infY = self.infer(x, containsNoise) probs = self.calcProbs(x, infY, sigmaF) term1 = 1.0/(1.0 + tmpVec2) pro = numpy.absolute(infY - 1) contra = numpy.absolute(infY + 1) diff = numpy.repeat(numpy.sum(contra,axis=1),contra.shape[1],axis=1) diff = (diff - contra + pro) term3 = numpy.sum(numpy.multiply(probs,diff),axis=1) return numpy.multiply(numpy.multiply(term1, self.approxSum(x, allX, tmpVec1, density)).T, term3) # x.shape = (number of samples, feat dim) def calcEMOCpde(self, x, allX=None): if allX is None: allX = numpy.append(self.X, x, axis=0) if self.cachedDensity is None: density = self.calcDensity(x, allX) else: density = self.cachedDensity scores = self.calcEMOC(x, allX, density) return numpy.multiply(scores, density) # x.shape = (number of samples, feat dim) def calcAlScores(self, x): allX = numpy.append(self.X, x, axis=0) if self.usePde: return self.calcEMOCpde(x, allX) else: return self.calcEMOC(x, allX) def approxSum(self, x, allX=None, invCregDotX=None, density=None): if self.approxMode == 'Call': return self.cachedApprox ### if invCregDotX is None: invCregDotX = self.invCreg*x.T ### if self.approxMode == 'clustering': return numpy.sum(numpy.absolute(self.cachedClusters*invCregDotX), axis=0) ### amount = min(self.approxSize, self.X.shape[0] + x.shape[0]) if allX is None: allX = numpy.append(self.X, x, axis=0) ### if self.approxMode == 'rnd': return numpy.sum(numpy.absolute(allX[numpy.random.permutation(allX.shape[0])[:amount], :]*invCregDotX), axis=0) else: raise Exception('Approximation mode > %s < unknown!'%self.approxMode) def prepareApprox(self, x): allX = numpy.append(self.X, x, axis=0) ### if self.usePde: self.cachedDensity = self.calcDensity(x, allX) ### if self.approxMode == 'rnd' or self.approxMode == 'knn': return ### if self.approxMode == 'clustering': amount = min(self.approxSize, allX.shape[0]) self.cachedClusters = scipy.cluster.vq.kmeans(allX, amount)[0] print 'data points:', allX.shape[0], ', cluster requested:', amount, ', cluster found:', self.cachedClusters.shape[0] return else: raise Exception('Approximation mode > %s < unknown!'%self.approxMode) def clearApprox(self, idx=None): if idx is None: self.cachedApprox = None self.cachedClusters = None if self.usePde: self.cachedDensity = None else: if self.cachedApprox is not None: self.cachedApprox = numpy.delete(self.cachedApprox, (idx), axis=0) if self.usePde and self.cachedDensity is not None: self.cachedDensity = numpy.delete(self.cachedDensity, (idx), axis=0) def calcDensity(self, x, allX=None): if allX is None: allX = numpy.append(self.X, x, axis=0) return x*numpy.mean(allX, axis=0).T