123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- #! /usr/bin/python
- import numpy
- import scipy.cluster.vq
- import pickle
- import sys
- import os
- sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),os.pardir))
- import helperFunctions
- import activeLearningLinGPprototype
- class Classifier(activeLearningLinGPprototype.ClassifierPrototype):
- def __init__(self,
- sigmaN = 0.00178,
- usePde = True,
- configFile=None):
- activeLearningLinGPprototype.ClassifierPrototype.__init__(self, sigmaN=sigmaN, configFile=configFile)
- self.usePde = helperFunctions.getConfig(configFile, 'activeLearning', 'usePde', usePde, 'bool', True)
- self.approxMode = helperFunctions.getConfig(self.configFile, 'activeLearning', 'approxMode', 'rnd', 'str', True)
- self.approxSize = helperFunctions.getConfig(self.configFile, 'activeLearning', 'approxSize', 500, 'int', True)
- self.cachedClusters = None
- self.cachedApprox = None
- self.cachedDensity = None
-
- # x.shape = (number of samples, feat dim)
- def calcEMOC(self, x, allX=None, density=None):
- containsNoise = (self.yUni == -1).any()
- tmpVec1 = self.invCreg*x.T
- tmpVec2 = numpy.sum(numpy.multiply(x.T,tmpVec1), axis=0)
- sigmaF = self.calcSigmaF(x, tmpVec2)
- infY = self.infer(x, containsNoise)
- probs = self.calcProbs(x, infY, sigmaF)
- term1 = 1.0/(1.0 + tmpVec2)
- approxSum = self.approxSum(x, allX, tmpVec1, density)
- pro = numpy.absolute(infY - 1)
- contra = numpy.absolute(infY + 1)
- diff = numpy.repeat(numpy.sum(contra,axis=1),contra.shape[1],axis=1)
- diff = (diff - contra + pro)
- term3 = numpy.sum(numpy.multiply(probs,diff),axis=1)
- res = numpy.multiply(numpy.multiply(term1, approxSum).T, term3)
- return numpy.multiply(numpy.multiply(term1, self.approxSum(x, allX, tmpVec1, density)).T, term3)
- # x.shape = (number of samples, feat dim)
- def calcEMOCpde(self, x, allX=None):
- if allX is None:
- allX = numpy.append(self.X, x, axis=0)
- if self.cachedDensity is None:
- density = self.calcDensity(x, allX)
- else:
- density = self.cachedDensity
- scores = self.calcEMOC(x, allX, density)
- return numpy.multiply(scores, density)
- # x.shape = (feat dim, number of samples)
- def calcAlScores(self, x):
- allX = numpy.append(self.X, x, axis=0)
- if self.usePde:
- return self.calcEMOCpde(x, allX)
- else:
- return self.calcEMOC(x, allX)
- def approxSum(self, x, allX=None, invCregDotX=None, density=None):
- if self.approxMode == 'Call':
- return self.cachedApprox
- ###
- if invCregDotX is None:
- invCregDotX = self.invCreg*x.T
- ###
- if self.approxMode == 'clustering':
- return numpy.sum(numpy.absolute(self.cachedClusters*invCregDotX), axis=0)
- ###
- amount = min(self.approxSize, self.X.shape[0] + x.shape[0])
- if allX is None:
- allX = numpy.append(self.X, x, axis=0)
- ###
- if self.approxMode == 'rnd':
- return numpy.sum(numpy.absolute(allX[numpy.random.permutation(allX.shape[0])[:amount], :]*invCregDotX), axis=0)
- else:
- raise Exception('Approximation mode > %s < unknown!'%self.approxMode)
- def prepareApprox(self, x):
- allX = numpy.append(self.X, x, axis=0)
- ###
- if self.usePde:
- self.cachedDensity = self.calcDensity(x, allX)
- ###
- if self.approxMode == 'rnd' or self.approxMode == 'knn':
- return
- ###
- if self.approxMode == 'clustering':
- amount = min(self.approxSize, allX.shape[0])
- self.cachedClusters = scipy.cluster.vq.kmeans(allX, amount)[0]
- print 'data points:', allX.shape[0], ', cluster requested:', amount, ', cluster found:', self.cachedClusters.shape[0]
- return
- else:
- raise Exception('Approximation mode > %s < unknown!'%self.approxMode)
- def clearApprox(self, idx=None):
- if idx is None:
- self.cachedApprox = None
- self.cachedClusters = None
- if self.usePde:
- self.cachedDensity = None
- else:
- if self.cachedApprox is not None:
- self.cachedApprox = numpy.delete(self.cachedApprox, (idx), axis=0)
- if self.usePde and self.cachedDensity is not None:
- self.cachedDensity = numpy.delete(self.cachedDensity, (idx), axis=0)
- def calcDensity(self, x, allX=None):
- if allX is None:
- allX = numpy.append(self.X, x, axis=0)
- return x*numpy.mean(allX, axis=0).T
|