LifelongLearning
/
gpEMOC


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
							#! /usr/bin/python

import numpy
import scipy.cluster.vq
import pickle

import sys
import os
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),os.pardir))
import helperFunctions

import activeLearningLinGPprototype

class Classifier(activeLearningLinGPprototype.ClassifierPrototype):

  def __init__(self,
                    sigmaN = 0.00178,
                    usePde = True,
                    configFile=None):

    activeLearningLinGPprototype.ClassifierPrototype.__init__(self, sigmaN=sigmaN, configFile=configFile)
    self.usePde = helperFunctions.getConfig(configFile, 'activeLearning', 'usePde', usePde, 'bool', True)
    self.approxMode = helperFunctions.getConfig(self.configFile, 'activeLearning', 'approxMode', 'rnd', 'str', True)
    self.approxSize = helperFunctions.getConfig(self.configFile, 'activeLearning', 'approxSize', 500, 'int', True)

    self.cachedClusters = None
    self.cachedApprox = None
    self.cachedDensity = None
    

  # x.shape = (number of samples, feat dim)
  def calcEMOC(self, x, allX=None, density=None):

    containsNoise = (self.yUni == -1).any()

    tmpVec1 = self.invCreg*x.T
    tmpVec2 = numpy.sum(numpy.multiply(x.T,tmpVec1), axis=0)

    sigmaF = self.calcSigmaF(x, tmpVec2)

    infY = self.infer(x, containsNoise)
    probs = self.calcProbs(x, infY, sigmaF)

    term1 = 1.0/(1.0 + tmpVec2)

    approxSum = self.approxSum(x, allX, tmpVec1, density)

    pro = numpy.absolute(infY - 1)
    contra = numpy.absolute(infY + 1)
    diff = numpy.repeat(numpy.sum(contra,axis=1),contra.shape[1],axis=1)
    diff = (diff - contra + pro)

    term3 = numpy.sum(numpy.multiply(probs,diff),axis=1)

    res = numpy.multiply(numpy.multiply(term1, approxSum).T, term3)

    return numpy.multiply(numpy.multiply(term1, self.approxSum(x, allX, tmpVec1, density)).T, term3)


  # x.shape = (number of samples, feat dim)
  def calcEMOCpde(self, x, allX=None):

    if allX is None:
        allX = numpy.append(self.X, x, axis=0)

    if self.cachedDensity is None:
        density = self.calcDensity(x, allX)
    else:
        density = self.cachedDensity

    scores = self.calcEMOC(x, allX, density)

    return numpy.multiply(scores, density)


   # x.shape = (feat dim, number of samples)
  def calcAlScores(self, x):

    allX = numpy.append(self.X, x, axis=0)

    if self.usePde:
        return self.calcEMOCpde(x, allX)
    else:
        return self.calcEMOC(x, allX)


  def approxSum(self, x, allX=None, invCregDotX=None, density=None):

    if self.approxMode == 'Call':
        return self.cachedApprox

    ###

    if invCregDotX is None:
        invCregDotX = self.invCreg*x.T

    ###

    if self.approxMode == 'clustering':
        return numpy.sum(numpy.absolute(self.cachedClusters*invCregDotX), axis=0)

    ###

    amount = min(self.approxSize, self.X.shape[0] + x.shape[0])

    if allX is None:
        allX = numpy.append(self.X, x, axis=0)

    ###

    if self.approxMode == 'rnd':
        return numpy.sum(numpy.absolute(allX[numpy.random.permutation(allX.shape[0])[:amount], :]*invCregDotX), axis=0)

    else:
        raise Exception('Approximation mode > %s < unknown!'%self.approxMode)


  def prepareApprox(self, x):

    allX = numpy.append(self.X, x, axis=0)

    ###

    if self.usePde:
        self.cachedDensity = self.calcDensity(x, allX)

    ###

    if self.approxMode == 'rnd' or self.approxMode == 'knn':
        return

    ###

    if self.approxMode == 'clustering':
        amount = min(self.approxSize, allX.shape[0])
        self.cachedClusters = scipy.cluster.vq.kmeans(allX, amount)[0]
        print 'data points:', allX.shape[0], ', cluster requested:', amount, ', cluster found:', self.cachedClusters.shape[0]
        return

    else:
        raise Exception('Approximation mode > %s < unknown!'%self.approxMode)


  def clearApprox(self, idx=None):

    if idx is None:
        self.cachedApprox = None
        self.cachedClusters = None
        if self.usePde:
            self.cachedDensity = None
    else:
        if self.cachedApprox is not None:
            self.cachedApprox = numpy.delete(self.cachedApprox, (idx), axis=0)
        if self.usePde and self.cachedDensity is not None:
            self.cachedDensity = numpy.delete(self.cachedDensity, (idx), axis=0)


  def calcDensity(self, x, allX=None):

    if allX is None:
        allX = numpy.append(self.X, x, axis=0)

    return x*numpy.mean(allX, axis=0).T