activeLearningLinGPemocApprox.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. #! /usr/bin/python
  2. import numpy
  3. import scipy.cluster.vq
  4. import pickle
  5. import sys
  6. import os
  7. sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),os.pardir))
  8. import helperFunctions
  9. import activeLearningLinGPprototype
  10. class Classifier(activeLearningLinGPprototype.ClassifierPrototype):
  11. def __init__(self,
  12. sigmaN = 0.00178,
  13. usePde = True,
  14. configFile=None):
  15. activeLearningLinGPprototype.ClassifierPrototype.__init__(self, sigmaN=sigmaN, configFile=configFile)
  16. self.usePde = helperFunctions.getConfig(configFile, 'activeLearning', 'usePde', usePde, 'bool', True)
  17. self.approxMode = helperFunctions.getConfig(self.configFile, 'activeLearning', 'approxMode', 'rnd', 'str', True)
  18. self.approxSize = helperFunctions.getConfig(self.configFile, 'activeLearning', 'approxSize', 500, 'int', True)
  19. self.cachedClusters = None
  20. self.cachedApprox = None
  21. self.cachedDensity = None
  22. # x.shape = (number of samples, feat dim)
  23. def calcEMOC(self, x, allX=None, density=None):
  24. containsNoise = (self.yUni == -1).any()
  25. tmpVec1 = self.invCreg*x.T
  26. tmpVec2 = numpy.sum(numpy.multiply(x.T,tmpVec1), axis=0)
  27. sigmaF = self.calcSigmaF(x, tmpVec2)
  28. infY = self.infer(x, containsNoise)
  29. probs = self.calcProbs(x, infY, sigmaF)
  30. term1 = 1.0/(1.0 + tmpVec2)
  31. approxSum = self.approxSum(x, allX, tmpVec1, density)
  32. pro = numpy.absolute(infY - 1)
  33. contra = numpy.absolute(infY + 1)
  34. diff = numpy.repeat(numpy.sum(contra,axis=1),contra.shape[1],axis=1)
  35. diff = (diff - contra + pro)
  36. term3 = numpy.sum(numpy.multiply(probs,diff),axis=1)
  37. res = numpy.multiply(numpy.multiply(term1, approxSum).T, term3)
  38. return numpy.multiply(numpy.multiply(term1, self.approxSum(x, allX, tmpVec1, density)).T, term3)
  39. # x.shape = (number of samples, feat dim)
  40. def calcEMOCpde(self, x, allX=None):
  41. if allX is None:
  42. allX = numpy.append(self.X, x, axis=0)
  43. if self.cachedDensity is None:
  44. density = self.calcDensity(x, allX)
  45. else:
  46. density = self.cachedDensity
  47. scores = self.calcEMOC(x, allX, density)
  48. return numpy.multiply(scores, density)
  49. # x.shape = (feat dim, number of samples)
  50. def calcAlScores(self, x):
  51. allX = numpy.append(self.X, x, axis=0)
  52. if self.usePde:
  53. return self.calcEMOCpde(x, allX)
  54. else:
  55. return self.calcEMOC(x, allX)
  56. def approxSum(self, x, allX=None, invCregDotX=None, density=None):
  57. if self.approxMode == 'Call':
  58. return self.cachedApprox
  59. ###
  60. if invCregDotX is None:
  61. invCregDotX = self.invCreg*x.T
  62. ###
  63. if self.approxMode == 'clustering':
  64. return numpy.sum(numpy.absolute(self.cachedClusters*invCregDotX), axis=0)
  65. ###
  66. amount = min(self.approxSize, self.X.shape[0] + x.shape[0])
  67. if allX is None:
  68. allX = numpy.append(self.X, x, axis=0)
  69. ###
  70. if self.approxMode == 'rnd':
  71. return numpy.sum(numpy.absolute(allX[numpy.random.permutation(allX.shape[0])[:amount], :]*invCregDotX), axis=0)
  72. else:
  73. raise Exception('Approximation mode > %s < unknown!'%self.approxMode)
  74. def prepareApprox(self, x):
  75. allX = numpy.append(self.X, x, axis=0)
  76. ###
  77. if self.usePde:
  78. self.cachedDensity = self.calcDensity(x, allX)
  79. ###
  80. if self.approxMode == 'rnd' or self.approxMode == 'knn':
  81. return
  82. ###
  83. if self.approxMode == 'clustering':
  84. amount = min(self.approxSize, allX.shape[0])
  85. self.cachedClusters = scipy.cluster.vq.kmeans(allX, amount)[0]
  86. print 'data points:', allX.shape[0], ', cluster requested:', amount, ', cluster found:', self.cachedClusters.shape[0]
  87. return
  88. else:
  89. raise Exception('Approximation mode > %s < unknown!'%self.approxMode)
  90. def clearApprox(self, idx=None):
  91. if idx is None:
  92. self.cachedApprox = None
  93. self.cachedClusters = None
  94. if self.usePde:
  95. self.cachedDensity = None
  96. else:
  97. if self.cachedApprox is not None:
  98. self.cachedApprox = numpy.delete(self.cachedApprox, (idx), axis=0)
  99. if self.usePde and self.cachedDensity is not None:
  100. self.cachedDensity = numpy.delete(self.cachedDensity, (idx), axis=0)
  101. def calcDensity(self, x, allX=None):
  102. if allX is None:
  103. allX = numpy.append(self.X, x, axis=0)
  104. return x*numpy.mean(allX, axis=0).T