activeLearningLinGPemocApprox.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. #! /usr/bin/python
  2. import numpy
  3. import scipy.cluster.vq
  4. import pickle
  5. import sys
  6. import os
  7. sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),os.pardir))
  8. import helperFunctions
  9. import activeLearningLinGPprototype
  10. class Classifier(activeLearningLinGPprototype.ClassifierPrototype):
  11. def __init__(self,
  12. sigmaN = 0.00178,
  13. usePde = True,
  14. configFile=None):
  15. activeLearningLinGPprototype.ClassifierPrototype.__init__(self, sigmaN=sigmaN, configFile=configFile)
  16. self.usePde = helperFunctions.getConfig(configFile, 'activeLearning', 'usePde', usePde, 'bool', True)
  17. self.approxMode = helperFunctions.getConfig(configFile, 'activeLearning', 'approxMode', 'rnd', 'str', True)
  18. self.approxSize = helperFunctions.getConfig(configFile, 'activeLearning', 'approxSize', 500, 'int', True)
  19. self.cachedClusters = None
  20. self.cachedApprox = None
  21. self.cachedDensity = None
  22. # x.shape = (number of samples, feat dim)
  23. def calcEMOC(self, x, allX=None, density=None):
  24. containsNoise = (self.yUni == -1).any()
  25. tmpVec1 = self.invCreg*x.T
  26. tmpVec2 = numpy.sum(numpy.multiply(x.T,tmpVec1), axis=0)
  27. sigmaF = self.calcSigmaF(x, tmpVec2)
  28. infY = self.infer(x, containsNoise)
  29. probs = self.calcProbs(x, infY, sigmaF)
  30. term1 = 1.0/(1.0 + tmpVec2)
  31. pro = numpy.absolute(infY - 1)
  32. contra = numpy.absolute(infY + 1)
  33. diff = numpy.repeat(numpy.sum(contra,axis=1),contra.shape[1],axis=1)
  34. diff = (diff - contra + pro)
  35. term3 = numpy.sum(numpy.multiply(probs,diff),axis=1)
  36. return numpy.multiply(numpy.multiply(term1, self.approxSum(x, allX, tmpVec1, density)).T, term3)
  37. # x.shape = (number of samples, feat dim)
  38. def calcEMOCpde(self, x, allX=None):
  39. if allX is None:
  40. allX = numpy.append(self.X, x, axis=0)
  41. if self.cachedDensity is None:
  42. density = self.calcDensity(x, allX)
  43. else:
  44. density = self.cachedDensity
  45. scores = self.calcEMOC(x, allX, density)
  46. return numpy.multiply(scores, density)
  47. # x.shape = (number of samples, feat dim)
  48. def calcAlScores(self, x):
  49. allX = numpy.append(self.X, x, axis=0)
  50. if self.usePde:
  51. return self.calcEMOCpde(x, allX)
  52. else:
  53. return self.calcEMOC(x, allX)
  54. def approxSum(self, x, allX=None, invCregDotX=None, density=None):
  55. if self.approxMode == 'Call':
  56. return self.cachedApprox
  57. ###
  58. if invCregDotX is None:
  59. invCregDotX = self.invCreg*x.T
  60. ###
  61. if self.approxMode == 'clustering':
  62. return numpy.sum(numpy.absolute(self.cachedClusters*invCregDotX), axis=0)
  63. ###
  64. amount = min(self.approxSize, self.X.shape[0] + x.shape[0])
  65. if allX is None:
  66. allX = numpy.append(self.X, x, axis=0)
  67. ###
  68. if self.approxMode == 'rnd':
  69. return numpy.sum(numpy.absolute(allX[numpy.random.permutation(allX.shape[0])[:amount], :]*invCregDotX), axis=0)
  70. else:
  71. raise Exception('Approximation mode > %s < unknown!'%self.approxMode)
  72. def prepareApprox(self, x):
  73. allX = numpy.append(self.X, x, axis=0)
  74. ###
  75. if self.usePde:
  76. self.cachedDensity = self.calcDensity(x, allX)
  77. ###
  78. if self.approxMode == 'rnd' or self.approxMode == 'knn':
  79. return
  80. ###
  81. if self.approxMode == 'clustering':
  82. amount = min(self.approxSize, allX.shape[0])
  83. self.cachedClusters = scipy.cluster.vq.kmeans(allX, amount)[0]
  84. print 'data points:', allX.shape[0], ', cluster requested:', amount, ', cluster found:', self.cachedClusters.shape[0]
  85. return
  86. else:
  87. raise Exception('Approximation mode > %s < unknown!'%self.approxMode)
  88. def clearApprox(self, idx=None):
  89. if idx is None:
  90. self.cachedApprox = None
  91. self.cachedClusters = None
  92. if self.usePde:
  93. self.cachedDensity = None
  94. else:
  95. if self.cachedApprox is not None:
  96. self.cachedApprox = numpy.delete(self.cachedApprox, (idx), axis=0)
  97. if self.usePde and self.cachedDensity is not None:
  98. self.cachedDensity = numpy.delete(self.cachedDensity, (idx), axis=0)
  99. def calcDensity(self, x, allX=None):
  100. if allX is None:
  101. allX = numpy.append(self.X, x, axis=0)
  102. return x*numpy.mean(allX, axis=0).T