TSGeneration.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. import pandas as pd
  2. import numpy as np
  3. def genSynchronizedTimeSeries(lenTS=1000, minlenI=3, maxlenI=100,
  4. sigma = 0.01, seed=None):
  5. """
  6. Generates synthetic time series data e.g. for method evaluation
  7. :param lenTS: length of generated time series (s1 and s2 need to have equal
  8. length)
  9. :param minlenI, minimum length of synchronized interval
  10. :param maxlenI: maximum length of synchronized interval
  11. :param seed: Can be set to reproduce results
  12. :return
  13. s1: First time series
  14. s2: Second time series
  15. syncI: Array of size with synchronized intervals. Each first
  16. value startindex and each second value length of synchronized interval.
  17. """
  18. np.random.seed(seed=seed)
  19. # Generate two uniform distributed vectors
  20. s1 = pd.Series(np.random.uniform(0, 1, lenTS))
  21. s2 = pd.Series(np.random.uniform(0, 1, lenTS))
  22. # Calculate average over each 11 points
  23. s1.rolling(window=11, min_periods=5, center=True)
  24. s2.rolling(window=11, min_periods=5, center=True)
  25. # Sample synchronous intervals
  26. i = 0
  27. count = 0
  28. syncI = []
  29. while i < lenTS:
  30. lenI = np.random.randint(minlenI, maxlenI)
  31. if lenI+i >= lenTS:
  32. lenI = lenTS-i
  33. if count % 2 == 1:
  34. s2[i:i + lenI] = s1[i:i + lenI] + np.random.normal(0,sigma,1)
  35. syncI += [(i, lenI)]
  36. else:
  37. s2[i:i + lenI] = np.mean(s2[i:i + lenI]) + np.random.uniform(0,1,
  38. lenI)
  39. count += 1
  40. i += lenI
  41. # syncI = np.reshape(syncI, (-1,2))
  42. return s1, s2, syncI
  43. def glaette(s, window):
  44. """
  45. smooth time series
  46. :param s: time series
  47. :param window: window size for smoothing
  48. :return smoothed time series
  49. """
  50. import numpy as np
  51. ret = []
  52. for i, val in enumerate(s[window:]):
  53. i += window
  54. ret += [np.mean(s[i-window:i+1])]
  55. return np.array(ret)
  56. def genShiftedSynchronizedTimeSeries(lents=1000,
  57. lenints=[25, 125],
  58. lennoints=[25, 125],
  59. noiseSigma=0.5,
  60. shift=4,
  61. shiftbothsides=False,
  62. seed=None):
  63. """
  64. Generates synthetic time series data with one time series being shifed,
  65. e.g. for method evaluation
  66. :param lenTS: length of generated time series (s1 and s2 need to have equal
  67. length)
  68. :param lenints: minimum and maximum length for intervals being synchronized
  69. :param lennoints: length of sequences which are not synchronized
  70. :param noiseSigma: factor to multiply noise by
  71. :param shift: shift value for synchronized intervals
  72. :param shiftbothsides: if true shift intervals both back and forth in
  73. time, if false intervals are shifted into one direction only. default:
  74. False
  75. :param seed: Can be set to reproduce results
  76. :return
  77. s1: First time series
  78. s2: Second time series
  79. syncI: Array of size with synchronized intervals. Each first
  80. value startindex and each second value length of synchronized interval.
  81. """
  82. import numpy as np
  83. np.random.seed(seed=seed)
  84. # Generate two uniform distributed vectors
  85. s1 = glaette(np.random.randn(lents), 10)
  86. s2 = glaette(np.random.randn(lents), 10)
  87. # Sample synchronous intervals
  88. i = shift
  89. count = 0
  90. syncI = []
  91. while i < lents + shift:
  92. if count % 2 == 1:
  93. lenI = np.random.randint(lenints[0], lenints[1])
  94. else:
  95. lenI = np.random.randint(lennoints[0], lennoints[1])
  96. # stop if interval longer than time series
  97. if lenI + i >= lents:
  98. break
  99. if shiftbothsides:
  100. # alternate between shift back and shift forth
  101. # later: randomly shift back and forth
  102. if count % 2 == 1:
  103. if count % 4 == 1:
  104. s2[i:i + lenI] = -1 * s1[i - shift:i + lenI - shift].copy()
  105. syncI += [(i, lenI, 1)]
  106. else:
  107. s2[i:i + lenI] = -1 * s1[i + shift:i + lenI + shift].copy()
  108. syncI += [(i, lenI, -1)]
  109. else:
  110. if count % 2 == 1:
  111. s2[i:i + lenI] = s1[i - shift:i + lenI - shift].copy()
  112. syncI += [(i, lenI, 1)]
  113. count += 1
  114. i += lenI
  115. # syncI = np.reshape(syncI, (-1,2))
  116. s2 += np.random.randn(len(s2)) * noiseSigma
  117. return s1, s2, syncI