Browse Source

add dataset transformation

phillip.rothenbeck 1 year ago
parent
commit
24317d8fcc
2 changed files with 86 additions and 0 deletions
  1. 86 0
      datasets/transform_SIR.py
  2. BIN
      visualizations/SIR.png

+ 86 - 0
datasets/transform_SIR.py

@@ -0,0 +1,86 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pylab as plt
+
+dataset_path = 'Covid_19_DINN/datasets/COVID-19-Todesfaelle_in_Deutschland/'
+
+def transform_general_data():
+    """Function to generate the SIR split from the data in the COVID-19-Todesfaelle_in_Deutschland dataset.
+    """
+    # read the data
+    df = pd.read_csv(dataset_path + 'COVID-19-Todesfaelle_Deutschland.csv')
+
+    # population of germany at the end of 2019
+    N = 83100000
+    S, I, R = np.zeros(df.shape[0]), np.zeros(df.shape[0]), np.zeros(df.shape[0])
+
+    # S_0 = N - I_0
+    S[0] = N - df['Faelle_gesamt'][0]
+    # I_0 = overall cases at the day - overall death cases at the day
+    I[0] = df['Faelle_gesamt'][0] - df['Todesfaelle_gesamt'][0]
+    # R_0 = overall death cases at the day
+    R[0] = df['Todesfaelle_gesamt'][0]
+
+    # the recovery time is 14 days
+    recovery_queue = np.zeros(14)
+    
+    for day in range(1, df.shape[0]):
+        infections = df['Faelle_gesamt'][day] - df['Faelle_gesamt'][day-1]
+        deaths = df['Todesfaelle_neu'][day]
+        recoveries = recovery_queue[0]
+
+        S[day] = S[day-1] - infections
+        I[day] = I[day-1] + infections - deaths - recoveries
+        R[day] = R[day-1] + deaths + recoveries
+
+        # update recovery queue
+        if I[day] < 0:
+            recovery_queue[-1] -= I[day] 
+            I[day] = 0
+
+        recovery_queue[:-1] = recovery_queue[1:]
+        recovery_queue[-1] = infections
+
+    # plot graphs
+    t = np.arange(0, df.shape[0], 1)
+    plt.plot(t, S, label='Susceptible')
+    plt.plot(t, I, label='Infectionous')
+    plt.plot(t, R, label='Removed')
+    plt.legend()
+    # plt.yscale('log')
+    plt.savefig('Covid_19_DINN/visualizations/SIR.png')
+
+def transform_state_data(state='Sachsen', N=4100000):
+    df = pd.read_csv(dataset_path + 'COVID-19-Todesfaelle_Bundeslaender.csv')
+    S, I, R = np.zeros(df.shape[0]), np.zeros(df.shape[0]), np.zeros(df.shape[0])
+    S[0] = N - df['Faelle_gesamt'][0]
+    I[0] = df['Faelle_gesamt'][0] - df['Todesfaelle_gesamt'][0]
+    R[0] = df['Todesfaelle_gesamt'][0]
+    recovery_queue = np.zeros(14)
+    
+    for day in range(1, df.shape[0]):
+        infections = df['Faelle_gesamt'][day] - df['Faelle_gesamt'][day-1]
+        deaths = df['Todesfaelle_neu'][day]
+        recoveries = recovery_queue[0]
+
+        S[day] = S[day-1] - infections
+        I[day] = I[day-1] + infections - deaths - recoveries
+        R[day] = R[day-1] + deaths + recoveries
+
+        if I[day] < 0:
+            recovery_queue[-1] -= I[day] 
+            I[day] = 0
+
+        recovery_queue[:-1] = recovery_queue[1:]
+        recovery_queue[-1] = infections
+
+    t = np.arange(0, df.shape[0], 1)
+    plt.plot(t, S, label='Susceptible')
+    plt.plot(t, I, label='Infectionous')
+    plt.plot(t, R, label='Removed')
+    plt.legend()
+    # plt.yscale('log')
+    plt.savefig('Covid_19_DINN/visualizations/SIR.png')
+
+transform_general_data()
+

BIN
visualizations/SIR.png