|
@@ -1,72 +1,124 @@
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
+from datetime import date, timedelta
|
|
|
|
|
|
from src.plotter import Plotter
|
|
|
|
|
|
-state_lookup = {'Schleswig Holstein' : (1, 2897000),
|
|
|
- 'Hamburg' : (2, 1841000),
|
|
|
- 'Niedersachsen' : (3, 7982000),
|
|
|
- 'Bremen' : (4, 569352),
|
|
|
- 'Nordrhein-Westfalen' : (5, 17930000),
|
|
|
- 'Hessen' : (6, 6266000),
|
|
|
- 'Rheinland-Pfalz' : (7, 4085000),
|
|
|
- 'Baden-Württemberg' : (8, 11070000),
|
|
|
- 'Bayern' : (9, 13080000),
|
|
|
- 'Saarland' : (10, 990509),
|
|
|
- 'Berlin' : (11, 3645000),
|
|
|
- 'Brandenburg' : (12, 2641000),
|
|
|
- 'Mecklenburg-Vorpommern' : (13, 1610000),
|
|
|
- 'Sachsen' : (14, 4078000),
|
|
|
- 'Sachsen-Anhalt' : (15, 2208000),
|
|
|
- 'Thüringen' : (16, 2143000)}
|
|
|
-
|
|
|
-def transform_data(plotter:Plotter, alpha=1/14, state_name='Germany', time_range=1200, plot_name='', plot_title='', sample_rate=1, model='SIR', plot_size=(12,6), yscale_log=False, plot_legend=True):
|
|
|
+state_lookup = {'Schleswig Holstein': (1, 2897000),
|
|
|
+ 'Hamburg': (2, 1841000),
|
|
|
+ 'Niedersachsen': (3, 7982000),
|
|
|
+ 'Bremen': (4, 569352),
|
|
|
+ 'Nordrhein-Westfalen': (5, 17930000),
|
|
|
+ 'Hessen': (6, 6266000),
|
|
|
+ 'Rheinland-Pfalz': (7, 4085000),
|
|
|
+ 'Baden-Württemberg': (8, 11070000),
|
|
|
+ 'Bayern': (9, 13080000),
|
|
|
+ 'Saarland': (10, 990509),
|
|
|
+ 'Berlin': (11, 3645000),
|
|
|
+ 'Brandenburg': (12, 2641000),
|
|
|
+ 'Mecklenburg-Vorpommern': (13, 1610000),
|
|
|
+ 'Sachsen': (14, 4078000),
|
|
|
+ 'Sachsen-Anhalt': (15, 2208000),
|
|
|
+ 'Thüringen': (16, 2143000)}
|
|
|
+
|
|
|
+
|
|
|
+def daterange(start_date: date, end_date: date):
|
|
|
+ days = int((end_date - start_date).days)
|
|
|
+ for n in range(days):
|
|
|
+ yield start_date + timedelta(n)
|
|
|
+
|
|
|
+
|
|
|
+def transform_jh_germany_data(plotter: Plotter,
|
|
|
+ time_range=50,
|
|
|
+ sample_rate=1,
|
|
|
+ model='SIR'):
|
|
|
+ N = 83100000
|
|
|
+ state_name = 'Germany'
|
|
|
+ infections = np.zeros(time_range)
|
|
|
+ deaths = np.zeros(time_range)
|
|
|
+ recoveries = np.zeros(time_range)
|
|
|
+
|
|
|
+ # extract data
|
|
|
+ data_directory = 'datasets/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports'
|
|
|
+ start_date = date(2020, 1, 31)
|
|
|
+ end_date = date(2020, 3, 20)
|
|
|
+ for i, single_date in enumerate(daterange(start_date, end_date)):
|
|
|
+ file_date = single_date.strftime("%m-%d-%Y")
|
|
|
+ date_df = pd.read_csv(data_directory + "/" + file_date + ".csv")
|
|
|
+ date_df = date_df.loc[date_df['Country/Region'] == state_name]
|
|
|
+
|
|
|
+ infections[i] = date_df['Confirmed'].fillna(0).astype(int)
|
|
|
+ deaths[i] = date_df['Deaths'].fillna(0).astype(int)
|
|
|
+ recoveries[i] = date_df['Recovered'].fillna(0).astype(int)
|
|
|
+
|
|
|
+ S, I, R = np.zeros(infections.shape[0]), np.zeros(
|
|
|
+ infections.shape[0]), np.zeros(infections.shape[0])
|
|
|
+ S[0] = N - infections[0]
|
|
|
+ I[0] = infections[0]
|
|
|
+ R[0] = 0
|
|
|
+
|
|
|
+ for day in range(1, time_range):
|
|
|
+ S[day] = S[day - 1] - infections[day]
|
|
|
+ I[day] = I[day - 1] + infections[day] - deaths[day] - recoveries[day]
|
|
|
+ R[day] = R[day - 1] + deaths[day] + recoveries[day]
|
|
|
+ if I[day] < 0:
|
|
|
+ I[day] = 0
|
|
|
+
|
|
|
+ t = np.arange(0, time_range, 1)
|
|
|
+
|
|
|
+ plotter.plot(t, [I, R], ["I", "R"], "JH_data", "JH Data", (6, 6))
|
|
|
+
|
|
|
+ groups = [S, I, R]
|
|
|
+ COVID_Data = np.asarray([t[0::sample_rate]] +
|
|
|
+ [group[0::sample_rate] for group in groups])
|
|
|
+
|
|
|
+ np.savetxt(
|
|
|
+ f"datasets/{model}_JH_{state_name.replace(' ', '_').replace('-', '_').replace('ü','ue')}_{sample_rate}.csv", COVID_Data, delimiter=",")
|
|
|
+
|
|
|
+
|
|
|
+def transform_data(plotter: Plotter, alpha=1 / 14, state_name='Germany', time_range=1200, sample_rate=1, model='SIR'):
|
|
|
"""Function to generate the SIR split from the data in the COVID-19-Todesfaelle_in_Deutschland dataset.
|
|
|
|
|
|
Args:
|
|
|
plotter (Plotter): Plotter object to plot dataset curves.
|
|
|
dataset_path (str, optional): Path to the dataset directory. Defaults to 'datasets/COVID-19-Todesfaelle_in_Deutschland/'.
|
|
|
- plot_name (str, optional): Name of the plot file. Defaults to ''.
|
|
|
- plot_title (str, optional): Title of the plot. Defaults to ''.
|
|
|
sample_rate (int, optional): Sample rate used to sample the timepoints. Defaults to 1.
|
|
|
exclude (list, optional): List of groups that are to excluded from the plot. Defaults to [].
|
|
|
- plot_size (tuple, optional): Size of the plot in (x, y) format. Defaults to (12,6).
|
|
|
- yscale_log (bool, optional): Controls if the y axis of the plot will be scaled in log scale. Defaults to False.
|
|
|
- plot_legend (bool, optional): Controls if the legend is to be plotted. Defaults to True.
|
|
|
"""
|
|
|
# read the data
|
|
|
|
|
|
-
|
|
|
infections = np.zeros(time_range)
|
|
|
deaths = np.zeros(time_range)
|
|
|
recoveries = np.zeros(time_range)
|
|
|
if state_name == 'Germany':
|
|
|
- df = pd.read_csv('datasets/COVID-19-Todesfaelle_in_Deutschland/COVID-19-Todesfaelle_Deutschland.csv')
|
|
|
+ df = pd.read_csv(
|
|
|
+ 'datasets/COVID-19-Todesfaelle_in_Deutschland/COVID-19-Todesfaelle_Deutschland.csv')
|
|
|
N = 83100000
|
|
|
infections[0] = df['Faelle_gesamt'][0]
|
|
|
deaths[0] = df['Todesfaelle_neu'][0]
|
|
|
|
|
|
recovery_queue = np.zeros(14)
|
|
|
for i in range(1, time_range):
|
|
|
- infections[i] = df['Faelle_gesamt'][i] - df['Faelle_gesamt'][i-1]
|
|
|
+ infections[i] = df['Faelle_gesamt'][i] - df['Faelle_gesamt'][i - 1]
|
|
|
deaths[i] = df['Todesfaelle_neu'][i]
|
|
|
recoveries[i] = recovery_queue[0]
|
|
|
|
|
|
recovery_queue[:-1] = recovery_queue[1:]
|
|
|
recovery_queue[-1] = infections[i]
|
|
|
else:
|
|
|
- df = pd.read_csv('datasets/state_data/Aktuell_Deutschland_SarsCov2_Infektionen.csv')
|
|
|
+ df = pd.read_csv(
|
|
|
+ 'datasets/state_data/Aktuell_Deutschland_SarsCov2_Infektionen.csv')
|
|
|
state_ID, N = state_lookup[state_name]
|
|
|
|
|
|
# single out a state
|
|
|
state_IDs = df['IdLandkreis'] // 1000
|
|
|
df = df.loc[state_IDs == state_ID]
|
|
|
|
|
|
- # sort entries by state
|
|
|
+ # sort entries by date
|
|
|
df = df.sort_values('Refdatum')
|
|
|
df = df.reset_index(drop=True)
|
|
|
|
|
|
- # collect cases
|
|
|
+ # collect cases
|
|
|
entry_idx = 0
|
|
|
day = 0
|
|
|
date = df['Refdatum'][entry_idx]
|
|
@@ -78,7 +130,8 @@ def transform_data(plotter:Plotter, alpha=1/14, state_name='Germany', time_range
|
|
|
deaths[day] += df['AnzahlTodesfall'][entry_idx]
|
|
|
entry_idx += 1
|
|
|
# move day index by difference between the current and next date
|
|
|
- day += (pd.to_datetime(df['Refdatum'][entry_idx])-pd.to_datetime(date)).days
|
|
|
+ day += (pd.to_datetime(df['Refdatum']
|
|
|
+ [entry_idx]) - pd.to_datetime(date)).days
|
|
|
date = df['Refdatum'][entry_idx]
|
|
|
|
|
|
recovery_queue = np.zeros(14)
|
|
@@ -89,48 +142,122 @@ def transform_data(plotter:Plotter, alpha=1/14, state_name='Germany', time_range
|
|
|
recovery_queue[:-1] = recovery_queue[1:]
|
|
|
recovery_queue[-1] = infections[i]
|
|
|
week_counter -= 1
|
|
|
-
|
|
|
+
|
|
|
df = df.drop(df.index[time_range:])
|
|
|
- S, I, R = np.zeros(df.shape[0]), np.zeros(df.shape[0]), np.zeros(df.shape[0])
|
|
|
+ S, I, R = np.zeros(df.shape[0]), np.zeros(
|
|
|
+ df.shape[0]), np.zeros(df.shape[0])
|
|
|
# generate groups
|
|
|
S[0] = N - infections[0]
|
|
|
I[0] = infections[0]
|
|
|
R[0] = 0
|
|
|
if model == 'I':
|
|
|
for day in range(1, time_range):
|
|
|
- S[day] = S[day-1] - infections[day]
|
|
|
- I[day] = I[day-1] + infections[day] - I[day-1] * alpha
|
|
|
- R[day] = R[day-1] + I[day-1] * alpha
|
|
|
+ S[day] = S[day - 1] - infections[day]
|
|
|
+ I[day] = I[day - 1] + infections[day] - I[day - 1] * alpha
|
|
|
+ R[day] = R[day - 1] + I[day - 1] * alpha
|
|
|
else:
|
|
|
for day in range(1, time_range):
|
|
|
- S[day] = S[day-1] - infections[day]
|
|
|
- I[day] = I[day-1] + infections[day] - deaths[day] - recoveries[day]
|
|
|
- R[day] = R[day-1] + deaths[day] + recoveries[day]
|
|
|
+ S[day] = S[day - 1] - infections[day]
|
|
|
+ I[day] = I[day - 1] + infections[day] - \
|
|
|
+ deaths[day] - recoveries[day]
|
|
|
+ R[day] = R[day - 1] + deaths[day] + recoveries[day]
|
|
|
if I[day] < 0:
|
|
|
I[day] = 0
|
|
|
-
|
|
|
+
|
|
|
t = np.arange(0, time_range, 1)
|
|
|
|
|
|
# select, which group is to be outputted
|
|
|
groups = []
|
|
|
if 'S' in model:
|
|
|
groups.append(S)
|
|
|
-
|
|
|
+
|
|
|
if 'I' in model:
|
|
|
groups.append(I)
|
|
|
|
|
|
if 'R' in model:
|
|
|
groups.append(R)
|
|
|
|
|
|
- plotter.plot(t,
|
|
|
- groups,
|
|
|
- [*model],
|
|
|
- state_name.replace(' ', '_').replace('-', '_').replace('ü','ue') + f"_{model}" + f"_{int(1/alpha)}",
|
|
|
- state_name,
|
|
|
- (6,6),
|
|
|
- xlabel='time / days',
|
|
|
+ plotter.plot(t,
|
|
|
+ groups,
|
|
|
+ [*model],
|
|
|
+ state_name.replace(' ', '_').replace(
|
|
|
+ '-', '_').replace('ü', 'ue') + f"_{model}" + f"_{int(1/alpha)}",
|
|
|
+ state_name,
|
|
|
+ (6, 6),
|
|
|
+ xlabel='time / days',
|
|
|
ylabel='amount of people')
|
|
|
|
|
|
- COVID_Data = np.asarray([t[0::sample_rate]] + [group[0::sample_rate] for group in groups])
|
|
|
+ COVID_Data = np.asarray([t[0::sample_rate]] +
|
|
|
+ [group[0::sample_rate] for group in groups])
|
|
|
+
|
|
|
+ np.savetxt(
|
|
|
+ f"datasets/{model}_RKI_{state_name.replace(' ', '_').replace('-', '_').replace('ü','ue')}_{sample_rate}_{int(1/alpha)}.csv", COVID_Data, delimiter=",")
|
|
|
+
|
|
|
+
|
|
|
+def transform_paper_data():
|
|
|
+ N = 70000000
|
|
|
+ time_range = 36
|
|
|
+ alpha = 0.07
|
|
|
+ state_name = 'Germany'
|
|
|
+
|
|
|
+ infections = np.zeros(time_range)
|
|
|
+ deaths = np.zeros(time_range)
|
|
|
+ recoveries = np.zeros(time_range)
|
|
|
+ # Data
|
|
|
+ data = [
|
|
|
+ [1.30000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [1.40000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [1.50000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [1.60000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [1.70000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [1.80000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [1.90000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [2.00000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [2.10000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [2.20000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [2.30000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [2.40000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [2.50000000e+01, 2.00000000e+00, 1.50000000e+01],
|
|
|
+ [2.60000000e+01, 2.00000000e+00, 1.70000000e+01],
|
|
|
+ [2.70000000e+01, 2.00000000e+00, 2.10000000e+01],
|
|
|
+ [2.80000000e+01, 2.00000000e+00, 4.70000000e+01],
|
|
|
+ [2.90000000e+01, 2.00000000e+00, 5.70000000e+01],
|
|
|
+ [1.00000000e+00, 3.00000000e+00, 1.11000000e+02],
|
|
|
+ [2.00000000e+00, 3.00000000e+00, 1.29000000e+02],
|
|
|
+ [3.00000000e+00, 3.00000000e+00, 1.57000000e+02],
|
|
|
+ [4.00000000e+00, 3.00000000e+00, 1.96000000e+02],
|
|
|
+ [5.00000000e+00, 3.00000000e+00, 2.62000000e+02],
|
|
|
+ [6.00000000e+00, 3.00000000e+00, 4.00000000e+02],
|
|
|
+ [7.00000000e+00, 3.00000000e+00, 6.84000000e+02],
|
|
|
+ [8.00000000e+00, 3.00000000e+00, 8.47000000e+02],
|
|
|
+ [9.00000000e+00, 3.00000000e+00, 9.02000000e+02],
|
|
|
+ [1.00000000e+01, 3.00000000e+00, 1.13900000e+03],
|
|
|
+ [1.10000000e+01, 3.00000000e+00, 1.29600000e+03],
|
|
|
+ [1.20000000e+01, 3.00000000e+00, 1.56700000e+03],
|
|
|
+ [1.30000000e+01, 3.00000000e+00, 2.36900000e+03],
|
|
|
+ [1.40000000e+01, 3.00000000e+00, 3.06200000e+03],
|
|
|
+ [1.50000000e+01, 3.00000000e+00, 3.79500000e+03],
|
|
|
+ [1.60000000e+01, 3.00000000e+00, 4.83800000e+03],
|
|
|
+ [1.70000000e+01, 3.00000000e+00, 6.01200000e+03],
|
|
|
+ [1.80000000e+01, 3.00000000e+00, 7.15600000e+03],
|
|
|
+ [1.90000000e+01, 3.00000000e+00, 8.19800000e+03],
|
|
|
+ ]
|
|
|
+
|
|
|
+ # Creating a Pandas DataFrame
|
|
|
+ df = pd.DataFrame(data, columns=["Day", "Month", "Infected people"])
|
|
|
+ S, I, R = np.zeros(df.shape[0]), np.zeros(
|
|
|
+ df.shape[0]), np.zeros(df.shape[0])
|
|
|
+ # generate groups
|
|
|
+ S[0] = N - infections[0]
|
|
|
+ I[0] = infections[0]
|
|
|
+ R[0] = 0
|
|
|
+ for day in range(1, time_range):
|
|
|
+ S[day] = S[day - 1] - df["Infected people"][day]
|
|
|
+ I[day] = I[day - 1] + df["Infected people"][day] - I[day - 1] * alpha
|
|
|
+ R[day] = R[day - 1] + I[day - 1] * alpha
|
|
|
+
|
|
|
+ COVID_Data = np.asarray([np.arange(0, time_range, 1)] +
|
|
|
+ [S, I, R])
|
|
|
|
|
|
- np.savetxt(f"datasets/{model}_RKI_{state_name.replace(' ', '_').replace('-', '_').replace('ü','ue')}_{sample_rate}_{int(1/alpha)}.csv", COVID_Data, delimiter=",")
|
|
|
+ np.savetxt(
|
|
|
+ f"datasets/SIR_Paper_{state_name.replace(' ', '_').replace('-', '_').replace('ü','ue')}_{int(1/alpha)}.csv", COVID_Data, delimiter=",")
|