import pandas as pd import matplotlib.pyplot as plt import pylab from scipy.optimize import curve_fit import numpy as np import datetime # load the data names = ["date", "infections", "age", "gender", "area", "source"] df = pd.read_csv( 'greece.csv', names=names, sep='|', keep_date_col=True, index_col=[0]) # index by date # convert indexes to dates df.index = df.index.map(lambda d: datetime.datetime.strptime(d.strip(), "%d.%m.%Y")) # Add elapsed days day_0 = df.index[0] elapsed = df.index.map(lambda date: (date- day_0).days) df['elapsed-days'] = elapsed # Calculate cumulative infections infections = pd.Series(df['infections']) cumulative = infections.cumsum() df['cumulative-infections'] = cumulative # load it to the dataframe ## group all intra day infections together infections_per_day = pd.Series(df.groupby(['date']).agg(sum)['infections']) cumulative_per_day = infections_per_day.cumsum() elapsed = infections_per_day.index.map(lambda date: (date- day_0).days) # fit it to the sigmoid (L = 1) # f(x) = \frac{L}{1 + e^{-k(x-x0)}} def sigmoid(x, L, x0, k): y = L / (1 + np.exp(-k*(x-x0))) return y xdata = np.array(elapsed) ydata = np.array(cumulative_per_day) # [max, midpoint, growth], pcov = curve_fit(sigmoid, xdata, ydata) max = 300 midpoint = 15 growth = 0.2 # generate sigmoid based on estimations x = np.linspace(0, 100, 100) y = sigmoid(x, max, midpoint, growth) # plot pylab.plot(xdata, ydata, 'o', label='data') # pylab.xlabel("Days since first incident at {}".format(day_0.days)) pylab.ylabel("Number tested positive") pylab.plot(x,y, label='fit') pylab.ylim(0, 500) pylab.legend(loc='best') pylab.show()