import matplotlib.pyplot as plt import pandas as pd from pandas import DataFrame from pandas.io.common import urlopen import numpy as np URL = "https://covidtracking.com/api/v1/states/daily.json" ALPHA = 0.1 LOGARITHMIC = False def fetch_data(*, url: str) -> DataFrame: response = urlopen(url) return pd.read_json(response, convert_dates=["date", "dateChecked"]) def calc_ema(*, col_name: str, d: DataFrame, alpha: float = 0.5) -> None: ema_name = f"{col_name}_ema_{alpha}" d[ema_name] = d[col_name].ewm(alpha=alpha, adjust=True).mean() # noinspection PyShadowingNames def get_data(*, url=URL, alpha=ALPHA) -> DataFrame: df = fetch_data(url=url) df["date"] = pd.to_datetime(df["date"], format="%Y%m%d") df = df.groupby(by="date").sum() calc_ema(col_name="death", d=df, alpha=alpha) calc_ema(col_name="positive", d=df, alpha=alpha) calc_ema(col_name="negative", d=df, alpha=alpha) calc_ema(col_name="recovered", d=df, alpha=alpha) calc_ema(col_name="hospitalized", d=df, alpha=alpha) calc_ema(col_name="inIcuCurrently", d=df, alpha=alpha) return df # noinspection PyShadowingNames def create_fig(*, df: DataFrame, log=LOGARITHMIC, alpha=ALPHA): plt.grid(True) plt.bar( df.index, df["totalTestResults"], label="Total Test Results", color="tab:grey", log=log, ) plt.plot( df[f"positive_ema_{alpha}"], label=f"Positive EMA α={alpha}", color="blue", lw=3 ) # plt.plot( # df[f"recovered_ema_{alpha}"], # label=f"Positive EMA α={alpha}", # color="black", # lw=3, # ) plt.plot( df[f"negative_ema_{alpha}"], label=f"Negative EMA α={alpha}", color="green", lw=3, ) plt.plot(df[f"death_ema_{alpha}"], label=f"Deaths EMA α={alpha}", color="red", lw=3) plt.plot( df[f"hospitalized_ema_{alpha}"], label=f"In Hospitals EMA α={alpha}", color="yellow", lw=3, ) plt.plot( df[f"inIcuCurrently_ema_{alpha}"], label=f"In ICU EMA α={alpha}", color="purple", lw=3, ) upto = df.index[-1].strftime("%d %b, %Y") plt.suptitle(f"The U.S. COVID-19 Stats as of {upto}{' (logarithmic)' if log else ''}") plt.gca().set_ylabel(f"Number of people{' (logarithmic)' if log else ''}") plt.gca().set_xlabel("Days") plt.annotate( "Source: covidtracking.com", xy=(1, 0), xycoords=("axes fraction", "figure fraction"), xytext=(0, 10), textcoords="offset points", ha="right", va="bottom", ) plt.legend(loc="upper left") plt.gcf().set_size_inches([12.8, 9.6]) plt.tight_layout(rect=[0, 0.03, 1, 0.97]) def calc_cors(*, d: DataFrame, method="pearson"): return { "Positive/Death": d["positive"].corr(d["death"], method=method), "Positive/Negative": d["positive"].corr(d["negative"], method=method), "Total/Negative": d["totalTestResults"].corr(d["negative"], method=method), "Total/Positive": d["totalTestResults"].corr(d["positive"], method=method), } df = get_data() print( df[ [ "totalTestResults", "positive", "negative", "death", "hospitalized", "inIcuCurrently", "recovered", ] ] ) corr_method = "pearson" print(f"Correlations ({corr_method.capitalize()})") for name, value in calc_cors(d=df, method=corr_method).items(): print(f" {name}: {np.round(value, 4)}") create_fig(df=df, log=LOGARITHMIC) plt.savefig(f"us-stats{'-log' if LOGARITHMIC else ''}")