using DataFrames, Plots, CSV, Dates, Statistics # Load CDC NHCS covid weekly data nhcs = CSV.read("C:/Users/andrew.bannerman/Desktop/Julia/scripts/Provisional_COVID-19_Deaths_by_Week__Sex__and_Age.csv", DataFrame, header=true, delim=",") #rename columns colnames = ["Data_as_of", "State", "MMWR_Week", "End_Week", "Sex", "Age_Group", "Total_Deaths", "COVID19_Deaths"] rename!(nhcs, colnames) # Subset data only for all sexes nhcs = nhcs[nhcs.Sex .== "All Sex", :] #nhcs = nhcs[nhcs.Age_Group .== "All Ages", :] # group by date and age gdf = groupby(nhcs, [:End_Week, :Age_Group]) # sum totals for all dates sum_out = combine(gdf, :COVID19_Deaths => sum) print(unique(sum_out.Age_Group)) # extract age group per day all = sum_out[sum_out.Age_Group .== "All Ages", :] Under_1 = sum_out[sum_out.Age_Group .== "Under 1 year", :] one_to_four = sum_out[sum_out.Age_Group .== "1-4 Years", :] five_to_fourteen = sum_out[sum_out.Age_Group .== "5-14 Years", :] fifteen_to_twenty_four = sum_out[sum_out.Age_Group .== "15-24 Years", :] twenty_five_to_34 = sum_out[sum_out.Age_Group .== "25-34 Years", :] thirty_five_to_44 = sum_out[sum_out.Age_Group .== "35-44 Years", :] fourty_five_to_54 = sum_out[sum_out.Age_Group .== "45-54 Years", :] fifty_five_to_64 = sum_out[sum_out.Age_Group .== "55-64 Years", :] sixy_five_to_74 = sum_out[sum_out.Age_Group .== "65-74 Years", :] seventy_five_to_84 = sum_out[sum_out.Age_Group .== "75-84 Years", :] eighty_five_over = sum_out[sum_out.Age_Group .== "85 Years and Over", :] all_out = hcat(all,Under_1,one_to_four,five_to_fourteen,fifteen_to_twenty_four,twenty_five_to_34,thirty_five_to_44,fourty_five_to_54,fifty_five_to_64,sixy_five_to_74,seventy_five_to_84,eighty_five_over, makeunique=true) all_out plot(sum_out.End_Week ,sum_out.COVID19_Deaths_sum) sum_out1 = combine(gdf, :Total_Deaths => sum) # take % statistics sum_out.perc = zeros(size(sum_out,1)) for i = 1:size(sum_out,1) sum_out.perc[i] = round((sum_out.COVID19_Deaths_sum[i] / sum_out.COVID19_Deaths_sum[1]) * 100,digits = 2) end (sum_out) sum_out1.perc = zeros(size(sum_out,1)) for i = 1:size(sum_out,1) sum_out1.perc[i] = round((sum_out1.Total_Deaths_sum[i] / sum_out1.Total_Deaths_sum[1]) * 100,digits = 2) end # Plot data plot_data = sum_out[2:size(sum_out,1),:] p1 = bar(plot_data.Age_Group,plot_data.perc,xrotation = 60,title = "Deaths involving (COVID-19) reported to NCHS") savefig(p1) # write CSV CSV.write("C:/Users/andrew.bannerman/Desktop/Julia/scripts/weekly_VAX.csv",array)