Skip to content

Instantly share code, notes, and snippets.

@Tadge-Analytics
Created December 1, 2021 22:51
Show Gist options
  • Select an option

  • Save Tadge-Analytics/4ed92c5d40e31eab39d81a7e030c24e0 to your computer and use it in GitHub Desktop.

Select an option

Save Tadge-Analytics/4ed92c5d40e31eab39d81a7e030c24e0 to your computer and use it in GitHub Desktop.

Revisions

  1. Tadge-Analytics created this gist Dec 1, 2021.
    63 changes: 63 additions & 0 deletions preppin_data_wk_48.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,63 @@

    library(tidyverse)
    library(readxl)


    ###################################################################


    import <-
    read_excel("PD 2021 Wk 48 Input.xlsx", col_names = F) %>%
    janitor::clean_names() %>%
    mutate(row_id = row_number())


    xx <-
    import %>%
    mutate(left_col_is_blank = is.na(x1)) %>%
    group_by(left_col_is_blank) %>%
    mutate(grouping_of_info_set = if_else(left_col_is_blank, 1+row_number(), NA_real_)) %>%
    ungroup() %>%
    fill(grouping_of_info_set) %>%
    mutate(grouping_of_info_set = replace_na(grouping_of_info_set, 1)) %>%
    filter(!left_col_is_blank) %>%
    select(-left_col_is_blank) %>%
    group_by(grouping_of_info_set) %>%
    mutate(row_id_by_set = row_number()) %>%
    select(-row_id) %>%
    gather(key, value, -row_id_by_set, -grouping_of_info_set) %>%
    arrange(grouping_of_info_set, key, row_id_by_set) %>%
    mutate(location = if_else(key == "x1" & row_id_by_set == 1, value, NA_character_),
    time_frame = if_else(key != "x1" & row_id_by_set == 1, value, NA_character_)) %>%
    fill(location, time_frame) %>%
    arrange(grouping_of_info_set, row_id_by_set, key) %>%
    mutate(metric_name = if_else(key == "x1" & row_id_by_set != 1, value, NA_character_)) %>%
    fill(metric_name) %>%
    ungroup() %>%
    filter(!is.na(time_frame), !is.na(metric_name)) %>%
    mutate(time_frame = word(time_frame, -1, sep = " ") %>% as.numeric()) %>%
    mutate(value = as.numeric(value),
    metric_name_has_units = str_detect(metric_name, fixed("(")),
    units_of_value = if_else(metric_name_has_units, str_extract(metric_name, "(?<=\\().+?(?=\\))"), NA_character_),
    value_muiltiplier = case_when(units_of_value == "m" ~ 1E6,
    units_of_value == "k" ~ 1E3,
    TRUE ~ 1),
    new_value = value * value_muiltiplier,
    new_metric_name = if_else(metric_name_has_units, word(metric_name, 1, -2, sep = " "), metric_name)) %>%
    select(location, time_frame, new_metric_name, new_value) %>%
    spread(new_metric_name, new_value) %>%
    janitor::clean_names()





    # if you want to make it look a bit closer to the output they asked for

    yy <-
    xx %>%
    gather(clean_measure_names, true_value, -location, -time_frame)