Created
January 18, 2026 06:52
-
-
Save ar-puuk/8fcf769d4d8582e5fc099f347406385f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(tidyverse) | |
| library(tidycensus) | |
| library(mapgl) | |
| library(spopt) | |
| library(sf) | |
| library(spdep) | |
| options(tigris_use_cache = TRUE) | |
| project_crs <- "EPSG:26912" # NAD83 / Utah North | |
| # ---------------------------------------------------------------------------- | |
| # 0. Set up Geography | |
| # ---------------------------------------------------------------------------- | |
| state_abb = "UT" | |
| county_names = c("Box Elder", "Weber", "Davis", "Salt Lake", "Utah") | |
| # Converting state abbreviation code to FIPS code | |
| state_fips <- tidycensus:::validate_state(state = state_abb) | |
| county_fips <- vapply( | |
| county_names, | |
| function(x) tidycensus:::validate_county(state = state_abb, county = x), | |
| character(1) | |
| ) | |
| # Converting County Names to FIPS code | |
| fips_codes <- paste(state_fips, county_fips, sep = "") | |
| fips_codes | |
| # ---------------------------------------------------------------------------- | |
| # 1. FETCH DATA AT THREE LEVELS | |
| # ---------------------------------------------------------------------------- | |
| # --- LEVEL A: TRACT (Education) --- | |
| # Education is only valid at Tract level | |
| print("Fetching Tract Data (Education)...") | |
| df_tract <- get_acs( | |
| geography = "tract", | |
| variables = c(pop_25 = "B15003_001", bach = "B15003_022", mast = "B15003_023", prof = "B15003_024", phd = "B15003_025"), | |
| state = state_fips, | |
| county = county_fips, | |
| year = 2022, | |
| output = "wide", | |
| geometry = FALSE | |
| ) |> | |
| transmute( | |
| TRACT_ID = GEOID, | |
| # Note the 'E' suffix (Estimate) which tidycensus adds in wide mode | |
| pct_degree = (bachE + mastE + profE + phdE) / pop_25E | |
| ) | |
| # --- LEVEL B: BLOCK GROUP (Income & Cars) --- | |
| # Income/Cars available here | |
| print("Fetching Block Group Data (Income/Cars)...") | |
| df_bg <- get_acs( | |
| geography = "block group", | |
| variables = c(med_inc = "B19013_001", tot_hh = "B25044_001", own_no_car = "B25044_003", rent_no_car = "B25044_010"), | |
| state = state_fips, | |
| county = county_fips, | |
| year = 2022, | |
| output = "wide", | |
| geometry = FALSE | |
| ) |> | |
| transmute( | |
| BG_ID = GEOID, | |
| bg_med_income = med_incE, | |
| bg_pct_no_car = (own_no_carE + rent_no_carE) / tot_hhE | |
| ) | |
| # --- LEVEL C: BLOCK (Geometry & HH Size) --- | |
| # 2020 Decennial Census for the atomic units | |
| print("Fetching Block Data (Geometry & HH Size)...") | |
| df_block <- get_decennial( | |
| geography = "block", | |
| variables = c(pop = "P1_001N", hunits = "H1_001N"), | |
| state = state_fips, | |
| county = county_fips, | |
| year = 2020, | |
| output = "wide", | |
| sumfile = "dhc", | |
| geometry = TRUE | |
| ) |> | |
| st_transform(project_crs) | |
| # ---------------------------------------------------------------------------- | |
| # 2. HIERARCHICAL JOIN | |
| # ---------------------------------------------------------------------------- | |
| # We join attributes down to the block level based on GEOID substrings | |
| # Block ID (15 digits) -> BG ID (First 12) -> Tract ID (First 11) | |
| taz_input <- df_block |> | |
| mutate( | |
| BG_ID = substr(GEOID, 1, 12), | |
| TRACT_ID = substr(GEOID, 1, 11), | |
| # Calculate Block-specific Household Size (Pop / Housing Units) | |
| # Handle division by zero for empty blocks | |
| hh_size = ifelse(hunits > 0, pop / hunits, 0) | |
| ) |> | |
| # Join BG Data | |
| left_join(df_bg, by = "BG_ID") |> | |
| # Join Tract Data | |
| left_join(df_tract, by = "TRACT_ID") |> | |
| # Filter: We only want populated residential blocks for TAZs | |
| filter( | |
| hunits > 0, | |
| !is.na(bg_med_income), # Removes blocks in BGs with no data | |
| !is.na(pct_degree) | |
| ) | |
| print(paste("Data ready. Total Residential Blocks:", nrow(taz_input))) | |
| # ---------------------------------------------------------------------------- | |
| # 3. CLEANING SPATIAL ISLANDS | |
| # ---------------------------------------------------------------------------- | |
| # Because we filtered out empty blocks (roads/parks), we created holes. | |
| # We must ensure all remaining blocks are connected. | |
| nb <- spdep::poly2nb(taz_input, queen = TRUE) | |
| # Identify islands (blocks with 0 neighbors) | |
| is_island <- (spdep::card(nb) == 0) | |
| # Filter out islands | |
| taz_input_clean <- taz_input[!is_island, ] | |
| print(paste("Removed", sum(is_island), "islands.")) | |
| # ---------------------------------------------------------------------------- | |
| # 4. REGIONALIZATION (SKATER ALGORITHM) | |
| # ---------------------------------------------------------------------------- | |
| # Using SKATER instead of Max-P. | |
| # SKATER is tree-based and much faster for 10k+ polygons. | |
| # Target: We want approx 3300 TAZs for this county | |
| num_tazs <- 3300 | |
| print("Scaling variables...") | |
| # FIX: Manually scale the variables. | |
| # We use 'as.numeric(scale(x))' to ensure they are clean vectors. | |
| taz_input_ready <- taz_input_clean |> | |
| mutate( | |
| hh_size_sc = as.numeric(scale(hh_size)), | |
| bg_med_income_sc = as.numeric(scale(bg_med_income)), | |
| bg_pct_no_car_sc = as.numeric(scale(bg_pct_no_car)), | |
| pct_degree_sc = as.numeric(scale(pct_degree)) | |
| ) | |
| print("Running SKATER clustering... (This may take 1-2 minutes)") | |
| taz_result <- spopt::skater( | |
| taz_input_ready, | |
| # Use the Scaled (_sc) variables for the math | |
| attrs = c("hh_size_sc", "bg_med_income_sc", "bg_pct_no_car_sc", "pct_degree_sc"), | |
| n_regions = num_tazs, | |
| bridge_islands = TRUE | |
| ) | |
| print("Regionalization Complete.") | |
| # ---------------------------------------------------------------------------- | |
| # 5. VISUALIZATION | |
| # ---------------------------------------------------------------------------- | |
| # A. Attach the original un-scaled data back to the results for readability | |
| # The output of skater() retains the columns of the input, | |
| # so 'bg_med_income' (the original) is still there. | |
| # B. Summary Stats | |
| taz_stats <- taz_result |> | |
| st_drop_geometry() |> | |
| group_by(.region) |> | |
| summarise( | |
| taz_pop = sum(pop), | |
| taz_hh = sum(hunits), | |
| avg_income = mean(bg_med_income, na.rm=TRUE), # Use original dollar amount | |
| pct_degree = mean(pct_degree, na.rm=TRUE), # Use original % | |
| blocks_count = n() | |
| ) | |
| print(head(taz_stats)) | |
| # C. Map | |
| # We drop the border lines (lwd = 0) to make the map render faster | |
| mapgl::maplibre_view(taz_result, column = ".region", legend = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment