Skip to content

Instantly share code, notes, and snippets.

@ar-puuk
Created January 18, 2026 06:52
Show Gist options
  • Select an option

  • Save ar-puuk/8fcf769d4d8582e5fc099f347406385f to your computer and use it in GitHub Desktop.

Select an option

Save ar-puuk/8fcf769d4d8582e5fc099f347406385f to your computer and use it in GitHub Desktop.
library(tidyverse)
library(tidycensus)
library(mapgl)
library(spopt)
library(sf)
library(spdep)
options(tigris_use_cache = TRUE)
project_crs <- "EPSG:26912" # NAD83 / Utah North
# ----------------------------------------------------------------------------
# 0. Set up Geography
# ----------------------------------------------------------------------------
state_abb = "UT"
county_names = c("Box Elder", "Weber", "Davis", "Salt Lake", "Utah")
# Converting state abbreviation code to FIPS code
state_fips <- tidycensus:::validate_state(state = state_abb)
county_fips <- vapply(
county_names,
function(x) tidycensus:::validate_county(state = state_abb, county = x),
character(1)
)
# Converting County Names to FIPS code
fips_codes <- paste(state_fips, county_fips, sep = "")
fips_codes
# ----------------------------------------------------------------------------
# 1. FETCH DATA AT THREE LEVELS
# ----------------------------------------------------------------------------
# --- LEVEL A: TRACT (Education) ---
# Education is only valid at Tract level
print("Fetching Tract Data (Education)...")
df_tract <- get_acs(
geography = "tract",
variables = c(pop_25 = "B15003_001", bach = "B15003_022", mast = "B15003_023", prof = "B15003_024", phd = "B15003_025"),
state = state_fips,
county = county_fips,
year = 2022,
output = "wide",
geometry = FALSE
) |>
transmute(
TRACT_ID = GEOID,
# Note the 'E' suffix (Estimate) which tidycensus adds in wide mode
pct_degree = (bachE + mastE + profE + phdE) / pop_25E
)
# --- LEVEL B: BLOCK GROUP (Income & Cars) ---
# Income/Cars available here
print("Fetching Block Group Data (Income/Cars)...")
df_bg <- get_acs(
geography = "block group",
variables = c(med_inc = "B19013_001", tot_hh = "B25044_001", own_no_car = "B25044_003", rent_no_car = "B25044_010"),
state = state_fips,
county = county_fips,
year = 2022,
output = "wide",
geometry = FALSE
) |>
transmute(
BG_ID = GEOID,
bg_med_income = med_incE,
bg_pct_no_car = (own_no_carE + rent_no_carE) / tot_hhE
)
# --- LEVEL C: BLOCK (Geometry & HH Size) ---
# 2020 Decennial Census for the atomic units
print("Fetching Block Data (Geometry & HH Size)...")
df_block <- get_decennial(
geography = "block",
variables = c(pop = "P1_001N", hunits = "H1_001N"),
state = state_fips,
county = county_fips,
year = 2020,
output = "wide",
sumfile = "dhc",
geometry = TRUE
) |>
st_transform(project_crs)
# ----------------------------------------------------------------------------
# 2. HIERARCHICAL JOIN
# ----------------------------------------------------------------------------
# We join attributes down to the block level based on GEOID substrings
# Block ID (15 digits) -> BG ID (First 12) -> Tract ID (First 11)
taz_input <- df_block |>
mutate(
BG_ID = substr(GEOID, 1, 12),
TRACT_ID = substr(GEOID, 1, 11),
# Calculate Block-specific Household Size (Pop / Housing Units)
# Handle division by zero for empty blocks
hh_size = ifelse(hunits > 0, pop / hunits, 0)
) |>
# Join BG Data
left_join(df_bg, by = "BG_ID") |>
# Join Tract Data
left_join(df_tract, by = "TRACT_ID") |>
# Filter: We only want populated residential blocks for TAZs
filter(
hunits > 0,
!is.na(bg_med_income), # Removes blocks in BGs with no data
!is.na(pct_degree)
)
print(paste("Data ready. Total Residential Blocks:", nrow(taz_input)))
# ----------------------------------------------------------------------------
# 3. CLEANING SPATIAL ISLANDS
# ----------------------------------------------------------------------------
# Because we filtered out empty blocks (roads/parks), we created holes.
# We must ensure all remaining blocks are connected.
nb <- spdep::poly2nb(taz_input, queen = TRUE)
# Identify islands (blocks with 0 neighbors)
is_island <- (spdep::card(nb) == 0)
# Filter out islands
taz_input_clean <- taz_input[!is_island, ]
print(paste("Removed", sum(is_island), "islands."))
# ----------------------------------------------------------------------------
# 4. REGIONALIZATION (SKATER ALGORITHM)
# ----------------------------------------------------------------------------
# Using SKATER instead of Max-P.
# SKATER is tree-based and much faster for 10k+ polygons.
# Target: We want approx 3300 TAZs for this county
num_tazs <- 3300
print("Scaling variables...")
# FIX: Manually scale the variables.
# We use 'as.numeric(scale(x))' to ensure they are clean vectors.
taz_input_ready <- taz_input_clean |>
mutate(
hh_size_sc = as.numeric(scale(hh_size)),
bg_med_income_sc = as.numeric(scale(bg_med_income)),
bg_pct_no_car_sc = as.numeric(scale(bg_pct_no_car)),
pct_degree_sc = as.numeric(scale(pct_degree))
)
print("Running SKATER clustering... (This may take 1-2 minutes)")
taz_result <- spopt::skater(
taz_input_ready,
# Use the Scaled (_sc) variables for the math
attrs = c("hh_size_sc", "bg_med_income_sc", "bg_pct_no_car_sc", "pct_degree_sc"),
n_regions = num_tazs,
bridge_islands = TRUE
)
print("Regionalization Complete.")
# ----------------------------------------------------------------------------
# 5. VISUALIZATION
# ----------------------------------------------------------------------------
# A. Attach the original un-scaled data back to the results for readability
# The output of skater() retains the columns of the input,
# so 'bg_med_income' (the original) is still there.
# B. Summary Stats
taz_stats <- taz_result |>
st_drop_geometry() |>
group_by(.region) |>
summarise(
taz_pop = sum(pop),
taz_hh = sum(hunits),
avg_income = mean(bg_med_income, na.rm=TRUE), # Use original dollar amount
pct_degree = mean(pct_degree, na.rm=TRUE), # Use original %
blocks_count = n()
)
print(head(taz_stats))
# C. Map
# We drop the border lines (lwd = 0) to make the map render faster
mapgl::maplibre_view(taz_result, column = ".region", legend = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment