Skip to content

Instantly share code, notes, and snippets.

@SermetPekin
Last active March 31, 2026 07:21
Show Gist options
  • Select an option

  • Save SermetPekin/ec5aaa7289469606e62e06d1c712f190 to your computer and use it in GitHub Desktop.

Select an option

Save SermetPekin/ec5aaa7289469606e62e06d1c712f190 to your computer and use it in GitHub Desktop.
simulation for Andrew Gelman's blog titled "These are the three ways of attacking a statistical problem"
# testing out schedule balancing on nfl 4-game starts
# https://statmodeling.stat.columbia.edu/2026/03/30/these-are-the-three-ways-of-attacking-a-statistical-problem-illustrated-with-the-nfl-example/#comment-2412639
invlogit <- function(x) { 1 / (1 + exp(-x)) }
# Andrew's split normal for ability (bottom-heavy)
rnorm_split <- function(n, mu, sigma_neg, sigma_pos) {
z <- rnorm(n, 0, 1)
ifelse(z < 0, mu + sigma_neg * z, mu + sigma_pos * z)
}
# generic match simulator for a group of teams
sim_week_matchups <- function(team_pool, history, abilities, b) {
teams <- sample(team_pool)
pairs <- list()
while(length(teams) > 1) {
t1 <- teams[1]
paired <- FALSE
# try to find an opponent we haven't played
for (i in 2:length(teams)) {
t2 <- teams[i]
if (!(t2 %in% history[[t1]])) {
pairs[[length(pairs) + 1]] <- c(t1, t2)
history[[t1]] <- c(history[[t1]], t2)
history[[t2]] <- c(history[[t2]], t1)
teams <- teams[-c(1, i)]
paired <- TRUE
break
}
}
# fallback if stuck
if (!paired) {
t2 <- teams[2]
pairs[[length(pairs) + 1]] <- c(t1, t2)
teams <- teams[-c(1, 2)]
}
}
return(list(pairs = pairs, history = history))
}
# main simulation wrapper
run_nfl_sim <- function(n_seasons = 10000, n_teams = 32, mode = "random", swaps = 0) {
all_records <- vector("character", n_seasons * n_teams)
idx <- 1
b <- 0.2 # home field
for (s in 1:n_seasons) {
abilities <- rnorm_split(n_teams, 0, 1.0, 0.7)
records <- rep("", n_teams)
if (mode == "random") {
# everyone in one big pool
tiers <- list(1:n_teams)
} else {
# split into 4 tiers based on underlying ability
ordered <- order(abilities, decreasing = TRUE)
tiers <- list(ordered[1:8], ordered[9:16], ordered[17:24], ordered[25:32])
# add noise to the preseason tiers
if (swaps > 0) {
swapped_out <- c()
for(i in 1:4) {
drop_idx <- sample(1:8, swaps)
swapped_out <- c(swapped_out, tiers[[i]][drop_idx])
tiers[[i]] <- tiers[[i]][-drop_idx]
}
shuffled <- sample(swapped_out)
for(i in 1:4) {
start <- (i - 1) * swaps + 1
tiers[[i]] <- c(tiers[[i]], shuffled[start:(start + swaps - 1)])
}
}
}
# play 4 weeks
for(tier in tiers) {
history <- lapply(1:n_teams, function(x) integer(0))
for(week in 1:4) {
res <- sim_week_matchups(tier, history, abilities, b)
history <- res$history
for (p in res$pairs) {
t1 <- p[1]; t2 <- p[2]
p_win <- invlogit(abilities[t1] - abilities[t2] + b * sample(c(-1, 1), 1))
if (runif(1) < p_win) {
records[t1] <- paste0(records[t1], "W")
records[t2] <- paste0(records[t2], "L")
} else {
records[t1] <- paste0(records[t1], "L")
records[t2] <- paste0(records[t2], "W")
}
}
}
}
end_idx <- idx + n_teams - 1
all_records[idx:end_idx] <- records
idx <- end_idx + 1
}
# tabulate and format
res <- as.data.frame(table(all_records))
names(res) <- c("seq", "count")
res$pct <- round(100 * res$count / sum(res$count), 2)
return(res[order(-res$count), ])
}
# --- run simulations ---
set.seed(2026)
cat("\n1. random schedule (baseline)\n")
random_res <- run_nfl_sim(mode = "random")
print(head(random_res, 4))
print(tail(random_res, 4))
cat("\n2. perfectly balanced by tier (good vs good, bad vs bad)\n")
balanced_res <- run_nfl_sim(mode = "tiered", swaps = 0)
print(head(balanced_res, 4))
print(tail(balanced_res, 4))
cat("\n3. imperfectly balanced (2 teams swapped into wrong tier)\n")
noisy_res <- run_nfl_sim(mode = "tiered", swaps = 2)
print(head(noisy_res, 4))
print(tail(noisy_res, 4))
cat("\n4. imperfectly balanced (1 team swapped into wrong tier)\n")
noisy_res <- run_nfl_sim(mode = "tiered", swaps = 1)
print(head(noisy_res, 4))
print(tail(noisy_res, 4))
@SermetPekin
Copy link
Copy Markdown
Author

  1. random schedule (baseline)
    seq count   pct
1  LLLL 34152 10.67
16 WWWW 32709 10.22
8  LWWW 19920  6.22
14 WWLW 19860  6.21
    seq count  pct
7  LWWL 16216 5.07
11 WLWL 16170 5.05
6  LWLW 16132 5.04
10 WLLW 16067 5.02
  1. perfectly balanced by tier (good vs good, bad vs bad)
    seq count  pct
1  LLLL 22961 7.18
16 WWWW 22797 7.12
15 WWWL 20292 6.34
3  LLWL 20203 6.31
    seq count  pct
7  LWWL 19043 5.95
13 WWLL 19028 5.95
4  LLWW 19023 5.94
11 WLWL 18849 5.89
  1. imperfectly balanced (2 teams swapped into wrong tier)
    seq count  pct
1  LLLL 28533 8.92
16 WWWW 27674 8.65
14 WWLW 19955 6.24
8  LWWW 19836 6.20
    seq count  pct
11 WLWL 17898 5.59
7  LWWL 17893 5.59
10 WLLW 17802 5.56
6  LWLW 17730 5.54
  1. imperfectly balanced (1 team swapped into wrong tier)
    seq count  pct
1  LLLL 25816 8.07
16 WWWW 25459 7.96
15 WWWL 19928 6.23
8  LWWW 19913 6.22
    seq count  pct
13 WWLL 18450 5.77
7  LWWL 18393 5.75
10 WLLW 18362 5.74
11 WLWL 18314 5.72

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment