SermetPekin · March 31, 2026 07:21 · SermetPekin · Mar 30, 2026 · SermetPekin · Mar 31, 2026
diff --git a/nfl-example.R b/nfl-example.R
 # testing out schedule balancing on nfl 4-game starts
 # https://statmodeling.stat.columbia.edu/2026/03/30/these-are-the-three-ways-of-attacking-a-statistical-problem-illustrated-with-the-nfl-example/#comment-2412639

 invlogit <- function(x) { 1 / (1 + exp(-x)) }

 # Andrew's split normal for ability (bottom-heavy)
 rnorm_split <- function(n, mu, sigma_neg, sigma_pos) {
  z <- rnorm(n, 0, 1)
  ifelse(z < 0, mu + sigma_neg * z, mu + sigma_pos * z)
 }

 # generic match simulator for a group of teams
 sim_week_matchups <- function(team_pool, history, abilities, b) {
  teams <- sample(team_pool)
  pairs <- list()
  
  while(length(teams) > 1) {
    t1 <- teams[1]
    paired <- FALSE
    
    # try to find an opponent we haven't played
    for (i in 2:length(teams)) {
      t2 <- teams[i]
      if (!(t2 %in% history[[t1]])) {
        pairs[[length(pairs) + 1]] <- c(t1, t2)
        history[[t1]] <- c(history[[t1]], t2)
        history[[t2]] <- c(history[[t2]], t1)
        teams <- teams[-c(1, i)]
        paired <- TRUE
        break
      }
    }
    
    # fallback if stuck
    if (!paired) {
      t2 <- teams[2]
      pairs[[length(pairs) + 1]] <- c(t1, t2)
      teams <- teams[-c(1, 2)]
    }
  }
  
  return(list(pairs = pairs, history = history))
 }

 # main simulation wrapper
 run_nfl_sim <- function(n_seasons = 10000, n_teams = 32, mode = "random", swaps = 0) {
  all_records <- vector("character", n_seasons * n_teams)
  idx <- 1
  b <- 0.2 # home field
  
  for (s in 1:n_seasons) {
    abilities <- rnorm_split(n_teams, 0, 1.0, 0.7)
    records <- rep("", n_teams)
    
    if (mode == "random") {
      # everyone in one big pool
      tiers <- list(1:n_teams) 
    } else {
      # split into 4 tiers based on underlying ability
      ordered <- order(abilities, decreasing = TRUE)
      tiers <- list(ordered[1:8], ordered[9:16], ordered[17:24], ordered[25:32])
      
      # add noise to the preseason tiers
      if (swaps > 0) {
        swapped_out <- c()
        for(i in 1:4) {
          drop_idx <- sample(1:8, swaps)
          swapped_out <- c(swapped_out, tiers[[i]][drop_idx])
          tiers[[i]] <- tiers[[i]][-drop_idx]
        }
        
        shuffled <- sample(swapped_out)
        for(i in 1:4) {
          start <- (i - 1) * swaps + 1
          tiers[[i]] <- c(tiers[[i]], shuffled[start:(start + swaps - 1)])
        }
      }
    }
    
    # play 4 weeks
    for(tier in tiers) {
      history <- lapply(1:n_teams, function(x) integer(0))
      
      for(week in 1:4) {
        res <- sim_week_matchups(tier, history, abilities, b)
        history <- res$history
        
        for (p in res$pairs) {
          t1 <- p[1]; t2 <- p[2]
          p_win <- invlogit(abilities[t1] - abilities[t2] + b * sample(c(-1, 1), 1))
          
          if (runif(1) < p_win) {
            records[t1] <- paste0(records[t1], "W")
            records[t2] <- paste0(records[t2], "L")
          } else {
            records[t1] <- paste0(records[t1], "L")
            records[t2] <- paste0(records[t2], "W")
          }
        }
      }
    }
    
    end_idx <- idx + n_teams - 1
    all_records[idx:end_idx] <- records
    idx <- end_idx + 1
  }
  
  # tabulate and format
  res <- as.data.frame(table(all_records))
  names(res) <- c("seq", "count")
  res$pct <- round(100 * res$count / sum(res$count), 2)
  return(res[order(-res$count), ])
 }

 # --- run simulations ---
 set.seed(2026)

 cat("\n1. random schedule (baseline)\n")
 random_res <- run_nfl_sim(mode = "random")
 print(head(random_res, 4))
 print(tail(random_res, 4))

 cat("\n2. perfectly balanced by tier (good vs good, bad vs bad)\n")
 balanced_res <- run_nfl_sim(mode = "tiered", swaps = 0)
 print(head(balanced_res, 4))
 print(tail(balanced_res, 4))

 cat("\n3. imperfectly balanced (2 teams swapped into wrong tier)\n")
 noisy_res <- run_nfl_sim(mode = "tiered", swaps = 2)
 print(head(noisy_res, 4))
 print(tail(noisy_res, 4))

 cat("\n4. imperfectly balanced (1 team swapped into wrong tier)\n")
 noisy_res <- run_nfl_sim(mode = "tiered", swaps = 1)
 print(head(noisy_res, 4))
 print(tail(noisy_res, 4))
	# testing out schedule balancing on nfl 4-game starts
	# https://statmodeling.stat.columbia.edu/2026/03/30/these-are-the-three-ways-of-attacking-a-statistical-problem-illustrated-with-the-nfl-example/#comment-2412639

	invlogit <- function(x) { 1 / (1 + exp(-x)) }

	# Andrew's split normal for ability (bottom-heavy)
	rnorm_split <- function(n, mu, sigma_neg, sigma_pos) {
	z <- rnorm(n, 0, 1)
	ifelse(z < 0, mu + sigma_neg * z, mu + sigma_pos * z)
	}

	# generic match simulator for a group of teams
	sim_week_matchups <- function(team_pool, history, abilities, b) {
	teams <- sample(team_pool)
	pairs <- list()

	while(length(teams) > 1) {
	t1 <- teams[1]
	paired <- FALSE

	# try to find an opponent we haven't played
	for (i in 2:length(teams)) {
	t2 <- teams[i]
	if (!(t2 %in% history[[t1]])) {
	pairs[[length(pairs) + 1]] <- c(t1, t2)
	history[[t1]] <- c(history[[t1]], t2)
	history[[t2]] <- c(history[[t2]], t1)
	teams <- teams[-c(1, i)]
	paired <- TRUE
	break
	}
	}

	# fallback if stuck
	if (!paired) {
	t2 <- teams[2]
	pairs[[length(pairs) + 1]] <- c(t1, t2)
	teams <- teams[-c(1, 2)]
	}
	}

	return(list(pairs = pairs, history = history))
	}

	# main simulation wrapper
	run_nfl_sim <- function(n_seasons = 10000, n_teams = 32, mode = "random", swaps = 0) {
	all_records <- vector("character", n_seasons * n_teams)
	idx <- 1
	b <- 0.2 # home field

	for (s in 1:n_seasons) {
	abilities <- rnorm_split(n_teams, 0, 1.0, 0.7)
	records <- rep("", n_teams)

	if (mode == "random") {
	# everyone in one big pool
	tiers <- list(1:n_teams)
	} else {
	# split into 4 tiers based on underlying ability
	ordered <- order(abilities, decreasing = TRUE)
	tiers <- list(ordered[1:8], ordered[9:16], ordered[17:24], ordered[25:32])

	# add noise to the preseason tiers
	if (swaps > 0) {
	swapped_out <- c()
	for(i in 1:4) {
	drop_idx <- sample(1:8, swaps)
	swapped_out <- c(swapped_out, tiers[[i]][drop_idx])
	tiers[[i]] <- tiers[[i]][-drop_idx]
	}

	shuffled <- sample(swapped_out)
	for(i in 1:4) {
	start <- (i - 1) * swaps + 1
	tiers[[i]] <- c(tiers[[i]], shuffled[start:(start + swaps - 1)])
	}
	}
	}

	# play 4 weeks
	for(tier in tiers) {
	history <- lapply(1:n_teams, function(x) integer(0))

	for(week in 1:4) {
	res <- sim_week_matchups(tier, history, abilities, b)
	history <- res$history

	for (p in res$pairs) {
	t1 <- p[1]; t2 <- p[2]
	p_win <- invlogit(abilities[t1] - abilities[t2] + b * sample(c(-1, 1), 1))

	if (runif(1) < p_win) {
	records[t1] <- paste0(records[t1], "W")
	records[t2] <- paste0(records[t2], "L")
	} else {
	records[t1] <- paste0(records[t1], "L")
	records[t2] <- paste0(records[t2], "W")
	}
	}
	}
	}

	end_idx <- idx + n_teams - 1
	all_records[idx:end_idx] <- records
	idx <- end_idx + 1
	}

	# tabulate and format
	res <- as.data.frame(table(all_records))
	names(res) <- c("seq", "count")
	res$pct <- round(100 * res$count / sum(res$count), 2)
	return(res[order(-res$count), ])
	}

	# --- run simulations ---
	set.seed(2026)

	cat("\n1. random schedule (baseline)\n")
	random_res <- run_nfl_sim(mode = "random")
	print(head(random_res, 4))
	print(tail(random_res, 4))

	cat("\n2. perfectly balanced by tier (good vs good, bad vs bad)\n")
	balanced_res <- run_nfl_sim(mode = "tiered", swaps = 0)
	print(head(balanced_res, 4))
	print(tail(balanced_res, 4))

	cat("\n3. imperfectly balanced (2 teams swapped into wrong tier)\n")
	noisy_res <- run_nfl_sim(mode = "tiered", swaps = 2)
	print(head(noisy_res, 4))
	print(tail(noisy_res, 4))

	cat("\n4. imperfectly balanced (1 team swapped into wrong tier)\n")
	noisy_res <- run_nfl_sim(mode = "tiered", swaps = 1)
	print(head(noisy_res, 4))
	print(tail(noisy_res, 4))
No results found