Finbarr Timbers finbarrtimbers

Create a Chrome search shortcut for your Github org, a la CodeSearch

	% ===================== SUBFIGURE A: Static batching =====================
	\begin{subfigure}{\linewidth}
	\centering
	\begin{tikzpicture}
	% Grid (4 rows x 8 cols)
	\gridlines{0}{0}{4}{8}

	% ----- PROMPTS: different lengths per row -----
	% Row index: 1 = top
	% Row 1: length 2

	beaker-session() {
	# Defaults
	local cluster="ai2/hammond"

	# Parse flags
	while (( $# > 0 )); do
	case "$1" in
	-h\|--help)
	echo "beaker-session [-h\|--help] [-c\|--cluster CLUSTER\|--cluster=CLUSTER]"
	return 0

	def calculate_model_usage_per_token(model_path: str) -> int:
	"""
	Calculate actual FLOPs per token for a transformer model using torch FlopCounterMode.

	Args:
	model_path: Path to the actual model for precise measurement

	Returns:
	FLOPs per token as integer.
	"""

	============================================================
	BENCHMARK SUMMARY
	============================================================
	Model: hamishivi/qwen2_5_openthoughts2
	Total batches: 5
	Batch size: 256
	Unique prompts per batch: 32
	Num rollouts: 8
	Max tokens: 32000
	------------------------------------------------------------

	% Transformer Decoder Layer (minimal, cross‑attn removed)
	% TikZ diagram mimicking the iconic style from "Attention Is All You Need".
	% Residual arrows fully inside the layer box, bifurcating around blocks.
	% Compile with: pdflatex decoder_layer.tex

	\documentclass[tikz,border=10pt]{standalone}

	\usepackage{tikz}
	\usetikzlibrary{arrows.meta,positioning,decorations.pathreplacing,calc,fit}

	#!/bin/bash
	# Runs the benchmark on gantry. Takes one argument which is the response length.
	# Usage: ./gantry_run_benchmark.sh [response_length]
	# E.g. $ ./gantry_run_benchmark.sh 64000
	set -e

	# Set default value for response_length
	response_length=64000

	# If first argument exists and is a number, use it as response_length

	class SimpleAdam(torch.optim.Optimizer):

	def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8):
	super().__init__(params, defaults={'lr': lr})
	self.state = {}
	self.t = 0
	self.betas = betas
	self.eps = eps
	for group in self.param_groups:
	for p in group['params']:

	[
	("embed_tokens.weight", torch.Size([32000, 4096])),
	("layers.0.self_attn.q_proj.weight", torch.Size([4096, 4096])),
	("layers.0.self_attn.k_proj.weight", torch.Size([1024, 4096])),
	("layers.0.self_attn.v_proj.weight", torch.Size([1024, 4096])),
	("layers.0.self_attn.o_proj.weight", torch.Size([4096, 4096])),
	("layers.0.mlp.gate_proj.weight", torch.Size([14336, 4096])),
	("layers.0.mlp.up_proj.weight", torch.Size([14336, 4096])),
	("layers.0.mlp.down_proj.weight", torch.Size([4096, 14336])),
	("layers.0.input_layernorm.weight", torch.Size([4096])),

	# This is a modified version of https://github.com/karpathy/nanoGPT/blob/master/data/openwebtext/prepare.py.
	import os
	import requests
	import tiktoken
	import numpy as np
	import tarfile
	import glob
	import shutil

	# download the bookcorpus dataset. Note: this needs to be concatenated.