Skip to content

Instantly share code, notes, and snippets.

@mlelarge
Created March 23, 2026 21:27
Show Gist options
  • Select an option

  • Save mlelarge/693c20d21f6c4362e07967f536734685 to your computer and use it in GitHub Desktop.

Select an option

Save mlelarge/693c20d21f6c4362e07967f536734685 to your computer and use it in GitHub Desktop.
HW3 llm-efficiency auto-grading
#!/usr/bin/env python3
"""
Self-grading script for LLM Efficiency homework.
Parses your test output files and computes your grade.
Usage: python self_grade.py
This script does NOT run any code. It reads the output files
produced by test_and_submit.sh (in results/ or results_manual/)
and computes your score.
Scoring:
KV Cache: passed_tests / 21 * 10 (out of 10)
LoRA: passed_tests / 33 * 10 (out of 10)
Total: out of 20
"""
import re
import sys
from pathlib import Path
KV_TOTAL = 21
LORA_TOTAL = 33
def find_latest_results_dir() -> Path | None:
"""Find the latest timestamped results directory."""
candidates = []
for folder in ["results", "results_manual"]:
root = Path(folder)
if root.is_dir():
for d in root.iterdir():
if d.is_dir() and re.match(r"\d{8}_\d{6}", d.name):
candidates.append(d)
if not candidates:
return None
return sorted(candidates, key=lambda p: p.name)[-1]
def parse_pytest(filepath: Path) -> tuple[int, int, list[str]]:
"""Parse pytest output. Returns (passed, failed, list_of_failed_test_names)."""
if not filepath.is_file():
return 0, 0, []
text = filepath.read_text(errors="replace")
passed = 0
failed = 0
# Find the last summary line
matches = re.findall(r"=+ (.+?) =+\s*$", text, re.MULTILINE)
if matches:
summary = matches[-1]
p = re.search(r"(\d+) passed", summary)
f = re.search(r"(\d+) failed", summary)
if p:
passed = int(p.group(1))
if f:
failed = int(f.group(1))
# Extract failed test names
failed_tests = re.findall(r"FAILED (tests/\S+)", text)
return passed, failed, failed_tests
def parse_demo_accuracies(filepath: Path) -> list[str]:
"""Extract 'final score' lines from demo output."""
if not filepath.is_file():
return []
text = filepath.read_text(errors="replace")
return re.findall(r".*final score.*", text)
def main():
results_dir = find_latest_results_dir()
print("=" * 48)
print(" LLM Efficiency - Self Grading")
if results_dir:
print(f" Using results from: {results_dir}/")
print("=" * 48)
print()
if not results_dir:
print("ERROR: No results directory found.")
print("Run test_and_submit.sh first to generate your outputs.")
sys.exit(1)
# ── KV Cache Tests ────────────────────────────────────────────
kv_file = results_dir / "test_kv_cache.txt"
kv_passed, kv_failed, kv_failed_tests = parse_pytest(kv_file)
kv_score = round(kv_passed / KV_TOTAL * 10, 2)
print("-- KV Cache " + "-" * 36)
if kv_file.is_file():
print(f" Tests passed: {kv_passed} / {KV_TOTAL}")
if kv_failed > 0:
print(f" Tests failed: {kv_failed}")
print()
print(" Failed tests:")
for t in kv_failed_tests:
print(f" {t}")
else:
print(f" No test output found ({kv_file})")
print(f" Score: {kv_score} / 10")
print()
# ── LoRA Tests ────────────────────────────────────────────────
lora_file = results_dir / "test_lora.txt"
lora_passed, lora_failed, lora_failed_tests = parse_pytest(lora_file)
lora_score = round(lora_passed / LORA_TOTAL * 10, 2)
print("-- LoRA " + "-" * 40)
if lora_file.is_file():
print(f" Tests passed: {lora_passed} / {LORA_TOTAL}")
if lora_failed > 0:
print(f" Tests failed: {lora_failed}")
print()
print(" Failed tests:")
for t in lora_failed_tests:
print(f" {t}")
else:
print(f" No test output found ({lora_file})")
print(f" Score: {lora_score} / 10")
print()
# ── Demo outputs (informational) ─────────────────────────────
print("-- Demo Outputs (informational) " + "-" * 16)
print()
demo_kv = results_dir / "demo_sort_kv.txt"
if demo_kv.is_file():
print(" demo_sort_kv.txt:")
for line in parse_demo_accuracies(demo_kv):
print(f" {line.strip()}")
else:
print(" demo_sort_kv.txt: not found")
print()
demo_lora = results_dir / "demo_sort_lora.txt"
if demo_lora.is_file():
print(" demo_sort_lora.txt:")
for line in parse_demo_accuracies(demo_lora):
print(f" {line.strip()}")
else:
print(" demo_sort_lora.txt: not found")
print()
benchmark = results_dir / "benchmark_kv.txt"
if not benchmark.is_file():
benchmark = Path("kv_cache/benchmark_results.txt")
if benchmark.is_file():
print(f" {benchmark.name}:")
text = benchmark.read_text(errors="replace")
for line in text.splitlines():
if re.search(r"(gpt|Context T|speedup|\d+\.\d+x)", line):
print(f" {line.strip()}")
else:
print(" benchmark: not found")
print()
# ── Total ─────────────────────────────────────────────────────
total = round(kv_score + lora_score, 2)
print("=" * 48)
print(f" TOTAL SCORE: {total} / 20")
print()
print(f" KV Cache: {kv_score} / 10 ({kv_passed}/{KV_TOTAL} tests)")
print(f" LoRA: {lora_score} / 10 ({lora_passed}/{LORA_TOTAL} tests)")
print("=" * 48)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment