Skip to content

Instantly share code, notes, and snippets.

@mlelarge
Created March 23, 2026 21:05
Show Gist options
  • Select an option

  • Save mlelarge/9cde613b997eea5d683f483a593eebaa to your computer and use it in GitHub Desktop.

Select an option

Save mlelarge/9cde613b997eea5d683f483a593eebaa to your computer and use it in GitHub Desktop.
HW2 controlled-generation auto-grading
#!/usr/bin/env python3
"""Compute your grade from the test_and_submit log file.
Usage:
1. Run ./test_and_submit.sh (this produces a test_and_submit_YYYYMMDD_HHMMSS.log)
2. Run: python grade_student.py
The script automatically finds the latest test_and_submit_*.log in the
current directory and parses the pytest results for each part.
Grading: each part is worth 8 points (total /24).
"""
import glob
import re
import sys
# Expected test counts per part
EXPECTED = {
"Part 1 (Structured Generation)": 55,
"Part 2 (Meta-Generation)": 26,
"Part 3 (Self-Correction)": 30, # 3 are Verus integration tests, skipped if Verus is not installed
}
POINTS_PER_PART = 8
SUMMARY_RE = re.compile(r"=+\s*(.*?)\s+in\s+[\d.]+s\s*=+", re.IGNORECASE)
def parse_summary_line(line):
"""Parse a pytest summary line and return (passed, failed, skipped, errors)."""
passed = failed = skipped = errors = 0
m = SUMMARY_RE.search(line)
if not m:
return None
summary = m.group(1)
for part in summary.split(","):
part = part.strip()
num_match = re.match(r"(\d+)\s+(\w+)", part)
if num_match:
count = int(num_match.group(1))
kind = num_match.group(2).lower()
if kind == "passed":
passed = count
elif kind == "failed":
failed = count
elif kind == "skipped":
skipped = count
elif kind in ("error", "errors"):
errors = count
return passed, failed, skipped, errors
def parse_log(log_path):
"""Parse a test_and_submit log and return results per part."""
with open(log_path, "r", errors="replace") as f:
content = f.read()
results = {"part1": None, "part2": None, "part3": None}
part_markers = [
("part1", "--- Part 1 tests ---"),
("part2", "--- Part 2 tests ---"),
("part3", "--- Part 3 tests ---"),
]
for i, (part_key, marker) in enumerate(part_markers):
start = content.find(marker)
if start == -1:
continue
if i + 1 < len(part_markers):
next_marker = part_markers[i + 1][1]
end = content.find(next_marker, start)
if end == -1:
end = len(content)
else:
end = len(content)
section = content[start:end]
for line in section.split("\n"):
parsed = parse_summary_line(line)
if parsed is not None:
results[part_key] = parsed
return results
def main():
# Find the latest log file
logs = sorted(glob.glob("test_and_submit_*.log"))
if not logs:
print("ERROR: No test_and_submit_*.log file found.")
print("Run ./test_and_submit.sh first, then re-run this script.")
sys.exit(1)
log_path = logs[-1]
print(f"Reading: {log_path}\n")
results = parse_log(log_path)
part_names = {
"part1": "Part 1 (Structured Generation)",
"part2": "Part 2 (Meta-Generation)",
"part3": "Part 3 (Self-Correction)",
}
total_grade = 0.0
max_grade = POINTS_PER_PART * 3
print("=" * 60)
print(" YOUR GRADE")
print("=" * 60)
for part_key in ["part1", "part2", "part3"]:
name = part_names[part_key]
expected = EXPECTED[name]
if results[part_key] is None:
print(f"\n {name}")
print(f" Result: NOT FOUND (tests did not run or crashed)")
print(f" Grade: 0 / {POINTS_PER_PART}")
continue
passed, failed, skipped, errors = results[part_key]
part_grade = round(passed / expected * POINTS_PER_PART, 2)
total_grade += part_grade
print(f"\n {name}")
print(f" Tests: {passed} / {expected} passed")
if failed:
print(f" Failed: {failed}")
if skipped:
print(f" Skipped: {skipped} (Verus integration tests — not penalized)")
if errors:
print(f" Errors: {errors}")
print(f" Grade: {part_grade:.2f} / {POINTS_PER_PART}")
total_grade = round(total_grade, 2)
print()
print("=" * 60)
print(f" TOTAL: {total_grade:.2f} / {max_grade}")
print("=" * 60)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment