Created
March 23, 2026 21:05
-
-
Save mlelarge/9cde613b997eea5d683f483a593eebaa to your computer and use it in GitHub Desktop.
HW2 controlled-generation auto-grading
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """Compute your grade from the test_and_submit log file. | |
| Usage: | |
| 1. Run ./test_and_submit.sh (this produces a test_and_submit_YYYYMMDD_HHMMSS.log) | |
| 2. Run: python grade_student.py | |
| The script automatically finds the latest test_and_submit_*.log in the | |
| current directory and parses the pytest results for each part. | |
| Grading: each part is worth 8 points (total /24). | |
| """ | |
| import glob | |
| import re | |
| import sys | |
| # Expected test counts per part | |
| EXPECTED = { | |
| "Part 1 (Structured Generation)": 55, | |
| "Part 2 (Meta-Generation)": 26, | |
| "Part 3 (Self-Correction)": 30, # 3 are Verus integration tests, skipped if Verus is not installed | |
| } | |
| POINTS_PER_PART = 8 | |
| SUMMARY_RE = re.compile(r"=+\s*(.*?)\s+in\s+[\d.]+s\s*=+", re.IGNORECASE) | |
| def parse_summary_line(line): | |
| """Parse a pytest summary line and return (passed, failed, skipped, errors).""" | |
| passed = failed = skipped = errors = 0 | |
| m = SUMMARY_RE.search(line) | |
| if not m: | |
| return None | |
| summary = m.group(1) | |
| for part in summary.split(","): | |
| part = part.strip() | |
| num_match = re.match(r"(\d+)\s+(\w+)", part) | |
| if num_match: | |
| count = int(num_match.group(1)) | |
| kind = num_match.group(2).lower() | |
| if kind == "passed": | |
| passed = count | |
| elif kind == "failed": | |
| failed = count | |
| elif kind == "skipped": | |
| skipped = count | |
| elif kind in ("error", "errors"): | |
| errors = count | |
| return passed, failed, skipped, errors | |
| def parse_log(log_path): | |
| """Parse a test_and_submit log and return results per part.""" | |
| with open(log_path, "r", errors="replace") as f: | |
| content = f.read() | |
| results = {"part1": None, "part2": None, "part3": None} | |
| part_markers = [ | |
| ("part1", "--- Part 1 tests ---"), | |
| ("part2", "--- Part 2 tests ---"), | |
| ("part3", "--- Part 3 tests ---"), | |
| ] | |
| for i, (part_key, marker) in enumerate(part_markers): | |
| start = content.find(marker) | |
| if start == -1: | |
| continue | |
| if i + 1 < len(part_markers): | |
| next_marker = part_markers[i + 1][1] | |
| end = content.find(next_marker, start) | |
| if end == -1: | |
| end = len(content) | |
| else: | |
| end = len(content) | |
| section = content[start:end] | |
| for line in section.split("\n"): | |
| parsed = parse_summary_line(line) | |
| if parsed is not None: | |
| results[part_key] = parsed | |
| return results | |
| def main(): | |
| # Find the latest log file | |
| logs = sorted(glob.glob("test_and_submit_*.log")) | |
| if not logs: | |
| print("ERROR: No test_and_submit_*.log file found.") | |
| print("Run ./test_and_submit.sh first, then re-run this script.") | |
| sys.exit(1) | |
| log_path = logs[-1] | |
| print(f"Reading: {log_path}\n") | |
| results = parse_log(log_path) | |
| part_names = { | |
| "part1": "Part 1 (Structured Generation)", | |
| "part2": "Part 2 (Meta-Generation)", | |
| "part3": "Part 3 (Self-Correction)", | |
| } | |
| total_grade = 0.0 | |
| max_grade = POINTS_PER_PART * 3 | |
| print("=" * 60) | |
| print(" YOUR GRADE") | |
| print("=" * 60) | |
| for part_key in ["part1", "part2", "part3"]: | |
| name = part_names[part_key] | |
| expected = EXPECTED[name] | |
| if results[part_key] is None: | |
| print(f"\n {name}") | |
| print(f" Result: NOT FOUND (tests did not run or crashed)") | |
| print(f" Grade: 0 / {POINTS_PER_PART}") | |
| continue | |
| passed, failed, skipped, errors = results[part_key] | |
| part_grade = round(passed / expected * POINTS_PER_PART, 2) | |
| total_grade += part_grade | |
| print(f"\n {name}") | |
| print(f" Tests: {passed} / {expected} passed") | |
| if failed: | |
| print(f" Failed: {failed}") | |
| if skipped: | |
| print(f" Skipped: {skipped} (Verus integration tests — not penalized)") | |
| if errors: | |
| print(f" Errors: {errors}") | |
| print(f" Grade: {part_grade:.2f} / {POINTS_PER_PART}") | |
| total_grade = round(total_grade, 2) | |
| print() | |
| print("=" * 60) | |
| print(f" TOTAL: {total_grade:.2f} / {max_grade}") | |
| print("=" * 60) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment