mlelarge · March 23, 2026 21:27
diff --git a/grade_hw3.py b/grade_hw3.py
 #!/usr/bin/env python3
 """
 Self-grading script for LLM Efficiency homework.
 Parses your test output files and computes your grade.

 Usage:  python self_grade.py

 This script does NOT run any code. It reads the output files
 produced by test_and_submit.sh (in results/ or results_manual/)
 and computes your score.

 Scoring:
  KV Cache:  passed_tests / 21 * 10  (out of 10)
  LoRA:      passed_tests / 33 * 10  (out of 10)
  Total: out of 20
 """

 import re
 import sys
 from pathlib import Path

 KV_TOTAL = 21
 LORA_TOTAL = 33


 def find_latest_results_dir() -> Path | None:
    """Find the latest timestamped results directory."""
    candidates = []
    for folder in ["results", "results_manual"]:
        root = Path(folder)
        if root.is_dir():
            for d in root.iterdir():
                if d.is_dir() and re.match(r"\d{8}_\d{6}", d.name):
                    candidates.append(d)
    if not candidates:
        return None
    return sorted(candidates, key=lambda p: p.name)[-1]


 def parse_pytest(filepath: Path) -> tuple[int, int, list[str]]:
    """Parse pytest output. Returns (passed, failed, list_of_failed_test_names)."""
    if not filepath.is_file():
        return 0, 0, []

    text = filepath.read_text(errors="replace")
    passed = 0
    failed = 0

    # Find the last summary line
    matches = re.findall(r"=+ (.+?) =+\s*$", text, re.MULTILINE)
    if matches:
        summary = matches[-1]
        p = re.search(r"(\d+) passed", summary)
        f = re.search(r"(\d+) failed", summary)
        if p:
            passed = int(p.group(1))
        if f:
            failed = int(f.group(1))

    # Extract failed test names
    failed_tests = re.findall(r"FAILED (tests/\S+)", text)

    return passed, failed, failed_tests


 def parse_demo_accuracies(filepath: Path) -> list[str]:
    """Extract 'final score' lines from demo output."""
    if not filepath.is_file():
        return []
    text = filepath.read_text(errors="replace")
    return re.findall(r".*final score.*", text)


 def main():
    results_dir = find_latest_results_dir()

    print("=" * 48)
    print("  LLM Efficiency - Self Grading")
    if results_dir:
        print(f"  Using results from: {results_dir}/")
    print("=" * 48)
    print()

    if not results_dir:
        print("ERROR: No results directory found.")
        print("Run test_and_submit.sh first to generate your outputs.")
        sys.exit(1)

    # ── KV Cache Tests ────────────────────────────────────────────

    kv_file = results_dir / "test_kv_cache.txt"
    kv_passed, kv_failed, kv_failed_tests = parse_pytest(kv_file)
    kv_score = round(kv_passed / KV_TOTAL * 10, 2)

    print("-- KV Cache " + "-" * 36)
    if kv_file.is_file():
        print(f"  Tests passed: {kv_passed} / {KV_TOTAL}")
        if kv_failed > 0:
            print(f"  Tests failed: {kv_failed}")
            print()
            print("  Failed tests:")
            for t in kv_failed_tests:
                print(f"    {t}")
    else:
        print(f"  No test output found ({kv_file})")
    print(f"  Score: {kv_score} / 10")
    print()

    # ── LoRA Tests ────────────────────────────────────────────────

    lora_file = results_dir / "test_lora.txt"
    lora_passed, lora_failed, lora_failed_tests = parse_pytest(lora_file)
    lora_score = round(lora_passed / LORA_TOTAL * 10, 2)

    print("-- LoRA " + "-" * 40)
    if lora_file.is_file():
        print(f"  Tests passed: {lora_passed} / {LORA_TOTAL}")
        if lora_failed > 0:
            print(f"  Tests failed: {lora_failed}")
            print()
            print("  Failed tests:")
            for t in lora_failed_tests:
                print(f"    {t}")
    else:
        print(f"  No test output found ({lora_file})")
    print(f"  Score: {lora_score} / 10")
    print()

    # ── Demo outputs (informational) ─────────────────────────────

    print("-- Demo Outputs (informational) " + "-" * 16)
    print()

    demo_kv = results_dir / "demo_sort_kv.txt"
    if demo_kv.is_file():
        print("  demo_sort_kv.txt:")
        for line in parse_demo_accuracies(demo_kv):
            print(f"    {line.strip()}")
    else:
        print("  demo_sort_kv.txt: not found")
    print()

    demo_lora = results_dir / "demo_sort_lora.txt"
    if demo_lora.is_file():
        print("  demo_sort_lora.txt:")
        for line in parse_demo_accuracies(demo_lora):
            print(f"    {line.strip()}")
    else:
        print("  demo_sort_lora.txt: not found")
    print()

    benchmark = results_dir / "benchmark_kv.txt"
    if not benchmark.is_file():
        benchmark = Path("kv_cache/benchmark_results.txt")
    if benchmark.is_file():
        print(f"  {benchmark.name}:")
        text = benchmark.read_text(errors="replace")
        for line in text.splitlines():
            if re.search(r"(gpt|Context T|speedup|\d+\.\d+x)", line):
                print(f"    {line.strip()}")
    else:
        print("  benchmark: not found")
    print()

    # ── Total ─────────────────────────────────────────────────────

    total = round(kv_score + lora_score, 2)

    print("=" * 48)
    print(f"  TOTAL SCORE: {total} / 20")
    print()
    print(f"  KV Cache:  {kv_score} / 10  ({kv_passed}/{KV_TOTAL} tests)")
    print(f"  LoRA:      {lora_score} / 10  ({lora_passed}/{LORA_TOTAL} tests)")
    print("=" * 48)


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	Self-grading script for LLM Efficiency homework.
	Parses your test output files and computes your grade.

	Usage: python self_grade.py

	This script does NOT run any code. It reads the output files
	produced by test_and_submit.sh (in results/ or results_manual/)
	and computes your score.

	Scoring:
	KV Cache: passed_tests / 21 * 10 (out of 10)
	LoRA: passed_tests / 33 * 10 (out of 10)
	Total: out of 20
	"""

	import re
	import sys
	from pathlib import Path

	KV_TOTAL = 21
	LORA_TOTAL = 33


	def find_latest_results_dir() -> Path \| None:
	"""Find the latest timestamped results directory."""
	candidates = []
	for folder in ["results", "results_manual"]:
	root = Path(folder)
	if root.is_dir():
	for d in root.iterdir():
	if d.is_dir() and re.match(r"\d{8}_\d{6}", d.name):
	candidates.append(d)
	if not candidates:
	return None
	return sorted(candidates, key=lambda p: p.name)[-1]


	def parse_pytest(filepath: Path) -> tuple[int, int, list[str]]:
	"""Parse pytest output. Returns (passed, failed, list_of_failed_test_names)."""
	if not filepath.is_file():
	return 0, 0, []

	text = filepath.read_text(errors="replace")
	passed = 0
	failed = 0

	# Find the last summary line
	matches = re.findall(r"=+ (.+?) =+\s*$", text, re.MULTILINE)
	if matches:
	summary = matches[-1]
	p = re.search(r"(\d+) passed", summary)
	f = re.search(r"(\d+) failed", summary)
	if p:
	passed = int(p.group(1))
	if f:
	failed = int(f.group(1))

	# Extract failed test names
	failed_tests = re.findall(r"FAILED (tests/\S+)", text)

	return passed, failed, failed_tests


	def parse_demo_accuracies(filepath: Path) -> list[str]:
	"""Extract 'final score' lines from demo output."""
	if not filepath.is_file():
	return []
	text = filepath.read_text(errors="replace")
	return re.findall(r".final score.", text)


	def main():
	results_dir = find_latest_results_dir()

	print("=" * 48)
	print(" LLM Efficiency - Self Grading")
	if results_dir:
	print(f" Using results from: {results_dir}/")
	print("=" * 48)
	print()

	if not results_dir:
	print("ERROR: No results directory found.")
	print("Run test_and_submit.sh first to generate your outputs.")
	sys.exit(1)

	# ── KV Cache Tests ────────────────────────────────────────────

	kv_file = results_dir / "test_kv_cache.txt"
	kv_passed, kv_failed, kv_failed_tests = parse_pytest(kv_file)
	kv_score = round(kv_passed / KV_TOTAL * 10, 2)

	print("-- KV Cache " + "-" * 36)
	if kv_file.is_file():
	print(f" Tests passed: {kv_passed} / {KV_TOTAL}")
	if kv_failed > 0:
	print(f" Tests failed: {kv_failed}")
	print()
	print(" Failed tests:")
	for t in kv_failed_tests:
	print(f" {t}")
	else:
	print(f" No test output found ({kv_file})")
	print(f" Score: {kv_score} / 10")
	print()

	# ── LoRA Tests ────────────────────────────────────────────────

	lora_file = results_dir / "test_lora.txt"
	lora_passed, lora_failed, lora_failed_tests = parse_pytest(lora_file)
	lora_score = round(lora_passed / LORA_TOTAL * 10, 2)

	print("-- LoRA " + "-" * 40)
	if lora_file.is_file():
	print(f" Tests passed: {lora_passed} / {LORA_TOTAL}")
	if lora_failed > 0:
	print(f" Tests failed: {lora_failed}")
	print()
	print(" Failed tests:")
	for t in lora_failed_tests:
	print(f" {t}")
	else:
	print(f" No test output found ({lora_file})")
	print(f" Score: {lora_score} / 10")
	print()

	# ── Demo outputs (informational) ─────────────────────────────

	print("-- Demo Outputs (informational) " + "-" * 16)
	print()

	demo_kv = results_dir / "demo_sort_kv.txt"
	if demo_kv.is_file():
	print(" demo_sort_kv.txt:")
	for line in parse_demo_accuracies(demo_kv):
	print(f" {line.strip()}")
	else:
	print(" demo_sort_kv.txt: not found")
	print()

	demo_lora = results_dir / "demo_sort_lora.txt"
	if demo_lora.is_file():
	print(" demo_sort_lora.txt:")
	for line in parse_demo_accuracies(demo_lora):
	print(f" {line.strip()}")
	else:
	print(" demo_sort_lora.txt: not found")
	print()

	benchmark = results_dir / "benchmark_kv.txt"
	if not benchmark.is_file():
	benchmark = Path("kv_cache/benchmark_results.txt")
	if benchmark.is_file():
	print(f" {benchmark.name}:")
	text = benchmark.read_text(errors="replace")
	for line in text.splitlines():
	if re.search(r"(gpt\|Context T\|speedup\|\d+\.\d+x)", line):
	print(f" {line.strip()}")
	else:
	print(" benchmark: not found")
	print()

	# ── Total ─────────────────────────────────────────────────────

	total = round(kv_score + lora_score, 2)

	print("=" * 48)
	print(f" TOTAL SCORE: {total} / 20")
	print()
	print(f" KV Cache: {kv_score} / 10 ({kv_passed}/{KV_TOTAL} tests)")
	print(f" LoRA: {lora_score} / 10 ({lora_passed}/{LORA_TOTAL} tests)")
	print("=" * 48)


	if __name__ == "__main__":
	main()
No results found