Created
February 12, 2026 00:39
-
-
Save ljw1004/ebd96122641074ea884aa68e277fb7c7 to your computer and use it in GitHub Desktop.
Revisions
-
ljw1004 created this gist
Feb 12, 2026 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,221 @@ #!/usr/bin/env python3 """ PostToolUse hook: periodic LEARNINGS.md reminder. On first invocation for an agent/subagent, outputs the full LEARNINGS.md instructions. Thereafter, outputs a short reminder about every 10 assistant turns. Does nothing if no LEARNINGS.md exists in the project root. This works on a per-agent or per-subagent basis. This is tricky because hooks don't tell us who they fired for. We work around this by looking for which transcript file contains this hook's tool_use_id in its most recent assistant turn. This works because claude gets the assistant response back and writes it in its entirety to the transcript file (as a sequence of assistant lines), and only afterwards does it start processing tool-uses. Therefore by the time it gets to PostToolUseHook then the assistant tool-use for that hook is assuredly the most recent assistant turn. We rely on the disk layout of {session_id}.jsonl for the main agent and {session_id}/subagents/*.jsonl for subagent transcripts. We keep a per-session cache "{session_id}.cache.json" that combines main and subagents, and parse only incremental updates to the session and subagent transcript files, tracking each file's byte length and protecting the cache with flock. This hook is stateless: it determines when to fire by scanning the transcript to identify the assistant turn index for the current tool_use_id. That's a lot of file to read! It tries to be fast by using string-match heuristics instead of json-parsing. """ import fcntl import glob import json import os import sys from typing import TypedDict, cast FIRST_MESSAGE = """\ <system-reminder> There is a file LEARNINGS.md in this project. As you work, consult LEARNINGS.md to build on previous experience. When you encounter a mistake that seems like it could be common, check LEARNINGS.md for relevant notes — and if nothing is written yet, record what you learned. Guidelines: - Record insights about problem constraints, strategies that worked or failed, and lessons learned - Update or remove memories that turn out to be wrong or outdated - Organize memory semantically by topic, not chronologically - Keep it under 200 lines. If it's longer, use additional files `LEARNINGS-{TOPIC}.md` and reference them from the main file. - Use the Write and Edit tools to update the file - Since this file is project-scope and shared with your team via version control, tailor your memories to this project </system-reminder>""" REMINDER_MESSAGE = "<system-reminder>Consult LEARNINGS.md and update it if you learn something new.</system-reminder>" class CacheEntry(TypedDict): byte_offset: int latest_tool_use_ids: list[str] in_assistant: bool # whether the file ended mid-assistant-clump when last read type Cache = dict[str, CacheEntry] # relpath (relative to project dir) -> CacheEntry def update_cache(transcript_dir: str, session_id: str) -> Cache: """Load and incrementally update the per-session cache of transcript tool_use_ids. Returns the updated cache (keyed by rel_path from transcript_dir). The cache (at transcript_dir/{session_id}.cache.json) is flock-protected so parallel hook invocations don't corrupt it. Invariant: we can trust that at the moment we're invoked, all transcript files for main agent and subagent have complete jsonl lines. """ cache_path = os.path.join(transcript_dir, session_id + ".cache.json") os.makedirs(transcript_dir, exist_ok=True) lock_fd = open(cache_path, "a+") fcntl.flock(lock_fd, fcntl.LOCK_EX) try: lock_fd.seek(0) raw = lock_fd.read() if raw.strip(): try: cache = cast(Cache, json.loads(raw)) except json.JSONDecodeError: cache = {} lock_fd.seek(0) lock_fd.truncate() lock_fd.write("{}") lock_fd.flush() else: cache = {} rel_paths = [session_id + ".jsonl"] if os.path.isfile(os.path.join(transcript_dir, session_id + ".jsonl")) else [] for path in glob.glob(os.path.join(transcript_dir, session_id, "subagents", "*.jsonl")): rel_paths.append(os.path.relpath(path, transcript_dir)) for rel_path in rel_paths: entry = cache.get(rel_path) or CacheEntry(byte_offset=0, latest_tool_use_ids=[], in_assistant=False) cache[rel_path] = _update_file_entry(transcript_dir, rel_path, entry) lock_fd.seek(0) lock_fd.truncate() lock_fd.write(json.dumps(cache)) lock_fd.flush() return cache finally: fcntl.flock(lock_fd, fcntl.LOCK_UN) lock_fd.close() def _update_file_entry(directory: str, rel_path: str, entry: CacheEntry) -> CacheEntry: """Read new bytes from a transcript file and update the cache entry.""" full_path = os.path.join(directory, rel_path) try: file_size = os.path.getsize(full_path) except OSError: return entry cached_offset = entry.get("byte_offset", 0) if file_size == cached_offset: return entry if file_size < cached_offset: cached_offset = 0 # file shrunk — reparse try: with open(full_path, "rb") as f: f.seek(cached_offset) new_bytes = f.read() except OSError: return entry # Collect tool_use_ids from the most recent clump of consecutive assistant # entries. Reset when a new clump starts (assistant after non-assistant). latest_ids = entry.get("latest_tool_use_ids", []) saw_non_assistant = not entry.get("in_assistant", False) for line in new_bytes.split(b"\n"): line = line.strip() if not line: continue try: obj = json.loads(line) except json.JSONDecodeError: continue if obj.get("type") == "assistant": if saw_non_assistant: latest_ids = [] # new clump — discard previous saw_non_assistant = False raw_content = obj.get("message", {}).get("content", []) if isinstance(raw_content, list): content = cast(list[dict[str, object]], raw_content) ids = [str(b["id"]) for b in content if b.get("type") == "tool_use" and "id" in b] latest_ids.extend(ids) else: saw_non_assistant = True return {"byte_offset": file_size, "latest_tool_use_ids": latest_ids, "in_assistant": not saw_non_assistant} # --- Transcript analysis --- def every_n_turns(transcript_path: str, self_tool_use_id: str, frequency: int) -> int | None: """This function is for PostToolUseHooks that want to fire every {frequency} assistant turns. A "turn" is a clump of consecutive assistant lines, broken only by user lines (other line types like system/progress don't break a clump). The definition of an assistant line is type="assistant" and message.role="assistant". The definition of a user line is type="user" and message.role="user". But, I don't want to have to json-parse every single line in the (very long) transcript! This function uses string-matching heuristics instead. The function works by scanning the transcript file. We decree that the first tool_use after an N-turn boundary is the one that triggers the reminder. This function returns 0 for the first time in the transcript, >0 for subsequent times, and None otherwise. """ is_in_assistant_turn = False assistant_turn_index = -1 min_turn_of_next_trigger = 0 has_seen_tool_this_turn = False with open(transcript_path) as f: for line in f: prefix = line[:1024] if ',"type":"progress","data":{' in prefix: # to avoid the following string-match tests firing false positives continue elif '"type":"user","message":{"role":"user",' in prefix: is_in_assistant_turn = False elif '"type":"message"' in prefix and '"role":"assistant"' in prefix: if not is_in_assistant_turn: assistant_turn_index += 1 is_in_assistant_turn = True has_seen_tool_this_turn = False if '"type":"tool_use"' in prefix: if assistant_turn_index >= min_turn_of_next_trigger and not has_seen_tool_this_turn: has_seen_tool_this_turn = True if self_tool_use_id in line: # invariant: self_tool_use_id is necessarily in the most recent assistant turn return min_turn_of_next_trigger else: min_turn_of_next_trigger = assistant_turn_index + frequency return None def main() -> None: # PostToolUseHook receive the following on its stdin input_data = json.loads(sys.stdin.read()) tool_use_id: str = input_data["tool_use_id"] session_id: str = input_data["session_id"] transcript_path: str = input_data["transcript_path"] cwd: str = input_data["cwd"] learnings_path = os.path.join(cwd, "LEARNINGS.md") if os.path.isfile(learnings_path): transcript_dir = os.path.dirname(transcript_path) cache = update_cache(transcript_dir, session_id) rel_path = next((p for p, e in cache.items() if tool_use_id in e["latest_tool_use_ids"]), None) if rel_path is not None: i = every_n_turns(os.path.join(transcript_dir, rel_path), tool_use_id, 10) if i is not None: print(json.dumps({"hookSpecificOutput":{"hookEventName":"PostToolUse","additionalContext": FIRST_MESSAGE if i == 0 else REMINDER_MESSAGE}})) if __name__ == "__main__": main()