ljw1004 · February 12, 2026 00:39 · Feb 12, 2026
diff --git a/learning-hook.py b/learning-hook.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+"""
+PostToolUse hook: periodic LEARNINGS.md reminder.
+
+On first invocation for an agent/subagent, outputs the full LEARNINGS.md
+instructions. Thereafter, outputs a short reminder about every 10 assistant turns.
+Does nothing if no LEARNINGS.md exists in the project root.
+
+This works on a per-agent or per-subagent basis. This is tricky because
+hooks don't tell us who they fired for. We work around this by looking for
+which transcript file contains this hook's tool_use_id in its most recent
+assistant turn. This works because claude gets the assistant response back
+and writes it in its entirety to the transcript file (as a sequence of assistant
+lines), and only afterwards does it start processing tool-uses. Therefore
+by the time it gets to PostToolUseHook then the assistant tool-use for that
+hook is assuredly the most recent assistant turn. We rely on the disk
+layout of {session_id}.jsonl for the main agent and {session_id}/subagents/*.jsonl
+for subagent transcripts. We keep a per-session cache "{session_id}.cache.json"
+that combines main and subagents, and parse only incremental updates to the session
+and subagent transcript files, tracking each file's byte length and protecting
+the cache with flock.
+
+This hook is stateless: it determines when to fire by scanning the transcript
+to identify the assistant turn index for the current tool_use_id.
+That's a lot of file to read! It tries to be fast by using string-match
+heuristics instead of json-parsing.
+"""
+
+import fcntl
+import glob
+import json
+import os
+import sys
+from typing import TypedDict, cast
+
+
+FIRST_MESSAGE = """\
+<system-reminder>
+There is a file LEARNINGS.md in this project.
+
+As you work, consult LEARNINGS.md to build on previous experience. When you
+encounter a mistake that seems like it could be common, check LEARNINGS.md
+for relevant notes — and if nothing is written yet, record what you learned.
+Guidelines:
+- Record insights about problem constraints, strategies that worked or failed,
+and lessons learned
+- Update or remove memories that turn out to be wrong or outdated
+- Organize memory semantically by topic, not chronologically
+- Keep it under 200 lines. If it's longer, use additional files
+`LEARNINGS-{TOPIC}.md` and reference them from the main file.
+- Use the Write and Edit tools to update the file
+- Since this file is project-scope and shared with your team via version
+control, tailor your memories to this project
+</system-reminder>"""
+
+REMINDER_MESSAGE = "<system-reminder>Consult LEARNINGS.md and update it if you learn something new.</system-reminder>"
+
+
+class CacheEntry(TypedDict):
+    byte_offset: int
+    latest_tool_use_ids: list[str]
+    in_assistant: bool  # whether the file ended mid-assistant-clump when last read
+type Cache = dict[str, CacheEntry]  # relpath (relative to project dir) -> CacheEntry
+
+
+def update_cache(transcript_dir: str, session_id: str) -> Cache:
+    """Load and incrementally update the per-session cache of transcript tool_use_ids.
+
+    Returns the updated cache (keyed by rel_path from transcript_dir).
+    The cache (at transcript_dir/{session_id}.cache.json) is flock-protected
+    so parallel hook invocations don't corrupt it.
+
+    Invariant: we can trust that at the moment we're invoked, all transcript
+    files for main agent and subagent have complete jsonl lines.
+    """
+    cache_path = os.path.join(transcript_dir, session_id + ".cache.json")
+    os.makedirs(transcript_dir, exist_ok=True)
+    lock_fd = open(cache_path, "a+")
+    fcntl.flock(lock_fd, fcntl.LOCK_EX)
+    try:
+        lock_fd.seek(0)
+        raw = lock_fd.read()
+        if raw.strip():
+            try:
+                cache = cast(Cache, json.loads(raw))
+            except json.JSONDecodeError:
+                cache = {}
+                lock_fd.seek(0)
+                lock_fd.truncate()
+                lock_fd.write("{}")
+                lock_fd.flush()
+        else:
+            cache = {}
+
+        rel_paths = [session_id + ".jsonl"] if os.path.isfile(os.path.join(transcript_dir, session_id + ".jsonl")) else []
+        for path in glob.glob(os.path.join(transcript_dir, session_id, "subagents", "*.jsonl")):
+            rel_paths.append(os.path.relpath(path, transcript_dir))
+        for rel_path in rel_paths:
+            entry = cache.get(rel_path) or CacheEntry(byte_offset=0, latest_tool_use_ids=[], in_assistant=False)
+            cache[rel_path] = _update_file_entry(transcript_dir, rel_path, entry)
+
+        lock_fd.seek(0)
+        lock_fd.truncate()
+        lock_fd.write(json.dumps(cache))
+        lock_fd.flush()
+        return cache
+    finally:
+        fcntl.flock(lock_fd, fcntl.LOCK_UN)
+        lock_fd.close()
+
+
+def _update_file_entry(directory: str, rel_path: str, entry: CacheEntry) -> CacheEntry:
+    """Read new bytes from a transcript file and update the cache entry."""
+    full_path = os.path.join(directory, rel_path)
+    try:
+        file_size = os.path.getsize(full_path)
+    except OSError:
+        return entry
+    cached_offset = entry.get("byte_offset", 0)
+    if file_size == cached_offset:
+        return entry
+    if file_size < cached_offset:
+        cached_offset = 0  # file shrunk — reparse
+    try:
+        with open(full_path, "rb") as f:
+            f.seek(cached_offset)
+            new_bytes = f.read()
+    except OSError:
+        return entry
+    # Collect tool_use_ids from the most recent clump of consecutive assistant
+    # entries. Reset when a new clump starts (assistant after non-assistant).
+    latest_ids = entry.get("latest_tool_use_ids", [])
+    saw_non_assistant = not entry.get("in_assistant", False)
+    for line in new_bytes.split(b"\n"):
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            obj = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if obj.get("type") == "assistant":
+            if saw_non_assistant:
+                latest_ids = []  # new clump — discard previous
+                saw_non_assistant = False
+            raw_content = obj.get("message", {}).get("content", [])
+            if isinstance(raw_content, list):
+                content = cast(list[dict[str, object]], raw_content)
+                ids = [str(b["id"]) for b in content if b.get("type") == "tool_use" and "id" in b]
+                latest_ids.extend(ids)
+        else:
+            saw_non_assistant = True
+    return {"byte_offset": file_size, "latest_tool_use_ids": latest_ids, "in_assistant": not saw_non_assistant}
+
+
+# --- Transcript analysis ---
+
+def every_n_turns(transcript_path: str, self_tool_use_id: str, frequency: int) -> int | None:
+    """This function is for PostToolUseHooks that want to fire every {frequency} assistant turns.
+
+    A "turn" is a clump of consecutive assistant lines, broken only by user lines
+    (other line types like system/progress don't break a clump).
+    The definition of an assistant line is type="assistant" and message.role="assistant".
+    The definition of a user line is type="user" and message.role="user".
+    But, I don't want to have to json-parse every single line in the (very long) transcript!
+    This function uses string-matching heuristics instead.
+
+    The function works by scanning the transcript file. We decree that the first tool_use
+    after an N-turn boundary is the one that triggers the reminder.
+    This function returns 0 for the first time in the transcript, >0 for subsequent times,
+    and None otherwise.
+    """
+    is_in_assistant_turn = False
+    assistant_turn_index = -1
+    min_turn_of_next_trigger = 0
+    has_seen_tool_this_turn = False
+    with open(transcript_path) as f:
+        for line in f:
+            prefix = line[:1024]
+            if ',"type":"progress","data":{' in prefix:
+                # to avoid the following string-match tests firing false positives
+                continue
+            elif '"type":"user","message":{"role":"user",' in prefix:
+                is_in_assistant_turn = False
+            elif '"type":"message"' in prefix and '"role":"assistant"' in prefix:
+                if not is_in_assistant_turn:
+                    assistant_turn_index += 1
+                    is_in_assistant_turn = True
+                    has_seen_tool_this_turn = False
+                if '"type":"tool_use"' in prefix:
+                    if assistant_turn_index >= min_turn_of_next_trigger and not has_seen_tool_this_turn:
+                        has_seen_tool_this_turn = True
+                        if self_tool_use_id in line:
+                            # invariant: self_tool_use_id is necessarily in the most recent assistant turn
+                            return min_turn_of_next_trigger
+                        else:                    
+                            min_turn_of_next_trigger = assistant_turn_index + frequency
+    return None
+
+
+def main() -> None:
+    # PostToolUseHook receive the following on its stdin
+    input_data = json.loads(sys.stdin.read())
+    tool_use_id: str = input_data["tool_use_id"]
+    session_id: str = input_data["session_id"]
+    transcript_path: str = input_data["transcript_path"]
+    cwd: str = input_data["cwd"]
+
+    learnings_path = os.path.join(cwd, "LEARNINGS.md")
+    if os.path.isfile(learnings_path):
+        transcript_dir = os.path.dirname(transcript_path)
+        cache = update_cache(transcript_dir, session_id)
+        rel_path = next((p for p, e in cache.items() if tool_use_id in e["latest_tool_use_ids"]), None)
+        if rel_path is not None:
+            i = every_n_turns(os.path.join(transcript_dir, rel_path), tool_use_id, 10)
+            if i is not None:
+                print(json.dumps({"hookSpecificOutput":{"hookEventName":"PostToolUse","additionalContext": FIRST_MESSAGE if i == 0 else REMINDER_MESSAGE}}))
+
+
+if __name__ == "__main__":
+    main()
No results found