cast42 · March 11, 2026 07:38 · cast42 · Mar 11, 2026
diff --git a/didactical_pydantic_ai_rag.py b/didactical_pydantic_ai_rag.py
 # /// script
 # requires-python = ">=3.10"
 # dependencies = [
 #     "pydantic-ai-slim[web]",
 #     "bm25s",
 #     "uvicorn",
 #     "openai",
 # ]
 # ///

 """
 Minimal Pydantic-AI + BM25 demo

 Run locally:

    export OPENAI_API_KEY=your_key
    uv run script.py

 Run from gist:

    export OPENAI_API_KEY=your_key
    uv run https://gist.githubusercontent.com/.../raw
 """

 from __future__ import annotations

 import os
 import re
 import sys
 from dataclasses import dataclass

 import bm25s
 from pydantic_ai import Agent


 # -------------------------------------------------------------------
 # Check API key early
 # -------------------------------------------------------------------

 if "OPENAI_API_KEY" not in os.environ:
    print(
        "\n❌ OPENAI_API_KEY is not set.\n\n"
        "Set it first:\n\n"
        "export OPENAI_API_KEY=your_key\n\n"
        "Then run the script again.\n"
    )
    sys.exit(1)


 # -------------------------------------------------------------------
 # Embedded Markdown documents
 # -------------------------------------------------------------------

 MARKDOWN_DOCS = {
    "bm25.md": """
 # BM25

 BM25 is a lexical search ranking function.

 It ranks documents based on how often query terms appear
 while correcting for document length.

 BM25 is widely used in search engines and retrieval systems.
 """,
    "pydantic-ai.md": """
 # Pydantic AI

 Pydantic AI is a framework for building AI agents in Python.

 Key ideas:

 - Agents
 - Tools
 - Structured outputs
 - Dependency injection

 It also provides a built-in web chat interface for development.
 """,
    "rag.md": """
 # Retrieval Augmented Generation

 Retrieval Augmented Generation (RAG) combines:

 1. information retrieval
 2. large language models

 First documents are retrieved.
 Then the LLM uses those documents to generate an answer.
 """,
 }


 # -------------------------------------------------------------------
 # Markdown chunking
 # -------------------------------------------------------------------

 def chunk_markdown(text: str, max_chars: int = 600) -> list[str]:
    blocks = [b.strip() for b in re.split(r"\n\s*\n", text) if b.strip()]

    chunks: list[str] = []
    current: list[str] = []
    length = 0

    for block in blocks:
        size = len(block)

        if current and length + size > max_chars:
            chunks.append("\n\n".join(current))
            current = [block]
            length = size
        else:
            current.append(block)
            length += size

    if current:
        chunks.append("\n\n".join(current))

    return chunks


 # -------------------------------------------------------------------
 # BM25 Index
 # -------------------------------------------------------------------

 @dataclass
 class Chunk:
    doc: str
    text: str


 class MarkdownBM25Index:
    def __init__(self) -> None:
        self.chunks: list[Chunk] = []
        self.retriever = bm25s.BM25()

    def build(self) -> None:
        corpus: list[str] = []

        for name, doc in MARKDOWN_DOCS.items():
            for piece in chunk_markdown(doc):
                self.chunks.append(Chunk(name, piece))
                corpus.append(piece)

        tokens = bm25s.tokenize(corpus, stopwords="en")
        self.retriever.index(tokens)

    def search(self, query: str, k: int = 2) -> list[Chunk]:
        q = bm25s.tokenize(query, stopwords="en")
        results, _scores = self.retriever.retrieve(q, k=k)

        ids = results[0].tolist()
        return [self.chunks[i] for i in ids]


 index = MarkdownBM25Index()
 index.build()


 # -------------------------------------------------------------------
 # Agent
 # -------------------------------------------------------------------

 agent = Agent(
    "openai:gpt-5.2",
    instructions=(
        "You answer questions about a markdown knowledge base. "
        "Always call the search_markdown tool before answering. "
        "Base your answer on the retrieved snippets."
    ),
 )


 @agent.tool_plain
 def search_markdown(query: str) -> str:
    """Search markdown documents using BM25."""
    hits = index.search(query)

    if not hits:
        return "No results found."

    parts: list[str] = []

    for i, hit in enumerate(hits, start=1):
        parts.append(
            f"[{i}] Source: {hit.doc}\n"
            f"{hit.text}"
        )

    return "\n\n---\n\n".join(parts)


 # -------------------------------------------------------------------
 # Web Chat UI
 # -------------------------------------------------------------------

 app = agent.to_web()


 # -------------------------------------------------------------------
 # Start server
 # -------------------------------------------------------------------

 if __name__ == "__main__":
    import uvicorn

    print("\n🚀 Starting BM25 + Pydantic-AI demo")
    print("Open: http://127.0.0.1:8000\n")

    uvicorn.run(app, port=8000)
	# /// script
	# requires-python = ">=3.10"
	# dependencies = [
	# "pydantic-ai-slim[web]",
	# "bm25s",
	# "uvicorn",
	# "openai",
	# ]
	# ///

	"""
	Minimal Pydantic-AI + BM25 demo

	Run locally:

	export OPENAI_API_KEY=your_key
	uv run script.py

	Run from gist:

	export OPENAI_API_KEY=your_key
	uv run https://gist.githubusercontent.com/.../raw
	"""

	from __future__ import annotations

	import os
	import re
	import sys
	from dataclasses import dataclass

	import bm25s
	from pydantic_ai import Agent


	# -------------------------------------------------------------------
	# Check API key early
	# -------------------------------------------------------------------

	if "OPENAI_API_KEY" not in os.environ:
	print(
	"\n❌ OPENAI_API_KEY is not set.\n\n"
	"Set it first:\n\n"
	"export OPENAI_API_KEY=your_key\n\n"
	"Then run the script again.\n"
	)
	sys.exit(1)


	# -------------------------------------------------------------------
	# Embedded Markdown documents
	# -------------------------------------------------------------------

	MARKDOWN_DOCS = {
	"bm25.md": """
	# BM25

	BM25 is a lexical search ranking function.

	It ranks documents based on how often query terms appear
	while correcting for document length.

	BM25 is widely used in search engines and retrieval systems.
	""",
	"pydantic-ai.md": """
	# Pydantic AI

	Pydantic AI is a framework for building AI agents in Python.

	Key ideas:

	- Agents
	- Tools
	- Structured outputs
	- Dependency injection

	It also provides a built-in web chat interface for development.
	""",
	"rag.md": """
	# Retrieval Augmented Generation

	Retrieval Augmented Generation (RAG) combines:

	1. information retrieval
	2. large language models

	First documents are retrieved.
	Then the LLM uses those documents to generate an answer.
	""",
	}


	# -------------------------------------------------------------------
	# Markdown chunking
	# -------------------------------------------------------------------

	def chunk_markdown(text: str, max_chars: int = 600) -> list[str]:
	blocks = [b.strip() for b in re.split(r"\n\s*\n", text) if b.strip()]

	chunks: list[str] = []
	current: list[str] = []
	length = 0

	for block in blocks:
	size = len(block)

	if current and length + size > max_chars:
	chunks.append("\n\n".join(current))
	current = [block]
	length = size
	else:
	current.append(block)
	length += size

	if current:
	chunks.append("\n\n".join(current))

	return chunks


	# -------------------------------------------------------------------
	# BM25 Index
	# -------------------------------------------------------------------

	@dataclass
	class Chunk:
	doc: str
	text: str


	class MarkdownBM25Index:
	def __init__(self) -> None:
	self.chunks: list[Chunk] = []
	self.retriever = bm25s.BM25()

	def build(self) -> None:
	corpus: list[str] = []

	for name, doc in MARKDOWN_DOCS.items():
	for piece in chunk_markdown(doc):
	self.chunks.append(Chunk(name, piece))
	corpus.append(piece)

	tokens = bm25s.tokenize(corpus, stopwords="en")
	self.retriever.index(tokens)

	def search(self, query: str, k: int = 2) -> list[Chunk]:
	q = bm25s.tokenize(query, stopwords="en")
	results, _scores = self.retriever.retrieve(q, k=k)

	ids = results[0].tolist()
	return [self.chunks[i] for i in ids]


	index = MarkdownBM25Index()
	index.build()


	# -------------------------------------------------------------------
	# Agent
	# -------------------------------------------------------------------

	agent = Agent(
	"openai:gpt-5.2",
	instructions=(
	"You answer questions about a markdown knowledge base. "
	"Always call the search_markdown tool before answering. "
	"Base your answer on the retrieved snippets."
	),
	)


	@agent.tool_plain
	def search_markdown(query: str) -> str:
	"""Search markdown documents using BM25."""
	hits = index.search(query)

	if not hits:
	return "No results found."

	parts: list[str] = []

	for i, hit in enumerate(hits, start=1):
	parts.append(
	f"[{i}] Source: {hit.doc}\n"
	f"{hit.text}"
	)

	return "\n\n---\n\n".join(parts)


	# -------------------------------------------------------------------
	# Web Chat UI
	# -------------------------------------------------------------------

	app = agent.to_web()


	# -------------------------------------------------------------------
	# Start server
	# -------------------------------------------------------------------

	if __name__ == "__main__":
	import uvicorn

	print("\n🚀 Starting BM25 + Pydantic-AI demo")
	print("Open: http://127.0.0.1:8000\n")

	uvicorn.run(app, port=8000)
No results found