Skip to content

Instantly share code, notes, and snippets.

@cast42
Last active March 11, 2026 07:38
Show Gist options
  • Select an option

  • Save cast42/ce87e1743e950bdf4bd1c8e2421cf097 to your computer and use it in GitHub Desktop.

Select an option

Save cast42/ce87e1743e950bdf4bd1c8e2421cf097 to your computer and use it in GitHub Desktop.
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "pydantic-ai-slim[web]",
# "bm25s",
# "uvicorn",
# "openai",
# ]
# ///
"""
Minimal Pydantic-AI + BM25 demo
Run locally:
export OPENAI_API_KEY=your_key
uv run script.py
Run from gist:
export OPENAI_API_KEY=your_key
uv run https://gist.githubusercontent.com/.../raw
"""
from __future__ import annotations
import os
import re
import sys
from dataclasses import dataclass
import bm25s
from pydantic_ai import Agent
# -------------------------------------------------------------------
# Check API key early
# -------------------------------------------------------------------
if "OPENAI_API_KEY" not in os.environ:
print(
"\n❌ OPENAI_API_KEY is not set.\n\n"
"Set it first:\n\n"
"export OPENAI_API_KEY=your_key\n\n"
"Then run the script again.\n"
)
sys.exit(1)
# -------------------------------------------------------------------
# Embedded Markdown documents
# -------------------------------------------------------------------
MARKDOWN_DOCS = {
"bm25.md": """
# BM25
BM25 is a lexical search ranking function.
It ranks documents based on how often query terms appear
while correcting for document length.
BM25 is widely used in search engines and retrieval systems.
""",
"pydantic-ai.md": """
# Pydantic AI
Pydantic AI is a framework for building AI agents in Python.
Key ideas:
- Agents
- Tools
- Structured outputs
- Dependency injection
It also provides a built-in web chat interface for development.
""",
"rag.md": """
# Retrieval Augmented Generation
Retrieval Augmented Generation (RAG) combines:
1. information retrieval
2. large language models
First documents are retrieved.
Then the LLM uses those documents to generate an answer.
""",
}
# -------------------------------------------------------------------
# Markdown chunking
# -------------------------------------------------------------------
def chunk_markdown(text: str, max_chars: int = 600) -> list[str]:
blocks = [b.strip() for b in re.split(r"\n\s*\n", text) if b.strip()]
chunks: list[str] = []
current: list[str] = []
length = 0
for block in blocks:
size = len(block)
if current and length + size > max_chars:
chunks.append("\n\n".join(current))
current = [block]
length = size
else:
current.append(block)
length += size
if current:
chunks.append("\n\n".join(current))
return chunks
# -------------------------------------------------------------------
# BM25 Index
# -------------------------------------------------------------------
@dataclass
class Chunk:
doc: str
text: str
class MarkdownBM25Index:
def __init__(self) -> None:
self.chunks: list[Chunk] = []
self.retriever = bm25s.BM25()
def build(self) -> None:
corpus: list[str] = []
for name, doc in MARKDOWN_DOCS.items():
for piece in chunk_markdown(doc):
self.chunks.append(Chunk(name, piece))
corpus.append(piece)
tokens = bm25s.tokenize(corpus, stopwords="en")
self.retriever.index(tokens)
def search(self, query: str, k: int = 2) -> list[Chunk]:
q = bm25s.tokenize(query, stopwords="en")
results, _scores = self.retriever.retrieve(q, k=k)
ids = results[0].tolist()
return [self.chunks[i] for i in ids]
index = MarkdownBM25Index()
index.build()
# -------------------------------------------------------------------
# Agent
# -------------------------------------------------------------------
agent = Agent(
"openai:gpt-5.2",
instructions=(
"You answer questions about a markdown knowledge base. "
"Always call the search_markdown tool before answering. "
"Base your answer on the retrieved snippets."
),
)
@agent.tool_plain
def search_markdown(query: str) -> str:
"""Search markdown documents using BM25."""
hits = index.search(query)
if not hits:
return "No results found."
parts: list[str] = []
for i, hit in enumerate(hits, start=1):
parts.append(
f"[{i}] Source: {hit.doc}\n"
f"{hit.text}"
)
return "\n\n---\n\n".join(parts)
# -------------------------------------------------------------------
# Web Chat UI
# -------------------------------------------------------------------
app = agent.to_web()
# -------------------------------------------------------------------
# Start server
# -------------------------------------------------------------------
if __name__ == "__main__":
import uvicorn
print("\n🚀 Starting BM25 + Pydantic-AI demo")
print("Open: http://127.0.0.1:8000\n")
uvicorn.run(app, port=8000)
@cast42
Copy link
Author

cast42 commented Mar 11, 2026

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment