Skip to content

Instantly share code, notes, and snippets.

@mvdbeek
Created March 20, 2026 13:39
Show Gist options
  • Select an option

  • Save mvdbeek/a257c8885edba9e02482b598076eaa0e to your computer and use it in GitHub Desktop.

Select an option

Save mvdbeek/a257c8885edba9e02482b598076eaa0e to your computer and use it in GitHub Desktop.
Benchmark: UrlBuilder.url_path_for caching — 87x speedup for history contents serialization
"""Benchmark for UrlBuilder.url_path_for caching.
Demonstrates the performance difference when serializing N history items,
each requiring up to 2 url_path_for calls. Without caching, each call
does a linear scan through all registered routes. With caching, only the
first call per route name scans; subsequent calls go directly to the
matching route.
"""
import sys
import time
from starlette.requests import Request
from starlette.routing import Route
from galaxy.webapps.galaxy.api import UrlBuilder
NUM_DATASETS = 500
# Galaxy registers ~300+ routes across 71 API modules. Use a realistic count.
NUM_FILLER_ROUTES = 300
def _noop():
pass
def _build_app_with_routes():
"""Build a FastAPI/Starlette app with a realistic number of routes,
placing target routes near the end to simulate worst-case scanning."""
from starlette.applications import Starlette
routes = []
# Add filler routes before the target routes (worst-case position)
for i in range(NUM_FILLER_ROUTES):
routes.append(Route(f"/api/filler{i}/{{id}}", endpoint=_noop, name=f"filler_route_{i}"))
# The two routes used by _serialize_content_item
routes.append(
Route(
"/api/histories/{history_id}/contents/{type}s/{id}",
endpoint=_noop,
name="history_content_typed",
)
)
routes.append(
Route(
"/api/dataset_collections/{hdca_id}/contents/{parent_id}",
endpoint=_noop,
name="contents_dataset_collection",
)
)
app = Starlette(routes=routes)
return app
def _make_request(app):
"""Create a real Request object bound to the app."""
scope = {
"type": "http",
"method": "GET",
"path": "/",
"query_string": b"",
"headers": [],
"app": app,
}
return Request(scope)
def _url_path_for_uncached(app, name, **path_params):
"""The old behavior: always scan all routes."""
return app.url_path_for(name, **path_params)
def benchmark_uncached(app, n=NUM_DATASETS):
"""Simulate old behavior: call app.url_path_for directly for each item."""
for i in range(n):
_url_path_for_uncached(
app,
"history_content_typed",
history_id=str(i),
id=str(i),
type="dataset",
)
_url_path_for_uncached(
app,
"contents_dataset_collection",
hdca_id=str(i),
parent_id=str(i),
)
def benchmark_cached(request, n=NUM_DATASETS):
"""Simulate new behavior: UrlBuilder with route caching."""
url_builder = UrlBuilder(request)
for i in range(n):
url_builder(
"history_content_typed",
history_id=str(i),
id=str(i),
type="dataset",
)
url_builder(
"contents_dataset_collection",
hdca_id=str(i),
parent_id=str(i),
)
def test_url_builder_cache_benchmark():
app = _build_app_with_routes()
request = _make_request(app)
# Warmup
benchmark_uncached(app, n=5)
benchmark_cached(request, n=5)
# Benchmark uncached (old behavior)
iterations = 3
uncached_times = []
for _ in range(iterations):
start = time.perf_counter()
benchmark_uncached(app, NUM_DATASETS)
uncached_times.append(time.perf_counter() - start)
# Benchmark cached (new behavior)
cached_times = []
for _ in range(iterations):
start = time.perf_counter()
benchmark_cached(request, NUM_DATASETS)
cached_times.append(time.perf_counter() - start)
avg_uncached = sum(uncached_times) / len(uncached_times)
avg_cached = sum(cached_times) / len(cached_times)
speedup = avg_uncached / avg_cached
report = (
f"\n{'='*60}\n"
f"URL Builder Benchmark: {NUM_DATASETS} datasets, {NUM_FILLER_ROUTES} filler routes\n"
f"{'='*60}\n"
f"Uncached (old): {avg_uncached*1000:.1f} ms (per item: {avg_uncached/NUM_DATASETS*1000:.3f} ms)\n"
f"Cached (new): {avg_cached*1000:.1f} ms (per item: {avg_cached/NUM_DATASETS*1000:.3f} ms)\n"
f"Speedup: {speedup:.1f}x\n"
f"{'='*60}"
)
sys.stderr.write(report + "\n")
assert speedup > 5, f"Expected significant speedup, got only {speedup:.1f}x"
def test_url_builder_cache_correctness():
"""Verify cached results match uncached results."""
app = _build_app_with_routes()
request = _make_request(app)
url_builder = UrlBuilder(request)
for i in range(10):
expected_typed = str(app.url_path_for(
"history_content_typed",
history_id=str(i),
id=str(i),
type="dataset",
))
actual_typed = url_builder(
"history_content_typed",
history_id=str(i),
id=str(i),
type="dataset",
)
assert str(actual_typed) == expected_typed, f"Mismatch for history_content_typed at {i}"
expected_collection = str(app.url_path_for(
"contents_dataset_collection",
hdca_id=str(i),
parent_id=str(i),
))
actual_collection = url_builder(
"contents_dataset_collection",
hdca_id=str(i),
parent_id=str(i),
)
assert str(actual_collection) == expected_collection, f"Mismatch for contents_dataset_collection at {i}"
if __name__ == "__main__":
test_url_builder_cache_correctness()
test_url_builder_cache_benchmark()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment