Created
March 20, 2026 13:39
-
-
Save mvdbeek/a257c8885edba9e02482b598076eaa0e to your computer and use it in GitHub Desktop.
Benchmark: UrlBuilder.url_path_for caching — 87x speedup for history contents serialization
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Benchmark for UrlBuilder.url_path_for caching. | |
| Demonstrates the performance difference when serializing N history items, | |
| each requiring up to 2 url_path_for calls. Without caching, each call | |
| does a linear scan through all registered routes. With caching, only the | |
| first call per route name scans; subsequent calls go directly to the | |
| matching route. | |
| """ | |
| import sys | |
| import time | |
| from starlette.requests import Request | |
| from starlette.routing import Route | |
| from galaxy.webapps.galaxy.api import UrlBuilder | |
| NUM_DATASETS = 500 | |
| # Galaxy registers ~300+ routes across 71 API modules. Use a realistic count. | |
| NUM_FILLER_ROUTES = 300 | |
| def _noop(): | |
| pass | |
| def _build_app_with_routes(): | |
| """Build a FastAPI/Starlette app with a realistic number of routes, | |
| placing target routes near the end to simulate worst-case scanning.""" | |
| from starlette.applications import Starlette | |
| routes = [] | |
| # Add filler routes before the target routes (worst-case position) | |
| for i in range(NUM_FILLER_ROUTES): | |
| routes.append(Route(f"/api/filler{i}/{{id}}", endpoint=_noop, name=f"filler_route_{i}")) | |
| # The two routes used by _serialize_content_item | |
| routes.append( | |
| Route( | |
| "/api/histories/{history_id}/contents/{type}s/{id}", | |
| endpoint=_noop, | |
| name="history_content_typed", | |
| ) | |
| ) | |
| routes.append( | |
| Route( | |
| "/api/dataset_collections/{hdca_id}/contents/{parent_id}", | |
| endpoint=_noop, | |
| name="contents_dataset_collection", | |
| ) | |
| ) | |
| app = Starlette(routes=routes) | |
| return app | |
| def _make_request(app): | |
| """Create a real Request object bound to the app.""" | |
| scope = { | |
| "type": "http", | |
| "method": "GET", | |
| "path": "/", | |
| "query_string": b"", | |
| "headers": [], | |
| "app": app, | |
| } | |
| return Request(scope) | |
| def _url_path_for_uncached(app, name, **path_params): | |
| """The old behavior: always scan all routes.""" | |
| return app.url_path_for(name, **path_params) | |
| def benchmark_uncached(app, n=NUM_DATASETS): | |
| """Simulate old behavior: call app.url_path_for directly for each item.""" | |
| for i in range(n): | |
| _url_path_for_uncached( | |
| app, | |
| "history_content_typed", | |
| history_id=str(i), | |
| id=str(i), | |
| type="dataset", | |
| ) | |
| _url_path_for_uncached( | |
| app, | |
| "contents_dataset_collection", | |
| hdca_id=str(i), | |
| parent_id=str(i), | |
| ) | |
| def benchmark_cached(request, n=NUM_DATASETS): | |
| """Simulate new behavior: UrlBuilder with route caching.""" | |
| url_builder = UrlBuilder(request) | |
| for i in range(n): | |
| url_builder( | |
| "history_content_typed", | |
| history_id=str(i), | |
| id=str(i), | |
| type="dataset", | |
| ) | |
| url_builder( | |
| "contents_dataset_collection", | |
| hdca_id=str(i), | |
| parent_id=str(i), | |
| ) | |
| def test_url_builder_cache_benchmark(): | |
| app = _build_app_with_routes() | |
| request = _make_request(app) | |
| # Warmup | |
| benchmark_uncached(app, n=5) | |
| benchmark_cached(request, n=5) | |
| # Benchmark uncached (old behavior) | |
| iterations = 3 | |
| uncached_times = [] | |
| for _ in range(iterations): | |
| start = time.perf_counter() | |
| benchmark_uncached(app, NUM_DATASETS) | |
| uncached_times.append(time.perf_counter() - start) | |
| # Benchmark cached (new behavior) | |
| cached_times = [] | |
| for _ in range(iterations): | |
| start = time.perf_counter() | |
| benchmark_cached(request, NUM_DATASETS) | |
| cached_times.append(time.perf_counter() - start) | |
| avg_uncached = sum(uncached_times) / len(uncached_times) | |
| avg_cached = sum(cached_times) / len(cached_times) | |
| speedup = avg_uncached / avg_cached | |
| report = ( | |
| f"\n{'='*60}\n" | |
| f"URL Builder Benchmark: {NUM_DATASETS} datasets, {NUM_FILLER_ROUTES} filler routes\n" | |
| f"{'='*60}\n" | |
| f"Uncached (old): {avg_uncached*1000:.1f} ms (per item: {avg_uncached/NUM_DATASETS*1000:.3f} ms)\n" | |
| f"Cached (new): {avg_cached*1000:.1f} ms (per item: {avg_cached/NUM_DATASETS*1000:.3f} ms)\n" | |
| f"Speedup: {speedup:.1f}x\n" | |
| f"{'='*60}" | |
| ) | |
| sys.stderr.write(report + "\n") | |
| assert speedup > 5, f"Expected significant speedup, got only {speedup:.1f}x" | |
| def test_url_builder_cache_correctness(): | |
| """Verify cached results match uncached results.""" | |
| app = _build_app_with_routes() | |
| request = _make_request(app) | |
| url_builder = UrlBuilder(request) | |
| for i in range(10): | |
| expected_typed = str(app.url_path_for( | |
| "history_content_typed", | |
| history_id=str(i), | |
| id=str(i), | |
| type="dataset", | |
| )) | |
| actual_typed = url_builder( | |
| "history_content_typed", | |
| history_id=str(i), | |
| id=str(i), | |
| type="dataset", | |
| ) | |
| assert str(actual_typed) == expected_typed, f"Mismatch for history_content_typed at {i}" | |
| expected_collection = str(app.url_path_for( | |
| "contents_dataset_collection", | |
| hdca_id=str(i), | |
| parent_id=str(i), | |
| )) | |
| actual_collection = url_builder( | |
| "contents_dataset_collection", | |
| hdca_id=str(i), | |
| parent_id=str(i), | |
| ) | |
| assert str(actual_collection) == expected_collection, f"Mismatch for contents_dataset_collection at {i}" | |
| if __name__ == "__main__": | |
| test_url_builder_cache_correctness() | |
| test_url_builder_cache_benchmark() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment