Skip to content

Instantly share code, notes, and snippets.

@LawrenceHwang
Created May 8, 2026 03:51
Show Gist options
  • Select an option

  • Save LawrenceHwang/de7570843faf5b689ba441a113a41e82 to your computer and use it in GitHub Desktop.

Select an option

Save LawrenceHwang/de7570843faf5b689ba441a113a41e82 to your computer and use it in GitHub Desktop.
GitHub Copilot SDK BYOK Demo
"""
BYOK (Bring Your Own Key) demo for the GitHub Copilot SDK.
Sends a single prompt to any OpenAI-compatible endpoint — local Ollama, OpenAI,
Azure OpenAI, or any other compatible API — using the Copilot SDK session API.
Usage
-----
# Local Ollama (default, no key required)
uv run byok.py --prompt "Tell me a joke"
# Pick a specific local model
uv run byok.py --model gemma4:latest --prompt "Write a short story"
# List available models for the configured endpoint
uv run byok.py --list-models
# OpenAI
OPENAI_BASE_URL=https://api.openai.com OPENAI_API_KEY=sk-... \\
uv run byok.py --model gpt-4o --prompt "Hello"
# Any other OpenAI-compatible provider
uv run byok.py --base-url https://your-provider/v1 --api-key <key> --model <model>
Configuration (environment variables)
--------------------------------------
OPENAI_BASE_URL Base URL of the API (default: http://localhost:11434)
OPENAI_API_KEY API key (default: empty, not required for Ollama)
MODEL / OPENAI_MODEL / OLLAMA_MODEL Model to use (auto-selected from discovered list if unset)
PROMPT Default prompt (default: "Which model are you running on?")
COPILOT_REQUEST_TIMEOUT_SECONDS Request timeout (default: 180)
Dependencies
------------
Requires the `copilot` SDK package. Install with:
pip install copilot-sdk # or: uv add copilot-sdk
"""
import argparse
import asyncio
import json
import logging
import os
from urllib.request import Request, urlopen
from copilot import CopilotClient, ProviderConfig
from copilot.generated.session_events import AssistantMessageData
from copilot.session import PermissionHandler
logger = logging.getLogger(__name__)
# Defaults to local Ollama; override with OPENAI_BASE_URL for any OpenAI-compatible API.
OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "http://localhost:11434").strip()
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "").strip()
PROMPT = os.environ.get("PROMPT", "Which model are you running on?")
REQUEST_TIMEOUT_SECONDS = float(
os.environ.get("COPILOT_REQUEST_TIMEOUT_SECONDS", "180")
)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="BYOK Copilot SDK sample for OpenAI-compatible endpoints."
)
parser.add_argument(
"--base-url",
default=None,
help="OpenAI-compatible API base URL. Falls back to OPENAI_BASE_URL.",
)
parser.add_argument(
"--api-key",
default=None,
help="API key. Falls back to OPENAI_API_KEY.",
)
parser.add_argument(
"--model",
default=None,
help=(
"Model override. Highest priority over MODEL/OPENAI_MODEL/OLLAMA_MODEL "
"environment variables."
),
)
parser.add_argument(
"--prompt",
default=None,
help="Prompt override. Falls back to PROMPT environment variable.",
)
parser.add_argument(
"--timeout",
type=float,
default=None,
help="Request timeout in seconds. Falls back to COPILOT_REQUEST_TIMEOUT_SECONDS.",
)
parser.add_argument(
"--list-models",
action="store_true",
help="Print available models for the configured endpoint and exit.",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Enable debug logging.",
)
return parser.parse_args()
def build_base_url(raw: str | None) -> str:
base = (raw or OPENAI_BASE_URL).strip().rstrip("/")
if not base.endswith("/v1"):
base = f"{base}/v1"
return base
def discover_models(base_url: str, api_key: str) -> list[str]:
req = Request(f"{base_url}/models")
if api_key:
req.add_header("Authorization", f"Bearer {api_key}")
try:
logger.debug("Discovering models from %s/models", base_url)
with urlopen(req, timeout=15) as response:
logger.debug("Model discovery response status: %s", response.status)
payload = json.load(response)
except Exception as e:
logger.warning("Model discovery failed: %s", e)
return []
models: list[str] = []
for entry in payload.get("data", []):
model_id = str(entry.get("id", "")).strip()
if model_id and model_id not in models:
models.append(model_id)
return models
def select_model(cli_model: str | None, discovered: list[str]) -> str:
if cli_model:
return cli_model
for env_var in ("MODEL", "OPENAI_MODEL", "OLLAMA_MODEL"):
val = os.environ.get(env_var, "").strip()
if val:
return val
if discovered:
return discovered[0]
raise RuntimeError(
"Could not determine a model automatically. "
"Set MODEL (or OPENAI_MODEL / OLLAMA_MODEL)."
)
async def main() -> None:
args = parse_args()
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(levelname)s: %(message)s",
)
base_url = build_base_url(args.base_url)
api_key = args.api_key if args.api_key is not None else OPENAI_API_KEY
prompt = args.prompt if args.prompt is not None else PROMPT
timeout_seconds = (
args.timeout if args.timeout is not None else REQUEST_TIMEOUT_SECONDS
)
discovered_models = discover_models(base_url, api_key)
if args.list_models:
if discovered_models:
print("\n".join(discovered_models))
else:
print("No models discovered.")
return
model_name = select_model(args.model, discovered_models)
provider: ProviderConfig = {
"type": "openai",
"base_url": base_url,
"wire_api": "completions",
}
if api_key:
provider["api_key"] = api_key
else:
logger.warning("No API key provided; proceeding without one.")
logger.info("Provider base URL: %s", base_url)
if discovered_models:
logger.info("Discovered models: %s", ", ".join(discovered_models[:10]))
else:
logger.info("Model discovery unavailable; using explicit model selection.")
logger.info("Using model: %s", model_name)
async with CopilotClient() as client:
async with await client.create_session(
on_permission_request=PermissionHandler.approve_all,
model=model_name,
available_tools=[],
provider=provider,
) as session:
response = await session.send_and_wait(
prompt,
timeout=timeout_seconds,
)
if response is None:
raise RuntimeError(
"The session completed without an assistant message."
)
if not isinstance(response.data, AssistantMessageData):
raise RuntimeError(
f"Unexpected response payload type: {type(response.data).__name__}"
)
print(response.data.content)
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment