Skip to content

Instantly share code, notes, and snippets.

@conspirator
Created February 27, 2025 16:41
Show Gist options
  • Select an option

  • Save conspirator/7df47f68548b4037524bc2df51fd3d2c to your computer and use it in GitHub Desktop.

Select an option

Save conspirator/7df47f68548b4037524bc2df51fd3d2c to your computer and use it in GitHub Desktop.

Revisions

  1. Christopher Webb created this gist Feb 27, 2025.
    134 changes: 134 additions & 0 deletions tts.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,134 @@
    #!/usr/bin/env -S uv run --script
    # /// script
    # requires-python = ">=3.12"
    # dependencies = [
    # "kokoro-onnx",
    # "sounddevice",
    # "soundfile",
    # "requests",
    # ]
    # ///
    """
    Text-to-speech script using kokoro-onnx
    Assumes UV is installed. If not, install it with:
    curl -LsSf https://astral.sh/uv/install.sh | sh
    Usage:
    echo "Hello, world." | ./tts # Play audio directly
    echo "Hello, world." | ./tts --save # Save to default location
    echo "Hello, world." | ./tts --save --path /path/to/file.wav # Save to custom path
    cat test.txt | ./tts --speed 1.5 # Play at faster speed
    """

    import argparse
    import sys
    import sounddevice as sd
    import soundfile as sf
    from kokoro_onnx import Kokoro
    from pathlib import Path
    import requests
    import os
    import time
    from typing import Tuple

    # Define cache directory in user's home
    CACHE_DIR = Path.home() / ".cache" / "kokoro-tts"
    MODEL_PATH = CACHE_DIR / "kokoro-v1.0.onnx"
    VOICES_PATH = CACHE_DIR / "voices-v1.0.bin"

    MODEL_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx"
    VOICES_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"

    # Default save directory (Castro sideloads directory)
    DEFAULT_SAVE_DIR = os.path.expanduser("~/Downloads")

    def download_file(url: str, path: Path) -> None:
    """Download a file if it doesn't exist."""
    if not path.exists():
    print(f"Downloading {path.name}...")
    path.parent.mkdir(parents=True, exist_ok=True)
    response = requests.get(url, stream=True)
    response.raise_for_status()
    with open(path, 'wb') as f:
    for chunk in response.iter_content(chunk_size=8192):
    f.write(chunk)
    print(f"Downloaded {path.name}")

    def ensure_model_files() -> Tuple[Path, Path]:
    """Ensure model files exist, downloading if necessary."""
    download_file(MODEL_URL, MODEL_PATH)
    download_file(VOICES_URL, VOICES_PATH)
    return MODEL_PATH, VOICES_PATH

    def process_text(text, voice="af_sky", speed=1.0, lang="en-us", save=False, path=None):
    """Process text to speech and either play it or save it to a file."""
    model_path, voices_path = ensure_model_files()

    kokoro = Kokoro(str(model_path), str(voices_path))
    samples, sample_rate = kokoro.create(
    text, voice=voice, speed=speed, lang=lang
    )

    if save:
    # Save to file
    if path:
    # Use provided path
    file_path = path
    # Ensure directory exists
    os.makedirs(os.path.dirname(os.path.abspath(file_path)), exist_ok=True)
    else:
    # Use default path with timestamp
    timestamp = int(time.time())
    os.makedirs(DEFAULT_SAVE_DIR, exist_ok=True)
    file_path = os.path.join(DEFAULT_SAVE_DIR, f"audio_{timestamp}.wav")

    sf.write(file_path, samples, sample_rate)
    print(f"Saved audio to {file_path}")
    else:
    # Play directly
    print(f"Playing: {text}")
    sd.play(samples, sample_rate)
    sd.wait()

    def main():
    parser = argparse.ArgumentParser(description='Text-to-speech with configurable options')
    parser.add_argument('--voice', type=str, default="af_sky",
    help='Voice to use (default: af_sky)')
    parser.add_argument('--speed', type=float, default=1.0,
    help='Speech speed multiplier (default: 1.0)')
    parser.add_argument('--lang', type=str, default="en-us",
    help='Language code (default: en-us)')
    parser.add_argument('--save', action='store_true',
    help='Save audio to file instead of playing')
    parser.add_argument('--path', type=str,
    help='Custom path to save the audio file (used with --save)')
    args = parser.parse_args()

    try:
    # Read text from stdin
    text = sys.stdin.read().strip()
    if text:
    process_text(
    text,
    voice=args.voice,
    speed=args.speed,
    lang=args.lang,
    save=args.save,
    path=args.path
    )
    else:
    print("Error: No input provided. Please pipe some text to the script.")
    sys.exit(1)
    except FileNotFoundError as e:
    print(f"Error: {e}")
    sys.exit(1)
    except requests.RequestException as e:
    print(f"Error downloading model files: {e}")
    sys.exit(1)
    except Exception as e:
    print(f"An error occurred: {e}")
    sys.exit(1)

    if __name__ == "__main__":
    main()