Skip to content

Instantly share code, notes, and snippets.

@greg-randall
Created March 4, 2026 16:19
Show Gist options
  • Select an option

  • Save greg-randall/141de558696991e33ed871f0e96db895 to your computer and use it in GitHub Desktop.

Select an option

Save greg-randall/141de558696991e33ed871f0e96db895 to your computer and use it in GitHub Desktop.

Revisions

  1. greg-randall created this gist Mar 4, 2026.
    52 changes: 52 additions & 0 deletions get-audio.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,52 @@
    import json
    import requests
    import base64
    import wave
    import io

    url = 'https://inworld.ai/api/create-speech'

    headers = {
    'accept': '*/*',
    'accept-language': 'en-US,en;q=0.9',
    'cache-control': 'no-cache',
    'content-type': 'application/json',
    'origin': 'https://inworld.ai',
    'pragma': 'no-cache',
    'priority': 'u=1, i',
    'referer': 'https://inworld.ai/tts?rdt_cid=5537321732037909376&utm_campaign=TTS_Conversion&utm_content=artificial_analysis_speech_arena_updated&utm_medium=paidsocial&utm_source=reddit',
    'sec-ch-ua': '"Not:A-Brand";v="99", "Microsoft Edge";v="145", "Chromium";v="145"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36 Edg/145.0.0.0'
    }

    payload_string = '{"text":"The room in which I found myself was very large and lofty. The windows were long, narrow, and pointed, and at so vast a distance from the black oaken floor as to be altogether inaccessible from within. Feeble gleams of encrimsoned light made their way through the trellissed panes, and served to render sufficiently distinct the more prominent objects around; the eye, however, struggled in vain to reach the remoter angles of the chamber, or the recesses of the vaulted and fretted ceiling. Dark draperies hung upon the walls. The general furniture was profuse, comfortless, antique, and tattered. Many books and musical instruments lay scattered about, but failed to give any vitality to the scene. I felt that I breathed an atmosphere of sorrow. An air of stern, deep, and irredeemable gloom hung over and pervaded all.","voiceId":"Dennis","modelId":"inworld-tts-1.5-max","audioConfig":{"audioEncoding":"LINEAR16","sampleRateHertz":48000}}'

    response = requests.post(url, headers=headers, data=payload_string.encode('utf-8'), stream=True)

    pure_audio = []

    for line in response.iter_lines():
    if line:
    data = json.loads(line)
    if "result" in data and "audioContent" in data["result"]:
    audio_bytes = base64.b64decode(data["result"]["audioContent"])

    chunk_file = io.BytesIO(audio_bytes)
    with wave.open(chunk_file, 'rb') as w:
    frames = w.readframes(w.getnframes())
    pure_audio.append(frames)

    with wave.open("output.wav", 'wb') as out_wav:
    out_wav.setnchannels(1)
    out_wav.setsampwidth(2)
    out_wav.setframerate(48000)

    for frames in pure_audio:
    out_wav.writeframes(frames)

    print("Saved the fixed wave file!")