Skip to content

Instantly share code, notes, and snippets.

@echelon
Created August 14, 2020 02:52
Show Gist options
  • Select an option

  • Save echelon/bae4400d0532fc980a6599dbe4e69b2d to your computer and use it in GitHub Desktop.

Select an option

Save echelon/bae4400d0532fc980a6599dbe4e69b2d to your computer and use it in GitHub Desktop.
key = "watsonkey"
url = "watsonurl"
#!/usr/bin/env python
import argparse
import datetime
import os
import requests
import sys
import toml
SECRETS_FILE = 'secrets.toml'
def get_json_results_filename(audio_file_path, save_to_same_dir=False):
"""
foo/bar/baz.wav -> foo/bar/baz.json
OR
foo/bar/baz.wav -> baz.json
"""
if save_to_same_dir:
return os.path.splitext(audio_file_path)[0] + '.json'
else:
audio_filename = os.path.basename(audio_file_path)
return os.path.splitext(audio_filename)[0] + '.json'
def log(message):
time = str(datetime.datetime.now())
print('{}: {}'.format(time, message))
def read_secrets(filename):
contents = None
with open(filename, 'r') as f:
contents = f.read()
return toml.loads(contents)
def request_transcription(audio_filename, secrets):
filetype = None
if audio_filename.endswith('wav'):
filetype = 'audio/wav'
elif audio_filename.endswith('flac'):
filetype = 'audio/flac'
elif audio_filename.endswith('mp3'):
filetype = 'audio/mpeg'
else:
raise Exception('Unknown file type')
file_contents = None
with open(audio_filename, 'rb') as f:
file_contents = f.read()
credentials = ('apikey', secrets['key'])
headers = {'Content-Type': filetype}
# other parameters:
# end_of_phrase_silence_time (default 0.8sec; between 0.0 and 120.0)
# background_audio_suppression=0.5
# speech_detector_sensitivity=0.6
# audio_metrics=true
# model=(different language models, eg. zh-CN_BroadbandModel)
# word_confidence=true
query_string = {
# Provide timestamps for the words
'timestamps': 'true',
# Split results on semantic features of input sentences
'split_transcript_at_phrase_end': 'true',
}
endpoint = '/v1/recognize'
url = secrets['url'] + endpoint
results = requests.post(url, auth=credentials, params=query_string, headers=headers, data=file_contents)
if results.status_code != 200:
raise Exception('Bad status: {}, {}'.format(results.status_code, results.content))
return results.text
def main():
parser = argparse.ArgumentParser('Transcribe audio with IBM Watson')
parser.add_argument('filename', type=str)
parser.add_argument('--save_to_same_dir', default=False, action='store_true')
args = parser.parse_args()
if not args.filename:
raise Exception('Must supply an input filename.')
audio_filename = args.filename
output_filename = get_json_results_filename(audio_filename, args.save_to_same_dir)
log('input filename: {}'.format(audio_filename))
log('output filename will be: {}'.format(output_filename))
if os.path.exists(output_filename):
log("Output file already exists! Exiting early.")
sys.exit()
log('reading secrets')
secrets = read_secrets(SECRETS_FILE)
log('requesting transcription for file: {}'.format(audio_filename))
results = request_transcription(audio_filename, secrets)
log('saving results to file: {}'.format(output_filename))
with open(output_filename, 'w') as f:
f.write(results)
log('done')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment