Created
August 14, 2020 02:52
-
-
Save echelon/bae4400d0532fc980a6599dbe4e69b2d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| key = "watsonkey" | |
| url = "watsonurl" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import argparse | |
| import datetime | |
| import os | |
| import requests | |
| import sys | |
| import toml | |
| SECRETS_FILE = 'secrets.toml' | |
| def get_json_results_filename(audio_file_path, save_to_same_dir=False): | |
| """ | |
| foo/bar/baz.wav -> foo/bar/baz.json | |
| OR | |
| foo/bar/baz.wav -> baz.json | |
| """ | |
| if save_to_same_dir: | |
| return os.path.splitext(audio_file_path)[0] + '.json' | |
| else: | |
| audio_filename = os.path.basename(audio_file_path) | |
| return os.path.splitext(audio_filename)[0] + '.json' | |
| def log(message): | |
| time = str(datetime.datetime.now()) | |
| print('{}: {}'.format(time, message)) | |
| def read_secrets(filename): | |
| contents = None | |
| with open(filename, 'r') as f: | |
| contents = f.read() | |
| return toml.loads(contents) | |
| def request_transcription(audio_filename, secrets): | |
| filetype = None | |
| if audio_filename.endswith('wav'): | |
| filetype = 'audio/wav' | |
| elif audio_filename.endswith('flac'): | |
| filetype = 'audio/flac' | |
| elif audio_filename.endswith('mp3'): | |
| filetype = 'audio/mpeg' | |
| else: | |
| raise Exception('Unknown file type') | |
| file_contents = None | |
| with open(audio_filename, 'rb') as f: | |
| file_contents = f.read() | |
| credentials = ('apikey', secrets['key']) | |
| headers = {'Content-Type': filetype} | |
| # other parameters: | |
| # end_of_phrase_silence_time (default 0.8sec; between 0.0 and 120.0) | |
| # background_audio_suppression=0.5 | |
| # speech_detector_sensitivity=0.6 | |
| # audio_metrics=true | |
| # model=(different language models, eg. zh-CN_BroadbandModel) | |
| # word_confidence=true | |
| query_string = { | |
| # Provide timestamps for the words | |
| 'timestamps': 'true', | |
| # Split results on semantic features of input sentences | |
| 'split_transcript_at_phrase_end': 'true', | |
| } | |
| endpoint = '/v1/recognize' | |
| url = secrets['url'] + endpoint | |
| results = requests.post(url, auth=credentials, params=query_string, headers=headers, data=file_contents) | |
| if results.status_code != 200: | |
| raise Exception('Bad status: {}, {}'.format(results.status_code, results.content)) | |
| return results.text | |
| def main(): | |
| parser = argparse.ArgumentParser('Transcribe audio with IBM Watson') | |
| parser.add_argument('filename', type=str) | |
| parser.add_argument('--save_to_same_dir', default=False, action='store_true') | |
| args = parser.parse_args() | |
| if not args.filename: | |
| raise Exception('Must supply an input filename.') | |
| audio_filename = args.filename | |
| output_filename = get_json_results_filename(audio_filename, args.save_to_same_dir) | |
| log('input filename: {}'.format(audio_filename)) | |
| log('output filename will be: {}'.format(output_filename)) | |
| if os.path.exists(output_filename): | |
| log("Output file already exists! Exiting early.") | |
| sys.exit() | |
| log('reading secrets') | |
| secrets = read_secrets(SECRETS_FILE) | |
| log('requesting transcription for file: {}'.format(audio_filename)) | |
| results = request_transcription(audio_filename, secrets) | |
| log('saving results to file: {}'.format(output_filename)) | |
| with open(output_filename, 'w') as f: | |
| f.write(results) | |
| log('done') | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment