# Largely followed this post https://itectec.com/ubuntu/ubuntu-speech-recognition-app-to-convert-mp3-to-text/ # Download the show, https://selfhosted.show/49 wget https://aphid.fireside.fm/d/1437767933/7296e34a-2697-479a-adfb-ad32329dd0b0/1cbfb286-6182-404e-a59b-e969e60c7a44.mp3 # Convert it into the format vosk is epxecting ffmpeg -i downloaded-mp3-file.mp3 -ar 16000 -ac 1 file.wav # Install vosk pip3 install vosk #clone the vosk-api repo to get examples git clone https://github.com/alphacep/vosk-api cd vosk-api/python/example # get a more advanced open vosk model and move it to the model directory wget https://alphacephei.com/vosk/models/vosk-model-en-us-daanzu-20200905.zip unzip vosk-model-en-us-daanzu-20200905.zip mv vosk-model-en-us-daanzu-20200905 model time python3 ./test_text.py file.wav >transcript.txt # I timed it because I was curious # real 7m21.777s # user 7m17.250s # sys 0m3.188s