Skip to content

Instantly share code, notes, and snippets.

View roedoejet's full-sized avatar

Aidan Pine roedoejet

View GitHub Profile
@roedoejet
roedoejet / main.py
Created January 16, 2025 23:00
A CLI for creating a ReadAlong from an ELAN file
import argparse
import os
from pympi.Elan import Eaf
from readalongs.api import Token, convert_prealigned_text_to_offline_html
def elan_to_readalong(eaf_file, audio_file, tiername, output_file):
"""
Convert an ELAN file (.eaf) to a ReadAlong (.html).
import re
from unicodedata import normalize
from nltk.tokenize import RegexpTokenizer
class Tokenizer:
def __init__(self, symbols: list[str]):
# NFC normalize and reverse sort by length
self.symbols = sorted(