This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import argparse | |
| import os | |
| from pympi.Elan import Eaf | |
| from readalongs.api import Token, convert_prealigned_text_to_offline_html | |
| def elan_to_readalong(eaf_file, audio_file, tiername, output_file): | |
| """ | |
| Convert an ELAN file (.eaf) to a ReadAlong (.html). |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| from unicodedata import normalize | |
| from nltk.tokenize import RegexpTokenizer | |
| class Tokenizer: | |
| def __init__(self, symbols: list[str]): | |
| # NFC normalize and reverse sort by length | |
| self.symbols = sorted( |