Created
January 16, 2025 23:00
-
-
Save roedoejet/87d93cbf8d4eb5a25318e8da56dff385 to your computer and use it in GitHub Desktop.
A CLI for creating a ReadAlong from an ELAN file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import argparse | |
| import os | |
| from pympi.Elan import Eaf | |
| from readalongs.api import Token, convert_prealigned_text_to_offline_html | |
| def elan_to_readalong(eaf_file, audio_file, tiername, output_file): | |
| """ | |
| Convert an ELAN file (.eaf) to a ReadAlong (.html). | |
| Args: | |
| eaf_file (str): Path to the ELAN file. | |
| output_file (str): Path to the output ReadAlong html file. | |
| """ | |
| # Load the ELAN file | |
| eaf = Eaf(eaf_file) | |
| # Iterate over tiers in the ELAN file and collect the segments | |
| tier_intervals = eaf.get_annotation_data_for_tier(tiername) | |
| segments = [] | |
| for start_time, end_time, value in tier_intervals: | |
| start = start_time / 1000.0 | |
| end = end_time / 1000.0 | |
| segments.append(Token(text=value, time=start, dur=end - start)) | |
| segments.append(Token(text=" ", is_word=False)) | |
| # Save the ReadAlong | |
| readalong_html, _readalong_xml = convert_prealigned_text_to_offline_html( | |
| [segments], | |
| audio_file, | |
| ["unk"], | |
| title="ReadAlong generated using EveryVoice", | |
| ) | |
| with open(output_file, "w", encoding="utf8") as f: | |
| f.write(readalong_html) | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Convert ELAN files (.eaf) to ReadAlong files (.html)." | |
| ) | |
| parser.add_argument("input", help="Path to the input ELAN file (.eaf).") | |
| parser.add_argument("input_audio", help="Path to the input audio.") | |
| parser.add_argument("tiername", help="Name of the tier to extract segments from") | |
| parser.add_argument( | |
| "output", | |
| nargs="?", | |
| help="Path to the output ReadAlong file (.html). If not provided, the output will have the same name as the input with a .html extension.", | |
| ) | |
| args = parser.parse_args() | |
| input_path = args.input | |
| input_audio_path = args.input_audio | |
| tiername = args.tiername | |
| output_path = args.output or os.path.splitext(input_path)[0] + ".html" | |
| if not os.path.isfile(input_path): | |
| print(f"Error: The input file '{input_path}' does not exist.") | |
| exit(1) | |
| try: | |
| elan_to_readalong(input_path, input_audio_path, tiername, output_path) | |
| print(f"Successfully converted '{input_path}' to '{output_path}'.") | |
| except Exception as e: | |
| print(f"Error during conversion: {e}") | |
| exit(1) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment