Skip to content

Instantly share code, notes, and snippets.

@roedoejet
Created January 16, 2025 23:00
Show Gist options
  • Select an option

  • Save roedoejet/87d93cbf8d4eb5a25318e8da56dff385 to your computer and use it in GitHub Desktop.

Select an option

Save roedoejet/87d93cbf8d4eb5a25318e8da56dff385 to your computer and use it in GitHub Desktop.
A CLI for creating a ReadAlong from an ELAN file
import argparse
import os
from pympi.Elan import Eaf
from readalongs.api import Token, convert_prealigned_text_to_offline_html
def elan_to_readalong(eaf_file, audio_file, tiername, output_file):
"""
Convert an ELAN file (.eaf) to a ReadAlong (.html).
Args:
eaf_file (str): Path to the ELAN file.
output_file (str): Path to the output ReadAlong html file.
"""
# Load the ELAN file
eaf = Eaf(eaf_file)
# Iterate over tiers in the ELAN file and collect the segments
tier_intervals = eaf.get_annotation_data_for_tier(tiername)
segments = []
for start_time, end_time, value in tier_intervals:
start = start_time / 1000.0
end = end_time / 1000.0
segments.append(Token(text=value, time=start, dur=end - start))
segments.append(Token(text=" ", is_word=False))
# Save the ReadAlong
readalong_html, _readalong_xml = convert_prealigned_text_to_offline_html(
[segments],
audio_file,
["unk"],
title="ReadAlong generated using EveryVoice",
)
with open(output_file, "w", encoding="utf8") as f:
f.write(readalong_html)
def main():
parser = argparse.ArgumentParser(
description="Convert ELAN files (.eaf) to ReadAlong files (.html)."
)
parser.add_argument("input", help="Path to the input ELAN file (.eaf).")
parser.add_argument("input_audio", help="Path to the input audio.")
parser.add_argument("tiername", help="Name of the tier to extract segments from")
parser.add_argument(
"output",
nargs="?",
help="Path to the output ReadAlong file (.html). If not provided, the output will have the same name as the input with a .html extension.",
)
args = parser.parse_args()
input_path = args.input
input_audio_path = args.input_audio
tiername = args.tiername
output_path = args.output or os.path.splitext(input_path)[0] + ".html"
if not os.path.isfile(input_path):
print(f"Error: The input file '{input_path}' does not exist.")
exit(1)
try:
elan_to_readalong(input_path, input_audio_path, tiername, output_path)
print(f"Successfully converted '{input_path}' to '{output_path}'.")
except Exception as e:
print(f"Error during conversion: {e}")
exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment