#!/usr/bin/env python

import os
import re
import gc
import argparse
import ollama
import langdetect

from faster_whisper import WhisperModel

local_model_translate_failures = 0

def parse_args():
    '''Parse command line arguments.'''
    parser = argparse.ArgumentParser(description="Translate Japanese subtitles to Chinese.")
    parser.add_argument("-i", dest="input_file", help="Path to the input SRT file")
    parser.add_argument("-t", dest="vad_threshold", type=float, default=0.5, help="VAD threshold")
    parser.add_argument("--tmp-srt", dest="tmp_srt", default="tmp.srt", help="Path to the temporary SRT file")
    return parser.parse_args()

def translate_to_chinese(text, model="qwen2.5:32b"):
    '''Translate Japanese text to Chinese using local or openai model.'''

    messages = [
        {"role": "system", "content": '你是一个专业的日语译简体中文翻译器. \
把下面的文本日文翻译成中文, 保证输入和输出内容的格式一致, 输出文本为最终翻译文本, 固不要输出无关内容, 不要输出任何注释.'},
        {"role": "user", "content": str(text)}
   ]

    response = ollama.chat(
        model=model,
        messages=messages,
        stream=False,
        options={"temperature": 0.1}
    )

    return response.message.content.strip()

def is_japanese(text):
    '''Check if the text is Japanese.'''
    try:
        lang = langdetect.detect(text)
        return lang == 'ja'
    except:
        return False

def parse_srt(file_path):
    '''Parse an SRT file and return a list of subtitles.'''
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    # Split the content into subtitle blocks
    subtitle_blocks = re.split(r'\n\n', content.strip())

    subtitles = []
    for block in subtitle_blocks:
        parts = block.split('\n', 2)
        if len(parts) >= 3:
            subtitle_number = parts[0]
            time_code = parts[1]
            text = parts[2].replace('\n', ' ').strip()
            subtitles.append((subtitle_number, time_code, text))

    return subtitles

def translate_srt(input_file, output_file):
    '''Translate an SRT file to Chinese.'''
    global local_model_translate_failures
    subtitles = parse_srt(input_file)
    # keep a cache of translated text to keep translations consistent
    translated_cache = {}

    translated_subtitles = []
    for subtitle_number, time_code, text in subtitles:
        if translated_cache.get(text):
            translated_text = translated_cache[text]
        else:
            translated_text = translate_to_chinese(text)
            if is_japanese(translated_text):
                translated_text = translate_to_chinese(text)
                local_model_translate_failures += 1
            translated_cache[text] = translated_text
        print(f"{text} => {translated_text}")
        translated_subtitles.append((subtitle_number, time_code, translated_text))

    with open(output_file, 'w', encoding='utf-8') as file:
        for subtitle in translated_subtitles:
            file.write(f"{subtitle[0]}\n{subtitle[1]}\n{subtitle[2]}\n\n")

def format_time(seconds):
    '''Convert seconds to HH:MM:SS,mmm format.'''
    minutes, seconds = divmod(seconds, 60)
    hours, minutes = divmod(minutes, 60)
    milliseconds = (seconds - int(seconds)) * 1000
    return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"

def transcribe_video(args):
    '''Transcribe audio from a video file and generate an SRT file.'''
    model_size = "large-v3-turbo"
    model = WhisperModel(model_size, device="cuda", compute_type="float16")

    segments, info = model.transcribe(
        args.input_file,
        beam_size=5,
        vad_filter=True,
        vad_parameters={"threshold": args.vad_threshold},
        language='ja',
        )

    print(f"Detected language '{info.language}' with probability {info.language_probability}")

    subtitles = []
    allowed_gap = 5

    for segment in segments:
        duration = segment.end - segment.start

        if duration >= allowed_gap:
            start_time = format_time(segment.end - allowed_gap)
            end_time = format_time(segment.end)
        else:
            start_time = format_time(segment.start)
            end_time = format_time(segment.end)

        text = segment.text
        segment_id = segment.id + 1
        line_out = f"{segment_id}\n{start_time} --> {end_time}\n{text.lstrip()}\n\n"
        print(line_out)
        subtitles.append(line_out)

    with open(args.tmp_srt, 'w', encoding='utf-8') as srt_file:
        for line in subtitles:
            srt_file.write(line)
        srt_file.flush()

    # unload model when finish
    del model
    gc.collect()

def main():
    '''Main function for the script.'''
    args = parse_args()

    transcribe_video(args)
    output_srt = os.path.splitext(args.input_file)[0] + ".srt"
    translate_srt(args.tmp_srt, output_srt)
    print(f"Translation complete. Output saved to {output_srt}")
    print(f"Removing temporary SRT file {args.tmp_srt}")
    os.remove(args.tmp_srt)
    print(f"Local model translation failures: {local_model_translate_failures}")

if __name__ == "__main__":
    main()