cxfcxf · February 18, 2025 02:35
diff --git a/transcribe_ja_to_cn.py b/transcribe_ja_to_cn.py
 #!/usr/bin/env python

 import os
 import re
 import gc
 import argparse
 import ollama
 import langdetect

 from faster_whisper import WhisperModel

 local_model_translate_failures = 0

 def parse_args():
    '''Parse command line arguments.'''
    parser = argparse.ArgumentParser(description="Translate Japanese subtitles to Chinese.")
    parser.add_argument("-i", dest="input_file", help="Path to the input SRT file")
    parser.add_argument("-t", dest="vad_threshold", type=float, default=0.5, help="VAD threshold")
    parser.add_argument("--tmp-srt", dest="tmp_srt", default="tmp.srt", help="Path to the temporary SRT file")
    return parser.parse_args()

 def translate_to_chinese(text, model="qwen2.5:32b"):
    '''Translate Japanese text to Chinese using local or openai model.'''

    messages = [
        {"role": "system", "content": '你是一个专业的日语译简体中文翻译器. \
 把下面的文本日文翻译成中文, 保证输入和输出内容的格式一致, 输出文本为最终翻译文本, 固不要输出无关内容, 不要输出任何注释.'},
        {"role": "user", "content": str(text)}
   ]

    response = ollama.chat(
        model=model,
        messages=messages,
        stream=False,
        options={"temperature": 0.1}
    )

    return response.message.content.strip()

 def is_japanese(text):
    '''Check if the text is Japanese.'''
    try:
        lang = langdetect.detect(text)
        return lang == 'ja'
    except:
        return False

 def parse_srt(file_path):
    '''Parse an SRT file and return a list of subtitles.'''
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    # Split the content into subtitle blocks
    subtitle_blocks = re.split(r'\n\n', content.strip())

    subtitles = []
    for block in subtitle_blocks:
        parts = block.split('\n', 2)
        if len(parts) >= 3:
            subtitle_number = parts[0]
            time_code = parts[1]
            text = parts[2].replace('\n', ' ').strip()
            subtitles.append((subtitle_number, time_code, text))

    return subtitles

 def translate_srt(input_file, output_file):
    '''Translate an SRT file to Chinese.'''
    global local_model_translate_failures
    subtitles = parse_srt(input_file)
    # keep a cache of translated text to keep translations consistent
    translated_cache = {}

    translated_subtitles = []
    for subtitle_number, time_code, text in subtitles:
        if translated_cache.get(text):
            translated_text = translated_cache[text]
        else:
            translated_text = translate_to_chinese(text)
            if is_japanese(translated_text):
                translated_text = translate_to_chinese(text)
                local_model_translate_failures += 1
            translated_cache[text] = translated_text
        print(f"{text} => {translated_text}")
        translated_subtitles.append((subtitle_number, time_code, translated_text))

    with open(output_file, 'w', encoding='utf-8') as file:
        for subtitle in translated_subtitles:
            file.write(f"{subtitle[0]}\n{subtitle[1]}\n{subtitle[2]}\n\n")

 def format_time(seconds):
    '''Convert seconds to HH:MM:SS,mmm format.'''
    minutes, seconds = divmod(seconds, 60)
    hours, minutes = divmod(minutes, 60)
    milliseconds = (seconds - int(seconds)) * 1000
    return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"

 def transcribe_video(args):
    '''Transcribe audio from a video file and generate an SRT file.'''
    model_size = "large-v3-turbo"
    model = WhisperModel(model_size, device="cuda", compute_type="float16")

    segments, info = model.transcribe(
        args.input_file,
        beam_size=5,
        vad_filter=True,
        vad_parameters={"threshold": args.vad_threshold},
        language='ja',
        )

    print(f"Detected language '{info.language}' with probability {info.language_probability}")

    subtitles = []
    allowed_gap = 5

    for segment in segments:
        duration = segment.end - segment.start

        if duration >= allowed_gap:
            start_time = format_time(segment.end - allowed_gap)
            end_time = format_time(segment.end)
        else:
            start_time = format_time(segment.start)
            end_time = format_time(segment.end)

        text = segment.text
        segment_id = segment.id + 1
        line_out = f"{segment_id}\n{start_time} --> {end_time}\n{text.lstrip()}\n\n"
        print(line_out)
        subtitles.append(line_out)

    with open(args.tmp_srt, 'w', encoding='utf-8') as srt_file:
        for line in subtitles:
            srt_file.write(line)
        srt_file.flush()

    # unload model when finish
    del model
    gc.collect()

 def main():
    '''Main function for the script.'''
    args = parse_args()

    transcribe_video(args)
    output_srt = os.path.splitext(args.input_file)[0] + ".srt"
    translate_srt(args.tmp_srt, output_srt)
    print(f"Translation complete. Output saved to {output_srt}")
    print(f"Removing temporary SRT file {args.tmp_srt}")
    os.remove(args.tmp_srt)
    print(f"Local model translation failures: {local_model_translate_failures}")

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python

	import os
	import re
	import gc
	import argparse
	import ollama
	import langdetect

	from faster_whisper import WhisperModel

	local_model_translate_failures = 0

	def parse_args():
	'''Parse command line arguments.'''
	parser = argparse.ArgumentParser(description="Translate Japanese subtitles to Chinese.")
	parser.add_argument("-i", dest="input_file", help="Path to the input SRT file")
	parser.add_argument("-t", dest="vad_threshold", type=float, default=0.5, help="VAD threshold")
	parser.add_argument("--tmp-srt", dest="tmp_srt", default="tmp.srt", help="Path to the temporary SRT file")
	return parser.parse_args()

	def translate_to_chinese(text, model="qwen2.5:32b"):
	'''Translate Japanese text to Chinese using local or openai model.'''

	messages = [
	{"role": "system", "content": '你是一个专业的日语译简体中文翻译器. \
	把下面的文本日文翻译成中文, 保证输入和输出内容的格式一致, 输出文本为最终翻译文本, 固不要输出无关内容, 不要输出任何注释.'},
	{"role": "user", "content": str(text)}
	]

	response = ollama.chat(
	model=model,
	messages=messages,
	stream=False,
	options={"temperature": 0.1}
	)

	return response.message.content.strip()

	def is_japanese(text):
	'''Check if the text is Japanese.'''
	try:
	lang = langdetect.detect(text)
	return lang == 'ja'
	except:
	return False

	def parse_srt(file_path):
	'''Parse an SRT file and return a list of subtitles.'''
	with open(file_path, 'r', encoding='utf-8') as file:
	content = file.read()

	# Split the content into subtitle blocks
	subtitle_blocks = re.split(r'\n\n', content.strip())

	subtitles = []
	for block in subtitle_blocks:
	parts = block.split('\n', 2)
	if len(parts) >= 3:
	subtitle_number = parts[0]
	time_code = parts[1]
	text = parts[2].replace('\n', ' ').strip()
	subtitles.append((subtitle_number, time_code, text))

	return subtitles

	def translate_srt(input_file, output_file):
	'''Translate an SRT file to Chinese.'''
	global local_model_translate_failures
	subtitles = parse_srt(input_file)
	# keep a cache of translated text to keep translations consistent
	translated_cache = {}

	translated_subtitles = []
	for subtitle_number, time_code, text in subtitles:
	if translated_cache.get(text):
	translated_text = translated_cache[text]
	else:
	translated_text = translate_to_chinese(text)
	if is_japanese(translated_text):
	translated_text = translate_to_chinese(text)
	local_model_translate_failures += 1
	translated_cache[text] = translated_text
	print(f"{text} => {translated_text}")
	translated_subtitles.append((subtitle_number, time_code, translated_text))

	with open(output_file, 'w', encoding='utf-8') as file:
	for subtitle in translated_subtitles:
	file.write(f"{subtitle[0]}\n{subtitle[1]}\n{subtitle[2]}\n\n")

	def format_time(seconds):
	'''Convert seconds to HH:MM:SS,mmm format.'''
	minutes, seconds = divmod(seconds, 60)
	hours, minutes = divmod(minutes, 60)
	milliseconds = (seconds - int(seconds)) * 1000
	return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"

	def transcribe_video(args):
	'''Transcribe audio from a video file and generate an SRT file.'''
	model_size = "large-v3-turbo"
	model = WhisperModel(model_size, device="cuda", compute_type="float16")

	segments, info = model.transcribe(
	args.input_file,
	beam_size=5,
	vad_filter=True,
	vad_parameters={"threshold": args.vad_threshold},
	language='ja',
	)

	print(f"Detected language '{info.language}' with probability {info.language_probability}")

	subtitles = []
	allowed_gap = 5

	for segment in segments:
	duration = segment.end - segment.start

	if duration >= allowed_gap:
	start_time = format_time(segment.end - allowed_gap)
	end_time = format_time(segment.end)
	else:
	start_time = format_time(segment.start)
	end_time = format_time(segment.end)

	text = segment.text
	segment_id = segment.id + 1
	line_out = f"{segment_id}\n{start_time} --> {end_time}\n{text.lstrip()}\n\n"
	print(line_out)
	subtitles.append(line_out)

	with open(args.tmp_srt, 'w', encoding='utf-8') as srt_file:
	for line in subtitles:
	srt_file.write(line)
	srt_file.flush()

	# unload model when finish
	del model
	gc.collect()

	def main():
	'''Main function for the script.'''
	args = parse_args()

	transcribe_video(args)
	output_srt = os.path.splitext(args.input_file)[0] + ".srt"
	translate_srt(args.tmp_srt, output_srt)
	print(f"Translation complete. Output saved to {output_srt}")
	print(f"Removing temporary SRT file {args.tmp_srt}")
	os.remove(args.tmp_srt)
	print(f"Local model translation failures: {local_model_translate_failures}")

	if __name__ == "__main__":
	main()
No results found