YUKI2eN3e · July 17, 2023 23:59 · YUKI2eN3e · Jul 18, 2023
diff --git a/read-transcription.py b/read-transcription.py
 #!/usr/bin/env py
 import argparse
 import json
 import os
 from dataclasses import dataclass
 from typing import Dict, List

 from rich.console import Console, ConsoleOptions, RenderResult
 from rich.table import Column, Table
 from rich.text import Text
 from rich_argparse import RichHelpFormatter

 console = Console()


 @dataclass
 class CliArgs:
    input_file: str
    table: bool
    psv: bool
    show_blank_audio: bool


 def get_args() -> CliArgs:
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__), formatter_class=RichHelpFormatter
    )
    parser.add_argument(
        "-i",
        "--input",
        dest="input_file",
        help="the file containing the transcription",
        required=True,
    )

    output_type = parser.add_mutually_exclusive_group(required=True)
    output_type.add_argument(
        "-t",
        "--table",
        action="store_true",
        default=False,
        help="output in console as rich.Table",
    )
    output_type.add_argument(
        "-p",
        "--psv",
        action="store_true",
        default=False,
        help="output as psv (pipe separated values)",
    )

    parser.add_argument(
        "--show-blank-audio",
        action="store_true",
        default=False,
        help="show entries containing [BLANK_AUDIO]",
    )

    return CliArgs(**vars(parser.parse_args()))


 class Transcription:
    timestamps: Dict[str, str]
    offsets: Dict[str, int]
    text: str

    def __init__(
        self, timestamps: Dict[str, str], offsets: Dict[str, int], text: str
    ) -> None:
        self.timestamps = timestamps
        self.offsets = offsets
        self.text = text


 class Transcriptions(List[Transcription]):
    hide_blank_audio = False

    def __init__(self, json_array: List) -> None:
        for tr in json_array:
            self.append(Transcription(**tr))

    def __rich_console__(
        self, console: Console, options: ConsoleOptions
    ) -> RenderResult:
        yield Text("Transcriptions")
        table = Table(Column(header="From"), Column(header="To"), Column(header="Text"))

        for tr in self:
            if tr.text.strip() != "[BLANK_AUDIO]":
                table.add_row(
                    Text(tr.timestamps["from"]),
                    Text(tr.timestamps["to"]),
                    Text(tr.text),
                )
            elif self.hide_blank_audio:
                pass
            else:
                table.add_column(
                    f'"[bright_black]{tr.timestamps["from"]}[/bright_black]"',
                    f'"[bright_black]{tr.timestamps["to"]}[/bright_black]"',
                    "[bright_black]\[BLANK_AUDIO][/bright_black]",
                )

        yield table

    def get_psv(self) -> List[str]:
        psv: List[str] = []
        psv.append("From|To|Text")
        for tr in self:
            if tr.text.strip() == "[BLANK_AUDIO]" and self.hide_blank_audio:
                pass
            else:
                psv.append(f'{tr.timestamps["from"]}|{tr.timestamps["to"]}|{tr.text}')
        return psv


 def run():
    args = get_args()
    with open(args.input_file, "r", encoding="utf8") as file:
        transcriptions = Transcriptions(json.load(file)["transcription"])
    transcriptions.hide_blank_audio = not args.show_blank_audio

    if args.table:
        console.print(transcriptions)
    elif args.psv:
        for line in transcriptions.get_psv():
            print(line)


 if __name__ == "__main__":
    run()
	#!/usr/bin/env py
	import argparse
	import json
	import os
	from dataclasses import dataclass
	from typing import Dict, List

	from rich.console import Console, ConsoleOptions, RenderResult
	from rich.table import Column, Table
	from rich.text import Text
	from rich_argparse import RichHelpFormatter

	console = Console()


	@dataclass
	class CliArgs:
	input_file: str
	table: bool
	psv: bool
	show_blank_audio: bool


	def get_args() -> CliArgs:
	parser = argparse.ArgumentParser(
	prog=os.path.basename(__file__), formatter_class=RichHelpFormatter
	)
	parser.add_argument(
	"-i",
	"--input",
	dest="input_file",
	help="the file containing the transcription",
	required=True,
	)

	output_type = parser.add_mutually_exclusive_group(required=True)
	output_type.add_argument(
	"-t",
	"--table",
	action="store_true",
	default=False,
	help="output in console as rich.Table",
	)
	output_type.add_argument(
	"-p",
	"--psv",
	action="store_true",
	default=False,
	help="output as psv (pipe separated values)",
	)

	parser.add_argument(
	"--show-blank-audio",
	action="store_true",
	default=False,
	help="show entries containing [BLANK_AUDIO]",
	)

	return CliArgs(**vars(parser.parse_args()))


	class Transcription:
	timestamps: Dict[str, str]
	offsets: Dict[str, int]
	text: str

	def __init__(
	self, timestamps: Dict[str, str], offsets: Dict[str, int], text: str
	) -> None:
	self.timestamps = timestamps
	self.offsets = offsets
	self.text = text


	class Transcriptions(List[Transcription]):
	hide_blank_audio = False

	def __init__(self, json_array: List) -> None:
	for tr in json_array:
	self.append(Transcription(**tr))

	def __rich_console__(
	self, console: Console, options: ConsoleOptions
	) -> RenderResult:
	yield Text("Transcriptions")
	table = Table(Column(header="From"), Column(header="To"), Column(header="Text"))

	for tr in self:
	if tr.text.strip() != "[BLANK_AUDIO]":
	table.add_row(
	Text(tr.timestamps["from"]),
	Text(tr.timestamps["to"]),
	Text(tr.text),
	)
	elif self.hide_blank_audio:
	pass
	else:
	table.add_column(
	f'"[bright_black]{tr.timestamps["from"]}[/bright_black]"',
	f'"[bright_black]{tr.timestamps["to"]}[/bright_black]"',
	"[bright_black]\[BLANK_AUDIO][/bright_black]",
	)

	yield table

	def get_psv(self) -> List[str]:
	psv: List[str] = []
	psv.append("From\|To\|Text")
	for tr in self:
	if tr.text.strip() == "[BLANK_AUDIO]" and self.hide_blank_audio:
	pass
	else:
	psv.append(f'{tr.timestamps["from"]}\|{tr.timestamps["to"]}\|{tr.text}')
	return psv


	def run():
	args = get_args()
	with open(args.input_file, "r", encoding="utf8") as file:
	transcriptions = Transcriptions(json.load(file)["transcription"])
	transcriptions.hide_blank_audio = not args.show_blank_audio

	if args.table:
	console.print(transcriptions)
	elif args.psv:
	for line in transcriptions.get_psv():
	print(line)


	if __name__ == "__main__":
	run()
No results found