Created
December 18, 2024 12:28
-
-
Save koyfm/b3d986a0a1d8463f14f3ee81aef6c244 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /// script | |
| # requires-python = ">=3.12" | |
| # dependencies = [ | |
| # "ffmpeg-python", | |
| # "tqdm", | |
| # ] | |
| # /// | |
| import shutil | |
| import sqlite3 | |
| from pathlib import Path | |
| from ffmpeg import probe | |
| from tqdm import tqdm | |
| def main() -> None: | |
| con = sqlite3.connect("msgstore.db") | |
| cur = con.cursor() | |
| res = cur.execute(""" | |
| SELECT chat_row_id, file_path | |
| FROM message_media | |
| WHERE mime_type='audio/ogg; codecs=opus' | |
| """) | |
| missing_db, missing_media, total_duration = 0, 0, 0 | |
| for chat_row_id, file_path in tqdm(list(res)): | |
| if file_path is None: | |
| missing_db += 1 | |
| continue | |
| source = Path(file_path) | |
| if not source.is_file(): | |
| missing_media += 1 | |
| continue | |
| target = Path(f"data/{chat_row_id}") | |
| target.mkdir(exist_ok=True) | |
| shutil.copy(source, target) | |
| total_duration += float(probe(source.as_posix())["streams"][0]["duration"]) | |
| print( | |
| f"Done processing {total_duration:.2f}s of audio " | |
| f"with {missing_db} missing in msgstore " | |
| f"and {missing_media} missing media" | |
| ) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment