Skip to content

Instantly share code, notes, and snippets.

@koyfm
Created December 18, 2024 12:28
Show Gist options
  • Select an option

  • Save koyfm/b3d986a0a1d8463f14f3ee81aef6c244 to your computer and use it in GitHub Desktop.

Select an option

Save koyfm/b3d986a0a1d8463f14f3ee81aef6c244 to your computer and use it in GitHub Desktop.
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "ffmpeg-python",
# "tqdm",
# ]
# ///
import shutil
import sqlite3
from pathlib import Path
from ffmpeg import probe
from tqdm import tqdm
def main() -> None:
con = sqlite3.connect("msgstore.db")
cur = con.cursor()
res = cur.execute("""
SELECT chat_row_id, file_path
FROM message_media
WHERE mime_type='audio/ogg; codecs=opus'
""")
missing_db, missing_media, total_duration = 0, 0, 0
for chat_row_id, file_path in tqdm(list(res)):
if file_path is None:
missing_db += 1
continue
source = Path(file_path)
if not source.is_file():
missing_media += 1
continue
target = Path(f"data/{chat_row_id}")
target.mkdir(exist_ok=True)
shutil.copy(source, target)
total_duration += float(probe(source.as_posix())["streams"][0]["duration"])
print(
f"Done processing {total_duration:.2f}s of audio "
f"with {missing_db} missing in msgstore "
f"and {missing_media} missing media"
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment