Skip to content

Instantly share code, notes, and snippets.

@muness
Last active January 5, 2026 17:27
Show Gist options
  • Select an option

  • Save muness/539d9c6325ff38e07185bb9e2cfae012 to your computer and use it in GitHub Desktop.

Select an option

Save muness/539d9c6325ff38e07185bb9e2cfae012 to your computer and use it in GitHub Desktop.
Fetch enhanced lyrics for FLAC, M4A and MP3 files
#!/usr/bin/env python3
import os
import sys
import argparse
from mutagen.flac import FLAC
from mutagen.mp3 import MP3
from mutagen.mp4 import MP4
from mutagen.id3 import USLT, ID3
import re
# Setup command-line arguments
parser = argparse.ArgumentParser(description="Embed lyrics into FLAC, MP3, and M4A files from LRC files.")
parser.add_argument("folder", help="Path to the music folder")
parser.add_argument("--verbosity", type=int, choices=[0, 1, 2, 3], default=2,
help="Set verbosity level: 0=QUIET, 1=WARN, 2=INFO (default), 3=DEBUG")
args = parser.parse_args()
music_directory = args.folder
# Verbosity levels
DEBUG = 3
INFO = 2
WARN = 1
QUIET = 0
def log(message, level=INFO):
"""Unified logging function that prints messages based on verbosity."""
if args.verbosity >= level:
print(message)
def detect_lyric_type(lyrics):
"""Determine the type of lyrics based on timestamp patterns."""
if re.search(r'<\d{1,2}:\d{2}\.\d{1,2}>', lyrics):
return "enhanced"
elif re.search(r'\[\d{1,2}:\d{2}\.\d{1,2}\]', lyrics):
return "line-by-line"
else:
return "plain"
# Gather audio files
music_files = []
for root, _, files in os.walk(music_directory):
for file in files:
if file.lower().endswith(('.flac', '.mp3', '.m4a')):
music_files.append(os.path.join(root, file))
log(f"Found {len(music_files)} music files.", INFO)
# Process each audio file
for file_path in music_files:
try:
lrc_path = os.path.splitext(file_path)[0] + '.lrc'
# Skip if no LRC file exists
if not os.path.exists(lrc_path):
log(f"Skipping {file_path} (no LRC file found)", INFO)
continue
# Read lyrics from LRC file
with open(lrc_path, 'r', encoding='utf-8') as lrc_file:
lyrics = lrc_file.read().strip()
if not lyrics:
log(f"Skipping {file_path} (LRC file is empty)", WARN)
continue
# Detect lyric type
lyric_type = detect_lyric_type(lyrics)
# Process audio file based on format
if file_path.lower().endswith(".flac"):
audio = FLAC(file_path)
if "LYRICS" in audio and audio["LYRICS"][0] == lyrics:
log(f"Skipping {file_path} (lyrics already embedded)", INFO)
continue
audio["LYRICS"] = lyrics
audio.save()
elif file_path.lower().endswith(".mp3"):
audio = MP3(file_path, ID3=ID3)
if audio.tags and "USLT::eng" in audio.tags and audio.tags["USLT::eng"].text == lyrics:
log(f"Skipping {file_path} (lyrics already embedded)", INFO)
continue
audio.setall("USLT", [USLT(encoding=3, text=lyrics)])
audio.save()
elif file_path.lower().endswith(".m4a"):
audio = MP4(file_path)
if "\xa9lyr" in audio.tags and audio.tags["\xa9lyr"][0] == lyrics:
log(f"Skipping {file_path} (lyrics already embedded)", INFO)
continue
audio.tags["\xa9lyr"] = [lyrics]
audio.save()
log(f"Embedded lyrics for {file_path} ({lyric_type})", INFO)
except Exception as e:
log(f"Error embedding lyrics for {file_path}: {e}", WARN)
log("\nLyrics embedding process completed.", INFO)
#!/usr/bin/env python3
# pip install tqdm tenacity langdetect mutagen syncedlyrics
import os
import sys
import argparse
import contextlib
from syncedlyrics import search
from mutagen.flac import FLAC
from mutagen.mp3 import MP3
from mutagen.mp4 import MP4
import re
from langdetect import detect
import tenacity
from tqdm import tqdm
import time
# Verbosity levels
DEBUG = 3
INFO = 2
WARN = 1
QUIET = 0
def log(message, level=INFO):
"""Unified logging function that prints messages based on verbosity."""
if args.verbosity >= level:
print(message)
@contextlib.contextmanager
def suppress_output():
"""Suppress stdout and stderr to avoid noisy output in quiet mode."""
with open(os.devnull, 'w') as devnull:
old_stdout = sys.stdout
old_stderr = sys.stderr
sys.stdout = devnull
sys.stderr = devnull
try:
yield
finally:
sys.stdout = old_stdout
sys.stderr = old_stderr
# Setup command-line arguments
parser = argparse.ArgumentParser(description="Fetch enhanced lyrics for FLAC and MP3 files.")
parser.add_argument("folder", help="Path to the music folder")
parser.add_argument("--verbosity", type=int, choices=[0, 1, 2, 3], default=2,
help="Set verbosity level: 0=QUIET, 1=WARN, 2=INFO (default), 3=DEBUG")
parser.add_argument("--stats", action="store_true", help="Display processing statistics at the end")
args = parser.parse_args()
music_directory = args.folder
# Counters for statistics
skipped_count = 0
failed_count = 0
succeeded_count = 0
# Breakdown by lyric type: enhanced, line-by-line, plain
lyric_type_counts = {"enhanced": 0, "line-by-line": 0, "plain": 0}
def detect_lyric_type(lyrics):
"""Determine the type of lyrics based on timestamp patterns."""
if re.search(r'<\d{1,2}:\d{2}\.\d{1,2}>', lyrics):
return "enhanced"
elif re.search(r'\[\d{1,2}:\d{2}\.\d{1,2}\]', lyrics):
return "line-by-line"
else:
return "plain"
# Gather list of FLAC and MP3 files
music_files = []
for root, dirs, files in os.walk(music_directory):
for file in files:
if file.lower().endswith(('.flac', '.mp3', '.m4a')):
music_files.append(os.path.join(root, file))
log(f"Found {len(music_files)} music files.", INFO)
# Define a retryable search function using tenacity
@tenacity.retry(
wait=tenacity.wait_exponential(multiplier=1, min=1, max=10),
stop=tenacity.stop_after_attempt(5),
reraise=True
)
def attempt_search(term):
"""Attempt to retrieve lyrics with retries."""
return search(term, enhanced=True, providers=['lrclib'])
# Process each file with a progress bar
for file_path in tqdm(music_files, desc="Processing music files"):
try:
if file_path.lower().endswith(".flac"):
audio = FLAC(file_path)
title = audio.get('title', [None])[0]
artist = audio.get('artist', [None])[0]
elif file_path.lower().endswith(".mp3"):
audio = MP3(file_path)
title = audio.get("TIT2", None) # MP3 metadata: title
artist = audio.get("TPE1", None) # MP3 metadata: artist
if title:
title = title.text[0]
if artist:
artist = artist.text[0]
elif file_path.lower().endswith(".m4a"):
audio = MP4(file_path)
title = audio.tags.get("\xa9nam", [None])[0] # M4A title
artist = audio.tags.get("\xa9ART", [None])[0] # M4A artist
else:
continue # Shouldn't happen, since we filter for these
if not (title and artist):
skipped_count += 1
continue
search_term = f"{title} {artist}"
lrc_path = os.path.splitext(file_path)[0] + '.lrc'
# Check for existing enhanced lyrics
if os.path.exists(lrc_path):
try:
with open(lrc_path, 'r', encoding='utf-8') as lrc_file:
existing_lyrics = lrc_file.read()
if detect_lyric_type(existing_lyrics) == "enhanced":
log(f"\nSkipping (already enhanced): {search_term}", INFO)
skipped_count += 1
continue
except Exception as e:
log(f"\nError reading existing lyrics for {search_term}: {e}", WARN)
# Attempt to fetch lyrics
try:
if args.verbosity == QUIET:
with suppress_output():
lyrics = attempt_search(search_term)
else:
lyrics = attempt_search(search_term)
except Exception as error:
log(f"\nError processing {search_term}: {error}", WARN)
failed_count += 1
continue
if lyrics:
try:
with open(lrc_path, 'w', encoding='utf-8') as lrc_file:
lrc_file.write(lyrics)
log(f"\nLyrics saved for: {search_term}", INFO)
succeeded_count += 1
# Track lyric type
lyric_type_counts[detect_lyric_type(lyrics)] += 1
except Exception as e:
log(f"\nError writing lyrics for {search_term}: {e}", WARN)
failed_count += 1
else:
log(f"\nNo suitable lyrics found for: {search_term}", INFO)
skipped_count += 1
except Exception as e:
log(f"\nUnexpected error processing {file_path}: {e}", WARN)
failed_count += 1
# Print statistics if requested
if args.stats:
total_files = len(music_files)
print("\n--- Processing Statistics ---")
print(f"Total files processed: {total_files}")
print(f"Skipped: {skipped_count}")
print(f"Failed: {failed_count}")
print(f"Succeeded: {succeeded_count}")
print("Lyric type breakdown (for succeeded files):")
for lt, count in lyric_type_counts.items():
print(f" {lt}: {count}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment