Last active
January 5, 2026 17:27
-
-
Save muness/539d9c6325ff38e07185bb9e2cfae012 to your computer and use it in GitHub Desktop.
Fetch enhanced lyrics for FLAC, M4A and MP3 files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # pip install tqdm tenacity langdetect mutagen syncedlyrics | |
| import os | |
| import sys | |
| import argparse | |
| import contextlib | |
| from syncedlyrics import search | |
| from mutagen.flac import FLAC | |
| from mutagen.mp3 import MP3 | |
| import re | |
| from langdetect import detect | |
| import tenacity | |
| from tqdm import tqdm | |
| import time | |
| # Verbosity levels | |
| DEBUG = 3 | |
| INFO = 2 | |
| WARN = 1 | |
| QUIET = 0 | |
| def log(message, level=INFO): | |
| """Unified logging function that prints messages based on verbosity.""" | |
| if args.verbosity >= level: | |
| print(message) | |
| @contextlib.contextmanager | |
| def suppress_output(): | |
| """Suppress stdout and stderr to avoid noisy output in quiet mode.""" | |
| with open(os.devnull, 'w') as devnull: | |
| old_stdout = sys.stdout | |
| old_stderr = sys.stderr | |
| sys.stdout = devnull | |
| sys.stderr = devnull | |
| try: | |
| yield | |
| finally: | |
| sys.stdout = old_stdout | |
| sys.stderr = old_stderr | |
| # Setup command-line arguments | |
| parser = argparse.ArgumentParser(description="Fetch enhanced lyrics for FLAC and MP3 files.") | |
| parser.add_argument("folder", help="Path to the music folder") | |
| parser.add_argument("--verbosity", type=int, choices=[0, 1, 2, 3], default=2, | |
| help="Set verbosity level: 0=QUIET, 1=WARN, 2=INFO (default), 3=DEBUG") | |
| parser.add_argument("--stats", action="store_true", help="Display processing statistics at the end") | |
| args = parser.parse_args() | |
| music_directory = args.folder | |
| # Counters for statistics | |
| skipped_count = 0 | |
| failed_count = 0 | |
| succeeded_count = 0 | |
| # Breakdown by lyric type: enhanced, line-by-line, plain | |
| lyric_type_counts = {"enhanced": 0, "line-by-line": 0, "plain": 0} | |
| def detect_lyric_type(lyrics): | |
| """Determine the type of lyrics based on timestamp patterns.""" | |
| if re.search(r'<\d{1,2}:\d{2}\.\d{1,2}>', lyrics): | |
| return "enhanced" | |
| elif re.search(r'\[\d{1,2}:\d{2}\.\d{1,2}\]', lyrics): | |
| return "line-by-line" | |
| else: | |
| return "plain" | |
| # Gather list of FLAC and MP3 files | |
| music_files = [] | |
| for root, dirs, files in os.walk(music_directory): | |
| for file in files: | |
| if file.lower().endswith(('.flac', '.mp3')): # Match both formats | |
| music_files.append(os.path.join(root, file)) | |
| log(f"Found {len(music_files)} music files.", INFO) | |
| # Define a retryable search function using tenacity | |
| @tenacity.retry( | |
| wait=tenacity.wait_exponential(multiplier=1, min=1, max=10), | |
| stop=tenacity.stop_after_attempt(5), | |
| reraise=True | |
| ) | |
| def attempt_search(term): | |
| """Attempt to retrieve lyrics with retries.""" | |
| return search(term, enhanced=True, providers=['lrclib', 'Netease', 'Megalobiz']) | |
| # Process each file with a progress bar | |
| for file_path in tqdm(music_files, desc="Processing music files"): | |
| try: | |
| if file_path.lower().endswith(".flac"): | |
| audio = FLAC(file_path) | |
| title = audio.get('title', [None])[0] | |
| artist = audio.get('artist', [None])[0] | |
| elif file_path.lower().endswith(".mp3"): | |
| audio = MP3(file_path) | |
| title = audio.get("TIT2", None) # MP3 metadata: title | |
| artist = audio.get("TPE1", None) # MP3 metadata: artist | |
| if title: | |
| title = title.text[0] | |
| if artist: | |
| artist = artist.text[0] | |
| else: | |
| continue # Shouldn't happen, since we filter for these | |
| if not (title and artist): | |
| skipped_count += 1 | |
| continue | |
| search_term = f"{title} {artist}" | |
| lrc_path = os.path.splitext(file_path)[0] + '.lrc' | |
| # Check for existing enhanced lyrics | |
| if os.path.exists(lrc_path): | |
| try: | |
| with open(lrc_path, 'r', encoding='utf-8') as lrc_file: | |
| existing_lyrics = lrc_file.read() | |
| if detect_lyric_type(existing_lyrics) == "enhanced": | |
| log(f"\nSkipping (already enhanced): {search_term}", INFO) | |
| skipped_count += 1 | |
| continue | |
| except Exception as e: | |
| log(f"\nError reading existing lyrics for {search_term}: {e}", WARN) | |
| # Attempt to fetch lyrics | |
| try: | |
| if args.verbosity == QUIET: | |
| with suppress_output(): | |
| lyrics = attempt_search(search_term) | |
| else: | |
| lyrics = attempt_search(search_term) | |
| except Exception as error: | |
| log(f"\nError processing {search_term}: {error}", WARN) | |
| failed_count += 1 | |
| continue | |
| if lyrics: | |
| try: | |
| # Ensure lyrics are in English | |
| if detect(lyrics) != 'en': | |
| log(f"\nNon-English lyrics for: {search_term}", WARN) | |
| skipped_count += 1 | |
| continue | |
| except Exception: | |
| log("\nLanguage detection failed, proceeding with the lyrics anyway.", WARN) | |
| try: | |
| with open(lrc_path, 'w', encoding='utf-8') as lrc_file: | |
| lrc_file.write(lyrics) | |
| log(f"\nLyrics saved for: {search_term}", INFO) | |
| succeeded_count += 1 | |
| # Track lyric type | |
| lyric_type_counts[detect_lyric_type(lyrics)] += 1 | |
| except Exception as e: | |
| log(f"\nError writing lyrics for {search_term}: {e}", WARN) | |
| failed_count += 1 | |
| else: | |
| log(f"\nNo suitable lyrics found for: {search_term}", INFO) | |
| skipped_count += 1 | |
| except Exception as e: | |
| log(f"\nUnexpected error processing {file_path}: {e}", WARN) | |
| failed_count += 1 | |
| # Print statistics if requested | |
| if args.stats: | |
| total_files = len(music_files) | |
| print("\n--- Processing Statistics ---") | |
| print(f"Total files processed: {total_files}") | |
| print(f"Skipped: {skipped_count}") | |
| print(f"Failed: {failed_count}") | |
| print(f"Succeeded: {succeeded_count}") | |
| print("Lyric type breakdown (for succeeded files):") | |
| for lt, count in lyric_type_counts.items(): | |
| print(f" {lt}: {count}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment