Last active
January 5, 2026 17:27
-
-
Save muness/539d9c6325ff38e07185bb9e2cfae012 to your computer and use it in GitHub Desktop.
Fetch enhanced lyrics for FLAC, M4A and MP3 files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # pip install tqdm tenacity langdetect mutagen syncedlyrics | |
| import os | |
| import sys | |
| import argparse | |
| import contextlib | |
| from syncedlyrics import search | |
| from mutagen.flac import FLAC | |
| from mutagen.mp3 import MP3 | |
| import re | |
| from langdetect import detect | |
| import tenacity | |
| from tqdm import tqdm | |
| import time | |
| @contextlib.contextmanager | |
| def suppress_output(): | |
| """Suppress stdout and stderr to avoid noisy output in quiet mode.""" | |
| with open(os.devnull, 'w') as devnull: | |
| old_stdout = sys.stdout | |
| old_stderr = sys.stderr | |
| sys.stdout = devnull | |
| sys.stderr = devnull | |
| try: | |
| yield | |
| finally: | |
| sys.stdout = old_stdout | |
| sys.stderr = old_stderr | |
| # Setup command-line arguments | |
| parser = argparse.ArgumentParser(description="Fetch enhanced lyrics for FLAC and MP3 files.") | |
| parser.add_argument("folder", help="Path to the music folder") | |
| parser.add_argument("--quiet", action="store_true", help="Reduce output noise") | |
| parser.add_argument("--stats", action="store_true", help="Display processing statistics at the end") | |
| args = parser.parse_args() | |
| music_directory = args.folder | |
| # Counters for statistics | |
| skipped_count = 0 | |
| failed_count = 0 | |
| succeeded_count = 0 | |
| # Breakdown by lyric type: enhanced, line-by-line, plain | |
| lyric_type_counts = {"enhanced": 0, "line-by-line": 0, "plain": 0} | |
| def detect_lyric_type(lyrics): | |
| """Determine the type of lyrics based on timestamp patterns.""" | |
| if re.search(r'<\d{1,2}:\d{2}\.\d{1,2}>', lyrics): | |
| return "enhanced" | |
| elif re.search(r'\[\d{1,2}:\d{2}\.\d{1,2}\]', lyrics): | |
| return "line-by-line" | |
| else: | |
| return "plain" | |
| # Gather list of FLAC and MP3 files | |
| music_files = [] | |
| for root, dirs, files in os.walk(music_directory): | |
| for file in files: | |
| if file.lower().endswith(('.flac', '.mp3')): # Match both formats | |
| music_files.append(os.path.join(root, file)) | |
| if not args.quiet: | |
| print(f"Found {len(music_files)} music files.") | |
| # Define a retryable search function using tenacity | |
| @tenacity.retry( | |
| wait=tenacity.wait_exponential(multiplier=1, min=1, max=10), | |
| stop=tenacity.stop_after_attempt(5), | |
| reraise=True | |
| ) | |
| def attempt_search(term): | |
| """Attempt to retrieve lyrics with retries.""" | |
| return search(term, enhanced=True, providers=['lrclib', 'Netease', 'Megalobiz']) | |
| # Process each file with a progress bar | |
| for file_path in tqdm(music_files, desc="Processing music files"): | |
| try: | |
| if file_path.lower().endswith(".flac"): | |
| audio = FLAC(file_path) | |
| title = audio.get('title', [None])[0] | |
| artist = audio.get('artist', [None])[0] | |
| elif file_path.lower().endswith(".mp3"): | |
| audio = MP3(file_path) | |
| title = audio.get("TIT2", None) # MP3 metadata: title | |
| artist = audio.get("TPE1", None) # MP3 metadata: artist | |
| if title: | |
| title = title.text[0] | |
| if artist: | |
| artist = artist.text[0] | |
| else: | |
| continue # Shouldn't happen, since we filter for these | |
| if not (title and artist): | |
| skipped_count += 1 | |
| continue | |
| search_term = f"{title} {artist}" | |
| lrc_path = os.path.splitext(file_path)[0] + '.lrc' | |
| # Check for existing enhanced lyrics | |
| if os.path.exists(lrc_path): | |
| try: | |
| with open(lrc_path, 'r', encoding='utf-8') as lrc_file: | |
| existing_lyrics = lrc_file.read() | |
| if detect_lyric_type(existing_lyrics) == "enhanced": | |
| if not args.quiet: | |
| print(f"\nSkipping (already enhanced): {search_term}") | |
| skipped_count += 1 | |
| continue | |
| except Exception as e: | |
| if not args.quiet: | |
| print(f"\nError reading existing lyrics for {search_term}: {e}") | |
| # Attempt to fetch lyrics | |
| try: | |
| if args.quiet: | |
| with suppress_output(): | |
| lyrics = attempt_search(search_term) | |
| else: | |
| lyrics = attempt_search(search_term) | |
| except Exception as error: | |
| if not args.quiet: | |
| print(f"\nError processing {search_term}: {error}") | |
| failed_count += 1 | |
| continue | |
| if lyrics: | |
| try: | |
| # Ensure lyrics are in English | |
| if detect(lyrics) != 'en': | |
| if not args.quiet: | |
| print(f"\nNon-English lyrics for: {search_term}") | |
| skipped_count += 1 | |
| continue | |
| except Exception: | |
| if not args.quiet: | |
| print("\nLanguage detection failed, proceeding with the lyrics anyway.") | |
| try: | |
| with open(lrc_path, 'w', encoding='utf-8') as lrc_file: | |
| lrc_file.write(lyrics) | |
| if not args.quiet: | |
| print(f"\nLyrics saved for: {search_term}") | |
| succeeded_count += 1 | |
| # Track lyric type | |
| lyric_type_counts[detect_lyric_type(lyrics)] += 1 | |
| except Exception as e: | |
| if not args.quiet: | |
| print(f"\nError writing lyrics for {search_term}: {e}") | |
| failed_count += 1 | |
| else: | |
| if not args.quiet: | |
| print(f"\nNo suitable lyrics found for: {search_term}") | |
| skipped_count += 1 | |
| except Exception as e: | |
| if not args.quiet: | |
| print(f"\nUnexpected error processing {file_path}: {e}") | |
| failed_count += 1 | |
| # Print statistics if requested | |
| if args.stats: | |
| total_files = len(music_files) | |
| print("\n--- Processing Statistics ---") | |
| print(f"Total files processed: {total_files}") | |
| print(f"Skipped: {skipped_count}") | |
| print(f"Failed: {failed_count}") | |
| print(f"Succeeded: {succeeded_count}") | |
| print("Lyric type breakdown (for succeeded files):") | |
| for lt, count in lyric_type_counts.items(): | |
| print(f" {lt}: {count}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment