Skip to content

Instantly share code, notes, and snippets.

@yodaluca23
Last active March 21, 2025 10:24
Show Gist options
  • Select an option

  • Save yodaluca23/82ab1129e12f39e30c8e760a8c853c1f to your computer and use it in GitHub Desktop.

Select an option

Save yodaluca23/82ab1129e12f39e30c8e760a8c853c1f to your computer and use it in GitHub Desktop.
Fetch .lrc files for all songs in directory, from the Beautiful lyrics, API, supports A2 extension (Enhanced LRC format).
import os
import requests
import json
import re
from bs4 import BeautifulSoup
# Function to load configuration from config.txt
def load_config():
if os.path.exists('config.txt'):
with open('config.txt', 'r') as config_file:
config = json.load(config_file)
return config.get('naming_format'), config.get('useA2')
return None, None
# Function to save configuration to config.txt
def save_config(naming_format, useA2):
with open('config.txt', 'w') as config_file:
json.dump({'naming_format': naming_format, 'useA2': useA2}, config_file)
# Load naming format and useA2 from config.txt if it exists
naming_format, useA2 = load_config()
if not naming_format:
naming_format = input("Enter the naming format (use %A for artist and %T for title): ")
useA2 = input("Should use A2 extension (Enhanced LRC format) if available? (yes/no): ").strip().lower() == 'yes'
save_config(naming_format, useA2)
# Ask the user if they want to override existing files
override_existing = input("Do you want to override existing files? (yes/no): ").strip().lower() == 'yes'
# List of supported file extensions
supported_extensions = [
".3gp", ".aa", ".aac", ".aax", ".act", ".aiff", ".alac", ".amr", ".ape", ".au", ".awb", ".dss",
".dvf", ".flac", ".gsm", ".iklax", ".ivs", ".m4a", ".m4b", ".m4p", ".mmf", ".movpkg", ".mp3",
".mpc", ".msv", ".nmf", ".ogg", ".oga", ".mogg", ".opus", ".ra", ".rm", ".raw", ".rf64",
".sln", ".tta", ".voc", ".vox", ".wav", ".wma", ".wv", ".webm", ".8svx", ".cda"
]
def extract_artist_and_song(filename, naming_format):
naming_format = naming_format + "."
placeholders = {
'%A': '(?P<artist>.+?)',
'%T': '(?P<title>.+?)'
}
escaped_format = re.escape(naming_format)
for placeholder, pattern in placeholders.items():
escaped_format = escaped_format.replace(re.escape(placeholder), pattern)
pattern = re.compile(escaped_format)
match = pattern.match(filename)
if match:
artist = match.group('artist')
title = match.group('title')
return artist.strip(), title.strip()
else:
filename = filename.split('.')[0]
print(f"The filename '{filename}' does not match the naming format '{naming_format}'")
artist = "unknown_artist"
title = "unknown_title"
return artist.strip(), title.strip()
def get_bearer_token():
fetch_url = "https://open.spotify.com"
response = requests.get(fetch_url)
response.raise_for_status()
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
session_element = soup.find(id="session")
session_html = session_element.get_text()
tokens = json.loads(session_html)
access_token = tokens['accessToken']
return access_token
def search_spotify(artist, song, token):
url = f'https://api.spotify.com/v1/search?query=artist%3A+{artist}+track%3A+{song}&type=track&offset=0&limit=1'
headers = {
'Authorization': f'Bearer {token}'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
data = response.json()
if data['tracks']['items']:
href = data['tracks']['items'][0]['href']
match = re.search(r'tracks/([a-zA-Z0-9]+)', href)
if match:
song_id = match.group(1)
return song_id
else:
raise ValueError("Song ID not found in the href.")
else:
raise ValueError("No tracks found for the given artist and song.")
else:
raise Exception(f"Spotify API request failed with status code {response.status_code}")
def fetch_lyrics(track_id):
url = f'https://beautiful-lyrics.socalifornian.live/lyrics/{track_id}'
headers = {
'authorization': 'Bearer litterallyAnythingCanGoHereItJustTakesItLOL'
}
response = requests.get(url, headers=headers)
if response.status_code == 200 and response.headers.get('content-length') != '0':
return response.json()
return None
def convert_to_lrc_timestamp(timestamp):
minutes = int(timestamp // 60)
seconds = timestamp % 60
return f"{minutes}:{seconds:05.2f}"
def parse_lyrics(data, useA2):
lyrics = []
if data['Type'] == 'Line':
if useA2:
print("The following song is not compatible with A2 extension (Enhanced LRC format), continuing with standard LRC")
for item in data['Content']:
if item['Type'] == 'Vocal':
line = item['Text']
timestamp = convert_to_lrc_timestamp(item['StartTime'])
line = f"[{timestamp}] " + line
lyrics.append(line.strip())
if 'Background' in item:
print("This song has Background with Type Line, I was not able to find this in testing so I don't know the structure, please report this song, so I may add support for it.\n https://gist.github.com/yodaluca23/82ab1129e12f39e30c8e760a8c853c1f")
elif data['Type'] == 'Syllable':
if useA2:
for item in data['Content']:
if item['Type'] == 'Vocal':
syllables = item['Lead']['Syllables']
line = ''
timestamp = convert_to_lrc_timestamp(item['Lead']['StartTime'])
for syllable in syllables:
syllable_text = syllable['Text']
syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime'])
if syllable['IsPartOfWord']:
line += syllable_text
else:
line += f" <{syllable_timestamp}> {syllable_text}"
line = f"[{timestamp}]" + line
lyrics.append(line.strip())
if 'Background' in item:
for bg in item['Background']:
syllables = bg['Syllables']
line = ''
timestamp = convert_to_lrc_timestamp(bg['StartTime'])
for index, syllable in enumerate(syllables):
syllable_text = syllable['Text']
syllable_timestamp = convert_to_lrc_timestamp(syllable['StartTime'])
if syllable['IsPartOfWord']:
if index == 0:
line += f"({syllable_text}"
elif index == len(syllables) - 1:
line += f"{syllable_text})"
else:
line += syllable_text
else:
if index == 0:
line += f" <{syllable_timestamp}> ({syllable_text}"
elif index == len(syllables) - 1:
line += f" <{syllable_timestamp}> {syllable_text})"
else:
line += f" <{syllable_timestamp}> {syllable_text}"
line = f"[{timestamp}]" + line
lyrics.append(line.strip())
else:
for item in data['Content']:
if item['Type'] == 'Vocal':
line = ''.join([
f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}"
for syllable in item['Lead']['Syllables']
])
timestamp = item['Lead']['StartTime']
line = f"[{convert_to_lrc_timestamp(timestamp)}]" + f" {line}"
lyrics.append(line.strip())
if 'Background' in item:
for bg in item['Background']:
line = ''.join([
f"{syllable['Text']}{' ' if not syllable['IsPartOfWord'] else ''}"
for syllable in bg['Syllables']
])
timestamp = item['Background'][0]['StartTime']
line = f"[{convert_to_lrc_timestamp(timestamp)}]" + f" ({line.rstrip()})"
lyrics.append(line.strip())
return lyrics
def save_lyrics(lrc_filename, lyrics_body, is_time_synced, filename):
with open(lrc_filename, 'w') as lrc_file:
lrc_file.write("\n".join(lyrics_body))
if is_time_synced:
filename = filename.split('.')[0]
print(f"Saved time-synced lyrics for {filename}")
else:
filename = filename.split('.')[0]
print(f"Saved non-time-synced lyrics for {filename}")
def main():
token = get_bearer_token()
for item in os.listdir('.'):
if any(item.endswith(ext) for ext in supported_extensions):
artist, title = extract_artist_and_song(item, naming_format)
if artist and title:
lrc_filename = os.path.splitext(item)[0] + '.lrc'
if not override_existing and os.path.exists(lrc_filename):
item = item.split('.')[0]
print(f"Lyrics for {item} already exist, skipping")
continue
try:
track_id = search_spotify(artist, title, token)
data = fetch_lyrics(track_id)
if data:
lyrics = parse_lyrics(data, useA2)
save_lyrics(lrc_filename, lyrics, True, item)
else:
print(f"No lyrics found for {item}")
except Exception as e:
print(f"Could not save lyrics for {item}: {e}")
else:
item = item.split('.')[0]
print(f"Could not extract artist and title from {item}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment