Last active
July 16, 2024 20:50
-
-
Save yodaluca23/3958c29c2986841067324dd84258987b to your computer and use it in GitHub Desktop.
Revisions
-
yodaluca23 revised this gist
Jul 15, 2024 . 1 changed file with 39 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,9 +1,45 @@ import requests import re import base64 from bs4 import BeautifulSoup # Function to extract the first LYRICID from the HTML response def extract_lyric_id(html_content): soup = BeautifulSoup(html_content, 'html.parser') table = soup.find('table', id='lyrics_list') if table: first_link = table.find('a', href=re.compile(r'/lyrics/(\d+)')) if first_link: lyric_id = re.search(r'/lyrics/(\d+)', first_link['href']) if lyric_id: return lyric_id.group(1) return None # URL for the POST request post_url = 'https://petitlyrics.com/search_lyrics' # Headers for the POST request headers = { 'Accept-Encoding': 'gzip, deflate, br', 'Content-Type': 'application/x-www-form-urlencoded' } # Ask user for title and artist title = input("Enter the title of the song: ") artist = input("Enter the artist: ") # Data for the POST request data = { 'title': title, 'artist': artist } # Perform the POST request to search for lyrics response_post = requests.post(post_url, headers=headers, data=data) # Extract LYRICID from the HTML response lyrics_id = extract_lyric_id(response_post.text) print(f"Extracted LYRICID: {lyrics_id}") # URL of the site to obtain cookies initial_url = f'https://petitlyrics.com/lyrics/{lyrics_id}' @@ -50,4 +86,4 @@ print("\nLyrics:\n") for item in lyrics_data: decoded_lyrics = base64.b64decode(item['lyrics']).decode('utf-8') print(decoded_lyrics) -
yodaluca23 created this gist
Jul 15, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,53 @@ import requests import re import base64 # Ask the user for the LYRICSID lyrics_id = input("Please enter the Song ID: ") # URL of the site to obtain cookies initial_url = f'https://petitlyrics.com/lyrics/{lyrics_id}' # URL of the file to fetch CSRF Token csrf_ufl = 'https://petitlyrics.com/lib/pl-lib.js' # Create a session object to persist cookies session = requests.Session() # Make an initial request to the site to get cookies response = session.get(initial_url) # Extract the PLSESSION cookie plsession_cookie = session.cookies.get('PLSESSION') # Make a request to the CSRF Token file using the session (with cookies) response_js = session.get(csrf_ufl) # Extract the X-CSRF-Token using regex csrf_token_match = re.search(r"X-CSRF-Token',\s*'([^']+)'", response_js.text) csrf_token = csrf_token_match.group(1) if csrf_token_match else None # URL for the POST request post_url = 'https://petitlyrics.com/com/get_lyrics.ajax' # POST request headers = { 'Accept-Encoding': 'gzip, deflate, br', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Cookie': f'PLSESSION={plsession_cookie}', 'X-CSRF-Token': csrf_token, 'X-Requested-With': 'XMLHttpRequest' } data = { 'lyrics_id': lyrics_id } response_post = session.post(post_url, headers=headers, data=data) # Parse the JSON response lyrics_data = response_post.json() # Decode the base64 lyrics and print each one on a new line print("\nLyrics:\n") for item in lyrics_data: decoded_lyrics = base64.b64decode(item['lyrics']).decode('utf-8') print(decoded_lyrics)