from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.action_chains import ActionChains import time import csv import pickle from os.path import exists from bs4 import BeautifulSoup import os login_url = 'https://bandcamp.com/login' # Update this variable to use your own Bandcamp username username = 'USERNAME' purchase_url = f'https://bandcamp.com/{username}/purchases' cookies_file = 'bandcamp_cookies.pkl' # Function to log in and get session using Selenium def login_to_bandcamp_selenium(email, password, driver): driver.get(login_url) # Enter email and password and submit the form email_field = driver.find_element(By.ID, 'username-field') password_field = driver.find_element(By.ID, 'password-field') email_field.send_keys(email) password_field.send_keys(password) password_field.send_keys(Keys.RETURN) # Wait for login to complete (adjust the sleep time as needed) time.sleep(5) # Check if we successfully logged in by navigating to the purchases page driver.get(purchase_url) if f"{username}/purchases" in driver.current_url: print("Logged in successfully.") # Save cookies to a file with open(cookies_file, 'wb') as f: pickle.dump(driver.get_cookies(), f) return driver else: print("Failed to log in. Please check your credentials or handle any CAPTCHA.") driver.quit() return None # Function to load cookies and start session def load_cookies(driver): if exists(cookies_file): driver.get(login_url) with open(cookies_file, 'rb') as f: cookies = pickle.load(f) for cookie in cookies: driver.add_cookie(cookie) driver.get(purchase_url) time.sleep(5) # Wait for session to load if f"{username}/purchases" in driver.current_url: print("Logged in with cookies successfully.") return True return False # Function to load all purchases by scrolling def load_all_purchases(driver): try: view_all_button = driver.find_element(By.CLASS_NAME, 'view-all-button') view_all_button.click() time.sleep(2) except Exception as e: print(f"View all button not found or could not be clicked: {e}") # Scroll to the bottom of the page to load all purchases last_height = driver.execute_script("return document.body.scrollHeight") while True: driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(2) # Wait for new purchases to load new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height: break last_height = new_height # Function to scrape purchases def scrape_purchases_selenium(driver): soup = BeautifulSoup(driver.page_source, 'html.parser') purchases = [] # Locate each purchase item for item in soup.find_all('div', class_='purchases-item'): # Extract the title, artist information, and album link title_artist_div = item.find('a', class_='purchases-item-title') if title_artist_div: title = title_artist_div.find('strong').get_text(strip=True) if title_artist_div.find('strong') else 'Unknown' artist = title_artist_div.get_text(strip=True).split('by ', 1)[-1] if 'by ' in title_artist_div.get_text(strip=True) else 'Unknown' album_link = title_artist_div.get('href', None) else: title, artist, album_link = 'Unknown', 'Unknown', None # Extract the purchase date purchase_date_div = item.find('div', class_='purchases-item-date') purchase_date = purchase_date_div.get_text(strip=True) if purchase_date_div else 'Unknown' # Extract the total price price_div = item.find('div', class_='purchases-item-total') price = price_div.find_all('strong')[-1].contents[0].strip() if price_div and price_div.find_all('strong') else 'Unknown' # Add the purchase details to the list purchases.append([title, artist, purchase_date, price, album_link]) return purchases # Write purchases to CSV def save_purchases_to_csv(purchases): with open('bandcamp_purchases.csv', 'w', newline='', encoding='utf-8') as file: writer = csv.writer(file) writer.writerow(['Title', 'Artist', 'Purchase Date', 'Price', 'Link']) for purchase in purchases: title, artist, purchase_date, price, album_link = purchase link = album_link if album_link else 'No Link' writer.writerow([title, artist, purchase_date, price, link]) print("CSV file created successfully.") # Main function if __name__ == "__main__": driver = webdriver.Chrome() # Update with your ChromeDriver path if needed # Try to load cookies if not load_cookies(driver): # If cookies are not available or invalid, ask for credentials and log in manually email = os.getenv("BANDCAMP_EMAIL") password = os.getenv("BANDCAMP_PASSWORD") if not email or not password: email = input("Enter your Bandcamp email: ") import getpass password = getpass.getpass("Enter your Bandcamp password: ") driver = login_to_bandcamp_selenium(email, password, driver) if driver: load_all_purchases(driver) purchases = scrape_purchases_selenium(driver) if purchases: save_purchases_to_csv(purchases) else: print("No purchases found.") driver.quit() else: print("Script terminated due to login failure.")