Skip to content

Instantly share code, notes, and snippets.

@antonl-dev
Created August 21, 2025 18:54
Show Gist options
  • Select an option

  • Save antonl-dev/1f1bc392945c16e64f7603086da68793 to your computer and use it in GitHub Desktop.

Select an option

Save antonl-dev/1f1bc392945c16e64f7603086da68793 to your computer and use it in GitHub Desktop.
import requests
import re
from bs4 import BeautifulSoup
from urllib.parse import urljoin
# --- Configuration ---
BASE_URL = "http://www.radcap.ru/"
START_PAGE = urljoin(BASE_URL, "index-d.html")
OUTPUT_FILENAME = "radcap-all-streams-0022-aug22-2025.txt"
# Set a user-agent to mimic a browser, which can help avoid being blocked.
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
def log_and_print(message, file_handle):
"""Prints a message to the console and writes it to the given file."""
print(message)
file_handle.write(message + '\n')
def get_soup(url, log_file):
"""Fetches a URL and returns a BeautifulSoup object, or None on error."""
try:
response = requests.get(url, headers=HEADERS, timeout=15)
response.raise_for_status()
response.encoding = 'utf-8'
return BeautifulSoup(response.text, 'html.parser')
except requests.exceptions.RequestException as e:
log_and_print(f"Error fetching {url}: {e}", log_file)
return None
def main():
"""Main function to scrape the radio station data."""
# Open the output file; 'w' mode overwrites it if it exists.
# The 'with' statement ensures the file is closed automatically.
with open(OUTPUT_FILENAME, 'w', encoding='utf-8') as output_file:
log_and_print(f"Fetching main genre page: {START_PAGE}", output_file)
main_soup = get_soup(START_PAGE, output_file)
if not main_soup:
log_and_print("Could not fetch the main page. Exiting.", output_file)
return
genre_links = main_soup.select('table.genres-d a.genres-big.genres-index-d')
if not genre_links:
log_and_print("No genre links found on the main page.", output_file)
return
log_and_print(f"Found {len(genre_links)} genres.", output_file)
# --- 1. Loop through each GENRE ---
for genre_link in genre_links:
genre_name = genre_link.contents[0].strip()
genre_href = genre_link.get('href')
if not genre_href:
continue
genre_url = urljoin(BASE_URL, genre_href)
log_and_print(f"\n--- GENRE: {genre_name} ---", output_file)
genre_soup = get_soup(genre_url, output_file)
if not genre_soup:
continue
station_links = genre_soup.select('a.genres220, a.genres')
if not station_links:
log_and_print(f" No station links found on {genre_url}", output_file)
continue
# --- 2. Loop through each STATION within the genre ---
for station_link in station_links:
station_name = station_link.find(string=True, recursive=False).strip()
if not station_name: continue
station_href = station_link.get('href')
if not station_href:
continue
station_url = urljoin(BASE_URL, station_href)
log_and_print(f" Station: {station_name}", output_file)
station_soup = get_soup(station_url, output_file)
if not station_soup:
continue
# --- 3. Find the script with stream data ---
script_tag = station_soup.find('script', string=re.compile(r'new Playerjs'))
if not script_tag or not script_tag.string:
log_and_print(f" Could not find player script on {station_url}", output_file)
continue
script_content = script_tag.string
pattern = re.compile(r'\{\s*"?title"?\s*:\s*"([^"]+)"\s*,\s*"?file"?\s*:\s*"([^"]+)"\s*\}')
matches = pattern.findall(script_content)
if not matches:
log_and_print(" Could not extract stream data from script.", output_file)
continue
# --- 4. Loop through the extracted matches and print streams "1" and "3" ---
found_any = False
for match in matches:
title, file_url = match[0], match[1]
if title in ['1', '3']:
if file_url.startswith('//'):
file_url = 'http:' + file_url
log_and_print(f" Stream {title}: {file_url}", output_file)
found_any = True
if not found_any:
log_and_print(" Streams 1 and 3 not found.", output_file)
log_and_print(f"\nScraping complete. Output saved to {OUTPUT_FILENAME}", output_file)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment