Skip to content

Instantly share code, notes, and snippets.

@scottcowan
Created January 14, 2026 02:44
Show Gist options
  • Select an option

  • Save scottcowan/0092021be55a77c2194285074298fe10 to your computer and use it in GitHub Desktop.

Select an option

Save scottcowan/0092021be55a77c2194285074298fe10 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Extract magnet link from audiobookbay.lu pages.
Usage:
python3 extract-magnet.py <url>
python3 extract-magnet.py "https://audiobookbay.lu/abss/dungeonj-crawler-carl-book-2-carls-doomsday-scenario-matt-dinniman/"
Or make it executable:
chmod +x extract-magnet.py
./extract-magnet.py <url>
"""
import sys
import urllib.request
import urllib.parse
import re
def extract_magnet_link(url):
"""Extract magnet link from audiobookbay.lu page."""
try:
# Fetch the page
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'})
with urllib.request.urlopen(req) as response:
html = response.read().decode('utf-8')
except urllib.error.URLError as e:
print(f"Error fetching URL: {e}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
# Extract Info Hash
hash_match = re.search(r'Info Hash:</td>\s*<td>([a-fA-F0-9]{40})</td>', html)
if not hash_match:
print("Error: Could not find Info Hash on the page.", file=sys.stderr)
sys.exit(1)
hash_value = hash_match.group(1)
# Extract title
title_match = re.search(r'<h1[^>]*>(.*?)</h1>', html, re.DOTALL)
if title_match:
title = title_match.group(1)
# Remove HTML tags
title = re.sub(r'<[^>]+>', '', title)
# Decode HTML entities
title = title.replace('&#8217;', "'")
title = title.replace('&amp;', '&')
title = title.replace('&quot;', '"')
title = title.replace('&lt;', '<')
title = title.replace('&gt;', '>')
title = title.strip()
else:
# Fallback: try to get from page title
title_match = re.search(r'<title>(.*?)</title>', html)
if title_match:
title = title_match.group(1).split(' - ')[0].strip()
else:
title = "Audiobook"
# Extract trackers (both Announce URL and Tracker entries)
trackers = []
for match in re.finditer(r'(?:Announce URL:|Tracker):</td>\s*<td>(.*?)</td>', html, re.DOTALL):
tracker = match.group(1).strip()
# Clean up HTML entities in tracker URLs
tracker = re.sub(r'<[^>]+>', '', tracker)
if tracker and tracker not in trackers:
trackers.append(tracker)
if not trackers:
print("Warning: No trackers found. Magnet link may not work properly.", file=sys.stderr)
# Build magnet link
magnet = f'magnet:?xt=urn:btih:{hash_value}&dn={urllib.parse.quote(title)}'
for tracker in trackers:
magnet += f'&tr={urllib.parse.quote(tracker)}'
return magnet, hash_value, title, trackers
def main():
if len(sys.argv) != 2:
print(__doc__, file=sys.stderr)
print(f"\nUsage: {sys.argv[0]} <url>", file=sys.stderr)
sys.exit(1)
url = sys.argv[1]
# Validate URL
if not url.startswith('http://') and not url.startswith('https://'):
print(f"Error: URL must start with http:// or https://", file=sys.stderr)
sys.exit(1)
try:
magnet, hash_value, title, trackers = extract_magnet_link(url)
# Output the magnet link
print(magnet)
# Optional: Print details if verbose mode or if output is to terminal
if sys.stdout.isatty():
print(f"\n# Info Hash: {hash_value}", file=sys.stderr)
print(f"# Title: {title}", file=sys.stderr)
print(f"# Trackers: {len(trackers)}", file=sys.stderr)
except KeyboardInterrupt:
print("\nInterrupted by user.", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment