Skip to content

Instantly share code, notes, and snippets.

@bantmen
Created December 31, 2015 16:36
Show Gist options
  • Select an option

  • Save bantmen/34ae3131b394fb72c46b to your computer and use it in GitHub Desktop.

Select an option

Save bantmen/34ae3131b394fb72c46b to your computer and use it in GitHub Desktop.
import urllib2
from bs4 import BeautifulSoup
from time import sleep
base = "https://www.reddit.com/user/{0}/"
def get_subreddits(username):
url = base.format(username)
soup = get_next(url)
subreddits = collect_current(soup)
cd = 5
while True:
sleep(cd)
input_tags = soup.findAll("input", {"name" : "thing_id"})
if input_tags:
last_thing_id = input_tags[-1].get("value")
else:
return subreddits
try:
soup = get_next(url, last_thing_id)
except:
return subreddits
cur_subreddits = collect_current(soup)
subreddits.union(cur_subreddits)
cd = int(cd * 1.2)
def collect_current(soup):
link_tags = soup.findAll("a", {"class" : "subreddit"})
return set(map(lambda x: x.text.strip("/r/"), link_tags))
def get_next(url, after_id=None):
if after_id:
url += "?after={0}".format(after_id)
request = urllib2.Request(url)
response = urllib2.urlopen(request)
return BeautifulSoup(response)
# get_subreddits("some_username")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment