Skip to content

Instantly share code, notes, and snippets.

@dmitriiivashko
Created January 23, 2014 18:26
Show Gist options
  • Select an option

  • Save dmitriiivashko/8584067 to your computer and use it in GitHub Desktop.

Select an option

Save dmitriiivashko/8584067 to your computer and use it in GitHub Desktop.
# IMPORTS
import requests
import re
import os
import sys
# CREDENTIALS
login = "xxx@xxx.com"
password = "xxx"
# CONSTANTS
total_pages = 6
domain = "https://badukmovies.com"
login_url = "/users/sign_in"
page_url = "https://badukmovies.com/episodes?page="
download_modifier = "/download"
download_directory = os.path.dirname(__file__)
# Download enablers
download_video_allowed = True
download_sgfs_allowed = True
download_bonus_sgfs_allowed = True
############################################
############################################
############################################
def create_dir(new_dir):
if not os.path.isdir(new_dir):
os.mkdir(new_dir)
def download_file(s,file_from,file_to):
if not os.path.exists(file_to):
file = s.get(file_from, stream = True)
with open(file_to, 'wb') as f:
for chunk in file.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
f.flush()
return True
else:
return False
############################################
############################################
############################################
# STARTING HTTP SESSION
s = requests.Session()
# GETTING CSRF PROTECTION TOKEN
r = s.get(domain + login_url)
token = re.search('<meta content="(.+?)" name="csrf-token" />',r.text).group(1)
# LOGIN
payload = {
'utf8': "✓",
'authenticity_token': token,
"user[email]": login ,
"user[password]": password
}
r = s.post(domain + login_url, data=payload)
# READING EPISODES FROM PAGES
for single_page in range(total_pages):
single_page = str(single_page + 1)
print("\n>>> Parsing episodes from page #" + single_page)
r = s.get(page_url + single_page)
episodes = re.findall("<div class='episode'>(.+?)\n</div>\n</div>\n</div>",r.text,re.DOTALL)
print(">>> Found " + str(len(episodes)) + " episodes!\n")
parsed_episodes = []
for episode in episodes:
parsed_episodes.append({
"id": re.search("Episode #([0-9]+)",episode).group(1),
"slug": re.search("^\n<a.+?href=\"/episodes/(.+?)\">",episode).group(1),
"url": re.search("^\n<a.+?href=\"(.+?)\">",episode).group(1),
"title": re.search("<h4>\n(.+?)\n</h4>",episode).group(1),
"watch_url": re.search("^\n<a.+?href=\"(.+?)\">",episode).group(1) + "?play=true",
"read_url": re.search("(https://badukmovies.com/episode_sgfs/[0-9]+)",episode)
})
# Debug filtering. Uncomment to test on a specific episode
#parsed_episodes = parsed_episodes[11:12]
for episode in parsed_episodes:
print(">>> Processing episode #" + episode["id"] + "...");
episode_dirname = os.path.join(download_directory,episode["id"]+"_-_"+episode["slug"])
create_dir(episode_dirname)
if download_video_allowed:
print(">>> Downloading episode #" + episode["id"] + "...");
episode_filename = os.path.join(episode_dirname,episode["slug"] + ".mp4")
if(download_file(s,domain + episode["url"] + download_modifier,episode_filename)):
print(">>> Downloaded episode video #" + episode["id"] + "!");
else:
print(">>> Episode video #" + episode["id"] + " is already downloaded. I recommend skipping...");
if download_sgfs_allowed:
if episode["read_url"]:
episode["read_url"] = episode["read_url"].group(1)
sgf_filename = os.path.join(episode_dirname,episode["slug"] + ".sgf")
if download_file(s,episode['read_url'] + download_modifier,sgf_filename):
print(">>> Downloaded episode SGF #" + episode["id"] + "!");
else:
print(">>> Episode SGF #" + episode["id"] + " is already downloaded. I recommend skipping...");
if download_bonus_sgfs_allowed:
r = s.get(domain + episode['url'])
bonuses = re.findall('<p>\n(.+?)\n<a class.+?</a>\n<a class="btn btn-mini" href="(.+?)"><i class=\'icon-download icon-black\'></i> Download sgf</a>\n</p>',r.text)
if bonuses:
bonus_dir_path = os.path.join(episode_dirname,"bonus")
create_dir(bonus_dir_path)
i = 0;
print(">>> Bonuses detected!")
for bonus in bonuses:
i = i + 1
bonus_slug = re.sub("[^0-9a-zA-Z]","-",bonus[0])
bonus_slug = re.sub("\-+","-",bonus_slug)
bonus_slug = re.sub("(^\-|\-$)","",bonus_slug)
bonus_filename = os.path.join(bonus_dir_path, str(i) + "__" + bonus_slug + ".sgf")
if download_file(s,bonus[1],bonus_filename):
print(">>> Bonus " + bonus_slug + " downloaded!");
else:
print(">>> Bonus" + bonus_slug + " is already downloaded. I recommend skipping...");
print(">>> Processed episode #" + episode["id"] + "!\n");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment