Skip to content

Instantly share code, notes, and snippets.

@3rogue
Last active December 27, 2016 08:34
Show Gist options
  • Select an option

  • Save 3rogue/f86157e01b5fc0aee4e0 to your computer and use it in GitHub Desktop.

Select an option

Save 3rogue/f86157e01b5fc0aee4e0 to your computer and use it in GitHub Desktop.
download the picture from avmp.pw with multithreading
#coding=utf-8
import requests, re, os, time
from multiprocessing.dummy import Pool as ThreadPool
def get_pagenum(url):
pagenum = 1
page = requests.get(url)
while page.status_code == requests.codes.ok:
pagenum += 1
fullurl = url + '/currentPage/%d' % pagenum
page = requests.get(fullurl)
return pagenum - 1
img_title_queue = []
def get_img_url(url):
global img_title_queue
path_queue = []
img_queue = []
page = requests.get(url)
if page.status_code == requests.codes.ok:
img_url = re.findall('http://.*?video.*?jpg', page.content)
for i in img_url:
img_queue.append(re.sub('s.j','l.j', i))
title_list = re.findall('<span>(.*?)(<i.*?</i>)?<br><date>(.*?)</date>.*?<date>(.*?)</date>', page.content)
for pos, title in enumerate(title_list):
path_queue.append('./'+ dirname + '/' + title[0].decode('utf-8').replace(' ','') + '_' + title[2] + '_' + title[3] + '.png')
img_title = [(img_queue[i], path_queue[i] )for i in range(len(img_queue))]
img_title_queue.extend(img_title)
def write_img(imginfo):
try:
print imginfo[1]
with open(imginfo[1], 'wb') as f:
f.write(requests.get(imginfo[0]).content)
except:
pass
with open('error_list.txt','a') as f:
# path -----utf-8 to write
f.write(imginfo[1].encode('utf-8')+'\n')
f.write(imginfo[0]+'\n')
if __name__ == '__main__':
dirname = raw_input('input the dirname:')
urlkey = raw_input('input the key afer the "star". e.g "http://www.javfee.com/cn/star/9oz" input 9oz:')
if not os.path.isdir(dirname):
os.makedirs(dirname)
start = time.time()
url = "http://www.javfee.com/cn/star/%s" %urlkey
pagenum = get_pagenum(url)
urls = [url + '/currentPage/%i' % i for i in range(1,pagenum+1)]
pool = ThreadPool(15)
pool.map(get_img_url, urls)
# pool.close()
# pool.join()
# print len(img_title_queue)
pool2 = ThreadPool(15)
pool2.map(write_img, img_title_queue)
pool2.close()
pool2.join()
end = time.time()
print "耗时.......%d秒" %(end-start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment