import requests import urllib import re import sys def getInfo(book_id): url = "https://www.jb51.net/books/"+str(book_id)+".html" res = requests.get(url) res.encoding = "GBK" pattern = re.compile("

(.*?)

") book_name = re.findall(pattern, res.text) if len(book_name) != 0: return { "id": book_id, "name": book_name[0] } return { "id": book_id, "name": "" } def scan(beg, end): total = end - beg res = [] for idx in range(int(beg), int(end)): info = getInfo(idx) if info["name"] != "": res.append(info) print("[success]("+str(idx-beg)+"/"+str(total)+"): "+info["name"]) else: print("[failed_]("+str(idx-beg)+"/"+str(total)+"): "+"[x]") return res def logoutput(res): res = [str(elem["id"])+" -> "+elem["name"] for elem in res] with open("book.log", "w", encoding="utf-8") as f: for elem in res: f.write(elem + "\n") if __name__ == "__main__": beg = int(sys.argv[1]) end = int(sys.argv[2]) print("Begin to scan:") res = scan(beg, end) logoutput(res)