#!/usr/bin/env python3 import os lists=list([x.strip().split(": ",1)[1] for x in """Week 1: https://goo.gl/KWlyOO Week 2: https://goo.gl/Pn7MFs Week 3: https://goo.gl/CZwxsX Week 4: https://goo.gl/JhwuON Week 5: https://goo.gl/TGM6x8 Week 6: https://goo.gl/uhyjxe Week 7: https://goo.gl/bMdhTG Week 8: https://goo.gl/89MW8h Week 9: https://goo.gl/ekv9wE Week 10: https://goo.gl/RETyH1 Week 11: https://goo.gl/6cs0tt Week 12: https://goo.gl/bRMx5o Week 13: https://goo.gl/xzdjyY Week 14: https://goo.gl/1HwN4H Week 15: https://goo.gl/JxVghS Week 16: https://goo.gl/sZpudG Week 17: https://goo.gl/QymWmJ Week 18: https://goo.gl/Ri6E9N Week 19: https://goo.gl/L6JOSV Week 20: https://goo.gl/PvlahQ Week 21: https://goo.gl/XDjZFw Week 22: https://goo.gl/AQydgj Week 23: https://goo.gl/FlWS69 Week 24: https://goo.gl/O2c7Th Week 25: https://goo.gl/bJi31u Week 26: https://medium.com/@Amy_Siskind/week-26-experts-in-authoritarianism-advise-to-keep-a-list-of-things-subtly-changing-around-you-so-61ba023aee54 Week 27: https://goo.gl/6Kgby0 Week 28: https://goo.gl/teZ4i4 Week 29: https://goo.gl/HKaBbO Week 30: https://goo.gl/FJqUBe Week 31: https://goo.gl/17cXD4 Week 32: https://medium.com/@Amy_Siskind/week-32-experts-in-authoritarianism-advise-to-keep-a-list-of-things-subtly-changing-around-you-so-252680d31be6""".split("Week ")[1:]]) print(lists) import urllib3 http = urllib3.PoolManager() from lxml import html for i in range(0,len(lists)): r = http.request('GET', lists[i]) with open ("weeks/week%d.html" % (i+1),"wb") as hf: hf.write(r.data) weekpath="weeks/%d" % (i+1) os.makedirs(weekpath, exist_ok=True) tree = html.fromstring(r.data) alist = tree.xpath('//div[@class="section-content"]//ol[@class="postList"]/li') for j in range(0,len(alist)): with open("%s/%d-%d"%(weekpath,i+1,j+1),"w") as f: print(''.join(alist[j].itertext()).strip(),file=f)