Last active
September 23, 2022 10:10
-
-
Save umangahuja1/195dedc5d8069859f62c5e25d9c219a9 to your computer and use it in GitHub Desktop.
Revisions
-
umangahuja1 revised this gist
Oct 19, 2018 . 1 changed file with 0 additions and 19 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,19 +0,0 @@ -
umangahuja1 created this gist
Oct 19, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,22 @@ from multiprocessing import Pool import requests from bs4 import BeautifulSoup base_url = 'http://quotes.toscrape.com/page/' all_urls = list() def generate_urls(): for i in range(1,11): all_urls.append(base_url + str(i)) def scrape(url): res = requests.get(url) print(res.status_code, res.url) generate_urls() p = Pool(10) p.map(scrape, all_urls) p.terminate() p.join() This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,19 @@ import requests from bs4 import BeautifulSoup from time import sleep base_url = 'http://quotes.toscrape.com/page/' all_urls = list() def generate_urls(): for i in range(1,11): all_urls.append(base_url + str(i)) def scrape(url): res = requests.get(url) print(res.status_code, res.url) generate_urls() for url in all_urls: scrape(url)