Skip to content

Instantly share code, notes, and snippets.

@umangahuja1
Last active September 23, 2022 10:10
Show Gist options
  • Select an option

  • Save umangahuja1/195dedc5d8069859f62c5e25d9c219a9 to your computer and use it in GitHub Desktop.

Select an option

Save umangahuja1/195dedc5d8069859f62c5e25d9c219a9 to your computer and use it in GitHub Desktop.

Revisions

  1. umangahuja1 revised this gist Oct 19, 2018. 1 changed file with 0 additions and 19 deletions.
    19 changes: 0 additions & 19 deletions simple.py
    Original file line number Diff line number Diff line change
    @@ -1,19 +0,0 @@
    import requests
    from bs4 import BeautifulSoup
    from time import sleep

    base_url = 'http://quotes.toscrape.com/page/'

    all_urls = list()

    def generate_urls():
    for i in range(1,11):
    all_urls.append(base_url + str(i))

    def scrape(url):
    res = requests.get(url)
    print(res.status_code, res.url)

    generate_urls()
    for url in all_urls:
    scrape(url)
  2. umangahuja1 created this gist Oct 19, 2018.
    22 changes: 22 additions & 0 deletions parallel.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,22 @@
    from multiprocessing import Pool
    import requests
    from bs4 import BeautifulSoup

    base_url = 'http://quotes.toscrape.com/page/'

    all_urls = list()

    def generate_urls():
    for i in range(1,11):
    all_urls.append(base_url + str(i))

    def scrape(url):
    res = requests.get(url)
    print(res.status_code, res.url)

    generate_urls()

    p = Pool(10)
    p.map(scrape, all_urls)
    p.terminate()
    p.join()
    19 changes: 19 additions & 0 deletions simple.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,19 @@
    import requests
    from bs4 import BeautifulSoup
    from time import sleep

    base_url = 'http://quotes.toscrape.com/page/'

    all_urls = list()

    def generate_urls():
    for i in range(1,11):
    all_urls.append(base_url + str(i))

    def scrape(url):
    res = requests.get(url)
    print(res.status_code, res.url)

    generate_urls()
    for url in all_urls:
    scrape(url)