Last active
August 4, 2016 03:59
-
-
Save rchacon/1ab506bd70eee3f76cfd7c5985b3de4b to your computer and use it in GitHub Desktop.
Revisions
-
rchacon1 revised this gist
Aug 4, 2016 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,8 @@ """ Copy paste source from http://www.thechapelsf.com/music/ and save to markup.html Usage: $ py.test tests.py """ import pytest -
rchacon1 revised this gist
Aug 4, 2016 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,6 @@ """ Copy paste source from http://www.thechapelsf.com/music/ and save to markup.html """ import pytest from chapelsf import parse_shows -
rchacon1 revised this gist
Aug 4, 2016 . 1 changed file with 0 additions and 2556 deletions.There are no files selected for viewing
-
rchacon1 revised this gist
Aug 4, 2016 . 2 changed files with 2577 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,21 @@ import pytest from chapelsf import parse_shows @pytest.fixture(scope='module') def markup(): with open('markup.html') as f: data = f.read() return data def test_parse_shows(markup): result = parse_shows(markup) first = {'show_date': 'Wed 8/03 Doors: 8:00 pm / Show: 9:00 pm', 'ticket_url': 'http://www.thechapelsf.com/music//event/1151171-emily-king-san-francisco/', 'artist': 'Emily King', 'sold_out': True, 'ticket_price': '\n$20 adv / $22 door\t '} assert first == result[0] second = {'show_date': 'Thu 8/04 5:00 pm / ', 'ticket_url': 'http://www.thechapelsf.com/music//event/1261079-sinners-happy-hour-ted-san-francisco/', 'artist': "Sinner's Happy Hour with Ted Savarese and the TedTones", 'sold_out': False, 'ticket_price': None} assert second == result[1] -
rchacon1 created this gist
Aug 4, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,81 @@ """ Scrape shows from http://www.thechapelsf.com/music/ Usage: $ python chapelsf.py Sample output: [ { 'artist': 'Emily King', 'show_date': 'Wed 8/03 Doors: 8:00 pm / Show: 9:00 pm', 'sold_out': True, 'ticket_price': '\n$20 adv / $22 door\t ', 'ticket_url': 'http://www.thechapelsf.com/music//event/1151171-emily-king-san-francisco/'}, { 'artist': "Sinner's Happy Hour with Ted Savarese and the TedTones", 'show_date': 'Thu 8/04 5:00 pm / ', 'sold_out': False, 'ticket_price': None, 'ticket_url': 'http://www.thechapelsf.com/music//event/1261079-sinners-happy-hour-ted-san-francisco/'}, ...] """ import sys from bs4 import BeautifulSoup import requests URL = 'http://www.thechapelsf.com/music/' def get_markup(): resp = requests.get(URL) if not resp.ok: sys.exit('HTTP status received {}'.format(resp.status_code)) return resp.text def parse_shows(markup): soup = BeautifulSoup(markup, 'html.parser') music_listings = soup.find_all(attrs={'class': 'list-view-item'}) shows = [] for music_listing in music_listings: artist = music_listing.find(attrs={'class': 'headliners'}).find('a').text show_date = music_listing.find(attrs={'class': 'dates'}).text door_time = music_listing.find(attrs={'class': 'times'}).find('span').text show_time = music_listing.find(attrs={'class': 'times'}).find_all('span')[1].text ticket_price_tag = music_listing.find(attrs={'class': 'ticket-price'}).find(attrs={'class': 'price-range'}) if ticket_price_tag: ticket_price = ticket_price_tag.text else: ticket_price = None sold_out_tag = music_listing.find(attrs={'class': 'ticket-price'}).find(attrs={'class': 'sold-out'}) if sold_out_tag: sold_out = True else: sold_out = False ticket_url = music_listing.find(attrs={'class': 'headliners'}).find('a')['href'] shows.append({ 'artist': artist, 'show_date': '{} {} / {}'.format(show_date, door_time, show_time), 'ticket_price': ticket_price, 'sold_out': sold_out, 'ticket_url': '{}{}'.format(URL, ticket_url) }) return shows def main(): markup = get_markup() return parse_shows(markup) if __name__ == '__main__': import pprint pp = pprint.PrettyPrinter(indent=4) pp.pprint(main())