Skip to content

Instantly share code, notes, and snippets.

@rchacon
Last active August 4, 2016 03:59
Show Gist options
  • Select an option

  • Save rchacon/1ab506bd70eee3f76cfd7c5985b3de4b to your computer and use it in GitHub Desktop.

Select an option

Save rchacon/1ab506bd70eee3f76cfd7c5985b3de4b to your computer and use it in GitHub Desktop.

Revisions

  1. @rchacon1 rchacon1 revised this gist Aug 4, 2016. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions tests.py
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,8 @@
    """
    Copy paste source from http://www.thechapelsf.com/music/ and save to markup.html
    Usage:
    $ py.test tests.py
    """
    import pytest

  2. @rchacon1 rchacon1 revised this gist Aug 4, 2016. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions tests.py
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,6 @@
    """
    Copy paste source from http://www.thechapelsf.com/music/ and save to markup.html
    """
    import pytest

    from chapelsf import parse_shows
  3. @rchacon1 rchacon1 revised this gist Aug 4, 2016. 1 changed file with 0 additions and 2556 deletions.
    2,556 changes: 0 additions & 2,556 deletions markup.html
    0 additions, 2,556 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
  4. @rchacon1 rchacon1 revised this gist Aug 4, 2016. 2 changed files with 2577 additions and 0 deletions.
    2,556 changes: 2,556 additions & 0 deletions markup.html
    2,556 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
    21 changes: 21 additions & 0 deletions tests.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,21 @@
    import pytest

    from chapelsf import parse_shows


    @pytest.fixture(scope='module')
    def markup():
    with open('markup.html') as f:
    data = f.read()

    return data


    def test_parse_shows(markup):
    result = parse_shows(markup)

    first = {'show_date': 'Wed 8/03 Doors: 8:00 pm / Show: 9:00 pm', 'ticket_url': 'http://www.thechapelsf.com/music//event/1151171-emily-king-san-francisco/', 'artist': 'Emily King', 'sold_out': True, 'ticket_price': '\n$20 adv / $22 door\t '}
    assert first == result[0]

    second = {'show_date': 'Thu 8/04 5:00 pm / ', 'ticket_url': 'http://www.thechapelsf.com/music//event/1261079-sinners-happy-hour-ted-san-francisco/', 'artist': "Sinner's Happy Hour with Ted Savarese and the TedTones", 'sold_out': False, 'ticket_price': None}
    assert second == result[1]
  5. @rchacon1 rchacon1 created this gist Aug 4, 2016.
    81 changes: 81 additions & 0 deletions chapelsf.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,81 @@
    """
    Scrape shows from http://www.thechapelsf.com/music/
    Usage:
    $ python chapelsf.py
    Sample output:
    [ { 'artist': 'Emily King',
    'show_date': 'Wed 8/03 Doors: 8:00 pm / Show: 9:00 pm',
    'sold_out': True,
    'ticket_price': '\n$20 adv / $22 door\t ',
    'ticket_url': 'http://www.thechapelsf.com/music//event/1151171-emily-king-san-francisco/'},
    { 'artist': "Sinner's Happy Hour with Ted Savarese and the TedTones",
    'show_date': 'Thu 8/04 5:00 pm / ',
    'sold_out': False,
    'ticket_price': None,
    'ticket_url': 'http://www.thechapelsf.com/music//event/1261079-sinners-happy-hour-ted-san-francisco/'},
    ...]
    """
    import sys

    from bs4 import BeautifulSoup
    import requests


    URL = 'http://www.thechapelsf.com/music/'


    def get_markup():
    resp = requests.get(URL)
    if not resp.ok:
    sys.exit('HTTP status received {}'.format(resp.status_code))

    return resp.text


    def parse_shows(markup):
    soup = BeautifulSoup(markup, 'html.parser')
    music_listings = soup.find_all(attrs={'class': 'list-view-item'})

    shows = []
    for music_listing in music_listings:
    artist = music_listing.find(attrs={'class': 'headliners'}).find('a').text
    show_date = music_listing.find(attrs={'class': 'dates'}).text
    door_time = music_listing.find(attrs={'class': 'times'}).find('span').text
    show_time = music_listing.find(attrs={'class': 'times'}).find_all('span')[1].text

    ticket_price_tag = music_listing.find(attrs={'class': 'ticket-price'}).find(attrs={'class': 'price-range'})
    if ticket_price_tag:
    ticket_price = ticket_price_tag.text
    else:
    ticket_price = None

    sold_out_tag = music_listing.find(attrs={'class': 'ticket-price'}).find(attrs={'class': 'sold-out'})
    if sold_out_tag:
    sold_out = True
    else:
    sold_out = False
    ticket_url = music_listing.find(attrs={'class': 'headliners'}).find('a')['href']

    shows.append({
    'artist': artist,
    'show_date': '{} {} / {}'.format(show_date, door_time, show_time),
    'ticket_price': ticket_price,
    'sold_out': sold_out,
    'ticket_url': '{}{}'.format(URL, ticket_url)
    })

    return shows


    def main():
    markup = get_markup()
    return parse_shows(markup)


    if __name__ == '__main__':
    import pprint
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(main())