Skip to content

Instantly share code, notes, and snippets.

@RobinL
Created February 9, 2019 16:16
Show Gist options
  • Select an option

  • Save RobinL/9fc189d22777e6f7a9968d5ce5adf950 to your computer and use it in GitHub Desktop.

Select an option

Save RobinL/9fc189d22777e6f7a9968d5ce5adf950 to your computer and use it in GitHub Desktop.

Revisions

  1. RobinL created this gist Feb 9, 2019.
    44 changes: 44 additions & 0 deletions get_data.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,44 @@
    import requests
    import json
    import pandas as pd
    from io import StringIO
    import time

    url = "https://api.beta.ons.gov.uk/v1/filters?submitted=true"

    post = {
    "dataset": {
    "id": "mid-year-pop-est",
    "edition": "time-series",
    "version": 4
    },
    "dimensions": [
    {
    "name": "geography",
    "options": [
    "E06000002"
    ]
    }
    ]
    }

    post_data = json.dumps(post)
    r = requests.post(url = url, data = post_data) #works
    filter_meta = json.loads(r.text)
    csv_location = filter_meta["links"]["filter_output"]["href"]
    output_url = filter_meta['links']['filter_output']['href']
    r = requests.get(output_url)
    filter_output_meta = json.loads(r.text)
    csv_location = filter_output_meta['downloads']['csv']['href']

    time.sleep(1)
    for i in range(100):
    r = requests.get(csv_location)
    df = pd.read_csv(StringIO(text))
    if text != "resource not found\n":
    break

    # Occasionally the 'downloads' key of the filter output comes back blank like so:
    # {'dataset': {'edition': 'time-series', 'id': 'mid-year-pop-est', 'version': 4},
    # 'dimensions': [{'name': 'geography', 'options': ['E06000002']}],
    # 'downloads': {'csv': {}, 'xls': {}}