Created
December 15, 2018 19:13
-
-
Save zdenulo/bb11b5e5450e85347fa6bcd7cac87cf6 to your computer and use it in GitHub Desktop.
Goal is to find out which tables are regularly updated.
Script fetches info about public datasets / tables in BigQuery and writes into file sorted by most recent modified
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import csv | |
| from google.cloud import bigquery | |
| client = bigquery.Client(project='bigquery-public-data') | |
| datasets = client.list_datasets() | |
| data = [] | |
| for dataset in datasets: | |
| dataset_id = dataset.dataset_id | |
| dataset_ref = client.get_dataset(dataset_id) | |
| tables = client.list_tables(dataset_id) | |
| for table in tables: | |
| full_table_id = table.full_table_id.replace(':', '.') | |
| table_ref = client.get_table(full_table_id) | |
| d = {'dataset': dataset_id, | |
| 'table': table_ref.table_id, | |
| 'dataset_description': dataset_ref.description, | |
| 'modified': table_ref.modified.strftime("%Y-%m-%d %H:%M:%S"), | |
| 'created': table_ref.created.strftime("%Y-%m-%d %H:%M:%S"), | |
| 'table_description': table_ref.description | |
| } | |
| data.append(d) | |
| data = sorted(data, key=lambda x: x['modified'], reverse=True) # sort by latest | |
| # write to file | |
| with open('bq_public.csv', 'w') as f: | |
| csv_writer = csv.DictWriter(f, fieldnames=('dataset', 'dataset_description', 'table', | |
| 'table_description', 'created', 'modified')) | |
| csv_writer.writeheader() | |
| for item in data: | |
| csv_writer.writerow(item) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment