Last active
October 7, 2024 18:55
-
-
Save rixx/422392d2aa580b5d286e585418bf6915 to your computer and use it in GitHub Desktop.
Revisions
-
rixx revised this gist
Mar 5, 2022 . 1 changed file with 8 additions and 14 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,9 +3,10 @@ from datetime import datetime import requests from tqdm import tqdm BASE_URL = "https://code.djangoproject.com/jsonrpc" DJANGO_MAX_TICKET = 33562 def get_ticket(*, ticket_id, session): @@ -115,15 +116,6 @@ def create_db(*, connection): connection.commit() def collect_data(*, connection, start=None, end=None, total=None): cursor = connection.cursor() start = start or 1 @@ -132,12 +124,11 @@ def collect_data(*, connection, start=None, end=None, total=None): if not end: end = DJANGO_MAX_TICKET with requests.Session() as session: for ticket_id in tqdm(range(start, end + 1)): data = get_ticket(ticket_id=ticket_id, session=session) if data: try: store_ticket(data=data, cursor=cursor) except Exception as e: print("FAILED " + str(ticket_id)) print(e) @@ -147,8 +138,11 @@ def collect_data(*, connection, start=None, end=None, total=None): def main(): connection = sqlite3.connect("django_tickets.db") create_db(connection=connection) cursor = connection.cursor() result = cursor.execute("SELECT id FROM tickets ORDER BY id DESC LIMIT 1").fetchone() start = result[0] if result else 0 collect_data(connection=connection, start=start) if __name__ == "__main__": main() -
rixx created this gist
Jun 25, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,154 @@ import json import sqlite3 from datetime import datetime import requests BASE_URL = "https://code.djangoproject.com/jsonrpc" DJANGO_MAX_TICKET = 30588 def get_ticket(*, ticket_id, session): response = requests.post( BASE_URL, json.dumps( { "method": "ticket.get", "params": [ticket_id], "id": ticket_id, # Could also be 23 every time } ), headers={"Content-Type": "application/json"}, ) response.raise_for_status() result = response.json() if result and result.get("error") and result["error"].get("code") == 404: return return result def store_ticket(*, cursor, data): ticket_id = data["result"][0] ticket_data = data["result"][-1] ticket_data["created"] = ticket_data["time"] insert_keys = [ "changetime", "owner", "keywords", "severity", "needs_tests", "version", "easy", "type", "status", "description", "reporter", "component", "has_patch", "stage", "needs_better_patch", "summary", "created", "needs_docs", "ui_ux", "resolution", ] insert_data = { key: get_value_from_data(key=key, value=ticket_data[key]) for key in insert_keys } insert_data["last_pulled_from_trac"] = datetime.now() insert_data["id"] = int(ticket_id) cursor.execute( f""" INSERT INTO tickets ({", ".join(insert_data.keys())}) VALUES ({", ".join("?" for _ in range(len(insert_data)))}) """, list(insert_data.values()), ) def get_value_from_data(*, key, value): if not value: return if key in ["changetime", "created"]: return datetime.strptime(value["__jsonclass__"][-1], "%Y-%m-%dT%H:%M:%S") if key in [ "needs_tests", "easy", "has_patch", "needs_better_patch", "needs_docs", "ui_ux", ]: return bool(int(value)) return value def create_db(*, connection): cursor = connection.cursor() cursor.execute( """CREATE TABLE IF NOT EXISTS tickets ( id int primary key, created datetime, changetime datetime, last_pulled_from_trac datetime, stage text, status text, component text, type text, severity text, version text, resolution text, summary text, description text, owner text, reporter text, keywords text, easy boolean, has_patch boolean, needs_better_patch boolean, needs_tests boolean, needs_docs boolean, ui_ux boolean )""" ) connection.commit() def print_progress_bar(current, total): percent = ("{0:1.1f}").format(100 * (current / total)) filled = int(100 * current // total) bar = "█" * filled + "-" * (100 - filled) print("\r |{}| {}%".format(bar, percent), end="\r") if current == total: print() def collect_data(*, connection, start=None, end=None, total=None): cursor = connection.cursor() start = start or 1 if not end and total: end = start + total - 1 if not end: end = DJANGO_MAX_TICKET with requests.Session() as session: for ticket_id in range(start, end + 1): data = get_ticket(ticket_id=ticket_id, session=session) if data: try: store_ticket(data=data, cursor=cursor) print_progress_bar(ticket_id - start, end - start) except Exception as e: print("FAILED " + str(ticket_id)) print(e) connection.commit() def main(): connection = sqlite3.connect("django_tickets.db") create_db(connection=connection) collect_data(connection=connection, start=28369) if __name__ == "__main__": main()