Created
February 4, 2022 17:14
-
-
Save qb20nh/06ad21054205b62c01d2887ccf3377f3 to your computer and use it in GitHub Desktop.
Revisions
-
qb20nh created this gist
Feb 4, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,61 @@ from time import sleep, time import os def binary_search(condition, low, high): if callable(condition) and high > low: mid = int((high + low) / 2) res = condition(mid) if res == 0: return mid elif res < 0: return binary_search(condition, low, mid - 1) else: return binary_search(condition, mid + 1, high) def find_json_property(idx): print(f'seeking to {idx}') file.seek(idx) read_success = False preview_bytes = 1024 read_content = None offset = 0 while not read_success: file.seek(idx+offset) try: read_content = file.read(preview_bytes).decode("utf-8") read_success = True except Exception: offset += 1 print(f'content: {read_content}') target = 2500000000 print('querying DB for property songID') # query db here sleep(1) # assume every query takes about 1 second if abs(idx - target) <= preview_bytes: # change this to json property match result return 0 elif idx > target: return -1 else: return 1 file_path = "C:/json/song_data_file.json" # replace this with your file location start_time = time() size_in_bytes = os.path.getsize(file_path) print(f'file size: {size_in_bytes}') file = open(file_path, 'rb') result = binary_search(find_json_property, 0, size_in_bytes) end_time = time() print(f'found at {result}') print(f'elapsed {end_time-start_time}')