Skip to content

Instantly share code, notes, and snippets.

@qb20nh
Created February 4, 2022 17:14
Show Gist options
  • Select an option

  • Save qb20nh/06ad21054205b62c01d2887ccf3377f3 to your computer and use it in GitHub Desktop.

Select an option

Save qb20nh/06ad21054205b62c01d2887ccf3377f3 to your computer and use it in GitHub Desktop.

Revisions

  1. qb20nh created this gist Feb 4, 2022.
    61 changes: 61 additions & 0 deletions .\read_file_skip_bytes.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,61 @@
    from time import sleep, time
    import os


    def binary_search(condition, low, high):
    if callable(condition) and high > low:
    mid = int((high + low) / 2)

    res = condition(mid)

    if res == 0:
    return mid

    elif res < 0:
    return binary_search(condition, low, mid - 1)

    else:
    return binary_search(condition, mid + 1, high)


    def find_json_property(idx):
    print(f'seeking to {idx}')
    file.seek(idx)

    read_success = False
    preview_bytes = 1024
    read_content = None
    offset = 0
    while not read_success:
    file.seek(idx+offset)
    try:
    read_content = file.read(preview_bytes).decode("utf-8")
    read_success = True
    except Exception:
    offset += 1

    print(f'content: {read_content}')
    target = 2500000000
    print('querying DB for property songID') # query db here
    sleep(1) # assume every query takes about 1 second
    if abs(idx - target) <= preview_bytes: # change this to json property match result
    return 0
    elif idx > target:
    return -1
    else:
    return 1


    file_path = "C:/json/song_data_file.json" # replace this with your file location

    start_time = time()

    size_in_bytes = os.path.getsize(file_path)
    print(f'file size: {size_in_bytes}')
    file = open(file_path, 'rb')
    result = binary_search(find_json_property, 0, size_in_bytes)

    end_time = time()

    print(f'found at {result}')
    print(f'elapsed {end_time-start_time}')