"""
This script fetches the ETag of an AWS S3 object. It then computes
the ETag locally to see if it matches.

From From https://teppen.io/2018/10/23/aws_s3_verify_etags/
"""

from typing import Optional
from hashlib import md5

import boto3


ONE_MEGABYTE = 1048576


def etag_matches_buffer(
    buffer, client, bucket: str, key: str, version_id: Optional[str] = None
) -> bool:
    # get the remote etag
    response = client.head_object(
        Bucket=bucket,
        Key=key,
        **({} if version_id is None else {"VersionId": version_id}),
    )
    remote_etag = response["ETag"].strip('"')

    # figure out how many parts the object consists of (for multi-part uploads)
    if "-" not in remote_etag:
        # e.g. "bb01862f54c5347bc6be623f237836d5"
        number_parts = 1
    else:
        # e.g. "bb01862f54c5347bc6be623f237836d5-2"
        a, number_parts = remote_etag.split("-")
        number_parts = int(number_parts)

    # compute an etag for a variety of different multipliers (1MB, 2MB, etc) and
    # see if any match
    def local_etags():

        # store location of buffer so that we can restore it later
        p = buffer.tell()
        
        # clients will upload files in chunks aligned to 1MB, or 
        # or maybe 2MB, or 5MB... We don't know which, so we try
        # various different options
        for m in [1, 2, 5, 8, 16]:  
            # calculate the size of each part of the multipart
            # upload
            content_length = int(response["ContentLength"])
            a = content_length / number_parts
            b = m * ONE_MEGABYTE
            part_size = int(a + b - a % b)

            # calculate the ETag
            md5_digests = []
            for chunk in iter(lambda: buffer.read(part_size), b""):
                md5_digests.append(md5(chunk).digest())
            yield md5(b"".join(md5_digests)).hexdigest() + "-" + str(len(md5_digests))

            # reset buffer location for next attempt
            buffer.seek(p)

    # see whether they match
    return any(remote_etag == local_etag for local_etag in local_etags())


client = boto3.client("s3")
with open("my_file.txt", "rb") as file:
    matches = etag_matches_buffer(
        file,
        client,
        "my_bucket",
        "path/to/my_file.txt",
    )
    print(matches)  # False/True