""" This script fetches the ETag of an AWS S3 object. It then computes the ETag locally to see if it matches. From From https://teppen.io/2018/10/23/aws_s3_verify_etags/ """ from typing import Optional from hashlib import md5 import boto3 ONE_MEGABYTE = 1048576 def etag_matches_buffer( buffer, client, bucket: str, key: str, version_id: Optional[str] = None ) -> bool: # get the remote etag response = client.head_object( Bucket=bucket, Key=key, **({} if version_id is None else {"VersionId": version_id}), ) remote_etag = response["ETag"].strip('"') # figure out how many parts the object consists of (for multi-part uploads) if "-" not in remote_etag: # e.g. "bb01862f54c5347bc6be623f237836d5" number_parts = 1 else: # e.g. "bb01862f54c5347bc6be623f237836d5-2" a, number_parts = remote_etag.split("-") number_parts = int(number_parts) # compute an etag for a variety of different multipliers (1MB, 2MB, etc) and # see if any match def local_etags(): # store location of buffer so that we can restore it later p = buffer.tell() # clients will upload files in chunks aligned to 1MB, or # or maybe 2MB, or 5MB... We don't know which, so we try # various different options for m in [1, 2, 5, 8, 16]: # calculate the size of each part of the multipart # upload content_length = int(response["ContentLength"]) a = content_length / number_parts b = m * ONE_MEGABYTE part_size = int(a + b - a % b) # calculate the ETag md5_digests = [] for chunk in iter(lambda: buffer.read(part_size), b""): md5_digests.append(md5(chunk).digest()) yield md5(b"".join(md5_digests)).hexdigest() + "-" + str(len(md5_digests)) # reset buffer location for next attempt buffer.seek(p) # see whether they match return any(remote_etag == local_etag for local_etag in local_etags()) client = boto3.client("s3") with open("my_file.txt", "rb") as file: matches = etag_matches_buffer( file, client, "my_bucket", "path/to/my_file.txt", ) print(matches) # False/True