Skip to content

Instantly share code, notes, and snippets.

@rolandog
Last active April 23, 2018 16:40
Show Gist options
  • Select an option

  • Save rolandog/50f1ae4e1900b62922f34b3ae278d451 to your computer and use it in GitHub Desktop.

Select an option

Save rolandog/50f1ae4e1900b62922f34b3ae278d451 to your computer and use it in GitHub Desktop.
Creates a SHA-256 digest of files in a directory
# -*- coding: utf-8 -*-
"""
Script to create SHA-256 digest of all files in the current directory
Based on an answer by Richard Neumann on Code Review
https://codereview.stackexchange.com/a/147191
Based on recommendation that file digests are created nowadays
https://en.wikipedia.org/wiki/File_verification#File_formats
Created on Fri Apr 20 12:07:41 2018
@author: rolandog
"""
from os import getcwd, listdir
from os.path import join, isfile
from time import strftime
from hashlib import sha256
def list_files(basedir=None):
"""List only files within the respective directory"""
if basedir is None:
basedir = getcwd()
for item in listdir(basedir):
path = join(basedir, item)
# skip listing a hash of our hash digest
if "sha256-digest" in item:
continue
if isfile(path):
# changed so that we get the path and the filename
yield (path, item)
def sha256sum(file_name, block_size=None):
"""Returns the sha256 checksum of the respective file"""
if block_size is None:
block_size = 4096
checksum = sha256()
with open(file_name, "rb") as file_handle:
block = file_handle.read(block_size)
while block:
checksum.update(block)
block = file_handle.read(block_size)
return checksum.hexdigest()
def sha256sums(basedir=None, block_size=None):
"""Yields (<sha256sum>, <file_name>) tuples
for files within the basedir.
"""
for file_path, file_name in list_files(basedir=basedir):
yield (sha256sum(file_path, block_size=block_size), file_name)
def create_sha256_digest(basedir=None, block_size=None, outputdir=None):
"""Creates de sha256-digest file with a timestamp"""
hash_file_name = strftime("sha256-digest_%Y%m%d-%H%M%S")
if outputdir is None:
outputdir = getcwd()
hash_file_path = join(outputdir, hash_file_name)
with open(hash_file_path, "w") as file_handle:
for file_hash in sha256sums(basedir, block_size):
file_handle.write(" *".join(file_hash) + "\n")
if __name__ == "__main__":
import argparse
PARSER = argparse.ArgumentParser()
PARSER.add_argument("-d",
"--directory",
default=None,
type=str,
required=False,
help="Path, str, to the directory of the files")
PARSER.add_argument("-b",
"--blocksize",
default=None,
type=int,
required=False,
help="Block size, int, in bytes to read from files")
PARSER.add_argument("-o",
"--outputdir",
default=None,
type=str,
required=False,
help="Output directory, str, for sha256 digest")
ARGUMENTS = PARSER.parse_args()
create_sha256_digest(basedir=ARGUMENTS.directory,
block_size=ARGUMENTS.blocksize,
outputdir=ARGUMENTS.outputdir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment