Skip to content

Instantly share code, notes, and snippets.

@tensorturtle
Last active May 27, 2021 11:26
Show Gist options
  • Select an option

  • Save tensorturtle/5f22081c3d5147a59b707a06234bf9bf to your computer and use it in GitHub Desktop.

Select an option

Save tensorturtle/5f22081c3d5147a59b707a06234bf9bf to your computer and use it in GitHub Desktop.
This script uses scikit-image to test a folder of images for corrupted files. Any corrupted files are moved to a new directory, and you can optionally delete them.
import os
import sys
import logging
import subprocess
import argparse
from skimage import io
from tqdm import tqdm
def validate_and_delete(path):
num_files = len(os.listdir(path))
counter = 0
# create a new folder named path-invalid next to path
invalid_path = os.path.abspath(path)+'_invalid'
print(f"Checking files in {path}...\nAny corrupt files will be moved to {invalid_path}\n")
if not os.path.isdir(invalid_path):
os.mkdir(invalid_path)
with tqdm(total=num_files, file=sys.stdout) as pbar:
for filename in os.listdir(path):
counter += 1
image_path = os.path.join(path, filename)
invalid_image_path = os.path.join(invalid_path, filename)
pbar.update(1)
try:
im = io.imread(image_path)
logging.info(f" {counter}-th image: verified.")
except (IOError, OSError, ValueError):
os.rename(image_path, invalid_image_path)
logging.warning(f" {counter}-th image: INVALID IMAGE DETECTED! Renamed {image_path} to {invalid_image_path}")
if os.listdir(invalid_path):
subprocess.Popen(["nautilus", invalid_path])
DELETE_IMAGES = yes_or_no(f"Invalid images have been moved to {invalid_path}. \nPlease check them in the file browser.\nDelete them now?")
if DELETE_IMAGES:
for filename in os.listdir(invalid_path):
os.remove(os.path.join(invalid_path,filename))
os.rmdir(invalid_path)
else:
logging.info("No images to delete")
def yes_or_no(question):
reply = str(input(question+ '(y/n): ')).lower().strip()
if len(reply) < 1:
return yes_or_no("Please enter an answer")
if reply[0] == 'y':
return True
if reply[0] == 'n':
return False
else:
return yes_or_no("Please enter an answer")
if __name__=="__main__":
parser=argparse.ArgumentParser(
description = "Check that all image files directory are valid; delete any that aren't.")
parser.add_argument('--path', type=str, help='Path to folder containing images')
args = parser.parse_args()
validate_and_delete(args.path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment