Last active
May 27, 2021 11:26
-
-
Save tensorturtle/5f22081c3d5147a59b707a06234bf9bf to your computer and use it in GitHub Desktop.
This script uses scikit-image to test a folder of images for corrupted files. Any corrupted files are moved to a new directory, and you can optionally delete them.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import sys | |
| import logging | |
| import subprocess | |
| import argparse | |
| from skimage import io | |
| from tqdm import tqdm | |
| def validate_and_delete(path): | |
| num_files = len(os.listdir(path)) | |
| counter = 0 | |
| # create a new folder named path-invalid next to path | |
| invalid_path = os.path.abspath(path)+'_invalid' | |
| print(f"Checking files in {path}...\nAny corrupt files will be moved to {invalid_path}\n") | |
| if not os.path.isdir(invalid_path): | |
| os.mkdir(invalid_path) | |
| with tqdm(total=num_files, file=sys.stdout) as pbar: | |
| for filename in os.listdir(path): | |
| counter += 1 | |
| image_path = os.path.join(path, filename) | |
| invalid_image_path = os.path.join(invalid_path, filename) | |
| pbar.update(1) | |
| try: | |
| im = io.imread(image_path) | |
| logging.info(f" {counter}-th image: verified.") | |
| except (IOError, OSError, ValueError): | |
| os.rename(image_path, invalid_image_path) | |
| logging.warning(f" {counter}-th image: INVALID IMAGE DETECTED! Renamed {image_path} to {invalid_image_path}") | |
| if os.listdir(invalid_path): | |
| subprocess.Popen(["nautilus", invalid_path]) | |
| DELETE_IMAGES = yes_or_no(f"Invalid images have been moved to {invalid_path}. \nPlease check them in the file browser.\nDelete them now?") | |
| if DELETE_IMAGES: | |
| for filename in os.listdir(invalid_path): | |
| os.remove(os.path.join(invalid_path,filename)) | |
| os.rmdir(invalid_path) | |
| else: | |
| logging.info("No images to delete") | |
| def yes_or_no(question): | |
| reply = str(input(question+ '(y/n): ')).lower().strip() | |
| if len(reply) < 1: | |
| return yes_or_no("Please enter an answer") | |
| if reply[0] == 'y': | |
| return True | |
| if reply[0] == 'n': | |
| return False | |
| else: | |
| return yes_or_no("Please enter an answer") | |
| if __name__=="__main__": | |
| parser=argparse.ArgumentParser( | |
| description = "Check that all image files directory are valid; delete any that aren't.") | |
| parser.add_argument('--path', type=str, help='Path to folder containing images') | |
| args = parser.parse_args() | |
| validate_and_delete(args.path) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment