Skip to content

Instantly share code, notes, and snippets.

@mchesterkadwell
Created April 16, 2017 21:16
Show Gist options
  • Select an option

  • Save mchesterkadwell/9c8173d11d2a821f3d4a222fd032dd5e to your computer and use it in GitHub Desktop.

Select an option

Save mchesterkadwell/9c8173d11d2a821f3d4a222fd032dd5e to your computer and use it in GitHub Desktop.
Python 3 face detector with comments & changes
#!/usr/bin/env python
from SPARQLWrapper import SPARQLWrapper, JSON
import urllib.request, urllib.parse, urllib.error
import os
from PIL import Image
import subprocess
import cv2
import argparse
import time
### Add import math for ceiling function
import math
# Retrieve images from British Museum Research Space and perform montage and facial recognition
# Daniel Pett 21/3/2017
# British Museum content is under a CC-BY-SA-NC license
### Tested on Python 3.5.1 by Mary Chester-Kadwell
__author__ = "Daniel Pett"
__credits__ = ["Richard Wareham", "Ben O'Steen", "Matthew Vincent"]
__license__ = 'MIT'
__version__ = "1.0.1"
__maintainer__ = "Daniel Pett"
__email__ = "dpett@britishmuseum.org"
def make_executable(path):
"""
Make the file executable
:param path:
:return:
"""
mode = os.stat(path).st_mode
mode |= (mode & 0o444) >> 2 # copy R bits to X
os.chmod(path, mode)
# Function defined for resize and crop of an image
def resize_and_crop(img_path, modified_path, size, crop_type='top'):
"""
Resize and crop an image to fit the specified size.
args:
img_path: path for the image to resize.
modified_path: path to save the modified image.
size: (width, height) a tuple. Eg (300,300)
crop_type: 3 options 'top', 'middle' or 'bottom'
raises:
Exception: if this script cannot open the file provided by img_path, then the image will not save
ValueError: thrown if an invalid `crop_type` is provided as an argument
"""
# Resizing is done in this order: if height is higher than width resize is vertical, default is horizontal resize
img = Image.open(img_path)
# Get current and desired ratio for the images
### Add conversion to int as Python 3 division operator '/' expects numbers
### Remove conversion to float as Python 3 division operator '/' is 'true' division and produces a float
img_ratio = int(img.size[0]) / int(img.size[1])
ratio = int(size[0]) / int(size[1])
# As mentioned above, the image is scaled and cropped vertically or
# horizontally depending on the ratio.
### Change division operator '/' to Python 3 floor division '//' throughout as Wand (ImageMagick) functions expect int not float
try:
if ratio > img_ratio:
img = img.resize((size[0], size[0] * int(img.size[1]) // int(img.size[0])),
Image.ANTIALIAS)
# Switch for position of crop
if crop_type == 'top':
box = (0, 0, img.size[0], size[1])
elif crop_type == 'middle':
box = (0, (img.size[1] - size[1]) // 2,
img.size[0], (img.size[1] + size[1]) // 2)
elif crop_type == 'bottom':
box = (0, img.size[1] - size[1], img.size[0], img.size[1])
else:
raise ValueError('Error detected: Option is not valid for crop type')
img = img.crop(box)
elif ratio < img_ratio:
img = img.resize((size[1] * img.size[0] // img.size[1], size[1]),
Image.ANTIALIAS)
# Switch for position of crop
if crop_type == 'top':
box = (0, 0, size[0], img.size[1])
elif crop_type == 'middle':
box = ((img.size[0] - size[0]) // 2, 0,
(img.size[0] + size[0]) // 2, img.size[1])
elif crop_type == 'bottom':
box = (img.size[0] - size[0], 0, img.size[0], img.size[1])
else:
raise ValueError('Error detected: Option is not valid for crop type')
img = img.crop(box)
else:
img = img.resize((size[0], size[1]),
Image.ANTIALIAS)
except Exception as ex:
template = "An exception of type {0} occurred. Arguments:\n{1!r}"
message = template.format(type(ex).__name__, ex.args)
print(message)
img.save(modified_path)
def count_files(path, extension):
"""
Count number of files of a specific extension
:param path:
:param extension:
:return:
"""
list_dir = []
list_dir = os.listdir(path)
count = 0
for fn in list_dir:
if fn.endswith(extension):
# eg: '.jpg'
count += 1
return count
### Using the file containing the list of resized images appears to be unnecessary ?
### Refactored to just iterate through the images available in the directory
### This also has the benefit of solving another bug whereby the script looks for files.txt in the base directory
### even though it has actually been written into the 'resized' directory
def create_montage():
"""
Create the montage if at least one resized image exists with a try catch block
:return:
"""
if (count_files(paths[args.resized], ".jpg") > 0):
print("Resized images exist")
try:
# This will produce multiple tiles for large results
print("Now creating image montage of all retrieved (resized) images")
subprocess.call("montage -border 0 -geometry 660x -tile 10x10 " +
args.resized + "/* " +
"/".join([args.montages, args.output]) + ".jpg",
shell=True)
print("Now resizing image montage of all retrieved (resized) images")
subprocess.call("convert " +
"/".join([args.montages, args.output]) + ".jpg -resize 750 " +
"/".join([args.montages, args.output]) + "_montage_750w.jpg",
shell=True)
# This call makes a montage of the faces detected
print("Now creating image montage of all faces detected in images")
subprocess.call("montage -border 0 -geometry 660x -tile " + dims + " " +
args.faces + "/* " +
"/".join([args.montages, args.output]) + "Faces.jpg",
shell=True)
print("Now resizing image montage of all faces detected in images")
subprocess.call("convert " +
"/".join([args.montages, args.output]) + "Faces.jpg -resize 750 " +
"/".join([args.montages, args.output]) + "Faces_montage_750w.jpg",
shell=True)
except:
# The process failed
raise ValueError("Montage generation failed")
else:
print("No resized images exist")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='A script for retrieving images from British Museum Research Space and '
'perform montage and facial recognition')
parser.add_argument('-p', '--path', help='The path in which to run this script', required=True)
# An example would be: --path '/Users/danielpett/githubProjects/scripts/'
parser.add_argument('-d', '--download', help='The path in which to place downloaded original images', required=True)
# An example would be: --download '/Users/danielpett/githubProjects/scripts/bmImages/'
parser.add_argument('-r', '--resized', help='The path in which to place resized original images', required=True)
# An example would be: --resized '/Users/danielpett/githubProjects/scripts/bmImagesResized/'
parser.add_argument('-f', '--faces', help='The path in which to place images of detected faces', required=True)
# An example would be: --faces '/Users/danielpett/githubProjects/scripts/facesDetected/'
### Add extra arg to specify folder for highlights otherwise it always goes into the base directory
parser.add_argument('-i', '--highlights', help='The path in which to place original images with detected faces highlighted', required=True)
# An example would be: --highlights '/Users/danielpett/githubProjects/scripts/bmImagesFaceHighlights/'
parser.add_argument('-m', '--montages', help='The path in which to place montage images', required=True)
# An example would be: --montages '/Users/danielpett/githubProjects/scripts/montages/'
### Add type=int as Wand (ImageMagick) functions must have type int
parser.add_argument('-s', '--size', help='The resize dimensions', type=int, required=False, default=300)
# An example would be: --resized '/Users/danielpett/githubProjects/scripts/bmimagesResized/'
parser.add_argument('-o', '--output', help='The file name output for image magick', required=False,
default='britishMuseumImages')
# An example would be 'britishMuseumPortraits'
parser.add_argument('-t', '--template', help='The spaqrl query template to use', required=False, default='default')
# An example would be 'default' as this is concatenated to default.txt
parser.add_argument('-q', '--query', help='The spaqrl query string to use', required=False, default='bust')
# An example would be 'bust'
parser.add_argument('-e', '--endpoint', help='The spaqrl endpoint to use', required=False,
default='http://collection.britishmuseum.org/sparql')
# Parse arguments
args = parser.parse_args()
# Set base path
basePath = args.path
# Define the base directories
### Add 'args.highlights' for highlights directory
paths = {x: os.path.join(basePath, x) for x in
[args.download, args.resized, args.montages, args.faces, 'opencv', args.highlights]}
# Create them if they don't already exist
for path in list(paths.values()):
if not os.path.exists(path):
os.makedirs(path)
# Set up your sparql endpoint
sparql = SPARQLWrapper(args.endpoint)
# Read text file sparql query
with open("sparql/" + args.template + ".txt", "r") as sparqlQuery:
# Format the query string retrieved from the text file with simple replacement
query = sparqlQuery.read().format(string=args.query)
# Return the query for the user to see
print("Your sparql query reads as: \n" + query)
# Set your query
sparql.setQuery(query)
# Return the JSON triples
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
### file.txt seems unnecessary ?
### so commented out
# # Open the file for writing urls (this is for image magick)
# listImages = open(os.path.join(paths[args.resized], "files.txt"), 'w')
# Iterate over the results
for result in results["results"]["bindings"]:
image = result["image"]["value"]
if os.path.isfile(os.path.join(paths[args.download], os.path.basename(image))):
print("File already exists")
else:
path = os.path.join(paths[args.download], os.path.basename(image))
urllib.request.urlretrieve(image, path)
print("Image " + os.path.basename(image) + " downloaded")
# ### Writing the resized image paths to file seems unnecessary ?
# ### so commented out
# for fn in os.listdir(paths[args.download]):
# if not fn.startswith('.'):
# listImages.write(os.path.join(paths[args.resized], os.path.basename(fn)) + "\n")
# print("Image path written to file")
# Resize and crop the 'download' files into the 'resized' directory
# Iterate through files and crop as required
for fn in os.listdir(paths[args.download]):
# Make sure file is not a hidden one etc
if not fn.startswith('.') and os.path.isfile(os.path.join(paths[args.download], fn)):
# Open the file checking if it is valid or not. It fails otherwise :-(
try:
if not os.path.exists(os.path.join(paths[args.resized], fn)):
resize_and_crop(os.path.join(paths[args.download], fn),
os.path.join(paths[args.resized], fn),
(args.size, args.size))
print(fn + " resized")
else:
print("Resized file exists")
except:
pass
# Amended to be relevant to the base path?
cascPath = os.path.join(paths["opencv"], "haarcascade_frontalface_default.xml")
# Check you have this file, if not get it
if not os.path.isfile(cascPath):
haar = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml"
urllib.request.urlretrieve(haar, cascPath)
# Create the haar cascade
faceCascade = cv2.CascadeClassifier(cascPath)
start = time.time()
for fn in os.listdir(paths[args.download]):
if not fn.startswith('.'):
print("Detecting faces in " + os.path.join(paths[args.download], fn))
image = cv2.imread(os.path.join(paths[args.download], fn))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(150, 150),
flags=cv2.CASCADE_SCALE_IMAGE
)
left = 10
right = 10
top = 10
bottom = 10
print("Found {0} faces within the image".format(len(faces)))
if len(faces) > 0:
for (x, y, w, h) in faces:
cv2.rectangle(image, (x, y), (x + w, y + h), (102, 0, 204), 2)
### Add 'args.highlights' for highlights directory
rectName = os.path.join(paths[args.highlights], "high_{1}_{0}".format(str(fn), str(x)))
cv2.imwrite(rectName, image)
print("Written highlighted face " + rectName)
### Image is acquired again from 'download' directory so that crop does not include rectangle
### as drawn in the previous step
### Side effect of this is to prevent crops happening off the edge of the image if there is more than
### one face detected - previously this had been creating 'bad' empty files that needed to be
### handled below by checking for filesize
### Further side effect of this is previously unrealised detected faces have surfaced from the
### demonstration query (1 of these is a new face, 1 is a duplicate face, 2 are not faces)
image = cv2.imread(os.path.join(paths[args.download], fn))
image = image[y - top:y + h + bottom, x - left:x + w + right]
filename = os.path.join(paths[args.faces], "cropped_{1}_{0}".format(str(fn), str(x)))
### Move 'print("Written cropped face " + filename)' as the file is cropped close to the face below
if not os.path.exists(filename):
cv2.imwrite(filename, image)
### Move line from above here as this is where the image is actually cropped close to the face
print("Written cropped face " + filename)
filesize = os.stat(filename).st_size
try:
if not filesize == 0:
resize_and_crop(filename, filename, (args.size, args.size), crop_type='middle')
else:
print(filename + " is likely to be broken.")
os.remove(filename)
print(filename + " has therefore been removed.")
except:
pass
end = time.time()
print("The time taken to process face detection was: " + "--- %s seconds ---" % (end - start))
# ### Apparently duplicated and therefore unnecessary code from above ?
# ### commented out
# #Iterate through files and crop as required
# for fn in os.listdir(paths[args.faces]):
# # Make sure file is not a hidden one etc
# if not fn.startswith('.') and os.path.isfile(os.path.join(paths[args.faces], fn)):
# # Open the file checking if it is valid or not. It fails otherwise :-(
# try:
# if not os.path.exists(os.path.join(paths[args.faces], fn)):
# resize_and_crop(os.path.join(paths[args.faces], fn), os.path.join(paths[args.faces], fn), (args.size, args.size))
# print(fn + " resized")
# else:
# print("Resized file " + fn + " exists")
# except:
# pass
a = count_files(paths[args.faces], ".jpg")
print(str(a) + " faces were identified in total")
### Equation 'a/10' does not provide sufficient rows unless number of images is an exact multiple of 10
### Change equation to add math.ceil(), which returns smallest int >= input
dims = "10x" + str(math.ceil(a / 10))
print("The dimensions of the montage are " + dims)
create_montage()
print("Facial detection complete")
@portableant
Copy link

Cool fixes. Re files.txt - when tiling large numbers of files with imagick, this can sometimes be the best method. So what did you make?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment