Skip to content

Instantly share code, notes, and snippets.

@sebleblanc
Created April 3, 2015 00:55
Show Gist options
  • Select an option

  • Save sebleblanc/da93244228cb5acd7789 to your computer and use it in GitHub Desktop.

Select an option

Save sebleblanc/da93244228cb5acd7789 to your computer and use it in GitHub Desktop.
Extract images from a multipart email message.
"""
Extract images from a multipart email message.
This script will open an email file named "email.txt" and
extract any file that matches the content type 'image/.*'.
It will save it in a folder called "images".
The format of the email is the same as the format you get when
you choose "Show Original" in Gmail.
"""
from email import message_from_binary_file
from re import match
import os
import posixpath
_os_alt_seps = list(sep for sep in [os.path.sep, os.path.altsep]
if sep not in (None, '/'))
def safe_join(directory, filename):
# From werkzeug.security
filename = posixpath.normpath(filename)
for sep in _os_alt_seps:
if sep in filename:
return None
if os.path.isabs(filename) or filename.startswith('../'):
return None
return os.path.join(directory, filename)
def main():
output_folder = "images"
pattern = r'^image/'
message = message_from_binary_file(open("email.txt", 'rb'))
images = (part for part in message.walk() if match(pattern, part.get_content_type()))
try:
os.mkdir(output_folder)
except FileExistsError:
pass
for image in images:
filename = safe_join(output_folder, image.get_filename())
with open(filename, "w") as fp:
print("Writing to " + filename)
fp.write(image.get_payload())
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment