Created
April 3, 2015 00:55
-
-
Save sebleblanc/da93244228cb5acd7789 to your computer and use it in GitHub Desktop.
Extract images from a multipart email message.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Extract images from a multipart email message. | |
| This script will open an email file named "email.txt" and | |
| extract any file that matches the content type 'image/.*'. | |
| It will save it in a folder called "images". | |
| The format of the email is the same as the format you get when | |
| you choose "Show Original" in Gmail. | |
| """ | |
| from email import message_from_binary_file | |
| from re import match | |
| import os | |
| import posixpath | |
| _os_alt_seps = list(sep for sep in [os.path.sep, os.path.altsep] | |
| if sep not in (None, '/')) | |
| def safe_join(directory, filename): | |
| # From werkzeug.security | |
| filename = posixpath.normpath(filename) | |
| for sep in _os_alt_seps: | |
| if sep in filename: | |
| return None | |
| if os.path.isabs(filename) or filename.startswith('../'): | |
| return None | |
| return os.path.join(directory, filename) | |
| def main(): | |
| output_folder = "images" | |
| pattern = r'^image/' | |
| message = message_from_binary_file(open("email.txt", 'rb')) | |
| images = (part for part in message.walk() if match(pattern, part.get_content_type())) | |
| try: | |
| os.mkdir(output_folder) | |
| except FileExistsError: | |
| pass | |
| for image in images: | |
| filename = safe_join(output_folder, image.get_filename()) | |
| with open(filename, "w") as fp: | |
| print("Writing to " + filename) | |
| fp.write(image.get_payload()) | |
| if __name__=='__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment