Last active
December 11, 2019 20:16
-
-
Save eczajk1/5c835805a27746ca02f2c4925f69147e to your computer and use it in GitHub Desktop.
Dump e-mail attachments from a folder of e-mail (.eml) files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const fs = require('fs') | |
| const Promise = require('bluebird'); | |
| const moment = require('moment'); | |
| const { simpleParser } = require('mailparser'); | |
| const run = async (params) => { | |
| const { inputFolder, outputFolder } = params; | |
| // make a list of e-mails (.eml files) | |
| const files = fs.readdirSync(inputFolder); | |
| // for each one | |
| Promise.mapSeries(files, async (filename) => { | |
| // parse it, extracting attachments and date for file renaming | |
| const eml = fs.readFileSync(`${inputFolder}/${filename}`, 'utf-8'); | |
| const parsed = await simpleParser(eml, {}); | |
| const parsedDate = moment(parsed.date).format('YYYY-MM-DD'); | |
| parsed.attachments.forEach((attachment) => { | |
| // only do PDFs | |
| const ext = attachment.filename.split('.').pop(); | |
| if (ext !== 'pdf') { | |
| return; | |
| } | |
| // ignore if there is more than 1 attachment | |
| if (parsed.attachments.length > 1) { | |
| return | |
| } | |
| // dump attachment to a separate file in output folder | |
| const attachmentFile = `${outputFolder}/${parsedDate}.${ext}`; | |
| fs.writeFileSync(attachmentFile, attachment.content); | |
| }); | |
| }); | |
| console.info('Done!'); | |
| }; | |
| run({ | |
| inputFolder: __dirname + '/paycheck-emails', | |
| outputFolder: __dirname + '/paychecks', | |
| }); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const fs = require('fs'); | |
| const hummus = require('hummus'); | |
| const run = async (params) => { | |
| const { inputFolder, outputFolder } = params; | |
| fs.readdirSync(inputFolder) | |
| .filter(filename => filename.indexOf('.pdf') > -1) | |
| .forEach((filename) => { | |
| const inputfile = `${inputFolder}/${filename}`; | |
| const outputfile = `${outputFolder}/${filename}`; | |
| const password = 'mypw'; | |
| hummus.recrypt(inputfile, outputfile, { password }); | |
| }); | |
| }; | |
| run({ | |
| inputFolder: __dirname + '/paychecks', | |
| outputFolder: __dirname + '/paychecks-output', | |
| }); |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For this to work, I needed to dump all of the e-mail messages that I suspected would have a pdf that I wish to extract into a folder. I did that part manually.
dump-eml-attachments dumps selected attachments into another folder.
recrypt-pdf-files was used to remove password-protection from those files.