Last active
May 13, 2019 22:09
-
-
Save ewencp/6573961 to your computer and use it in GitHub Desktop.
Revisions
-
ewencp revised this gist
Sep 21, 2013 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -18,7 +18,7 @@ } /* Otherwise, continue on next block. We need to make sure we get rid of the boundary in the process */ var new_start_idx = extracted_html.indexOf('\n', next_boundary); extracted_html = extracted_html.substr(new_start_idx+1); } -
ewencp revised this gist
Sep 15, 2013 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -51,7 +51,7 @@ treated as a prefix so we can match generic sets of tags. Finally, we also have list of globally explicitly attributes that should always be stripped. */ var global_attributes = ['accesskey', 'contenteditable', 'contextmenu', 'data-', 'dir', 'draggable', 'dropzone', 'hidden', 'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype', 'lang', 'spellcheck', 'style', 'tabindex', 'title']; -
ewencp created this gist
Sep 15, 2013 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,126 @@ (function() { /* First try to handle pages which are actually raw text of the email. Extract the HTML part and replace page with it */ var orig_html = document.getElementsByTagName('html')[0].textContent; var extracted_html = orig_html; /* Try splitting it up if it's actually the multipart email. Otherwise, work on the document itself, leaving the orig_html in place */ var boundary_pattern = '--==============='; while (extracted_html.indexOf(boundary_pattern) != -1) { var next_boundary = extracted_html.indexOf(boundary_pattern); var next_block = extracted_html.substr(0, next_boundary); /* If this block contains the html use it */ var html_pos = next_block.indexOf('<html'); if (html_pos != -1) { var html_end_pos = next_block.indexOf('/html>'); extracted_html = next_block.substr(html_pos, html_end_pos-html_pos+6); break; } /* Otherwise, continue on next block. We need to make sure we get rid of the boundary in the process */ var new_start_idx = extracted_html.indexOf('\n', boundary_pattern); extracted_html = extracted_html.substr(new_start_idx+1); } /* Put the replacement in place*/ if (extracted_html != orig_html) { document.write(extracted_html); } /*Now run through the document clearing out data we shouldn't have. Ideally this would match the process that email clients follow. Something like GMail or Yahoo Mail, where the data is embedded directly in another page, needs to do the most aggressive filtering, so we want to match something like that. Our first step is removing entire tags. */ var excluded_tags = ['head', 'style', 'link']; for(var ex_i = 0; ex_i < excluded_tags.length; ex_i++) { var ex_elems = document.getElementsByTagName(excluded_tags[ex_i]); for (var exe_i = 0; exe_i < ex_elems.length; exe_i++) { var node = ex_elems[exe_i]; node.parentNode.removeChild(node); } } /*And remove attributes that we can't verify. We don't have a complete list, so we filter out attributes only for tags we generate an explicit list for. A blacklist of attributes would be nice, but since the possible list of tags is ever growing and people generate non-conforming HTML for emails, we can't do that. Some global attributes are always permitted. Each attribute is treated as a prefix so we can match generic sets of tags. Finally, we also have list of globally explicitly attributes that should always be stripped. */ var global_attributes = ['accesskey', 'class', 'contenteditable', 'contextmenu', 'data-', 'dir', 'draggable', 'dropzone', 'hidden', 'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype', 'lang', 'spellcheck', 'style', 'tabindex', 'title']; var valid_attributes = { 'table': ['align', 'bgcolor', 'border', 'cellpadding', 'cellspacing', 'frame', 'rules', 'width'], 'tbody': ['align', 'bgcolor', 'valign'], 'tr': ['align', 'bgcolor', 'valign'], 'td': ['align', 'bgcolor', 'colspan', 'rowspan', 'valign'], 'img': ['align', 'alt', 'border', 'height', 'src', 'width'], }; var always_strip_attributes = ['id', 'class']; var all_elems = document.getElementsByTagName('*'); for(var elem_i = 0; elem_i < all_elems.length; elem_i++) { var elem = all_elems[elem_i]; var attribs_to_remove = []; for(var i = 0; i < elem.attributes.length; i++) { var attrib = elem.attributes[i]; var done = false; if (!attrib.specified) continue; /* First check if it's in the "always strip" list */ for(var ai = 0; ai < always_strip_attributes.length; ai++) { if (always_strip_attributes[ai] == attrib.name) { attribs_to_remove.push(attrib.name); done = true; break; } } if (done) continue; /* Next check if it's one of the valid global attributes. If it is, we let it pass */ var tag_valid_attributes = valid_attributes[elem.tagName.toLowerCase()]; if (!tag_valid_attributes) continue; for(var ai = 0; ai < global_attributes.length; ai++) { var global_attrib_prefix = global_attributes[ai]; if (attrib.name.indexOf(global_attrib_prefix) == 0) { /* Setting done & not adding to the list lets it pass */ done = true; break; } } if (done) continue; /* Finally, if we have a filter on the element, we can filter based on its valid elements */ for(var ai = 0; ai < tag_valid_attributes.length; ai++) { var valid_attrib = tag_valid_attributes[ai]; if (valid_attrib == attrib.name) { done = true; break; } } if (done) continue; /* If we didn't continue already, then the attribute wasn't in the safe list. */ attribs_to_remove.push(attrib.name); } /* After finishing iterating over them, remove the ones we discovered */ for(var ai = 0; ai < attribs_to_remove.length; ai++) elem.removeAttribute(attribs_to_remove[ai]); } /* And we need to remove any restricted styles. I haven't done any of this yet... */ })();