ewencp · May 13, 2019 22:09 · Sep 21, 2013 · Sep 15, 2013 · Sep 15, 2013
diff --git a/html_email_preview.js b/html_email_preview.js
@@ -18,7 +18,7 @@
         }
         /* Otherwise, continue on next block. We need to make sure we get rid of
            the boundary in the process */
-        var new_start_idx = extracted_html.indexOf('\n', boundary_pattern);
+        var new_start_idx = extracted_html.indexOf('\n', next_boundary);
         extracted_html = extracted_html.substr(new_start_idx+1);
     }
 

diff --git a/html_email_preview.js b/html_email_preview.js
@@ -51,7 +51,7 @@
       treated as a prefix so we can match generic sets of tags. Finally, we also
       have list of globally explicitly  attributes that should always be
       stripped. */
-    var global_attributes = ['accesskey', 'class', 'contenteditable',
+    var global_attributes = ['accesskey', 'contenteditable',
       'contextmenu', 'data-', 'dir', 'draggable', 'dropzone', 'hidden',
       'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype', 'lang',
       'spellcheck', 'style', 'tabindex', 'title'];

diff --git a/html_email_preview.js b/html_email_preview.js
@@ -0,0 +1,126 @@
+(function() {
+    /* First try to handle pages which are actually raw text of the email.
+       Extract the HTML part and replace page with it */
+    var orig_html = document.getElementsByTagName('html')[0].textContent;
+    var extracted_html = orig_html;
+    /* Try splitting it up if it's actually the multipart email. Otherwise, work
+       on the document itself, leaving the orig_html in place */
+    var boundary_pattern = '--===============';
+    while (extracted_html.indexOf(boundary_pattern) != -1) {
+        var next_boundary = extracted_html.indexOf(boundary_pattern);
+        var next_block = extracted_html.substr(0, next_boundary);
+        /* If this block contains the html use it */
+        var html_pos = next_block.indexOf('<html');
+        if (html_pos != -1) {
+            var html_end_pos = next_block.indexOf('/html>');
+            extracted_html = next_block.substr(html_pos, html_end_pos-html_pos+6);
+            break;
+        }
+        /* Otherwise, continue on next block. We need to make sure we get rid of
+           the boundary in the process */
+        var new_start_idx = extracted_html.indexOf('\n', boundary_pattern);
+        extracted_html = extracted_html.substr(new_start_idx+1);
+    }
+
+    /* Put the replacement in place*/
+    if (extracted_html != orig_html) {
+        document.write(extracted_html);
+    }
+
+    /*Now run through the document clearing out data we shouldn't have. Ideally
+    this would match the process that email clients follow. Something like GMail
+    or Yahoo Mail, where the data is embedded directly in another page, needs to
+    do the most aggressive filtering, so we want to match something like
+    that. Our first step is removing entire tags. */
+    var excluded_tags = ['head', 'style', 'link'];
+    for(var ex_i = 0; ex_i < excluded_tags.length; ex_i++) {
+        var ex_elems = document.getElementsByTagName(excluded_tags[ex_i]);
+        for (var exe_i = 0; exe_i < ex_elems.length; exe_i++) {
+            var node = ex_elems[exe_i];
+            node.parentNode.removeChild(node);
+        }
+    }
+
+    /*And remove attributes that we can't verify. We don't have a complete
+      list, so we filter out attributes only for tags we generate an explicit
+      list for. A blacklist of attributes would be nice, but since the possible
+      list of tags is ever growing and people generate non-conforming HTML for
+      emails, we can't do that.
+
+      Some global attributes are always permitted. Each attribute is
+      treated as a prefix so we can match generic sets of tags. Finally, we also
+      have list of globally explicitly  attributes that should always be
+      stripped. */
+    var global_attributes = ['accesskey', 'class', 'contenteditable',
+      'contextmenu', 'data-', 'dir', 'draggable', 'dropzone', 'hidden',
+      'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype', 'lang',
+      'spellcheck', 'style', 'tabindex', 'title'];
+    var valid_attributes = {
+        'table': ['align', 'bgcolor', 'border', 'cellpadding', 'cellspacing',
+                  'frame', 'rules', 'width'],
+        'tbody': ['align', 'bgcolor', 'valign'],
+        'tr': ['align', 'bgcolor', 'valign'],
+        'td': ['align', 'bgcolor', 'colspan', 'rowspan', 'valign'],
+
+        'img': ['align', 'alt', 'border', 'height', 'src', 'width'],
+    };
+    var always_strip_attributes = ['id', 'class'];
+
+    var all_elems = document.getElementsByTagName('*');
+    for(var elem_i = 0; elem_i < all_elems.length; elem_i++) {
+        var elem = all_elems[elem_i];
+        var attribs_to_remove = [];
+        for(var i = 0; i < elem.attributes.length; i++) {
+            var attrib = elem.attributes[i];
+            var done = false;
+            if (!attrib.specified)
+                continue;
+            /* First check if it's in the "always strip" list */
+            for(var ai = 0; ai < always_strip_attributes.length; ai++) {
+                if (always_strip_attributes[ai] == attrib.name) {
+                    attribs_to_remove.push(attrib.name);
+                    done = true;
+                    break;
+                }
+            }
+            if (done) continue;
+
+            /* Next check if it's one of the valid global
+               attributes. If it is, we let it pass */
+            var tag_valid_attributes = valid_attributes[elem.tagName.toLowerCase()];
+            if (!tag_valid_attributes) continue;
+            for(var ai = 0; ai < global_attributes.length; ai++) {
+                var global_attrib_prefix = global_attributes[ai];
+                if (attrib.name.indexOf(global_attrib_prefix) == 0) {
+                    /* Setting done & not adding to the list lets it
+                       pass */
+                    done = true;
+                    break;
+                }
+            }
+            if (done) continue;
+
+            /* Finally, if we have a filter on the element, we can filter based
+               on its valid elements */
+            for(var ai = 0; ai < tag_valid_attributes.length; ai++) {
+                var valid_attrib = tag_valid_attributes[ai];
+                if (valid_attrib == attrib.name) {
+                    done = true;
+                    break;
+                }
+            }
+            if (done) continue;
+            /* If we didn't continue already, then the attribute wasn't in the
+               safe list. */
+            attribs_to_remove.push(attrib.name);
+        }
+
+        /* After finishing iterating over them, remove the ones we
+           discovered */
+        for(var ai = 0; ai < attribs_to_remove.length; ai++)
+            elem.removeAttribute(attribs_to_remove[ai]);
+    }
+
+    /* And we need to remove any restricted styles. I haven't done any of this yet... */
+
+})();
No results found