Skip to content

Instantly share code, notes, and snippets.

@huxley
Created August 18, 2010 01:52
Show Gist options
  • Select an option

  • Save huxley/533050 to your computer and use it in GitHub Desktop.

Select an option

Save huxley/533050 to your computer and use it in GitHub Desktop.
whitelist + XSS protection example
# http://djangosnippets.org/snippets/1655/
from django import template
from BeautifulSoup import BeautifulSoup, Comment
import re
register = template.Library()
def sanitize(value, allowed_tags):
"""Argument should be in form 'tag2:attr1:attr2 tag2:attr1 tag3', where tags
are allowed HTML tags, and attrs are the allowed attributes for that tag.
"""
js_regex = re.compile(r'[\s]*(&#x.{1,7})?'.join(list('javascript')))
allowed_tags = [tag.split(':') for tag in allowed_tags.split()]
allowed_tags = dict((tag[0], tag[1:]) for tag in allowed_tags)
soup = BeautifulSoup(value)
for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
comment.extract()
for tag in soup.findAll(True):
if tag.name not in allowed_tags:
tag.hidden = True
else:
tag.attrs = [(attr, js_regex.sub('', val)) for attr, val in tag.attrs
if attr in allowed_tags[tag.name]]
return soup.renderContents().decode('utf8')
register.filter(sanitize)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment