Created
August 18, 2010 01:52
-
-
Save huxley/533050 to your computer and use it in GitHub Desktop.
whitelist + XSS protection example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # http://djangosnippets.org/snippets/1655/ | |
| from django import template | |
| from BeautifulSoup import BeautifulSoup, Comment | |
| import re | |
| register = template.Library() | |
| def sanitize(value, allowed_tags): | |
| """Argument should be in form 'tag2:attr1:attr2 tag2:attr1 tag3', where tags | |
| are allowed HTML tags, and attrs are the allowed attributes for that tag. | |
| """ | |
| js_regex = re.compile(r'[\s]*(&#x.{1,7})?'.join(list('javascript'))) | |
| allowed_tags = [tag.split(':') for tag in allowed_tags.split()] | |
| allowed_tags = dict((tag[0], tag[1:]) for tag in allowed_tags) | |
| soup = BeautifulSoup(value) | |
| for comment in soup.findAll(text=lambda text: isinstance(text, Comment)): | |
| comment.extract() | |
| for tag in soup.findAll(True): | |
| if tag.name not in allowed_tags: | |
| tag.hidden = True | |
| else: | |
| tag.attrs = [(attr, js_regex.sub('', val)) for attr, val in tag.attrs | |
| if attr in allowed_tags[tag.name]] | |
| return soup.renderContents().decode('utf8') | |
| register.filter(sanitize) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment