集成富⽂本编辑器XSS预防过滤措施#
import re
import copy
from html.parser import HTMLParser
class XSSHtml(HTMLParser):
allow_tags = ['a', 'img', 'br', 'strong', 'b', 'code', 'pre',
'p', 'div', 'em', 'span', 'h1', 'h2', 'h3', 'h4',
'h5', 'h6', 'blockquote', 'ul', 'ol', 'tr', 'th', 'td',
'hr', 'li', 'u', 'embed', 's', 'table', 'thead', 'tbody',
'caption', 'small', 'q', 'sup', 'sub', 'font']
common_attrs = ["style", "class", "name"]
nonend_tags = ["img", "hr", "br", "embed"]
tags_own_attrs = {
"img": ["src", "width", "height", "alt", "align"],
"a": ["href", "target", "rel", "title"],
"embed": ["src", "width", "height", "type", "allowfullscreen", "loop", "play", "wmode", "menu"],
"table": ["border", "cellpadding", "cellspacing"],
"font": ["color"]
}
def __init__(self, allows=[]):
HTMLParser.__init__(self)
self.allow_tags = allows if allows else self.allow_tags
self.start = []
self.data = []
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
super().close()
def clean(self, content):
self.feed(content)
_html()
def get_html(self):
"""
Get the safe html code
"""
for i in range(0, sult)):
sult[i].strip('\n'):
self.data.sult[i])
return ''.join(self.data)
def handle_startendtag(self, tag, attrs):
self.handle_starttag(tag, attrs)
def handle_starttag(self, tag, attrs):
if tag not in self.allow_tags:
return
end_diagonal = ' /' if tag d_tags else ''
if not end_diagonal:
self.start.append(tag)
attdict = {}
for attr in attrs:
attdict[attr[0]] = attr[1]
attdict = self._wash_attr(attdict, tag)
if hasattr(self, "node_%s" % tag):
attdict = getattr(self, "node_%s" % tag)(attdict)
else:
attdict = de_default(attdict)
attrs = []
for (key, value) in attdict.items():
attrs.append('%s="%s"' % (key, self._htmlspecialchars(value)))
attrs = (' ' + ' '.join(attrs)) if attrs else ''
def handle_endtag(self, tag):
if self.start and tag == self.start[len(self.start) - 1]:
self.start.pop()
def handle_data(self, data):
def handle_entityref(self, name):
if name.isalpha():
def handle_charref(self, name):
if name.isdigit():
def node_default(self, attrs):
attrs = self._common_attr(attrs)
return attrs
def node_a(self, attrs):
attrs = self._common_attr(attrs)
attrs = self._get_link(attrs, "href")
attrs = self._set_attr_default(attrs, "target", "_blank")        attrs = self._limit_attr(attrs, {
"target": ["_blank", "_self"]
})
return attrs
def node_embed(self, attrs):
attrs = self._common_attr(attrs)
attrs = self._get_link(attrs, "src")
attrs = self._limit_attr(attrs, {
"type": ["application/x-shockwave-flash"],
"wmode": ["transparent", "window", "opaque"],
"play": ["true", "false"],
"loop": ["true", "false"],
"menu": ["true", "false"],
"allowfullscreen": ["true", "false"]
})
attrs["allowscriptaccess"] = "never"
attrs["allownetworking"] = "none"
return attrs
def _true_url(self, url):
prog = repile(r"^(http|https|ftp)://.+", re.I | re.S)
if prog.match(url):
return url
else:
return "%s" % url
def _true_style(self, style):
if style:
style = re.sub(r"(\\|&#|/\*|\*/)", "_", style)
style = re.sub(r"e.*x.*p.*r.*e.*s.*s.*i.*o.*n", "_", style)        return style
def _get_style(self, attrs):
if "style" in attrs:
attrs["style"] = self._true_("style"))
return attrs
def _get_link(self, attrs, name):
if name in attrs:
attrs[name] = self._true_url(attrs[name])
return attrs
def _wash_attr(self, attrs, tag):
if tag in self.tags_own_attrs:
other = self.tags_(tag)
else:
other = []
if attrs:
for key, value in copy.deepcopy(attrs).items():
if key not in selfmon_attrs + other:
del attrs[key]
return attrs
def _common_attr(self, attrs):
attrs = self._get_style(attrs)
return attrs
def _set_attr_default(self, attrs, name, default=''):
if name not in attrs:
attrs[name] = default
return attrs
def _limit_attr(self, attrs, limit={}):
for (key, value) in limit.items():
if key in attrs and attrs[key] not in value:
del attrs[key]
return attrs
def _htmlspecialchars(self, html):
place("<", "<") \
.replace(">", ">") \
.replace('"', """) \
.replace("'", "'")
if "__main__" == __name__:
with XSSHtml() as parser:
ret = parser.clean("""<p><img src=1 onerror=alert(/xss/)></p><div class="left">
<a href='javascript:prompt(1)'><br />hehe</a></div>
<p id="test" onmouseover="alert(1)">>M<svg>
<a href="www.baidu" target="self">MM</a></p>
<embed src='javascript:alert(/hehe/)' allowscriptaccess=always />
<img onerror=alert(1) src=#>""")
print(ret)
1from urlparse import urlparse
2
3import bleach
4
5
6class XSSFilter(object):
7    tags = ['p', 'div', 'img', 'br', 'span', 'pre', 'code', 'blockquote', 'ol', 'ul', 'li']
8    styles = [
9'max-width', 'color', 'margin', 'line-height', 'display', 'padding', 'background-color',
10'display', 'border-left', 'font-family', 'white-space', 'font-size'
11    ]
12
13    @staticmethod
14def allowed_src(tag, name, value):
15if name in ('style', 'src', 'alt', 'data-w-e'):
html富文本框16return True
17if name == 'src':
18            p = urlparse(value)
19return XSSFilter._trusted_url(p)
20return False
21
22    @classmethod
23def clean(cls, html):
24return bleach.clean(html, tags=cls.tags, attributes=cls.allowed_src, styles=cls.styles) 25
26    @classmethod
27def _trusted_url(cls, url):
28return urlloc == ''or'static/gif'in url.path

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。