2014-02-13 11:18:03 +08:00
|
|
|
|
/**
|
|
|
|
|
|
* 默认配置
|
|
|
|
|
|
*
|
|
|
|
|
|
* @author 老雷<leizongmin@gmail.com>
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 默认白名单
|
|
|
|
|
|
var whiteList = {
|
2014-03-11 15:19:44 +08:00
|
|
|
|
a: ['target', 'href', 'title'],
|
2014-03-11 15:40:59 +08:00
|
|
|
|
abbr: ['title'],
|
|
|
|
|
|
address: [],
|
|
|
|
|
|
area: ['shape', 'coords', 'href', 'alt'],
|
|
|
|
|
|
article: [],
|
|
|
|
|
|
aside: [],
|
2014-03-11 15:19:44 +08:00
|
|
|
|
audio: ['autoplay', 'controls', 'loop', 'preload', 'src'],
|
|
|
|
|
|
b: [],
|
2014-03-11 15:40:59 +08:00
|
|
|
|
bdi: ['dir'],
|
|
|
|
|
|
bdo: ['dir'],
|
|
|
|
|
|
big: [],
|
|
|
|
|
|
blockquote: ['cite'],
|
2014-03-11 15:19:44 +08:00
|
|
|
|
br: [],
|
2014-03-11 15:40:59 +08:00
|
|
|
|
caption: [],
|
|
|
|
|
|
center: [],
|
2014-03-11 15:19:44 +08:00
|
|
|
|
cite: [],
|
|
|
|
|
|
code: [],
|
2014-03-11 15:40:59 +08:00
|
|
|
|
col: ['align', 'valign', 'span', 'width'],
|
|
|
|
|
|
colgroup: ['align', 'valign', 'span', 'width'],
|
|
|
|
|
|
dd: [],
|
|
|
|
|
|
del: ['datetime'],
|
|
|
|
|
|
details: ['open'],
|
2014-03-11 15:19:44 +08:00
|
|
|
|
div: [],
|
|
|
|
|
|
dl: [],
|
|
|
|
|
|
dt: [],
|
|
|
|
|
|
em: [],
|
2014-03-11 15:40:59 +08:00
|
|
|
|
font: ['color', 'size', 'face'],
|
2014-03-11 15:19:44 +08:00
|
|
|
|
footer: [],
|
2014-02-13 11:18:03 +08:00
|
|
|
|
h1: [],
|
|
|
|
|
|
h2: [],
|
|
|
|
|
|
h3: [],
|
|
|
|
|
|
h4: [],
|
|
|
|
|
|
h5: [],
|
|
|
|
|
|
h6: [],
|
2014-03-11 15:19:44 +08:00
|
|
|
|
header: [],
|
2014-02-13 11:18:03 +08:00
|
|
|
|
hr: [],
|
|
|
|
|
|
i: [],
|
2014-03-11 15:19:44 +08:00
|
|
|
|
img: ['src', 'alt', 'title', 'width', 'height'],
|
2014-03-11 15:40:59 +08:00
|
|
|
|
ins: ['datetime'],
|
2014-03-11 15:19:44 +08:00
|
|
|
|
li: [],
|
2014-03-11 15:40:59 +08:00
|
|
|
|
mark: [],
|
|
|
|
|
|
nav: [],
|
2014-03-11 15:19:44 +08:00
|
|
|
|
ol: [],
|
2014-02-13 11:18:03 +08:00
|
|
|
|
p: [],
|
|
|
|
|
|
pre: [],
|
2014-03-11 15:40:59 +08:00
|
|
|
|
s: [],
|
2014-03-11 15:19:44 +08:00
|
|
|
|
section:[],
|
2014-03-11 15:40:59 +08:00
|
|
|
|
small: [],
|
2014-03-11 15:19:44 +08:00
|
|
|
|
span: [],
|
2014-09-05 13:18:00 -05:00
|
|
|
|
sub: [],
|
|
|
|
|
|
sup: [],
|
2014-03-11 15:19:44 +08:00
|
|
|
|
strong: [],
|
2014-03-11 15:40:59 +08:00
|
|
|
|
table: ['width', 'border', 'align', 'valign'],
|
|
|
|
|
|
tbody: ['align', 'valign'],
|
|
|
|
|
|
td: ['width', 'colspan', 'align', 'valign'],
|
|
|
|
|
|
tfoot: ['align', 'valign'],
|
|
|
|
|
|
th: ['width', 'colspan', 'align', 'valign'],
|
|
|
|
|
|
thead: ['align', 'valign'],
|
|
|
|
|
|
tr: ['rowspan', 'align', 'valign'],
|
|
|
|
|
|
tt: [],
|
|
|
|
|
|
u: [],
|
2014-02-13 11:18:03 +08:00
|
|
|
|
ul: [],
|
|
|
|
|
|
video: ['autoplay', 'controls', 'loop', 'preload', 'src', 'height', 'width']
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 匹配到标签时的处理方法
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} tag
|
|
|
|
|
|
* @param {String} html
|
|
|
|
|
|
* @param {Object} options
|
|
|
|
|
|
* @return {String}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function onTag (tag, html, options) {
|
2014-02-13 15:55:36 +08:00
|
|
|
|
// do nothing
|
2014-02-13 11:18:03 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 匹配到不在白名单上的标签时的处理方法
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} tag
|
|
|
|
|
|
* @param {String} html
|
|
|
|
|
|
* @param {Object} options
|
|
|
|
|
|
* @return {String}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function onIgnoreTag (tag, html, options) {
|
2014-02-13 15:55:36 +08:00
|
|
|
|
// do nothing
|
2014-02-13 11:18:03 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 匹配到标签属性时的处理方法
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} tag
|
|
|
|
|
|
* @param {String} name
|
|
|
|
|
|
* @param {String} value
|
|
|
|
|
|
* @return {String}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function onTagAttr (tag, name, value) {
|
2014-02-13 15:55:36 +08:00
|
|
|
|
// do nothing
|
2014-02-13 11:18:03 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 匹配到不在白名单上的标签属性时的处理方法
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} tag
|
|
|
|
|
|
* @param {String} name
|
|
|
|
|
|
* @param {String} value
|
|
|
|
|
|
* @return {String}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function onIgnoreTagAttr (tag, name, value) {
|
2014-02-13 15:55:36 +08:00
|
|
|
|
// do nothing
|
2014-02-13 11:18:03 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* HTML转义
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} html
|
|
|
|
|
|
*/
|
|
|
|
|
|
function escapeHtml (html) {
|
|
|
|
|
|
return html.replace(REGEXP_LT, '<').replace(REGEXP_GT, '>');
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 安全的标签属性值
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} tag
|
|
|
|
|
|
* @param {String} name
|
|
|
|
|
|
* @param {String} value
|
|
|
|
|
|
* @return {String}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function safeAttrValue (tag, name, value) {
|
|
|
|
|
|
// 转换为友好的属性值,再做判断
|
|
|
|
|
|
value = friendlyAttrValue(value);
|
|
|
|
|
|
|
|
|
|
|
|
if (name === 'href' || name === 'src') {
|
|
|
|
|
|
// 过滤 href 和 src 属性
|
2014-02-18 14:27:27 +08:00
|
|
|
|
// 仅允许 http:// | https:// | / 开头的地址
|
|
|
|
|
|
value = value.trim();
|
2014-02-18 14:35:50 +08:00
|
|
|
|
if (value === '#') return '#';
|
2014-02-18 14:27:27 +08:00
|
|
|
|
if (value && !REGEXP_DEFAULT_ON_TAG_ATTR_1.test(value)) {
|
2014-02-18 14:35:50 +08:00
|
|
|
|
return '';
|
2014-02-13 11:18:03 +08:00
|
|
|
|
}
|
2014-02-20 10:44:08 +08:00
|
|
|
|
} else if (name === 'background') {
|
|
|
|
|
|
// 过滤 background 属性 (这个xss漏洞较老了,可能已经不适用)
|
2014-02-13 11:18:03 +08:00
|
|
|
|
// javascript:
|
2014-02-20 10:44:08 +08:00
|
|
|
|
REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
|
|
|
|
|
|
if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
|
|
|
|
|
|
return '';
|
|
|
|
|
|
}
|
|
|
|
|
|
} else if (name === 'style') {
|
|
|
|
|
|
// /*注释*/
|
2014-02-13 11:18:03 +08:00
|
|
|
|
REGEXP_DEFAULT_ON_TAG_ATTR_3.lastIndex = 0;
|
|
|
|
|
|
if (REGEXP_DEFAULT_ON_TAG_ATTR_3.test(value)) {
|
2014-02-18 14:35:50 +08:00
|
|
|
|
return '';
|
2014-02-13 11:18:03 +08:00
|
|
|
|
}
|
2014-02-20 10:44:08 +08:00
|
|
|
|
// expression()
|
|
|
|
|
|
REGEXP_DEFAULT_ON_TAG_ATTR_7.lastIndex = 0;
|
|
|
|
|
|
if (REGEXP_DEFAULT_ON_TAG_ATTR_7.test(value)) {
|
2014-02-13 11:18:03 +08:00
|
|
|
|
return '';
|
|
|
|
|
|
}
|
2014-02-20 10:44:08 +08:00
|
|
|
|
// url()
|
|
|
|
|
|
REGEXP_DEFAULT_ON_TAG_ATTR_8.lastIndex = 0;
|
|
|
|
|
|
if (REGEXP_DEFAULT_ON_TAG_ATTR_8.test(value)) {
|
|
|
|
|
|
REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
|
|
|
|
|
|
if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
|
|
|
|
|
|
return '';
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2014-02-13 11:18:03 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 输出时需要转义<>"
|
|
|
|
|
|
value = escapeAttrValue(value);
|
|
|
|
|
|
return value;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 正则表达式
|
|
|
|
|
|
var REGEXP_LT = /</g;
|
|
|
|
|
|
var REGEXP_GT = />/g;
|
|
|
|
|
|
var REGEXP_QUOTE = /"/g;
|
|
|
|
|
|
var REGEXP_QUOTE_2 = /"/g;
|
|
|
|
|
|
var REGEXP_ATTR_VALUE_1 = /&#([a-zA-Z0-9]*);?/img;
|
|
|
|
|
|
var REGEXP_ATTR_VALUE_COLON = /:?/img;
|
|
|
|
|
|
var REGEXP_ATTR_VALUE_NEWLINE = /&newline;?/img;
|
2014-02-18 14:27:27 +08:00
|
|
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_1 = /^((https?:\/)?\/)/;
|
2014-02-13 11:18:03 +08:00
|
|
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_3 = /\/\*|\*\//mg;
|
|
|
|
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_4 = /((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a)\:/ig;
|
|
|
|
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_5 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:/ig;
|
|
|
|
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_6 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:\s*image\//ig;
|
2014-02-20 10:44:08 +08:00
|
|
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_7 = /e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*\(.*/ig;
|
|
|
|
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_8 = /u\s*r\s*l\s*\(.*/ig;
|
2014-02-13 11:18:03 +08:00
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 对双引号进行转义
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} str
|
|
|
|
|
|
* @return {String} str
|
|
|
|
|
|
*/
|
|
|
|
|
|
function escapeQuote (str) {
|
|
|
|
|
|
return str.replace(REGEXP_QUOTE, '"e;');
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 对双引号进行转义
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} str
|
|
|
|
|
|
* @return {String} str
|
|
|
|
|
|
*/
|
|
|
|
|
|
function unescapeQuote (str) {
|
|
|
|
|
|
return str.replace(REGEXP_QUOTE_2, '"');
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 对html实体编码进行转义
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} str
|
|
|
|
|
|
* @return {String}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function escapeHtmlEntities (str) {
|
|
|
|
|
|
return str.replace(REGEXP_ATTR_VALUE_1, function replaceUnicode (str, code) {
|
|
|
|
|
|
return (code[0] === 'x' || code[0] === 'X')
|
|
|
|
|
|
? String.fromCharCode(parseInt(code.substr(1), 16))
|
|
|
|
|
|
: String.fromCharCode(parseInt(code, 10));
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 对html5新增的危险实体编码进行转义
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} str
|
|
|
|
|
|
* @return {String}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function escapeDangerHtml5Entities (str) {
|
|
|
|
|
|
return str.replace(REGEXP_ATTR_VALUE_COLON, ':')
|
|
|
|
|
|
.replace(REGEXP_ATTR_VALUE_NEWLINE, ' ');
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 清除不可见字符
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} str
|
|
|
|
|
|
* @return {String}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function clearNonPrintableCharacter (str) {
|
|
|
|
|
|
var str2 = '';
|
|
|
|
|
|
for (var i = 0, len = str.length; i < len; i++) {
|
|
|
|
|
|
str2 += str.charCodeAt(i) < 32 ? ' ' : str.charAt(i);
|
|
|
|
|
|
}
|
|
|
|
|
|
return str2.trim();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 将标签的属性值转换成一般字符,便于分析
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} str
|
|
|
|
|
|
* @return {String}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function friendlyAttrValue (str) {
|
|
|
|
|
|
str = unescapeQuote(str); // 双引号
|
|
|
|
|
|
str = escapeHtmlEntities(str); // 转换HTML实体编码
|
|
|
|
|
|
str = escapeDangerHtml5Entities(str); // 转换危险的HTML5新增实体编码
|
|
|
|
|
|
str = clearNonPrintableCharacter(str); // 清除不可见字符
|
|
|
|
|
|
return str;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 转义用于输出的标签属性值
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} str
|
|
|
|
|
|
* @return {String}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function escapeAttrValue (str) {
|
|
|
|
|
|
str = escapeQuote(str);
|
|
|
|
|
|
str = escapeHtml(str);
|
|
|
|
|
|
return str;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-02-13 16:27:49 +08:00
|
|
|
|
/**
|
|
|
|
|
|
* 去掉不在白名单中的标签onIgnoreTag处理方法
|
|
|
|
|
|
*/
|
|
|
|
|
|
function onIgnoreTagStripAll () {
|
|
|
|
|
|
return '';
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-02-13 17:55:43 +08:00
|
|
|
|
/**
|
|
|
|
|
|
* 删除标签体
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {array} tags 要删除的标签列表
|
|
|
|
|
|
* @param {function} next 对不在列表中的标签的处理函数,可选
|
|
|
|
|
|
*/
|
2014-02-13 17:56:18 +08:00
|
|
|
|
function StripTagBody (tags, next) {
|
2014-02-13 17:55:43 +08:00
|
|
|
|
if (typeof(next) !== 'function') {
|
|
|
|
|
|
next = function () {};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-02-13 18:18:43 +08:00
|
|
|
|
var isRemoveAllTag = !Array.isArray(tags);
|
|
|
|
|
|
function isRemoveTag (tag) {
|
|
|
|
|
|
if (isRemoveAllTag) return true;
|
2014-03-03 18:21:39 +08:00
|
|
|
|
return (tags.indexOf(tag) !== -1);
|
2014-02-13 18:18:43 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2014-02-13 17:55:43 +08:00
|
|
|
|
var removeList = []; // 要删除的位置范围列表
|
|
|
|
|
|
var posStart = false; // 当前标签开始位置
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
onIgnoreTag: function (tag, html, options) {
|
2014-02-13 18:18:43 +08:00
|
|
|
|
if (isRemoveTag(tag)) {
|
2014-02-13 17:55:43 +08:00
|
|
|
|
if (options.isClosing) {
|
|
|
|
|
|
var ret = '[/removed]';
|
|
|
|
|
|
var end = options.position + ret.length;
|
2014-03-03 18:21:39 +08:00
|
|
|
|
removeList.push([posStart !== false ? posStart : options.position, end]);
|
2014-02-13 17:55:43 +08:00
|
|
|
|
posStart = false;
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
if (!posStart) {
|
|
|
|
|
|
posStart = options.position;
|
|
|
|
|
|
}
|
|
|
|
|
|
return '[removed]';
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
return next(tag, html, options);
|
|
|
|
|
|
}
|
|
|
|
|
|
},
|
|
|
|
|
|
remove: function (html) {
|
|
|
|
|
|
var rethtml = '';
|
|
|
|
|
|
var lastPos = 0;
|
|
|
|
|
|
removeList.forEach(function (pos) {
|
|
|
|
|
|
rethtml += html.slice(lastPos, pos[0]);
|
|
|
|
|
|
lastPos = pos[1];
|
|
|
|
|
|
});
|
|
|
|
|
|
rethtml += html.slice(lastPos);
|
|
|
|
|
|
return rethtml;
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
2014-04-03 11:47:21 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 去除备注标签
|
|
|
|
|
|
*
|
|
|
|
|
|
* @param {String} html
|
|
|
|
|
|
* @return {String}
|
|
|
|
|
|
*/
|
|
|
|
|
|
function stripCommentTag (html) {
|
|
|
|
|
|
return html.replace(STRIP_COMMENT_TAG_REGEXP, '');
|
|
|
|
|
|
}
|
2014-09-11 19:14:27 +08:00
|
|
|
|
var STRIP_COMMENT_TAG_REGEXP = /<!--[\s\S]*?-->/g;
|
2014-02-13 17:55:43 +08:00
|
|
|
|
|
2014-02-13 11:18:03 +08:00
|
|
|
|
|
|
|
|
|
|
exports.whiteList = whiteList;
|
|
|
|
|
|
exports.onTag = onTag;
|
|
|
|
|
|
exports.onIgnoreTag = onIgnoreTag;
|
|
|
|
|
|
exports.onTagAttr = onTagAttr;
|
|
|
|
|
|
exports.onIgnoreTagAttr = onIgnoreTagAttr;
|
|
|
|
|
|
exports.safeAttrValue = safeAttrValue;
|
2014-02-13 14:58:36 +08:00
|
|
|
|
exports.escapeHtml = escapeHtml;
|
2014-02-13 11:18:03 +08:00
|
|
|
|
exports.escapeQuote = escapeQuote;
|
|
|
|
|
|
exports.unescapeQuote = unescapeQuote;
|
|
|
|
|
|
exports.escapeHtmlEntities = escapeHtmlEntities;
|
|
|
|
|
|
exports.escapeDangerHtml5Entities = escapeDangerHtml5Entities;
|
|
|
|
|
|
exports.clearNonPrintableCharacter = clearNonPrintableCharacter;
|
|
|
|
|
|
exports.friendlyAttrValue = friendlyAttrValue;
|
|
|
|
|
|
exports.escapeAttrValue = escapeAttrValue;
|
2014-02-13 16:27:49 +08:00
|
|
|
|
exports.onIgnoreTagStripAll = onIgnoreTagStripAll;
|
2014-02-13 17:56:18 +08:00
|
|
|
|
exports.StripTagBody = StripTagBody;
|
2014-04-03 11:47:21 +08:00
|
|
|
|
exports.stripCommentTag = stripCommentTag;
|