/** * 默认配置 * * @author 老雷 */ // 默认白名单 var whiteList = { a: ['target', 'href', 'title'], abbr: ['title'], address: [], area: ['shape', 'coords', 'href', 'alt'], article: [], aside: [], audio: ['autoplay', 'controls', 'loop', 'preload', 'src'], b: [], bdi: ['dir'], bdo: ['dir'], big: [], blockquote: ['cite'], br: [], caption: [], center: [], cite: [], code: [], col: ['align', 'valign', 'span', 'width'], colgroup: ['align', 'valign', 'span', 'width'], dd: [], del: ['datetime'], details: ['open'], div: [], dl: [], dt: [], em: [], font: ['color', 'size', 'face'], footer: [], h1: [], h2: [], h3: [], h4: [], h5: [], h6: [], header: [], hr: [], i: [], img: ['src', 'alt', 'title', 'width', 'height'], ins: ['datetime'], li: [], mark: [], nav: [], ol: [], p: [], pre: [], s: [], section:[], small: [], span: [], sub: [], sup: [], strong: [], table: ['width', 'border', 'align', 'valign'], tbody: ['align', 'valign'], td: ['width', 'colspan', 'align', 'valign'], tfoot: ['align', 'valign'], th: ['width', 'colspan', 'align', 'valign'], thead: ['align', 'valign'], tr: ['rowspan', 'align', 'valign'], tt: [], u: [], ul: [], video: ['autoplay', 'controls', 'loop', 'preload', 'src', 'height', 'width'] }; /** * 匹配到标签时的处理方法 * * @param {String} tag * @param {String} html * @param {Object} options * @return {String} */ function onTag (tag, html, options) { // do nothing } /** * 匹配到不在白名单上的标签时的处理方法 * * @param {String} tag * @param {String} html * @param {Object} options * @return {String} */ function onIgnoreTag (tag, html, options) { // do nothing } /** * 匹配到标签属性时的处理方法 * * @param {String} tag * @param {String} name * @param {String} value * @return {String} */ function onTagAttr (tag, name, value) { // do nothing } /** * 匹配到不在白名单上的标签属性时的处理方法 * * @param {String} tag * @param {String} name * @param {String} value * @return {String} */ function onIgnoreTagAttr (tag, name, value) { // do nothing } /** * HTML转义 * * @param {String} html */ function escapeHtml (html) { return html.replace(REGEXP_LT, '<').replace(REGEXP_GT, '>'); } /** * 安全的标签属性值 * * @param {String} tag * @param {String} name * @param {String} value * @return {String} */ function safeAttrValue (tag, name, value) { // 转换为友好的属性值,再做判断 value = friendlyAttrValue(value); if (name === 'href' || name === 'src') { // 过滤 href 和 src 属性 // 仅允许 http:// | https:// | mailto: | / 开头的地址 value = value.trim(); if (value === '#') return '#'; if (!(value.substr(0, 7) === 'http://' || value.substr(0, 8) === 'https://' || value.substr(0, 7) === 'mailto:' || value[0] === '/')) { return ''; } } else if (name === 'background') { // 过滤 background 属性 (这个xss漏洞较老了,可能已经不适用) // javascript: REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0; if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) { return ''; } } else if (name === 'style') { // /*注释*/ REGEXP_DEFAULT_ON_TAG_ATTR_3.lastIndex = 0; if (REGEXP_DEFAULT_ON_TAG_ATTR_3.test(value)) { return ''; } // expression() REGEXP_DEFAULT_ON_TAG_ATTR_7.lastIndex = 0; if (REGEXP_DEFAULT_ON_TAG_ATTR_7.test(value)) { return ''; } // url() REGEXP_DEFAULT_ON_TAG_ATTR_8.lastIndex = 0; if (REGEXP_DEFAULT_ON_TAG_ATTR_8.test(value)) { REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0; if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) { return ''; } } } // 输出时需要转义<>" value = escapeAttrValue(value); return value; } // 正则表达式 var REGEXP_LT = //g; var REGEXP_QUOTE = /"/g; var REGEXP_QUOTE_2 = /"/g; var REGEXP_ATTR_VALUE_1 = /&#([a-zA-Z0-9]*);?/img; var REGEXP_ATTR_VALUE_COLON = /:?/img; var REGEXP_ATTR_VALUE_NEWLINE = /&newline;?/img; var REGEXP_DEFAULT_ON_TAG_ATTR_3 = /\/\*|\*\//mg; var REGEXP_DEFAULT_ON_TAG_ATTR_4 = /((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a)\:/ig; var REGEXP_DEFAULT_ON_TAG_ATTR_5 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:/ig; var REGEXP_DEFAULT_ON_TAG_ATTR_6 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:\s*image\//ig; var REGEXP_DEFAULT_ON_TAG_ATTR_7 = /e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*\(.*/ig; var REGEXP_DEFAULT_ON_TAG_ATTR_8 = /u\s*r\s*l\s*\(.*/ig; /** * 对双引号进行转义 * * @param {String} str * @return {String} str */ function escapeQuote (str) { return str.replace(REGEXP_QUOTE, '"'); } /** * 对双引号进行转义 * * @param {String} str * @return {String} str */ function unescapeQuote (str) { return str.replace(REGEXP_QUOTE_2, '"'); } /** * 对html实体编码进行转义 * * @param {String} str * @return {String} */ function escapeHtmlEntities (str) { return str.replace(REGEXP_ATTR_VALUE_1, function replaceUnicode (str, code) { return (code[0] === 'x' || code[0] === 'X') ? String.fromCharCode(parseInt(code.substr(1), 16)) : String.fromCharCode(parseInt(code, 10)); }); } /** * 对html5新增的危险实体编码进行转义 * * @param {String} str * @return {String} */ function escapeDangerHtml5Entities (str) { return str.replace(REGEXP_ATTR_VALUE_COLON, ':') .replace(REGEXP_ATTR_VALUE_NEWLINE, ' '); } /** * 清除不可见字符 * * @param {String} str * @return {String} */ function clearNonPrintableCharacter (str) { var str2 = ''; for (var i = 0, len = str.length; i < len; i++) { str2 += str.charCodeAt(i) < 32 ? ' ' : str.charAt(i); } return str2.trim(); } /** * 将标签的属性值转换成一般字符,便于分析 * * @param {String} str * @return {String} */ function friendlyAttrValue (str) { str = unescapeQuote(str); // 双引号 str = escapeHtmlEntities(str); // 转换HTML实体编码 str = escapeDangerHtml5Entities(str); // 转换危险的HTML5新增实体编码 str = clearNonPrintableCharacter(str); // 清除不可见字符 return str; } /** * 转义用于输出的标签属性值 * * @param {String} str * @return {String} */ function escapeAttrValue (str) { str = escapeQuote(str); str = escapeHtml(str); return str; } /** * 去掉不在白名单中的标签onIgnoreTag处理方法 */ function onIgnoreTagStripAll () { return ''; } /** * 删除标签体 * * @param {array} tags 要删除的标签列表 * @param {function} next 对不在列表中的标签的处理函数,可选 */ function StripTagBody (tags, next) { if (typeof(next) !== 'function') { next = function () {}; } var isRemoveAllTag = !Array.isArray(tags); function isRemoveTag (tag) { if (isRemoveAllTag) return true; return (tags.indexOf(tag) !== -1); } var removeList = []; // 要删除的位置范围列表 var posStart = false; // 当前标签开始位置 return { onIgnoreTag: function (tag, html, options) { if (isRemoveTag(tag)) { if (options.isClosing) { var ret = '[/removed]'; var end = options.position + ret.length; removeList.push([posStart !== false ? posStart : options.position, end]); posStart = false; return ret; } else { if (!posStart) { posStart = options.position; } return '[removed]'; } } else { return next(tag, html, options); } }, remove: function (html) { var rethtml = ''; var lastPos = 0; removeList.forEach(function (pos) { rethtml += html.slice(lastPos, pos[0]); lastPos = pos[1]; }); rethtml += html.slice(lastPos); return rethtml; } }; } /** * 去除备注标签 * * @param {String} html * @return {String} */ function stripCommentTag (html) { return html.replace(STRIP_COMMENT_TAG_REGEXP, ''); } var STRIP_COMMENT_TAG_REGEXP = //g; /** * 去除不可见字符 * * @param {String} html * @return {String} */ function stripBlankChar (html) { var chars = html.split(''); chars = chars.filter(function (char) { var c = char.charCodeAt(0); if (c === 127) return false; if (c <= 31) { if (c === 10 || c === 13) return true; return false; } return true; }); return chars.join(''); } exports.whiteList = whiteList; exports.onTag = onTag; exports.onIgnoreTag = onIgnoreTag; exports.onTagAttr = onTagAttr; exports.onIgnoreTagAttr = onIgnoreTagAttr; exports.safeAttrValue = safeAttrValue; exports.escapeHtml = escapeHtml; exports.escapeQuote = escapeQuote; exports.unescapeQuote = unescapeQuote; exports.escapeHtmlEntities = escapeHtmlEntities; exports.escapeDangerHtml5Entities = escapeDangerHtml5Entities; exports.clearNonPrintableCharacter = clearNonPrintableCharacter; exports.friendlyAttrValue = friendlyAttrValue; exports.escapeAttrValue = escapeAttrValue; exports.onIgnoreTagStripAll = onIgnoreTagStripAll; exports.StripTagBody = StripTagBody; exports.stripCommentTag = stripCommentTag; exports.stripBlankChar = stripBlankChar;