Files
js-xss/lib/index.js
Lei Zongmin 4a2428dc1a 优化性能
2013-05-07 13:46:49 +08:00

355 lines
9.3 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* 过滤XSS攻击
*
* @author 老雷<leizongmin@gmail.com>
*/
/*
* 默认HTML标签白名单
* 标签名=>属性列表
*/
var defaultWhiteList = {
h1: ['style', 'class'],
h2: ['style', 'class'],
h3: ['style', 'class'],
h4: ['style', 'class'],
h5: ['style', 'class'],
h6: ['style', 'class'],
hr: ['style', 'class'],
span: ['style', 'class'],
strong: ['style', 'class'],
b: ['style', 'class'],
i: ['style', 'class'],
br: [],
p: ['style', 'class'],
pre: ['style', 'class'],
code: ['style', 'class'],
a: ['style', 'class', 'target', 'href', 'title'],
img: ['style', 'class', 'src', 'alt', 'title'],
div: ['style', 'class'],
table: ['style', 'class', 'width', 'border'],
tr: ['style', 'class'],
td: ['style', 'class', 'width', 'colspan'],
th: ['style', 'class', 'width', 'colspan'],
tbody: ['style', 'class'],
ul: ['style', 'class'],
li: ['style', 'class'],
ol: ['style', 'class'],
dl: ['style', 'class'],
dt: ['style', 'class'],
em: ['style'],
cite: ['style'],
section:['style', 'class'],
header: ['style', 'class'],
footer: ['style', 'class'],
blockquote: ['style', 'class'],
audio: ['autoplay', 'controls', 'loop', 'preload', 'src'],
video: ['autoplay', 'controls', 'loop', 'preload', 'src', 'height', 'width'],
};
// 正则表达式
var REGEXP_LT = /</g;
var REGEXP_GT = />/g;
var REGEXP_QUOTE = /"/g;
var REGEXP_ATTR_NAME = /[^a-zA-Z0-9_:\.\-]/img;
var REGEXP_ATTR_VALUE = /&#([a-zA-Z0-9]*);?/img;
var REGEXP_DEFAULT_ON_TAG_ATTR_1 = /\/\*|\*\//mg;
var REGEXP_DEFAULT_ON_TAG_ATTR_2 = /^[\s"'`]*((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a):/ig;
var REGEXP_DEFAULT_ON_TAG_ATTR_3 = /\/\*|\*\//mg;
var REGEXP_DEFAULT_ON_TAG_ATTR_4 = /((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a):/ig;
/**
* 过滤属性值
*
* @param {String} tag 标签名
* @param {String} attr 属性名
* @param {String} value 属性值
* @return {String} 若不需要修改属性值,不返回任何值
*/
function defaultOnTagAttr (tag, attr, value) {
if (attr === 'href' || attr === 'src') {
REGEXP_DEFAULT_ON_TAG_ATTR_1.lastIndex = 0;
if (REGEXP_DEFAULT_ON_TAG_ATTR_1.test(value)) {
return '#';
}
REGEXP_DEFAULT_ON_TAG_ATTR_2.lastIndex = 0;
if (REGEXP_DEFAULT_ON_TAG_ATTR_2.test(value)) {
return '#';
}
} else if (attr === 'style') {
REGEXP_DEFAULT_ON_TAG_ATTR_3.lastIndex = 0;
if (REGEXP_DEFAULT_ON_TAG_ATTR_3.test(value)) {
return '#';
}
REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
return '';
}
}
}
/**
* 过滤非白名单的标签
*
* @param {String} tag 标签名
* @param {String} html 标签HTML代码包括属性值
* @param {Object} options 更多属性:
* position在返回的HTML代码中的开始位置
* originalPosition在原HTML代码中的开始位置
* isClosing是否为闭合标签如</a>
* @return {String} 若不返回任何值,则默认替换<>为&lt;&gt;
*/
function defaultOnIgnoreTag (tag, html, options) {
return noTag(html);
}
/**
* 转换<>为&lt; &gt
*
* @param {String} text
* @return {String}
*/
function noTag (text) {
return text.replace(REGEXP_LT, '&lt;').replace(REGEXP_GT, '&gt;');
}
/**
* XSS过滤对象
*
* @param {Object} options 选项whiteList, onTagAttr, onIgnoreTag
*/
function FilterXSS (options) {
'use strict';
this.options = options = options || {};
this.whiteList = options.whiteList || exports.whiteList;
this.onTagAttr = options.onTagAttr || exports.onTagAttr;
this.onIgnoreTag = options.onIgnoreTag || exports.onIgnoreTag;
}
/**
* 过滤不合法的属性
*
* @param {String} tagName 标签名称
* @param {String} attrs 标签属性部分
* @return {String}
*/
FilterXSS.prototype.filterAttributes = function (tagName, attrs) {
'use strict';
tagName = tagName.toLowerCase();
var me = this;
var whites = this.whiteList[tagName];
var lastPos = 0;
var _attrs = '';
var tmpName = false;
var hasSprit = false;
var addAttr = function (name, value) {
name = name.trim();
if (!hasSprit && name === '/') {
hasSprit = true;
return;
};
name = name.replace(REGEXP_ATTR_NAME, '').toLowerCase();
if (name.length < 1) return;
if (whites.indexOf(name) !== -1) {
if (value) {
value = value.trim().replace(REGEXP_QUOTE, '&quote;');
// 转换unicode字符 及过滤不可见字符
value = value.replace(REGEXP_ATTR_VALUE, function (str, code) {
code = parseInt(code);
return String.fromCharCode(code);
});
var _value = '';
for (var i = 0, len = value.length; i < len; i++) {
_value += value.charCodeAt(i) < 32 ? ' ' : value[i];
}
value = _value.trim();
var newValue = me.onTagAttr(tagName, name, value);
if (typeof(newValue) !== 'undefined') {
value = newValue;
}
}
_attrs += name + (value ? '="' + value + '"' : '') + ' ';
}
};
for (var i = 0, len = attrs.length; i < len; i++) {
var c = attrs[i];
if (tmpName === false && c === '=') {
tmpName = attrs.slice(lastPos, i);
lastPos = i + 1;
continue;
}
if (tmpName !== false) {
if (i === lastPos && (c === '"' || c === "'")) {
var j = attrs.indexOf(c, i + 1);
if (j === -1) {
break;
} else {
var v = attrs.slice(lastPos + 1, j).trim();
addAttr(tmpName, v);
tmpName = false;
i = j;
lastPos = i + 1;
continue;
}
}
}
if (c === ' ') {
var v = attrs.slice(lastPos, i).trim();
if (tmpName === false) {
addAttr(v);
} else {
addAttr(tmpName, v);
}
tmpName = false;
lastPos = i + 1;
continue;
}
}
if (lastPos < attrs.length) {
if (tmpName === false) {
addAttr(attrs.slice(lastPos));
} else {
addAttr(tmpName, attrs.slice(lastPos));
}
}
if (hasSprit) _attrs += '/';
return _attrs.trim();
};
/**
* 检查标签是否合法
*
* @param {String} tag 标签文本,如“<a”
* @param {Number} currentPos 原HTML的当前位置
* @param {Number} targetPos 生成的HTML的当前位置
*/
FilterXSS.prototype.addNewTag = function (tag, currentPos, targetPos) {
'use strict';
var rethtml = '';
var spos = tag.slice(0, 2) === '</' ? 2 : 1;
var i = tag.indexOf(' ');
if (i === -1) {
var tagName = tag.slice(spos, tag.length - 1).trim();
} else {
var tagName = tag.slice(spos, i + 1).trim();
}
tagName = tagName.toLowerCase();
if (tagName in this.whiteList) {
// 过滤不合法的属性
if (i === -1) {
rethtml += tag.slice(0, spos) + tagName + '>';
} else {
var attrs = this.filterAttributes(tagName, tag.slice(i + 1, tag.length - 1).trim());
rethtml += tag.slice(0, spos) + tagName + (attrs.length > 0 ? ' ' + attrs : '') + '>';
}
} else {
// 过滤不合法的标签
var options = {
isClosing: (spos === 2),
position: targetPos,
originalPosition: currentPos - tag.length + 1
};
var tagHtml = this.onIgnoreTag(tagName, tag, options);
if (typeof(tagHtml) === 'undefined') {
tagHtml = noTag(tag);
}
rethtml += tagHtml;
}
return rethtml;
};
/**
* 开始处理
*
* @param {String} html
* @return {String}
*/
FilterXSS.prototype.process = function (html) {
'use strict';
var rethtml = '';
var lastPos = 0;
var tagStart = false;
var quoteStart = false;
var currentPos = 0;
// 逐个分析字符
for (var currentPos = 0, len = html.length; currentPos < len; currentPos++) {
var c = html[currentPos];
if (tagStart === false) {
if (c === '<') {
tagStart = currentPos;
continue;
}
} else {
if (quoteStart === false) {
if (c === '<') {
rethtml += noTag(html.slice(lastPos, currentPos));
tagStart = currentPos;
lastPos = currentPos;
continue;
}
if (c === '>') {
rethtml += noTag(html.slice(lastPos, tagStart));
rethtml += this.addNewTag(html.slice(tagStart, currentPos + 1), currentPos, rethtml.length);
lastPos = currentPos + 1;
tagStart = false;
continue;
}
if (c === '"' || c === "'") {
quoteStart = c;
continue;
}
} else {
if (c === quoteStart) {
quoteStart = false;
continue;
}
}
}
}
if (lastPos < html.length) {
rethtml += noTag(html.substr(lastPos));
}
return rethtml;
};
/**
* XSS过滤
*
* @param {String} html 要过滤的HTML代码
* @param {Object} options 选项whiteList, onTagAttr, onIgnoreTag
* @return {String}
*/
function filterXSS (html, options) {
var xss = new FilterXSS(options);
return xss.process(html);
};
// 默认配置
exports = module.exports = filterXSS;
exports.FilterXSS = FilterXSS;
exports.whiteList = defaultWhiteList;
exports.onTagAttr = defaultOnTagAttr;
exports.onIgnoreTag = defaultOnIgnoreTag;
// 工具函数
exports.utils = require('./utils');
// 在浏览器端使用
if (typeof window !== 'undefined') {
window.filterXSS = module.exports;
}