Files
js-xss/index.js

226 lines
6.0 KiB
JavaScript
Raw Normal View History

2012-09-18 23:23:16 +08:00
/**
* 过滤XSS攻击
*
* @author 老雷<leizongmin@gmail.com>
*/
/*
* 默认HTML标签白名单
* 标签名=>属性列表
*/
var defaultWhiteList = {
h1: ['style', 'class'],
h2: ['style', 'class'],
h3: ['style', 'class'],
h4: ['style', 'class'],
h5: ['style', 'class'],
hr: ['style', 'class'],
span: ['style', 'class'],
strong: ['style', 'class'],
b: ['style', 'class'],
i: ['style', 'class'],
2012-09-19 09:04:23 +08:00
br: [],
2012-09-18 23:23:16 +08:00
p: ['style', 'class'],
pre: ['style', 'class'],
code: ['style', 'class'],
a: ['style', 'class', 'target', 'href', 'title'],
img: ['style', 'class', 'src', 'alt', 'title'],
div: ['style', 'class'],
table: ['style', 'class', 'width', 'border'],
tr: ['style', 'class'],
td: ['style', 'class', 'width', 'colspan'],
th: ['style', 'class', 'width', 'colspan'],
tbody: ['style', 'class'],
};
2012-09-19 08:03:16 +08:00
/**
* 过滤属性值
*/
var defaultOnTagAttr = function (tag, attr, value) {
2012-09-19 09:04:23 +08:00
if (attr === 'href' || attr === 'src') {
if (/^[\s"'`]*j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*:/ig.test(value)) {
2012-09-19 08:03:16 +08:00
return '#';
}
}
};
2012-09-18 23:23:16 +08:00
/**
* XSS过滤
*
* @param {string} html 要过滤的HTML代码
* @param {object} whiteList 白名单若不指定则使用默认的
2012-09-19 08:00:01 +08:00
* @param {function} onTagAttr 指定此回调用于处理属性值格式function (tagName, attrName, attrValue)
* 若要改变该值返回新的值即可否则不用返回任何值
2012-09-18 23:23:16 +08:00
* @return {string}
*/
2012-09-19 08:00:01 +08:00
var xss = module.exports = function (html, whiteList, onTagAttr) {
2012-09-18 23:23:16 +08:00
'use strict';
2012-09-19 08:00:01 +08:00
if (typeof(whiteList) === 'function') {
onTagAttr = whiteList;
whiteList = defaultWhiteList;
} else {
whiteList = whiteList || exports.whiteList;
2012-09-19 08:03:16 +08:00
onTagAttr = onTagAttr || exports.onTagAttr;
2012-09-19 08:00:01 +08:00
}
2012-09-18 23:23:16 +08:00
var rethtml = '';
var lastPos = 0;
var tagStart = false;
var quoteStart = false;
/**
* 转换<>&lt; &gt
*/
var noTag = function (text) {
return text.replace(/</g, '&lt;').replace(/>/g, '&gt;');
}
/**
* 过滤不合法的属性
*/
2012-09-19 08:00:01 +08:00
var filterAttributes = function (tagName, attrs) {
tagName = tagName.toLowerCase();
var whites = whiteList[tagName];
2012-09-18 23:23:16 +08:00
var lastPos = 0;
var _attrs = [];
var tmpName = false;
function addAttr (name, value) {
2012-09-19 09:04:23 +08:00
name = name.replace(/[^a-zA-Z0-9_:\.\-]/img, '').toLowerCase().trim();
if (name.length < 1) return;
2012-09-18 23:23:16 +08:00
if (whites.indexOf(name) !== -1) {
if (value) {
value = value.trim().replace(/"/g, '&quote;');
// 转换unicode字符 及过滤不可见字符
value = value.replace(/&#([a-zA-Z0-9]*);?/img, function (str, code) {
code = parseInt(code);
2012-09-19 10:44:26 +08:00
return String.fromCharCode(code);
});
2012-09-19 10:44:26 +08:00
var _value = '';
for (var i = 0, len = value.length; i < len; i++) {
_value += value.charCodeAt(i) < 32 ? ' ' : value[i];
}
value = _value.trim();
var newValue = onTagAttr(tagName, name, value);
if (typeof(newValue) !== 'undefined') {
value = newValue;
}
}
2012-09-19 08:00:01 +08:00
_attrs.push(name + (value ? '="' + value + '"' : ''));
2012-09-18 23:23:16 +08:00
}
}
for (var i = 0, len = attrs.length; i < len; i++) {
var c = attrs[i];
2012-09-19 09:04:23 +08:00
if (tmpName === false && c === '=') {
2012-09-18 23:23:16 +08:00
tmpName = attrs.slice(lastPos, i);
lastPos = i + 1;
continue;
}
if (tmpName !== false) {
if (i === lastPos && (c === '"' || c === "'")) {
2012-09-18 23:23:16 +08:00
var j = attrs.indexOf(c, i + 1);
if (j === -1) {
break;
} else {
2012-09-19 08:00:01 +08:00
var v = attrs.slice(lastPos + 1, j).trim();
2012-09-18 23:23:16 +08:00
addAttr(tmpName, v);
tmpName = false;
i = j;
lastPos = i + 1;
continue;
}
}
2012-09-19 09:04:23 +08:00
}
if (c === ' ') {
var v = attrs.slice(lastPos, i).trim();
if (tmpName === false) {
2012-09-18 23:23:16 +08:00
addAttr(v);
2012-09-19 09:04:23 +08:00
} else {
addAttr(tmpName, v);
2012-09-18 23:23:16 +08:00
}
2012-09-19 09:04:23 +08:00
tmpName = false;
lastPos = i + 1;
continue;
2012-09-18 23:23:16 +08:00
}
}
if (lastPos < attrs.length) {
2012-09-19 09:04:23 +08:00
if (tmpName === false) {
addAttr(attrs.slice(lastPos));
} else {
addAttr(tmpName, attrs.slice(lastPos));
}
2012-09-18 23:23:16 +08:00
}
return _attrs.join(' ');
};
/**
* 检查标签是否合法
*/
var addNewTag = function (tag, end) {
rethtml += noTag(html.slice(lastPos, tagStart));
lastPos = end + 1;
var spos = tag.slice(0, 2) === '</' ? 2 : 1;
var i = tag.indexOf(' ');
if (i === -1) {
var tagName = tag.slice(spos, tag.length - 1).trim();
} else {
var tagName = tag.slice(spos, i + 1).trim();
}
2012-09-19 09:04:23 +08:00
tagName = tagName.toLowerCase();
2012-09-18 23:23:16 +08:00
if (tagName in whiteList) {
// 过滤不合法的属性
if (i === -1) {
rethtml += tag.slice(0, spos) + tagName + '>';
} else {
2012-09-19 08:00:01 +08:00
var attrs = filterAttributes(tagName, tag.slice(i + 1, tag.length - 1).trim());
2012-09-18 23:23:16 +08:00
rethtml += tag.slice(0, spos) + tagName + (attrs.length > 0 ? ' ' + attrs : '') + '>';
}
} else {
// 过滤 <>
rethtml += noTag(tag);
}
};
// 逐个分析字符
for (var i = 0, len = html.length; i < len; i++) {
var c = html[i];
if (tagStart === false) {
if (c === '<') {
tagStart = i;
continue;
}
} else {
if (quoteStart === false) {
if (c === '<') {
rethtml += noTag(html.slice(lastPos, i));
tagStart = i;
lastPos = i;
continue;
}
if (c === '>') {
addNewTag(html.slice(tagStart, i + 1), i);
tagStart = false;
continue;
}
if (c === '"' || c === "'") {
quoteStart = c;
continue;
}
} else {
if (c === quoteStart) {
quoteStart = false;
continue;
}
}
}
}
if (lastPos < html.length) {
rethtml += noTag(html.substr(lastPos));
}
return rethtml;
};
exports.whiteList = defaultWhiteList;
2012-09-19 08:03:16 +08:00
exports.onTagAttr = defaultOnTagAttr;