* Figure https://developer.mozilla.org/en-US/docs/Web/HTML/Element/figure * Figcaption https://developer.mozilla.org/en-US/docs/Web/HTML/Element/figcaption Most RSS feeds are using these tags to wrap around media content. I propose to add these tags to the default whitelist because they don't require any attribute and do not open any XSS vulnerability
422 lines
9.8 KiB
JavaScript
422 lines
9.8 KiB
JavaScript
/**
|
|
* default settings
|
|
*
|
|
* @author Zongmin Lei<leizongmin@gmail.com>
|
|
*/
|
|
|
|
var FilterCSS = require("cssfilter").FilterCSS;
|
|
var getDefaultCSSWhiteList = require("cssfilter").getDefaultWhiteList;
|
|
var _ = require("./util");
|
|
|
|
function getDefaultWhiteList() {
|
|
return {
|
|
a: ["target", "href", "title"],
|
|
abbr: ["title"],
|
|
address: [],
|
|
area: ["shape", "coords", "href", "alt"],
|
|
article: [],
|
|
aside: [],
|
|
audio: ["autoplay", "controls", "loop", "preload", "src"],
|
|
b: [],
|
|
bdi: ["dir"],
|
|
bdo: ["dir"],
|
|
big: [],
|
|
blockquote: ["cite"],
|
|
br: [],
|
|
caption: [],
|
|
center: [],
|
|
cite: [],
|
|
code: [],
|
|
col: ["align", "valign", "span", "width"],
|
|
colgroup: ["align", "valign", "span", "width"],
|
|
dd: [],
|
|
del: ["datetime"],
|
|
details: ["open"],
|
|
div: [],
|
|
dl: [],
|
|
dt: [],
|
|
em: [],
|
|
figcaption: [],
|
|
figure: [],
|
|
font: ["color", "size", "face"],
|
|
footer: [],
|
|
h1: [],
|
|
h2: [],
|
|
h3: [],
|
|
h4: [],
|
|
h5: [],
|
|
h6: [],
|
|
header: [],
|
|
hr: [],
|
|
i: [],
|
|
img: ["src", "alt", "title", "width", "height"],
|
|
ins: ["datetime"],
|
|
li: [],
|
|
mark: [],
|
|
nav: [],
|
|
ol: [],
|
|
p: [],
|
|
pre: [],
|
|
s: [],
|
|
section: [],
|
|
small: [],
|
|
span: [],
|
|
sub: [],
|
|
sup: [],
|
|
strong: [],
|
|
table: ["width", "border", "align", "valign"],
|
|
tbody: ["align", "valign"],
|
|
td: ["width", "rowspan", "colspan", "align", "valign"],
|
|
tfoot: ["align", "valign"],
|
|
th: ["width", "rowspan", "colspan", "align", "valign"],
|
|
thead: ["align", "valign"],
|
|
tr: ["rowspan", "align", "valign"],
|
|
tt: [],
|
|
u: [],
|
|
ul: [],
|
|
video: ["autoplay", "controls", "loop", "preload", "src", "height", "width"]
|
|
};
|
|
}
|
|
|
|
var defaultCSSFilter = new FilterCSS();
|
|
|
|
/**
|
|
* default onTag function
|
|
*
|
|
* @param {String} tag
|
|
* @param {String} html
|
|
* @param {Object} options
|
|
* @return {String}
|
|
*/
|
|
function onTag(tag, html, options) {
|
|
// do nothing
|
|
}
|
|
|
|
/**
|
|
* default onIgnoreTag function
|
|
*
|
|
* @param {String} tag
|
|
* @param {String} html
|
|
* @param {Object} options
|
|
* @return {String}
|
|
*/
|
|
function onIgnoreTag(tag, html, options) {
|
|
// do nothing
|
|
}
|
|
|
|
/**
|
|
* default onTagAttr function
|
|
*
|
|
* @param {String} tag
|
|
* @param {String} name
|
|
* @param {String} value
|
|
* @return {String}
|
|
*/
|
|
function onTagAttr(tag, name, value) {
|
|
// do nothing
|
|
}
|
|
|
|
/**
|
|
* default onIgnoreTagAttr function
|
|
*
|
|
* @param {String} tag
|
|
* @param {String} name
|
|
* @param {String} value
|
|
* @return {String}
|
|
*/
|
|
function onIgnoreTagAttr(tag, name, value) {
|
|
// do nothing
|
|
}
|
|
|
|
/**
|
|
* default escapeHtml function
|
|
*
|
|
* @param {String} html
|
|
*/
|
|
function escapeHtml(html) {
|
|
return html.replace(REGEXP_LT, "<").replace(REGEXP_GT, ">");
|
|
}
|
|
|
|
/**
|
|
* default safeAttrValue function
|
|
*
|
|
* @param {String} tag
|
|
* @param {String} name
|
|
* @param {String} value
|
|
* @param {Object} cssFilter
|
|
* @return {String}
|
|
*/
|
|
function safeAttrValue(tag, name, value, cssFilter) {
|
|
// unescape attribute value firstly
|
|
value = friendlyAttrValue(value);
|
|
|
|
if (name === "href" || name === "src") {
|
|
// filter `href` and `src` attribute
|
|
// only allow the value that starts with `http://` | `https://` | `mailto:` | `/` | `#`
|
|
value = _.trim(value);
|
|
if (value === "#") return "#";
|
|
if (
|
|
!(
|
|
value.substr(0, 7) === "http://" ||
|
|
value.substr(0, 8) === "https://" ||
|
|
value.substr(0, 7) === "mailto:" ||
|
|
value.substr(0, 4) === "tel:" ||
|
|
value.substr(0, 11) === "data:image/" ||
|
|
value.substr(0, 6) === "ftp://" ||
|
|
value.substr(0, 2) === "./" ||
|
|
value.substr(0, 3) === "../" ||
|
|
value[0] === "#" ||
|
|
value[0] === "/"
|
|
)
|
|
) {
|
|
return "";
|
|
}
|
|
} else if (name === "background") {
|
|
// filter `background` attribute (maybe no use)
|
|
// `javascript:`
|
|
REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
|
|
if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
|
|
return "";
|
|
}
|
|
} else if (name === "style") {
|
|
// `expression()`
|
|
REGEXP_DEFAULT_ON_TAG_ATTR_7.lastIndex = 0;
|
|
if (REGEXP_DEFAULT_ON_TAG_ATTR_7.test(value)) {
|
|
return "";
|
|
}
|
|
// `url()`
|
|
REGEXP_DEFAULT_ON_TAG_ATTR_8.lastIndex = 0;
|
|
if (REGEXP_DEFAULT_ON_TAG_ATTR_8.test(value)) {
|
|
REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
|
|
if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
|
|
return "";
|
|
}
|
|
}
|
|
if (cssFilter !== false) {
|
|
cssFilter = cssFilter || defaultCSSFilter;
|
|
value = cssFilter.process(value);
|
|
}
|
|
}
|
|
|
|
// escape `<>"` before returns
|
|
value = escapeAttrValue(value);
|
|
return value;
|
|
}
|
|
|
|
// RegExp list
|
|
var REGEXP_LT = /</g;
|
|
var REGEXP_GT = />/g;
|
|
var REGEXP_QUOTE = /"/g;
|
|
var REGEXP_QUOTE_2 = /"/g;
|
|
var REGEXP_ATTR_VALUE_1 = /&#([a-zA-Z0-9]*);?/gim;
|
|
var REGEXP_ATTR_VALUE_COLON = /:?/gim;
|
|
var REGEXP_ATTR_VALUE_NEWLINE = /&newline;?/gim;
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_3 = /\/\*|\*\//gm;
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_4 = /((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a)\:/gi;
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_5 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:/gi;
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_6 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:\s*image\//gi;
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_7 = /e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*\(.*/gi;
|
|
var REGEXP_DEFAULT_ON_TAG_ATTR_8 = /u\s*r\s*l\s*\(.*/gi;
|
|
|
|
/**
|
|
* escape double quote
|
|
*
|
|
* @param {String} str
|
|
* @return {String} str
|
|
*/
|
|
function escapeQuote(str) {
|
|
return str.replace(REGEXP_QUOTE, """);
|
|
}
|
|
|
|
/**
|
|
* unescape double quote
|
|
*
|
|
* @param {String} str
|
|
* @return {String} str
|
|
*/
|
|
function unescapeQuote(str) {
|
|
return str.replace(REGEXP_QUOTE_2, '"');
|
|
}
|
|
|
|
/**
|
|
* escape html entities
|
|
*
|
|
* @param {String} str
|
|
* @return {String}
|
|
*/
|
|
function escapeHtmlEntities(str) {
|
|
return str.replace(REGEXP_ATTR_VALUE_1, function replaceUnicode(str, code) {
|
|
return code[0] === "x" || code[0] === "X"
|
|
? String.fromCharCode(parseInt(code.substr(1), 16))
|
|
: String.fromCharCode(parseInt(code, 10));
|
|
});
|
|
}
|
|
|
|
/**
|
|
* escape html5 new danger entities
|
|
*
|
|
* @param {String} str
|
|
* @return {String}
|
|
*/
|
|
function escapeDangerHtml5Entities(str) {
|
|
return str
|
|
.replace(REGEXP_ATTR_VALUE_COLON, ":")
|
|
.replace(REGEXP_ATTR_VALUE_NEWLINE, " ");
|
|
}
|
|
|
|
/**
|
|
* clear nonprintable characters
|
|
*
|
|
* @param {String} str
|
|
* @return {String}
|
|
*/
|
|
function clearNonPrintableCharacter(str) {
|
|
var str2 = "";
|
|
for (var i = 0, len = str.length; i < len; i++) {
|
|
str2 += str.charCodeAt(i) < 32 ? " " : str.charAt(i);
|
|
}
|
|
return _.trim(str2);
|
|
}
|
|
|
|
/**
|
|
* get friendly attribute value
|
|
*
|
|
* @param {String} str
|
|
* @return {String}
|
|
*/
|
|
function friendlyAttrValue(str) {
|
|
str = unescapeQuote(str);
|
|
str = escapeHtmlEntities(str);
|
|
str = escapeDangerHtml5Entities(str);
|
|
str = clearNonPrintableCharacter(str);
|
|
return str;
|
|
}
|
|
|
|
/**
|
|
* unescape attribute value
|
|
*
|
|
* @param {String} str
|
|
* @return {String}
|
|
*/
|
|
function escapeAttrValue(str) {
|
|
str = escapeQuote(str);
|
|
str = escapeHtml(str);
|
|
return str;
|
|
}
|
|
|
|
/**
|
|
* `onIgnoreTag` function for removing all the tags that are not in whitelist
|
|
*/
|
|
function onIgnoreTagStripAll() {
|
|
return "";
|
|
}
|
|
|
|
/**
|
|
* remove tag body
|
|
* specify a `tags` list, if the tag is not in the `tags` list then process by the specify function (optional)
|
|
*
|
|
* @param {array} tags
|
|
* @param {function} next
|
|
*/
|
|
function StripTagBody(tags, next) {
|
|
if (typeof next !== "function") {
|
|
next = function() {};
|
|
}
|
|
|
|
var isRemoveAllTag = !Array.isArray(tags);
|
|
function isRemoveTag(tag) {
|
|
if (isRemoveAllTag) return true;
|
|
return _.indexOf(tags, tag) !== -1;
|
|
}
|
|
|
|
var removeList = [];
|
|
var posStart = false;
|
|
|
|
return {
|
|
onIgnoreTag: function(tag, html, options) {
|
|
if (isRemoveTag(tag)) {
|
|
if (options.isClosing) {
|
|
var ret = "[/removed]";
|
|
var end = options.position + ret.length;
|
|
removeList.push([
|
|
posStart !== false ? posStart : options.position,
|
|
end
|
|
]);
|
|
posStart = false;
|
|
return ret;
|
|
} else {
|
|
if (!posStart) {
|
|
posStart = options.position;
|
|
}
|
|
return "[removed]";
|
|
}
|
|
} else {
|
|
return next(tag, html, options);
|
|
}
|
|
},
|
|
remove: function(html) {
|
|
var rethtml = "";
|
|
var lastPos = 0;
|
|
_.forEach(removeList, function(pos) {
|
|
rethtml += html.slice(lastPos, pos[0]);
|
|
lastPos = pos[1];
|
|
});
|
|
rethtml += html.slice(lastPos);
|
|
return rethtml;
|
|
}
|
|
};
|
|
}
|
|
|
|
/**
|
|
* remove html comments
|
|
*
|
|
* @param {String} html
|
|
* @return {String}
|
|
*/
|
|
function stripCommentTag(html) {
|
|
return html.replace(STRIP_COMMENT_TAG_REGEXP, "");
|
|
}
|
|
var STRIP_COMMENT_TAG_REGEXP = /<!--[\s\S]*?-->/g;
|
|
|
|
/**
|
|
* remove invisible characters
|
|
*
|
|
* @param {String} html
|
|
* @return {String}
|
|
*/
|
|
function stripBlankChar(html) {
|
|
var chars = html.split("");
|
|
chars = chars.filter(function(char) {
|
|
var c = char.charCodeAt(0);
|
|
if (c === 127) return false;
|
|
if (c <= 31) {
|
|
if (c === 10 || c === 13) return true;
|
|
return false;
|
|
}
|
|
return true;
|
|
});
|
|
return chars.join("");
|
|
}
|
|
|
|
exports.whiteList = getDefaultWhiteList();
|
|
exports.getDefaultWhiteList = getDefaultWhiteList;
|
|
exports.onTag = onTag;
|
|
exports.onIgnoreTag = onIgnoreTag;
|
|
exports.onTagAttr = onTagAttr;
|
|
exports.onIgnoreTagAttr = onIgnoreTagAttr;
|
|
exports.safeAttrValue = safeAttrValue;
|
|
exports.escapeHtml = escapeHtml;
|
|
exports.escapeQuote = escapeQuote;
|
|
exports.unescapeQuote = unescapeQuote;
|
|
exports.escapeHtmlEntities = escapeHtmlEntities;
|
|
exports.escapeDangerHtml5Entities = escapeDangerHtml5Entities;
|
|
exports.clearNonPrintableCharacter = clearNonPrintableCharacter;
|
|
exports.friendlyAttrValue = friendlyAttrValue;
|
|
exports.escapeAttrValue = escapeAttrValue;
|
|
exports.onIgnoreTagStripAll = onIgnoreTagStripAll;
|
|
exports.StripTagBody = StripTagBody;
|
|
exports.stripCommentTag = stripCommentTag;
|
|
exports.stripBlankChar = stripBlankChar;
|
|
exports.cssFilter = defaultCSSFilter;
|
|
exports.getDefaultCSSWhiteList = getDefaultCSSWhiteList;
|