Files
js-xss/lib/parser.js

258 lines
5.8 KiB
JavaScript
Raw Normal View History

2014-02-12 14:33:16 +08:00
/**
2017-12-21 14:19:10 +08:00
* Simple HTML Parser
2014-02-12 14:33:16 +08:00
*
2017-12-21 14:19:10 +08:00
* @author Zongmin Lei<leizongmin@gmail.com>
2014-02-12 14:33:16 +08:00
*/
2017-12-21 14:22:34 +08:00
var _ = require("./util");
2014-02-12 14:33:16 +08:00
/**
2017-12-21 14:19:10 +08:00
* get tag name
2014-02-12 14:33:16 +08:00
*
2017-12-21 14:19:10 +08:00
* @param {String} html e.g. '<a hef="#">'
2014-02-12 14:33:16 +08:00
* @return {String}
*/
2017-12-21 14:22:34 +08:00
function getTagName(html) {
2022-03-09 19:39:57 +08:00
let i = _.spaceIndex(html);
let tagName;
2014-02-12 14:33:16 +08:00
if (i === -1) {
2022-03-09 19:39:57 +08:00
tagName = html.slice(1, -1);
2014-02-12 14:33:16 +08:00
} else {
2022-03-09 19:39:57 +08:00
tagName = html.slice(1, i + 1);
2014-02-12 14:33:16 +08:00
}
2015-03-27 16:09:45 +11:00
tagName = _.trim(tagName).toLowerCase();
2017-12-21 14:22:34 +08:00
if (tagName.slice(0, 1) === "/") tagName = tagName.slice(1);
if (tagName.slice(-1) === "/") tagName = tagName.slice(0, -1);
2014-02-12 14:33:16 +08:00
return tagName;
}
/**
2017-12-21 14:19:10 +08:00
* is close tag?
2014-02-12 14:33:16 +08:00
*
* @param {String} html '<a hef="#">'
* @return {Boolean}
*/
2017-12-21 14:22:34 +08:00
function isClosing(html) {
return html.slice(0, 2) === "</";
2014-02-12 14:33:16 +08:00
}
/**
2017-12-21 14:19:10 +08:00
* parse input html and returns processed html
2014-02-12 14:33:16 +08:00
*
* @param {String} html
2017-12-21 14:19:10 +08:00
* @param {Function} onTag e.g. function (sourcePosition, position, tag, html, isClosing)
* @param {Function} escapeHtml
2014-02-12 14:33:16 +08:00
* @return {String}
*/
2017-12-21 14:22:34 +08:00
function parseTag(html, onTag, escapeHtml) {
"use strict";
2014-02-12 14:33:16 +08:00
2017-12-21 14:22:34 +08:00
var rethtml = "";
2017-12-21 14:19:10 +08:00
var lastPos = 0;
var tagStart = false;
var quoteStart = false;
var currentPos = 0;
var len = html.length;
2017-12-21 14:22:34 +08:00
var currentTagName = "";
var currentHtml = "";
2014-02-12 14:33:16 +08:00
chariterator: for (currentPos = 0; currentPos < len; currentPos++) {
2014-02-12 14:33:16 +08:00
var c = html.charAt(currentPos);
if (tagStart === false) {
2017-12-21 14:22:34 +08:00
if (c === "<") {
2014-02-12 14:33:16 +08:00
tagStart = currentPos;
continue;
}
} else {
if (quoteStart === false) {
2017-12-21 14:22:34 +08:00
if (c === "<") {
2014-02-12 14:33:16 +08:00
rethtml += escapeHtml(html.slice(lastPos, currentPos));
tagStart = currentPos;
lastPos = currentPos;
continue;
}
2017-12-21 14:22:34 +08:00
if (c === ">") {
2014-02-12 14:33:16 +08:00
rethtml += escapeHtml(html.slice(lastPos, tagStart));
currentHtml = html.slice(tagStart, currentPos + 1);
currentTagName = getTagName(currentHtml);
2017-12-21 14:22:34 +08:00
rethtml += onTag(
tagStart,
rethtml.length,
currentTagName,
currentHtml,
isClosing(currentHtml)
);
2014-02-12 14:33:16 +08:00
lastPos = currentPos + 1;
tagStart = false;
continue;
}
if (c === '"' || c === "'") {
var i = 1;
2020-07-24 10:41:06 +01:00
var ic = html.charAt(currentPos - i);
while (ic.trim() === "" || ic === "=") {
2020-07-24 10:41:06 +01:00
if (ic === "=") {
quoteStart = c;
continue chariterator;
}
2020-07-24 10:41:06 +01:00
ic = html.charAt(currentPos - ++i);
}
2014-02-12 14:33:16 +08:00
}
} else {
if (c === quoteStart) {
quoteStart = false;
continue;
}
}
}
}
if (lastPos < html.length) {
rethtml += escapeHtml(html.substr(lastPos));
}
return rethtml;
}
var REGEXP_ILLEGAL_ATTR_NAME = /[^a-zA-Z0-9\\_:.-]/gim;
2014-02-12 14:33:16 +08:00
/**
2017-12-21 14:19:10 +08:00
* parse input attributes and returns processed attributes
2014-02-12 14:33:16 +08:00
*
2017-12-21 14:19:10 +08:00
* @param {String} html e.g. `href="#" target="_blank"`
* @param {Function} onAttr e.g. `function (name, value)`
2014-02-12 14:33:16 +08:00
* @return {String}
*/
2017-12-21 14:22:34 +08:00
function parseAttr(html, onAttr) {
"use strict";
2014-02-12 14:33:16 +08:00
2017-12-21 14:19:10 +08:00
var lastPos = 0;
var lastMarkPos = 0;
2017-12-21 14:19:10 +08:00
var retAttrs = [];
var tmpName = false;
var len = html.length;
2014-02-12 14:33:16 +08:00
2017-12-21 14:22:34 +08:00
function addAttr(name, value) {
2015-03-27 16:09:45 +11:00
name = _.trim(name);
2017-12-21 14:22:34 +08:00
name = name.replace(REGEXP_ILLEGAL_ATTR_NAME, "").toLowerCase();
2014-02-12 14:33:16 +08:00
if (name.length < 1) return;
2017-12-21 14:22:34 +08:00
var ret = onAttr(name, value || "");
if (ret) retAttrs.push(ret);
2017-12-21 14:22:34 +08:00
}
2014-02-12 14:33:16 +08:00
// 逐个分析字符
for (var i = 0; i < len; i++) {
var c = html.charAt(i);
var v, j;
2017-12-21 14:22:34 +08:00
if (tmpName === false && c === "=") {
2014-02-12 14:33:16 +08:00
tmpName = html.slice(lastPos, i);
lastPos = i + 1;
lastMarkPos = html.charAt(lastPos) === '"' || html.charAt(lastPos) === "'" ? lastPos : findNextQuotationMark(html, i + 1);
2014-02-12 14:33:16 +08:00
continue;
}
if (tmpName !== false) {
2017-12-21 14:22:34 +08:00
if (
i === lastMarkPos
2017-12-21 14:22:34 +08:00
) {
j = html.indexOf(c, i + 1);
2014-02-12 14:33:16 +08:00
if (j === -1) {
break;
} else {
v = _.trim(html.slice(lastMarkPos + 1, j));
2014-02-12 14:33:16 +08:00
addAttr(tmpName, v);
tmpName = false;
i = j;
lastPos = i + 1;
continue;
}
}
}
2017-08-30 10:10:39 +08:00
if (/\s|\n|\t/.test(c)) {
2017-12-21 14:22:34 +08:00
html = html.replace(/\s|\n|\t/g, " ");
2014-02-12 14:33:16 +08:00
if (tmpName === false) {
j = findNextEqual(html, i);
if (j === -1) {
v = _.trim(html.slice(lastPos, i));
addAttr(v);
tmpName = false;
lastPos = i + 1;
continue;
} else {
i = j - 1;
continue;
}
2014-02-12 14:33:16 +08:00
} else {
j = findBeforeEqual(html, i - 1);
if (j === -1) {
v = _.trim(html.slice(lastPos, i));
v = stripQuoteWrap(v);
addAttr(tmpName, v);
tmpName = false;
lastPos = i + 1;
continue;
} else {
continue;
}
2014-02-12 14:33:16 +08:00
}
}
}
if (lastPos < html.length) {
if (tmpName === false) {
addAttr(html.slice(lastPos));
} else {
addAttr(tmpName, stripQuoteWrap(_.trim(html.slice(lastPos))));
2014-02-12 14:33:16 +08:00
}
}
2017-12-21 14:22:34 +08:00
return _.trim(retAttrs.join(" "));
2014-02-12 14:33:16 +08:00
}
2017-12-21 14:22:34 +08:00
function findNextEqual(str, i) {
for (; i < str.length; i++) {
var c = str[i];
2017-12-21 14:22:34 +08:00
if (c === " ") continue;
if (c === "=") return i;
return -1;
}
}
function findNextQuotationMark(str, i) {
for (; i < str.length; i++) {
var c = str[i];
if (c === " ") continue;
if (c === "'" || c === '"') return i;
return -1;
}
}
2017-12-21 14:22:34 +08:00
function findBeforeEqual(str, i) {
for (; i > 0; i--) {
var c = str[i];
2017-12-21 14:22:34 +08:00
if (c === " ") continue;
if (c === "=") return i;
return -1;
}
}
2017-12-21 14:22:34 +08:00
function isQuoteWrapString(text) {
if (
(text[0] === '"' && text[text.length - 1] === '"') ||
(text[0] === "'" && text[text.length - 1] === "'")
) {
return true;
} else {
return false;
}
2017-12-21 14:22:34 +08:00
}
2017-12-21 14:22:34 +08:00
function stripQuoteWrap(text) {
if (isQuoteWrapString(text)) {
return text.substr(1, text.length - 2);
} else {
return text;
}
2017-12-21 14:22:34 +08:00
}
2014-02-12 14:33:16 +08:00
exports.parseTag = parseTag;
exports.parseAttr = parseAttr;