Added method SantizeDom that returns the parsed and cleaned IHtmlDocument. This exposes the result for further DOM manipulation without requiring another parse.

Also added a PostProcessDom event that runs after PostProcessNode. This also allows the whole DOM to be further manipulated without executing on every node or re-parsing the document.
This commit is contained in:
Levi Baker
2017-09-22 10:37:38 -07:00
parent f3d23f8b25
commit 919a71dba2
3 changed files with 71 additions and 5 deletions

View File

@@ -7,6 +7,26 @@ using System.ComponentModel;
namespace Ganss.XSS
{
/// <summary>
/// Provides data for the <see cref="HtmlSanitizer.PostProcessNode"/> event.
/// </summary>
public class PostProcessDomEventArgs : EventArgs
{
/// <summary>
/// Gets or sets the document.
/// </summary>
/// <value>
/// The document.
/// </value>
public IHtmlDocument Document { get; set; }
/// <summary>
/// Initializes a new instance of the <see cref="PostProcessDomEventArgs"/> class.
/// </summary>
public PostProcessDomEventArgs()
{ }
}
/// <summary>
/// Provides data for the <see cref="HtmlSanitizer.PostProcessNode"/> event.
/// </summary>

View File

@@ -294,6 +294,10 @@ namespace Ganss.XSS
/// </value>
public ISet<string> AllowedCssClasses { get; private set; }
/// <summary>
/// Occurs after sanitizing the document and post processing nodes.
/// </summary>
public event EventHandler<PostProcessDomEventArgs> PostProcessDom;
/// <summary>
/// Occurs for every node after sanitizing.
/// </summary>
@@ -323,6 +327,15 @@ namespace Ganss.XSS
/// </summary>
public event EventHandler<RemovingCssClassEventArgs> RemovingCssClass;
/// <summary>
/// Raises the <see cref="E:PostProcessDom" /> event.
/// </summary>
/// <param name="e">The <see cref="PostProcessDomEventArgs"/> instance containing the event data.</param>
protected virtual void OnPostProcessDom(PostProcessDomEventArgs e)
{
PostProcessDom?.Invoke(this, e);
}
/// <summary>
/// Raises the <see cref="E:PostProcessNode" /> event.
/// </summary>
@@ -418,6 +431,19 @@ namespace Ganss.XSS
/// <param name="outputFormatter">The formatter used to render the DOM. Using the <see cref="OutputFormatter"/> if null.</param>
/// <returns>The sanitized HTML body fragment.</returns>
public string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null)
{
var dom = SantizeDom(html, baseUrl);
var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter);
return output;
}
/// <summary>
/// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned.
/// </summary>
/// <param name="html">The HTML body fragment to sanitize.</param>
/// <param name="baseUrl">The base URL relative URLs are resolved against. No resolution if empty.</param>
/// <returns>The sanitized HTML Document.</returns>
public IHtmlDocument SantizeDom(string html, string baseUrl = "")
{
var parser = HtmlParserFactory();
var dom = parser.Parse("<html><body></body></html>");
@@ -425,11 +451,9 @@ namespace Ganss.XSS
DoSanitize(dom, dom.Body, baseUrl);
var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter);
return output;
return dom;
}
/// <summary>
/// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned.
/// </summary>
@@ -630,6 +654,12 @@ namespace Ganss.XSS
}
}
}
if (PostProcessDom != null)
{
var e = new PostProcessDomEventArgs { Document = dom };
OnPostProcessDom(e);
}
}
/// <summary>

View File

@@ -2166,7 +2166,7 @@ rl(javascript:alert(""foo""))'>";
}
[Fact]
public void PostProcessTest()
public void PostProcessNodeTest()
{
var sanitizer = new HtmlSanitizer();
sanitizer.PostProcessNode += (s, e) =>
@@ -2184,6 +2184,22 @@ rl(javascript:alert(""foo""))'>";
Assert.Equal(@"<div class=""test"">Hallo<b>Test</b></div>", sanitized, ignoreCase: true);
}
[Fact]
public void PostProcessDomTest()
{
var sanitizer = new HtmlSanitizer();
sanitizer.PostProcessDom += (s, e) =>
{
var p = e.Document.CreateElement("p");
p.TextContent = "World";
e.Document.Body.AppendChild(p);
};
var html = @"<div>Hallo</div>";
var sanitized = sanitizer.Sanitize(html);
Assert.Equal(@"<div>Hallo</div><p>World</p>", sanitized, ignoreCase: true);
}
[Fact]
public void AutoLinkTest()
{