Merge pull request #128 from leniency/master

Added methods and event to expose the parsed and cleaned IHtmlDocument.
This commit is contained in:
Michael Ganss
2017-09-24 21:15:04 +02:00
committed by GitHub
4 changed files with 95 additions and 5 deletions

View File

@@ -7,6 +7,26 @@ using System.ComponentModel;
namespace Ganss.XSS namespace Ganss.XSS
{ {
/// <summary>
/// Provides data for the <see cref="HtmlSanitizer.PostProcessDom"/> event.
/// </summary>
public class PostProcessDomEventArgs : EventArgs
{
/// <summary>
/// Gets or sets the document.
/// </summary>
/// <value>
/// The document.
/// </value>
public IHtmlDocument Document { get; set; }
/// <summary>
/// Initializes a new instance of the <see cref="PostProcessDomEventArgs"/> class.
/// </summary>
public PostProcessDomEventArgs()
{ }
}
/// <summary> /// <summary>
/// Provides data for the <see cref="HtmlSanitizer.PostProcessNode"/> event. /// Provides data for the <see cref="HtmlSanitizer.PostProcessNode"/> event.
/// </summary> /// </summary>

View File

@@ -294,6 +294,10 @@ namespace Ganss.XSS
/// </value> /// </value>
public ISet<string> AllowedCssClasses { get; private set; } public ISet<string> AllowedCssClasses { get; private set; }
/// <summary>
/// Occurs after sanitizing the document and post processing nodes.
/// </summary>
public event EventHandler<PostProcessDomEventArgs> PostProcessDom;
/// <summary> /// <summary>
/// Occurs for every node after sanitizing. /// Occurs for every node after sanitizing.
/// </summary> /// </summary>
@@ -323,6 +327,15 @@ namespace Ganss.XSS
/// </summary> /// </summary>
public event EventHandler<RemovingCssClassEventArgs> RemovingCssClass; public event EventHandler<RemovingCssClassEventArgs> RemovingCssClass;
/// <summary>
/// Raises the <see cref="E:PostProcessDom" /> event.
/// </summary>
/// <param name="e">The <see cref="PostProcessDomEventArgs"/> instance containing the event data.</param>
protected virtual void OnPostProcessDom(PostProcessDomEventArgs e)
{
PostProcessDom?.Invoke(this, e);
}
/// <summary> /// <summary>
/// Raises the <see cref="E:PostProcessNode" /> event. /// Raises the <see cref="E:PostProcessNode" /> event.
/// </summary> /// </summary>
@@ -418,6 +431,20 @@ namespace Ganss.XSS
/// <param name="outputFormatter">The formatter used to render the DOM. Using the <see cref="OutputFormatter"/> if null.</param> /// <param name="outputFormatter">The formatter used to render the DOM. Using the <see cref="OutputFormatter"/> if null.</param>
/// <returns>The sanitized HTML body fragment.</returns> /// <returns>The sanitized HTML body fragment.</returns>
public string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null) public string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null)
{
var dom = SanitizeDom(html, baseUrl);
var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter);
return output;
}
/// <summary>
/// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned.
/// </summary>
/// <param name="html">The HTML body fragment to sanitize.</param>
/// <param name="baseUrl">The base URL relative URLs are resolved against. No resolution if empty.</param>
/// <returns>The sanitized HTML Document.</returns>
public IHtmlDocument SanitizeDom(string html, string baseUrl = "")
{ {
var parser = HtmlParserFactory(); var parser = HtmlParserFactory();
var dom = parser.Parse("<html><body></body></html>"); var dom = parser.Parse("<html><body></body></html>");
@@ -425,9 +452,7 @@ namespace Ganss.XSS
DoSanitize(dom, dom.Body, baseUrl); DoSanitize(dom, dom.Body, baseUrl);
var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter); return dom;
return output;
} }
/// <summary> /// <summary>
@@ -630,6 +655,12 @@ namespace Ganss.XSS
} }
} }
} }
if (PostProcessDom != null)
{
var e = new PostProcessDomEventArgs { Document = dom };
OnPostProcessDom(e);
}
} }
/// <summary> /// <summary>

View File

@@ -1,5 +1,6 @@
using AngleSharp; using AngleSharp;
using AngleSharp.Dom.Css; using AngleSharp.Dom.Css;
using AngleSharp.Dom.Html;
using AngleSharp.Parser.Html; using AngleSharp.Parser.Html;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
@@ -96,6 +97,11 @@ namespace Ganss.XSS
/// </value> /// </value>
ISet<string> AllowedCssClasses { get; } ISet<string> AllowedCssClasses { get; }
/// <summary>
/// Occurs after sanitizing the document and post processing nodes.
/// </summary>
event EventHandler<PostProcessDomEventArgs> PostProcessDom;
/// <summary> /// <summary>
/// Occurs for every node after sanitizing. /// Occurs for every node after sanitizing.
/// </summary> /// </summary>
@@ -139,5 +145,22 @@ namespace Ganss.XSS
/// <param name="outputFormatter">The formatter used to render the DOM. Using the default formatter if null.</param> /// <param name="outputFormatter">The formatter used to render the DOM. Using the default formatter if null.</param>
/// <returns>The sanitized HTML.</returns> /// <returns>The sanitized HTML.</returns>
string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null); string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null);
/// <summary>
/// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned.
/// </summary>
/// <param name="html">The HTML body fragment to sanitize.</param>
/// <param name="baseUrl">The base URL relative URLs are resolved against. No resolution if empty.</param>
/// <returns>The sanitized HTML Document.</returns>
IHtmlDocument SanitizeDom(string html, string baseUrl = "");
/// <summary>
/// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned.
/// </summary>
/// <param name="html">The HTML document to sanitize.</param>
/// <param name="baseUrl">The base URL relative URLs are resolved against. No resolution if empty.</param>
/// <param name="outputFormatter">The formatter used to render the DOM. Using the <see cref="OutputFormatter"/> if null.</param>
/// <returns>The sanitized HTML document.</returns>
string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null);
} }
} }

View File

@@ -2166,7 +2166,7 @@ rl(javascript:alert(""foo""))'>";
} }
[Fact] [Fact]
public void PostProcessTest() public void PostProcessNodeTest()
{ {
var sanitizer = new HtmlSanitizer(); var sanitizer = new HtmlSanitizer();
sanitizer.PostProcessNode += (s, e) => sanitizer.PostProcessNode += (s, e) =>
@@ -2184,6 +2184,22 @@ rl(javascript:alert(""foo""))'>";
Assert.Equal(@"<div class=""test"">Hallo<b>Test</b></div>", sanitized, ignoreCase: true); Assert.Equal(@"<div class=""test"">Hallo<b>Test</b></div>", sanitized, ignoreCase: true);
} }
[Fact]
public void PostProcessDomTest()
{
var sanitizer = new HtmlSanitizer();
sanitizer.PostProcessDom += (s, e) =>
{
var p = e.Document.CreateElement("p");
p.TextContent = "World";
e.Document.Body.AppendChild(p);
};
var html = @"<div>Hallo</div>";
var sanitized = sanitizer.Sanitize(html);
Assert.Equal(@"<div>Hallo</div><p>World</p>", sanitized, ignoreCase: true);
}
[Fact] [Fact]
public void AutoLinkTest() public void AutoLinkTest()
{ {