diff --git a/src/HtmlSanitizer/EventArgs.cs b/src/HtmlSanitizer/EventArgs.cs index 497fd43..dace7ae 100644 --- a/src/HtmlSanitizer/EventArgs.cs +++ b/src/HtmlSanitizer/EventArgs.cs @@ -7,6 +7,26 @@ using System.ComponentModel; namespace Ganss.XSS { + /// + /// Provides data for the event. + /// + public class PostProcessDomEventArgs : EventArgs + { + /// + /// Gets or sets the document. + /// + /// + /// The document. + /// + public IHtmlDocument Document { get; set; } + + /// + /// Initializes a new instance of the class. + /// + public PostProcessDomEventArgs() + { } + } + /// /// Provides data for the event. /// diff --git a/src/HtmlSanitizer/HtmlSanitizer.cs b/src/HtmlSanitizer/HtmlSanitizer.cs index d379afe..f004c79 100644 --- a/src/HtmlSanitizer/HtmlSanitizer.cs +++ b/src/HtmlSanitizer/HtmlSanitizer.cs @@ -294,6 +294,10 @@ namespace Ganss.XSS /// public ISet AllowedCssClasses { get; private set; } + /// + /// Occurs after sanitizing the document and post processing nodes. + /// + public event EventHandler PostProcessDom; /// /// Occurs for every node after sanitizing. /// @@ -323,6 +327,15 @@ namespace Ganss.XSS /// public event EventHandler RemovingCssClass; + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnPostProcessDom(PostProcessDomEventArgs e) + { + PostProcessDom?.Invoke(this, e); + } + /// /// Raises the event. /// @@ -418,6 +431,20 @@ namespace Ganss.XSS /// The formatter used to render the DOM. Using the if null. /// The sanitized HTML body fragment. public string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null) + { + var dom = SanitizeDom(html, baseUrl); + var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter); + return output; + } + + + /// + /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned. + /// + /// The HTML body fragment to sanitize. + /// The base URL relative URLs are resolved against. No resolution if empty. + /// The sanitized HTML Document. + public IHtmlDocument SanitizeDom(string html, string baseUrl = "") { var parser = HtmlParserFactory(); var dom = parser.Parse(""); @@ -425,11 +452,9 @@ namespace Ganss.XSS DoSanitize(dom, dom.Body, baseUrl); - var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter); - - return output; + return dom; } - + /// /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned. /// @@ -630,6 +655,12 @@ namespace Ganss.XSS } } } + + if (PostProcessDom != null) + { + var e = new PostProcessDomEventArgs { Document = dom }; + OnPostProcessDom(e); + } } /// diff --git a/src/HtmlSanitizer/IHtmlSanitizer.cs b/src/HtmlSanitizer/IHtmlSanitizer.cs index 0b3ee79..ab39b0d 100644 --- a/src/HtmlSanitizer/IHtmlSanitizer.cs +++ b/src/HtmlSanitizer/IHtmlSanitizer.cs @@ -1,5 +1,6 @@ using AngleSharp; using AngleSharp.Dom.Css; +using AngleSharp.Dom.Html; using AngleSharp.Parser.Html; using System; using System.Collections.Generic; @@ -96,6 +97,11 @@ namespace Ganss.XSS /// ISet AllowedCssClasses { get; } + /// + /// Occurs after sanitizing the document and post processing nodes. + /// + event EventHandler PostProcessDom; + /// /// Occurs for every node after sanitizing. /// @@ -139,5 +145,22 @@ namespace Ganss.XSS /// The formatter used to render the DOM. Using the default formatter if null. /// The sanitized HTML. string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null); + + /// + /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned. + /// + /// The HTML body fragment to sanitize. + /// The base URL relative URLs are resolved against. No resolution if empty. + /// The sanitized HTML Document. + IHtmlDocument SanitizeDom(string html, string baseUrl = ""); + + /// + /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned. + /// + /// The HTML document to sanitize. + /// The base URL relative URLs are resolved against. No resolution if empty. + /// The formatter used to render the DOM. Using the if null. + /// The sanitized HTML document. + string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null); } } \ No newline at end of file diff --git a/test/HtmlSanitizer.Tests/Tests.cs b/test/HtmlSanitizer.Tests/Tests.cs index 1135542..10f95a8 100644 --- a/test/HtmlSanitizer.Tests/Tests.cs +++ b/test/HtmlSanitizer.Tests/Tests.cs @@ -2166,7 +2166,7 @@ rl(javascript:alert(""foo""))'>"; } [Fact] - public void PostProcessTest() + public void PostProcessNodeTest() { var sanitizer = new HtmlSanitizer(); sanitizer.PostProcessNode += (s, e) => @@ -2184,6 +2184,22 @@ rl(javascript:alert(""foo""))'>"; Assert.Equal(@"
HalloTest
", sanitized, ignoreCase: true); } + [Fact] + public void PostProcessDomTest() + { + var sanitizer = new HtmlSanitizer(); + sanitizer.PostProcessDom += (s, e) => + { + var p = e.Document.CreateElement("p"); + p.TextContent = "World"; + e.Document.Body.AppendChild(p); + }; + + var html = @"
Hallo
"; + var sanitized = sanitizer.Sanitize(html); + Assert.Equal(@"
Hallo

World

", sanitized, ignoreCase: true); + } + [Fact] public void AutoLinkTest() {