diff --git a/src/HtmlSanitizer/EventArgs.cs b/src/HtmlSanitizer/EventArgs.cs
index 497fd43..dace7ae 100644
--- a/src/HtmlSanitizer/EventArgs.cs
+++ b/src/HtmlSanitizer/EventArgs.cs
@@ -7,6 +7,26 @@ using System.ComponentModel;
namespace Ganss.XSS
{
+ ///
+ /// Provides data for the event.
+ ///
+ public class PostProcessDomEventArgs : EventArgs
+ {
+ ///
+ /// Gets or sets the document.
+ ///
+ ///
+ /// The document.
+ ///
+ public IHtmlDocument Document { get; set; }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ public PostProcessDomEventArgs()
+ { }
+ }
+
///
/// Provides data for the event.
///
diff --git a/src/HtmlSanitizer/HtmlSanitizer.cs b/src/HtmlSanitizer/HtmlSanitizer.cs
index d379afe..f004c79 100644
--- a/src/HtmlSanitizer/HtmlSanitizer.cs
+++ b/src/HtmlSanitizer/HtmlSanitizer.cs
@@ -294,6 +294,10 @@ namespace Ganss.XSS
///
public ISet AllowedCssClasses { get; private set; }
+ ///
+ /// Occurs after sanitizing the document and post processing nodes.
+ ///
+ public event EventHandler PostProcessDom;
///
/// Occurs for every node after sanitizing.
///
@@ -323,6 +327,15 @@ namespace Ganss.XSS
///
public event EventHandler RemovingCssClass;
+ ///
+ /// Raises the event.
+ ///
+ /// The instance containing the event data.
+ protected virtual void OnPostProcessDom(PostProcessDomEventArgs e)
+ {
+ PostProcessDom?.Invoke(this, e);
+ }
+
///
/// Raises the event.
///
@@ -418,6 +431,20 @@ namespace Ganss.XSS
/// The formatter used to render the DOM. Using the if null.
/// The sanitized HTML body fragment.
public string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null)
+ {
+ var dom = SanitizeDom(html, baseUrl);
+ var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter);
+ return output;
+ }
+
+
+ ///
+ /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned.
+ ///
+ /// The HTML body fragment to sanitize.
+ /// The base URL relative URLs are resolved against. No resolution if empty.
+ /// The sanitized HTML Document.
+ public IHtmlDocument SanitizeDom(string html, string baseUrl = "")
{
var parser = HtmlParserFactory();
var dom = parser.Parse("");
@@ -425,11 +452,9 @@ namespace Ganss.XSS
DoSanitize(dom, dom.Body, baseUrl);
- var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter);
-
- return output;
+ return dom;
}
-
+
///
/// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned.
///
@@ -630,6 +655,12 @@ namespace Ganss.XSS
}
}
}
+
+ if (PostProcessDom != null)
+ {
+ var e = new PostProcessDomEventArgs { Document = dom };
+ OnPostProcessDom(e);
+ }
}
///
diff --git a/src/HtmlSanitizer/IHtmlSanitizer.cs b/src/HtmlSanitizer/IHtmlSanitizer.cs
index 0b3ee79..ab39b0d 100644
--- a/src/HtmlSanitizer/IHtmlSanitizer.cs
+++ b/src/HtmlSanitizer/IHtmlSanitizer.cs
@@ -1,5 +1,6 @@
using AngleSharp;
using AngleSharp.Dom.Css;
+using AngleSharp.Dom.Html;
using AngleSharp.Parser.Html;
using System;
using System.Collections.Generic;
@@ -96,6 +97,11 @@ namespace Ganss.XSS
///
ISet AllowedCssClasses { get; }
+ ///
+ /// Occurs after sanitizing the document and post processing nodes.
+ ///
+ event EventHandler PostProcessDom;
+
///
/// Occurs for every node after sanitizing.
///
@@ -139,5 +145,22 @@ namespace Ganss.XSS
/// The formatter used to render the DOM. Using the default formatter if null.
/// The sanitized HTML.
string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null);
+
+ ///
+ /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned.
+ ///
+ /// The HTML body fragment to sanitize.
+ /// The base URL relative URLs are resolved against. No resolution if empty.
+ /// The sanitized HTML Document.
+ IHtmlDocument SanitizeDom(string html, string baseUrl = "");
+
+ ///
+ /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned.
+ ///
+ /// The HTML document to sanitize.
+ /// The base URL relative URLs are resolved against. No resolution if empty.
+ /// The formatter used to render the DOM. Using the if null.
+ /// The sanitized HTML document.
+ string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null);
}
}
\ No newline at end of file
diff --git a/test/HtmlSanitizer.Tests/Tests.cs b/test/HtmlSanitizer.Tests/Tests.cs
index 1135542..10f95a8 100644
--- a/test/HtmlSanitizer.Tests/Tests.cs
+++ b/test/HtmlSanitizer.Tests/Tests.cs
@@ -2166,7 +2166,7 @@ rl(javascript:alert(""foo""))'>";
}
[Fact]
- public void PostProcessTest()
+ public void PostProcessNodeTest()
{
var sanitizer = new HtmlSanitizer();
sanitizer.PostProcessNode += (s, e) =>
@@ -2184,6 +2184,22 @@ rl(javascript:alert(""foo""))'>";
Assert.Equal(@"HalloTest
", sanitized, ignoreCase: true);
}
+ [Fact]
+ public void PostProcessDomTest()
+ {
+ var sanitizer = new HtmlSanitizer();
+ sanitizer.PostProcessDom += (s, e) =>
+ {
+ var p = e.Document.CreateElement("p");
+ p.TextContent = "World";
+ e.Document.Body.AppendChild(p);
+ };
+
+ var html = @"Hallo
";
+ var sanitized = sanitizer.Sanitize(html);
+ Assert.Equal(@"Hallo
World
", sanitized, ignoreCase: true);
+ }
+
[Fact]
public void AutoLinkTest()
{