diff --git a/src/HtmlSanitizer/EventArgs.cs b/src/HtmlSanitizer/EventArgs.cs
index 9ff4c54..e57d946 100644
--- a/src/HtmlSanitizer/EventArgs.cs
+++ b/src/HtmlSanitizer/EventArgs.cs
@@ -7,6 +7,26 @@ using System.ComponentModel;
namespace Ganss.XSS
{
+ ///
+ /// Provides data for the event.
+ ///
+ public class PostProcessDomEventArgs : EventArgs
+ {
+ ///
+ /// Gets or sets the document.
+ ///
+ ///
+ /// The document.
+ ///
+ public IHtmlDocument Document { get; set; }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ public PostProcessDomEventArgs()
+ { }
+ }
+
///
/// Provides data for the event.
///
@@ -162,4 +182,34 @@ namespace Ganss.XSS
///
public IComment Comment { get; set; }
}
+
+ ///
+ /// Provides data for the event.
+ ///
+ public class RemovingCssClassEventArgs : CancelEventArgs
+ {
+ ///
+ /// Gets or sets the tag containing the CSS class to be removed.
+ ///
+ ///
+ /// The tag.
+ ///
+ public IElement Tag { get; set; }
+
+ ///
+ /// Gets or sets the CSS class to be removed.
+ ///
+ ///
+ /// The CSS class.
+ ///
+ public string CssClass { get; set; }
+
+ ///
+ /// Gets or sets the reason why the CSS class will be removed.
+ ///
+ ///
+ /// The reason.
+ ///
+ public RemoveReason Reason { get; set; }
+ }
}
diff --git a/src/HtmlSanitizer/HtmlSanitizer.cs b/src/HtmlSanitizer/HtmlSanitizer.cs
index 1cf8445..1cecc27 100644
--- a/src/HtmlSanitizer/HtmlSanitizer.cs
+++ b/src/HtmlSanitizer/HtmlSanitizer.cs
@@ -53,12 +53,13 @@ namespace Ganss.XSS
/// Initializes a new instance of the class.
///
/// The allowed tag names such as "a" and "div". When null, uses
- /// The allowed HTTP schemes such as "http" and "https". When null, uses
+ /// The allowed HTTP schemes such as "http" and "https". When null, uses
/// The allowed HTML attributes such as "href" and "alt". When null, uses
- /// the HTML attributes that can contain a URI such as "href". When null, uses
- /// the allowed CSS properties such as "font" and "margin". When null, uses
+ /// The HTML attributes that can contain a URI such as "href". When null, uses
+ /// The allowed CSS properties such as "font" and "margin". When null, uses
+ /// CSS class names which are allowed in the value of a class attribute. When null, any class names are allowed.
public HtmlSanitizer(IEnumerable allowedTags = null, IEnumerable allowedSchemes = null,
- IEnumerable allowedAttributes = null, IEnumerable uriAttributes = null, IEnumerable allowedCssProperties = null)
+ IEnumerable allowedAttributes = null, IEnumerable uriAttributes = null, IEnumerable allowedCssProperties = null, IEnumerable allowedCssClasses = null)
{
AllowedTags = new HashSet(allowedTags ?? DefaultAllowedTags, StringComparer.OrdinalIgnoreCase);
AllowedSchemes = new HashSet(allowedSchemes ?? DefaultAllowedSchemes, StringComparer.OrdinalIgnoreCase);
@@ -66,6 +67,7 @@ namespace Ganss.XSS
UriAttributes = new HashSet(uriAttributes ?? DefaultUriAttributes, StringComparer.OrdinalIgnoreCase);
AllowedCssProperties = new HashSet(allowedCssProperties ?? DefaultAllowedCssProperties, StringComparer.OrdinalIgnoreCase);
AllowedAtRules = new HashSet(DefaultAllowedAtRules);
+ AllowedCssClasses = allowedCssClasses != null ? new HashSet(allowedCssClasses) : null;
}
///
@@ -282,6 +284,18 @@ namespace Ganss.XSS
set { _disallowedCssPropertyValue = value; }
}
+ ///
+ /// Gets or sets the allowed CSS classes.
+ ///
+ ///
+ /// The allowed CSS classes.
+ ///
+ public ISet AllowedCssClasses { get; private set; }
+
+ ///
+ /// Occurs after sanitizing the document and post processing nodes.
+ ///
+ public event EventHandler PostProcessDom;
///
/// Occurs for every node after sanitizing.
///
@@ -306,6 +320,19 @@ namespace Ganss.XSS
/// Occurs before a comment is removed.
///
public event EventHandler RemovingComment;
+ ///
+ /// Occurs before a CSS class is removed.
+ ///
+ public event EventHandler RemovingCssClass;
+
+ ///
+ /// Raises the event.
+ ///
+ /// The instance containing the event data.
+ protected virtual void OnPostProcessDom(PostProcessDomEventArgs e)
+ {
+ PostProcessDom?.Invoke(this, e);
+ }
///
/// Raises the event.
@@ -366,6 +393,15 @@ namespace Ganss.XSS
///
public static readonly Regex DefaultDisallowedCssPropertyValue = new Regex(@"[<>]", RegexOptions.Compiled);
+ ///
+ /// Raises the event.
+ ///
+ /// The instance containing the event data.
+ protected virtual void OnRemovingCssClass(RemovingCssClassEventArgs e)
+ {
+ RemovingCssClass?.Invoke(this, e);
+ }
+
///
/// Return all nested subnodes of a node.
///
@@ -393,6 +429,20 @@ namespace Ganss.XSS
/// The formatter used to render the DOM. Using the if null.
/// The sanitized HTML body fragment.
public string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null)
+ {
+ var dom = SanitizeDom(html, baseUrl);
+ var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter);
+ return output;
+ }
+
+
+ ///
+ /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned.
+ ///
+ /// The HTML body fragment to sanitize.
+ /// The base URL relative URLs are resolved against. No resolution if empty.
+ /// The sanitized HTML Document.
+ public IHtmlDocument SanitizeDom(string html, string baseUrl = "")
{
var parser = HtmlParserFactory();
var dom = parser.ParseDocument("");
@@ -400,11 +450,9 @@ namespace Ganss.XSS
DoSanitize(dom, dom.Body, baseUrl);
- var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter);
-
- return output;
+ return dom;
}
-
+
///
/// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned.
///
@@ -486,15 +534,35 @@ namespace Ganss.XSS
// sanitize the style attribute
SanitizeStyle(tag, baseUrl);
+ var checkClasses = AllowedCssClasses != null;
+ var allowedTags = AllowedCssClasses?.ToArray() ?? new string[0];
+
// sanitize the value of the attributes
foreach (var attribute in tag.Attributes.ToList())
{
// The '& Javascript include' is a possible method to execute Javascript and can lead to XSS.
// (see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#.26_JavaScript_includes)
if (attribute.Value.Contains("&{"))
+ {
RemoveAttribute(tag, attribute, RemoveReason.NotAllowedValue);
+ }
else
- tag.SetAttribute(attribute.Name, attribute.Value);
+ {
+ if (checkClasses && attribute.Name == "class")
+ {
+ var removedClasses = tag.ClassList.Except(allowedTags).ToArray();
+
+ foreach(var removedClass in removedClasses)
+ RemoveCssClass(tag, removedClass, RemoveReason.NotAllowedCssClass);
+
+ if (!tag.ClassList.Any())
+ RemoveAttribute(tag, attribute, RemoveReason.ClassAttributeEmpty);
+ }
+ else
+ {
+ tag.SetAttribute(attribute.Name, attribute.Value);
+ }
+ }
}
}
@@ -584,6 +652,12 @@ namespace Ganss.XSS
}
}
}
+
+ if (PostProcessDom != null)
+ {
+ var e = new PostProcessDomEventArgs { Document = dom };
+ OnPostProcessDom(e);
+ }
}
///
@@ -831,5 +905,18 @@ namespace Ganss.XSS
OnRemovingAtRule(e);
return !e.Cancel;
}
+
+ ///
+ /// Removes a CSS class from a class attribute.
+ ///
+ /// Tag the style belongs to
+ /// Rule to be removed
+ /// true, if the rule can be removed; false, otherwise.
+ private void RemoveCssClass(IElement tag, string cssClass, RemoveReason reason)
+ {
+ var e = new RemovingCssClassEventArgs { Tag = tag, CssClass = cssClass, Reason = reason };
+ OnRemovingCssClass(e);
+ if (!e.Cancel) tag.ClassList.Remove(cssClass);
+ }
}
}
diff --git a/src/HtmlSanitizer/HtmlSanitizer.csproj b/src/HtmlSanitizer/HtmlSanitizer.csproj
index 4bfe186..f7cae6e 100644
--- a/src/HtmlSanitizer/HtmlSanitizer.csproj
+++ b/src/HtmlSanitizer/HtmlSanitizer.csproj
@@ -19,9 +19,9 @@
git://github.com/mganss/HtmlSanitizer
$(PackageTargetFallback);dotnet
false
- app.net40.config
- false
- Ganss.XSS
+ app.net40.config
+ false
+ Ganss.XSS
@@ -53,8 +53,7 @@
-
-
+
diff --git a/src/HtmlSanitizer/IHtmlSanitizer.cs b/src/HtmlSanitizer/IHtmlSanitizer.cs
index 1da7619..ab39b0d 100644
--- a/src/HtmlSanitizer/IHtmlSanitizer.cs
+++ b/src/HtmlSanitizer/IHtmlSanitizer.cs
@@ -1,4 +1,7 @@
using AngleSharp;
+using AngleSharp.Dom.Css;
+using AngleSharp.Dom.Html;
+using AngleSharp.Parser.Html;
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
@@ -11,6 +14,29 @@ namespace Ganss.XSS
///
public interface IHtmlSanitizer
{
+ ///
+ /// Gets or sets a value indicating whether to keep child nodes of elements that are removed. Default is .
+ ///
+ bool KeepChildNodes { get; set; }
+
+ ///
+ /// Gets or sets the object the creates the parser used for parsing the input.
+ ///
+ Func HtmlParserFactory { get; set; }
+
+ ///
+ /// Gets or sets the object used for generating output. Default is .
+ ///
+ IMarkupFormatter OutputFormatter { get; set; }
+
+ ///
+ /// Gets or sets the allowed CSS at-rules such as "@media" and "@font-face".
+ ///
+ ///
+ /// The allowed CSS at-rules.
+ ///
+ ISet AllowedAtRules { get; }
+
///
/// Gets or sets the allowed HTTP schemes such as "http" and "https".
///
@@ -64,6 +90,18 @@ namespace Ganss.XSS
///
Regex DisallowCssPropertyValue { get; set; }
+ /// Gets or sets the allowed CSS classes.
+ ///
+ ///
+ /// The allowed CSS classes.
+ ///
+ ISet AllowedCssClasses { get; }
+
+ ///
+ /// Occurs after sanitizing the document and post processing nodes.
+ ///
+ event EventHandler PostProcessDom;
+
///
/// Occurs for every node after sanitizing.
///
@@ -84,6 +122,21 @@ namespace Ganss.XSS
///
event EventHandler RemovingStyle;
+ ///
+ /// Occurs before an at-rule is removed.
+ ///
+ event EventHandler RemovingAtRule;
+
+ ///
+ /// Occurs before a comment is removed.
+ ///
+ event EventHandler RemovingComment;
+
+ ///
+ /// Occurs before a CSS class is removed.
+ ///
+ event EventHandler RemovingCssClass;
+
///
/// Sanitizes the specified HTML.
///
@@ -92,5 +145,22 @@ namespace Ganss.XSS
/// The formatter used to render the DOM. Using the default formatter if null.
/// The sanitized HTML.
string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null);
+
+ ///
+ /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned.
+ ///
+ /// The HTML body fragment to sanitize.
+ /// The base URL relative URLs are resolved against. No resolution if empty.
+ /// The sanitized HTML Document.
+ IHtmlDocument SanitizeDom(string html, string baseUrl = "");
+
+ ///
+ /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned.
+ ///
+ /// The HTML document to sanitize.
+ /// The base URL relative URLs are resolved against. No resolution if empty.
+ /// The formatter used to render the DOM. Using the if null.
+ /// The sanitized HTML document.
+ string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null);
}
}
\ No newline at end of file
diff --git a/src/HtmlSanitizer/RemoveReason.cs b/src/HtmlSanitizer/RemoveReason.cs
index 78ee2e1..d562291 100644
--- a/src/HtmlSanitizer/RemoveReason.cs
+++ b/src/HtmlSanitizer/RemoveReason.cs
@@ -25,5 +25,13 @@
/// Value is not allowed or harmful
///
NotAllowedValue,
+ ///
+ /// CSS Class is not allowed
+ ///
+ NotAllowedCssClass,
+ ///
+ /// The class attribute is empty
+ ///
+ ClassAttributeEmpty
}
}
diff --git a/test/HtmlSanitizer.Tests/Tests.cs b/test/HtmlSanitizer.Tests/Tests.cs
index 007fdd6..2efb75a 100644
--- a/test/HtmlSanitizer.Tests/Tests.cs
+++ b/test/HtmlSanitizer.Tests/Tests.cs
@@ -2166,7 +2166,7 @@ rl(javascript:alert(""foo""))'>";
}
[Fact]
- public void PostProcessTest()
+ public void PostProcessNodeTest()
{
var sanitizer = new HtmlSanitizer();
sanitizer.PostProcessNode += (s, e) =>
@@ -2184,6 +2184,22 @@ rl(javascript:alert(""foo""))'>";
Assert.Equal(@"HalloTest
", sanitized, ignoreCase: true);
}
+ [Fact]
+ public void PostProcessDomTest()
+ {
+ var sanitizer = new HtmlSanitizer();
+ sanitizer.PostProcessDom += (s, e) =>
+ {
+ var p = e.Document.CreateElement("p");
+ p.TextContent = "World";
+ e.Document.Body.AppendChild(p);
+ };
+
+ var html = @"Hallo
";
+ var sanitized = sanitizer.Sanitize(html);
+ Assert.Equal(@"Hallo
World
", sanitized, ignoreCase: true);
+ }
+
[Fact]
public void AutoLinkTest()
{
@@ -2499,6 +2515,44 @@ rl(javascript:alert(""foo""))'>";
Assert.Equal(RemoveReason.NotAllowedTag, actual);
}
+ [Fact]
+ public void RemoveEventForNotAllowedCssClass()
+ {
+ RemoveReason? reason = null;
+ string removedClass = null;
+
+ var s = new HtmlSanitizer(allowedAttributes: new[] { "class" }, allowedCssClasses: new[] { "good" });
+ s.RemovingCssClass += (sender, args) =>
+ {
+ reason = args.Reason;
+ removedClass = args.CssClass;
+ };
+
+ s.Sanitize(@"Test
");
+
+ Assert.Equal("bad", removedClass);
+ Assert.Equal(RemoveReason.NotAllowedCssClass, reason);
+ }
+
+ [Fact]
+ public void RemoveEventForEmptyClassAttributeAfterClassRemoval()
+ {
+ RemoveReason? reason = null;
+ string attributeName = null;
+
+ var s = new HtmlSanitizer(allowedAttributes: new[] { "class" }, allowedCssClasses: new[] { "other" });
+ s.RemovingAttribute += (sender, args) =>
+ {
+ attributeName = args.Attribute.Name;
+ reason = args.Reason;
+ };
+
+ s.Sanitize(@"Test
");
+
+ Assert.Equal("class", attributeName);
+ Assert.Equal(RemoveReason.ClassAttributeEmpty, reason);
+ }
+
[Fact]
public void DocumentTest()
{
@@ -2844,9 +2898,31 @@ zqy1QY1kkPOuMvKWvvmFIwClI2393jVVcp91eda4+J+fIYDbfJa7RY5YcNrZhTuV//9k="">
Assert.Equal(0, failures);
}
}
+
+ [Fact]
+ public void AllowAllClassesByDefaultTest()
+ {
+ var sanitizer = new HtmlSanitizer(allowedAttributes: new[] { "class" });
+
+ var html = @"Test
";
+ var actual = sanitizer.Sanitize(html);
+
+ Assert.Equal(@"Test
", actual);
+ }
[Fact]
public void AllowClassesTest()
+ {
+ var sanitizer = new HtmlSanitizer(allowedAttributes: new[] { "class" }, allowedCssClasses: new[] { "good" });
+
+ var html = @"Test
";
+ var actual = sanitizer.Sanitize(html);
+
+ Assert.Equal(@"Test
", actual);
+ }
+
+ [Fact]
+ public void AllowClassesUsingEventTest()
{
var sanitizer = new HtmlSanitizer();
sanitizer.RemovingAttribute += (s, e) =>
@@ -2864,6 +2940,29 @@ zqy1QY1kkPOuMvKWvvmFIwClI2393jVVcp91eda4+J+fIYDbfJa7RY5YcNrZhTuV//9k="">
Assert.Equal(@"Test
", actual);
}
+ [Fact]
+ public void RemoveClassAttributeIfNoAllowedClassesTest()
+ {
+ // Empty array for allowed classes = no classes allowed
+ var sanitizer = new HtmlSanitizer(allowedAttributes: new[] { "class" }, allowedCssClasses: new string[0]);
+
+ var html = @"Test
";
+ var actual = sanitizer.Sanitize(html);
+
+ Assert.Equal(@"Test
", actual);
+ }
+
+ [Fact]
+ public void RemoveClassAttributeIfEmptyTest()
+ {
+ var sanitizer = new HtmlSanitizer(allowedAttributes: new[] { "class" }, allowedCssClasses: new[] { "other" });
+
+ var html = @"Test
";
+ var actual = sanitizer.Sanitize(html);
+
+ Assert.Equal(@"Test
", actual);
+ }
+
[Fact]
public void TextTest()
{