diff --git a/src/HtmlSanitizer/EventArgs.cs b/src/HtmlSanitizer/EventArgs.cs index 9ff4c54..e57d946 100644 --- a/src/HtmlSanitizer/EventArgs.cs +++ b/src/HtmlSanitizer/EventArgs.cs @@ -7,6 +7,26 @@ using System.ComponentModel; namespace Ganss.XSS { + /// + /// Provides data for the event. + /// + public class PostProcessDomEventArgs : EventArgs + { + /// + /// Gets or sets the document. + /// + /// + /// The document. + /// + public IHtmlDocument Document { get; set; } + + /// + /// Initializes a new instance of the class. + /// + public PostProcessDomEventArgs() + { } + } + /// /// Provides data for the event. /// @@ -162,4 +182,34 @@ namespace Ganss.XSS /// public IComment Comment { get; set; } } + + /// + /// Provides data for the event. + /// + public class RemovingCssClassEventArgs : CancelEventArgs + { + /// + /// Gets or sets the tag containing the CSS class to be removed. + /// + /// + /// The tag. + /// + public IElement Tag { get; set; } + + /// + /// Gets or sets the CSS class to be removed. + /// + /// + /// The CSS class. + /// + public string CssClass { get; set; } + + /// + /// Gets or sets the reason why the CSS class will be removed. + /// + /// + /// The reason. + /// + public RemoveReason Reason { get; set; } + } } diff --git a/src/HtmlSanitizer/HtmlSanitizer.cs b/src/HtmlSanitizer/HtmlSanitizer.cs index 1cf8445..1cecc27 100644 --- a/src/HtmlSanitizer/HtmlSanitizer.cs +++ b/src/HtmlSanitizer/HtmlSanitizer.cs @@ -53,12 +53,13 @@ namespace Ganss.XSS /// Initializes a new instance of the class. /// /// The allowed tag names such as "a" and "div". When null, uses - /// The allowed HTTP schemes such as "http" and "https". When null, uses + /// The allowed HTTP schemes such as "http" and "https". When null, uses /// The allowed HTML attributes such as "href" and "alt". When null, uses - /// the HTML attributes that can contain a URI such as "href". When null, uses - /// the allowed CSS properties such as "font" and "margin". When null, uses + /// The HTML attributes that can contain a URI such as "href". When null, uses + /// The allowed CSS properties such as "font" and "margin". When null, uses + /// CSS class names which are allowed in the value of a class attribute. When null, any class names are allowed. public HtmlSanitizer(IEnumerable allowedTags = null, IEnumerable allowedSchemes = null, - IEnumerable allowedAttributes = null, IEnumerable uriAttributes = null, IEnumerable allowedCssProperties = null) + IEnumerable allowedAttributes = null, IEnumerable uriAttributes = null, IEnumerable allowedCssProperties = null, IEnumerable allowedCssClasses = null) { AllowedTags = new HashSet(allowedTags ?? DefaultAllowedTags, StringComparer.OrdinalIgnoreCase); AllowedSchemes = new HashSet(allowedSchemes ?? DefaultAllowedSchemes, StringComparer.OrdinalIgnoreCase); @@ -66,6 +67,7 @@ namespace Ganss.XSS UriAttributes = new HashSet(uriAttributes ?? DefaultUriAttributes, StringComparer.OrdinalIgnoreCase); AllowedCssProperties = new HashSet(allowedCssProperties ?? DefaultAllowedCssProperties, StringComparer.OrdinalIgnoreCase); AllowedAtRules = new HashSet(DefaultAllowedAtRules); + AllowedCssClasses = allowedCssClasses != null ? new HashSet(allowedCssClasses) : null; } /// @@ -282,6 +284,18 @@ namespace Ganss.XSS set { _disallowedCssPropertyValue = value; } } + /// + /// Gets or sets the allowed CSS classes. + /// + /// + /// The allowed CSS classes. + /// + public ISet AllowedCssClasses { get; private set; } + + /// + /// Occurs after sanitizing the document and post processing nodes. + /// + public event EventHandler PostProcessDom; /// /// Occurs for every node after sanitizing. /// @@ -306,6 +320,19 @@ namespace Ganss.XSS /// Occurs before a comment is removed. /// public event EventHandler RemovingComment; + /// + /// Occurs before a CSS class is removed. + /// + public event EventHandler RemovingCssClass; + + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnPostProcessDom(PostProcessDomEventArgs e) + { + PostProcessDom?.Invoke(this, e); + } /// /// Raises the event. @@ -366,6 +393,15 @@ namespace Ganss.XSS /// public static readonly Regex DefaultDisallowedCssPropertyValue = new Regex(@"[<>]", RegexOptions.Compiled); + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnRemovingCssClass(RemovingCssClassEventArgs e) + { + RemovingCssClass?.Invoke(this, e); + } + /// /// Return all nested subnodes of a node. /// @@ -393,6 +429,20 @@ namespace Ganss.XSS /// The formatter used to render the DOM. Using the if null. /// The sanitized HTML body fragment. public string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null) + { + var dom = SanitizeDom(html, baseUrl); + var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter); + return output; + } + + + /// + /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned. + /// + /// The HTML body fragment to sanitize. + /// The base URL relative URLs are resolved against. No resolution if empty. + /// The sanitized HTML Document. + public IHtmlDocument SanitizeDom(string html, string baseUrl = "") { var parser = HtmlParserFactory(); var dom = parser.ParseDocument(""); @@ -400,11 +450,9 @@ namespace Ganss.XSS DoSanitize(dom, dom.Body, baseUrl); - var output = dom.Body.ChildNodes.ToHtml(outputFormatter ?? OutputFormatter); - - return output; + return dom; } - + /// /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned. /// @@ -486,15 +534,35 @@ namespace Ganss.XSS // sanitize the style attribute SanitizeStyle(tag, baseUrl); + var checkClasses = AllowedCssClasses != null; + var allowedTags = AllowedCssClasses?.ToArray() ?? new string[0]; + // sanitize the value of the attributes foreach (var attribute in tag.Attributes.ToList()) { // The '& Javascript include' is a possible method to execute Javascript and can lead to XSS. // (see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#.26_JavaScript_includes) if (attribute.Value.Contains("&{")) + { RemoveAttribute(tag, attribute, RemoveReason.NotAllowedValue); + } else - tag.SetAttribute(attribute.Name, attribute.Value); + { + if (checkClasses && attribute.Name == "class") + { + var removedClasses = tag.ClassList.Except(allowedTags).ToArray(); + + foreach(var removedClass in removedClasses) + RemoveCssClass(tag, removedClass, RemoveReason.NotAllowedCssClass); + + if (!tag.ClassList.Any()) + RemoveAttribute(tag, attribute, RemoveReason.ClassAttributeEmpty); + } + else + { + tag.SetAttribute(attribute.Name, attribute.Value); + } + } } } @@ -584,6 +652,12 @@ namespace Ganss.XSS } } } + + if (PostProcessDom != null) + { + var e = new PostProcessDomEventArgs { Document = dom }; + OnPostProcessDom(e); + } } /// @@ -831,5 +905,18 @@ namespace Ganss.XSS OnRemovingAtRule(e); return !e.Cancel; } + + /// + /// Removes a CSS class from a class attribute. + /// + /// Tag the style belongs to + /// Rule to be removed + /// true, if the rule can be removed; false, otherwise. + private void RemoveCssClass(IElement tag, string cssClass, RemoveReason reason) + { + var e = new RemovingCssClassEventArgs { Tag = tag, CssClass = cssClass, Reason = reason }; + OnRemovingCssClass(e); + if (!e.Cancel) tag.ClassList.Remove(cssClass); + } } } diff --git a/src/HtmlSanitizer/HtmlSanitizer.csproj b/src/HtmlSanitizer/HtmlSanitizer.csproj index 4bfe186..f7cae6e 100644 --- a/src/HtmlSanitizer/HtmlSanitizer.csproj +++ b/src/HtmlSanitizer/HtmlSanitizer.csproj @@ -19,9 +19,9 @@ git://github.com/mganss/HtmlSanitizer $(PackageTargetFallback);dotnet false - app.net40.config - false - Ganss.XSS + app.net40.config + false + Ganss.XSS @@ -53,8 +53,7 @@ - - + diff --git a/src/HtmlSanitizer/IHtmlSanitizer.cs b/src/HtmlSanitizer/IHtmlSanitizer.cs index 1da7619..ab39b0d 100644 --- a/src/HtmlSanitizer/IHtmlSanitizer.cs +++ b/src/HtmlSanitizer/IHtmlSanitizer.cs @@ -1,4 +1,7 @@ using AngleSharp; +using AngleSharp.Dom.Css; +using AngleSharp.Dom.Html; +using AngleSharp.Parser.Html; using System; using System.Collections.Generic; using System.Text.RegularExpressions; @@ -11,6 +14,29 @@ namespace Ganss.XSS /// public interface IHtmlSanitizer { + /// + /// Gets or sets a value indicating whether to keep child nodes of elements that are removed. Default is . + /// + bool KeepChildNodes { get; set; } + + /// + /// Gets or sets the object the creates the parser used for parsing the input. + /// + Func HtmlParserFactory { get; set; } + + /// + /// Gets or sets the object used for generating output. Default is . + /// + IMarkupFormatter OutputFormatter { get; set; } + + /// + /// Gets or sets the allowed CSS at-rules such as "@media" and "@font-face". + /// + /// + /// The allowed CSS at-rules. + /// + ISet AllowedAtRules { get; } + /// /// Gets or sets the allowed HTTP schemes such as "http" and "https". /// @@ -64,6 +90,18 @@ namespace Ganss.XSS /// Regex DisallowCssPropertyValue { get; set; } + /// Gets or sets the allowed CSS classes. + /// + /// + /// The allowed CSS classes. + /// + ISet AllowedCssClasses { get; } + + /// + /// Occurs after sanitizing the document and post processing nodes. + /// + event EventHandler PostProcessDom; + /// /// Occurs for every node after sanitizing. /// @@ -84,6 +122,21 @@ namespace Ganss.XSS /// event EventHandler RemovingStyle; + /// + /// Occurs before an at-rule is removed. + /// + event EventHandler RemovingAtRule; + + /// + /// Occurs before a comment is removed. + /// + event EventHandler RemovingComment; + + /// + /// Occurs before a CSS class is removed. + /// + event EventHandler RemovingCssClass; + /// /// Sanitizes the specified HTML. /// @@ -92,5 +145,22 @@ namespace Ganss.XSS /// The formatter used to render the DOM. Using the default formatter if null. /// The sanitized HTML. string Sanitize(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null); + + /// + /// Sanitizes the specified HTML body fragment. If a document is given, only the body part will be returned. + /// + /// The HTML body fragment to sanitize. + /// The base URL relative URLs are resolved against. No resolution if empty. + /// The sanitized HTML Document. + IHtmlDocument SanitizeDom(string html, string baseUrl = ""); + + /// + /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned. + /// + /// The HTML document to sanitize. + /// The base URL relative URLs are resolved against. No resolution if empty. + /// The formatter used to render the DOM. Using the if null. + /// The sanitized HTML document. + string SanitizeDocument(string html, string baseUrl = "", IMarkupFormatter outputFormatter = null); } } \ No newline at end of file diff --git a/src/HtmlSanitizer/RemoveReason.cs b/src/HtmlSanitizer/RemoveReason.cs index 78ee2e1..d562291 100644 --- a/src/HtmlSanitizer/RemoveReason.cs +++ b/src/HtmlSanitizer/RemoveReason.cs @@ -25,5 +25,13 @@ /// Value is not allowed or harmful /// NotAllowedValue, + /// + /// CSS Class is not allowed + /// + NotAllowedCssClass, + /// + /// The class attribute is empty + /// + ClassAttributeEmpty } } diff --git a/test/HtmlSanitizer.Tests/Tests.cs b/test/HtmlSanitizer.Tests/Tests.cs index 007fdd6..2efb75a 100644 --- a/test/HtmlSanitizer.Tests/Tests.cs +++ b/test/HtmlSanitizer.Tests/Tests.cs @@ -2166,7 +2166,7 @@ rl(javascript:alert(""foo""))'>"; } [Fact] - public void PostProcessTest() + public void PostProcessNodeTest() { var sanitizer = new HtmlSanitizer(); sanitizer.PostProcessNode += (s, e) => @@ -2184,6 +2184,22 @@ rl(javascript:alert(""foo""))'>"; Assert.Equal(@"
HalloTest
", sanitized, ignoreCase: true); } + [Fact] + public void PostProcessDomTest() + { + var sanitizer = new HtmlSanitizer(); + sanitizer.PostProcessDom += (s, e) => + { + var p = e.Document.CreateElement("p"); + p.TextContent = "World"; + e.Document.Body.AppendChild(p); + }; + + var html = @"
Hallo
"; + var sanitized = sanitizer.Sanitize(html); + Assert.Equal(@"
Hallo

World

", sanitized, ignoreCase: true); + } + [Fact] public void AutoLinkTest() { @@ -2499,6 +2515,44 @@ rl(javascript:alert(""foo""))'>"; Assert.Equal(RemoveReason.NotAllowedTag, actual); } + [Fact] + public void RemoveEventForNotAllowedCssClass() + { + RemoveReason? reason = null; + string removedClass = null; + + var s = new HtmlSanitizer(allowedAttributes: new[] { "class" }, allowedCssClasses: new[] { "good" }); + s.RemovingCssClass += (sender, args) => + { + reason = args.Reason; + removedClass = args.CssClass; + }; + + s.Sanitize(@"
Test
"); + + Assert.Equal("bad", removedClass); + Assert.Equal(RemoveReason.NotAllowedCssClass, reason); + } + + [Fact] + public void RemoveEventForEmptyClassAttributeAfterClassRemoval() + { + RemoveReason? reason = null; + string attributeName = null; + + var s = new HtmlSanitizer(allowedAttributes: new[] { "class" }, allowedCssClasses: new[] { "other" }); + s.RemovingAttribute += (sender, args) => + { + attributeName = args.Attribute.Name; + reason = args.Reason; + }; + + s.Sanitize(@"
Test
"); + + Assert.Equal("class", attributeName); + Assert.Equal(RemoveReason.ClassAttributeEmpty, reason); + } + [Fact] public void DocumentTest() { @@ -2844,9 +2898,31 @@ zqy1QY1kkPOuMvKWvvmFIwClI2393jVVcp91eda4+J+fIYDbfJa7RY5YcNrZhTuV//9k=""> Assert.Equal(0, failures); } } + + [Fact] + public void AllowAllClassesByDefaultTest() + { + var sanitizer = new HtmlSanitizer(allowedAttributes: new[] { "class" }); + + var html = @"
Test
"; + var actual = sanitizer.Sanitize(html); + + Assert.Equal(@"
Test
", actual); + } [Fact] public void AllowClassesTest() + { + var sanitizer = new HtmlSanitizer(allowedAttributes: new[] { "class" }, allowedCssClasses: new[] { "good" }); + + var html = @"
Test
"; + var actual = sanitizer.Sanitize(html); + + Assert.Equal(@"
Test
", actual); + } + + [Fact] + public void AllowClassesUsingEventTest() { var sanitizer = new HtmlSanitizer(); sanitizer.RemovingAttribute += (s, e) => @@ -2864,6 +2940,29 @@ zqy1QY1kkPOuMvKWvvmFIwClI2393jVVcp91eda4+J+fIYDbfJa7RY5YcNrZhTuV//9k=""> Assert.Equal(@"
Test
", actual); } + [Fact] + public void RemoveClassAttributeIfNoAllowedClassesTest() + { + // Empty array for allowed classes = no classes allowed + var sanitizer = new HtmlSanitizer(allowedAttributes: new[] { "class" }, allowedCssClasses: new string[0]); + + var html = @"
Test
"; + var actual = sanitizer.Sanitize(html); + + Assert.Equal(@"
Test
", actual); + } + + [Fact] + public void RemoveClassAttributeIfEmptyTest() + { + var sanitizer = new HtmlSanitizer(allowedAttributes: new[] { "class" }, allowedCssClasses: new[] { "other" }); + + var html = @"
Test
"; + var actual = sanitizer.Sanitize(html); + + Assert.Equal(@"
Test
", actual); + } + [Fact] public void TextTest() {