diff --git a/HtmlSanitizer.Tests/Tests.cs b/HtmlSanitizer.Tests/Tests.cs index 6093372..3efbb2a 100644 --- a/HtmlSanitizer.Tests/Tests.cs +++ b/HtmlSanitizer.Tests/Tests.cs @@ -2520,6 +2520,91 @@ rl(javascript:alert(""foo""))'>"; Assert.That(actual, Is.EqualTo("
Test
")); } + + [Test] + public void StyleTagTest() + { + var s = new HtmlSanitizer(); + s.AllowedTags.Add("style"); + var html = "
Test
"; + + var actual = s.SanitizeDocument(html); + + Assert.That(actual, Is.EqualTo("
Test
")); + } + + [Test] + public void StyleAtTest() + { + var s = new HtmlSanitizer(); + s.AllowedTags.Add("style"); + s.AllowedAtRules.Add(AngleSharp.Dom.Css.CssRuleType.Media); + s.AllowedAtRules.Add(AngleSharp.Dom.Css.CssRuleType.Keyframes); + s.AllowedAtRules.Add(AngleSharp.Dom.Css.CssRuleType.Keyframe); + s.AllowedAtRules.Add(AngleSharp.Dom.Css.CssRuleType.Page); + var html = @""; + + var actual = s.SanitizeDocument(html); + + Assert.That(actual, Is.EqualTo(@"".Replace("\r\n", "\n"))); + } } } diff --git a/HtmlSanitizer/EventArgs.cs b/HtmlSanitizer/EventArgs.cs index d0aa0db..5a721aa 100644 --- a/HtmlSanitizer/EventArgs.cs +++ b/HtmlSanitizer/EventArgs.cs @@ -130,4 +130,25 @@ namespace Ganss.XSS public RemoveReason Reason { get; set; } } + /// + /// Provides data for the event. + /// + public class RemovingAtRuleEventArgs : CancelEventArgs + { + /// + /// The tag containing the at-rule to be removed. + /// + /// + /// The tag. + /// + public IElement Tag { get; set; } + + /// + /// Gets or sets the rule to be removed. + /// + /// + /// The rule. + /// + public ICssRule Rule { get; set; } + } } diff --git a/HtmlSanitizer/HtmlSanitizer.cs b/HtmlSanitizer/HtmlSanitizer.cs index 5be683e..9f64702 100644 --- a/HtmlSanitizer/HtmlSanitizer.cs +++ b/HtmlSanitizer/HtmlSanitizer.cs @@ -67,8 +67,22 @@ namespace Ganss.XSS AllowedAttributes = new HashSet(allowedAttributes ?? DefaultAllowedAttributes, StringComparer.OrdinalIgnoreCase); UriAttributes = new HashSet(uriAttributes ?? DefaultUriAttributes, StringComparer.OrdinalIgnoreCase); AllowedCssProperties = new HashSet(allowedCssProperties ?? DefaultAllowedCssProperties, StringComparer.OrdinalIgnoreCase); + AllowedAtRules = new HashSet(DefaultAllowedAtRules); } + /// + /// Gets or sets the allowed CSS at-rules such as "@media" and "@font-face". + /// + /// + /// The allowed CSS at-rules. + /// + public ISet AllowedAtRules { get; private set; } + + /// + /// The default allowed CSS at-rules. + /// + public static readonly ISet DefaultAllowedAtRules = new HashSet() { CssRuleType.Style, CssRuleType.Namespace }; + /// /// Gets or sets the allowed HTTP schemes such as "http" and "https". /// @@ -256,6 +270,10 @@ namespace Ganss.XSS /// Occurs before a style is removed. /// public event EventHandler RemovingStyle; + /// + /// Occurs before an at-rule is removed. + /// + public event EventHandler RemovingAtRule; /// /// Raises the event. @@ -293,6 +311,15 @@ namespace Ganss.XSS if (RemovingStyle != null) RemovingStyle(this, e); } + /// + /// Raises the event. + /// + /// The instance containing the event data. + protected virtual void OnRemovingAtRule(RemovingAtRuleEventArgs e) + { + if (RemovingAtRule != null) RemovingAtRule(this, e); + } + /// /// The default regex for disallowed CSS property values. /// @@ -388,6 +415,8 @@ namespace Ganss.XSS RemoveTag(tag, RemoveReason.NotAllowedTag); } + SanitizeStyleSheets(dom, baseUrl); + // cleanup attributes foreach (var tag in context.QuerySelectorAll("*").OfType().ToList()) { @@ -433,6 +462,72 @@ namespace Ganss.XSS DoPostProcess(dom, nodes); } + private void SanitizeStyleSheets(IHtmlDocument dom, string baseUrl) + { + foreach (var styleSheet in dom.StyleSheets.OfType()) + { + var styleTag = styleSheet.OwnerNode; + + for (int i = 0; i < styleSheet.Rules.Length;) + { + var rule = styleSheet.Rules[i]; + if (!SanitizeStyleRule(rule, styleTag, baseUrl) && RemoveAtRule(styleTag, rule)) + styleSheet.RemoveAt(i); + else i++; + } + + styleTag.InnerHtml = styleSheet.ToCss(); + } + } + + private bool SanitizeStyleRule(ICssRule rule, IElement styleTag, string baseUrl) + { + if (!AllowedAtRules.Contains(rule.Type)) return false; + + var styleRule = rule as ICssStyleRule; + + if (styleRule != null) + { + SanitizeStyleDeclaration(styleTag, styleRule.Style, baseUrl); + } + else + { + var groupingRule = rule as ICssGroupingRule; + + if (groupingRule != null) + { + for (int i = 0; i < groupingRule.Rules.Length;) + { + var childRule = groupingRule.Rules[i]; + if (!SanitizeStyleRule(childRule, styleTag, baseUrl) && RemoveAtRule(styleTag, childRule)) + groupingRule.RemoveAt(i); + else i++; + } + } + else if (rule is ICssPageRule) + { + var pageRule = (ICssPageRule)rule; + SanitizeStyleDeclaration(styleTag, pageRule.Style, baseUrl); + } + else if (rule is ICssKeyframesRule) + { + var keyFramesRule = (ICssKeyframesRule)rule; + foreach (var childRule in keyFramesRule.Rules.OfType().ToList()) + { + if (!SanitizeStyleRule(childRule, styleTag, baseUrl) && RemoveAtRule(styleTag, childRule)) + keyFramesRule.Remove(childRule.KeyText); + } + } + else if (rule is ICssKeyframeRule) + { + var keyFrameRule = (ICssKeyframeRule)rule; + SanitizeStyleDeclaration(styleTag, keyFrameRule.Style, baseUrl); + } + } + + return true; + } + /// /// Performs post processing on all nodes in the document. /// @@ -499,13 +594,18 @@ namespace Ganss.XSS protected void SanitizeStyle(IHtmlElement element, string baseUrl) { // filter out invalid CSS declarations - // see https://github.com/FlorianRappl/AngleSharp/issues/101 + // see https://github.com/AngleSharp/AngleSharp/issues/101 if (element.GetAttribute("style") == null) return; element.SetAttribute("style", element.Style.ToCss()); var styles = element.Style; if (styles == null || styles.Length == 0) return; + SanitizeStyleDeclaration(element, styles, baseUrl); + } + + private void SanitizeStyleDeclaration(IElement element, ICssStyleDeclaration styles, string baseUrl) + { var removeStyles = new List>(); var setStyles = new Dictionary(); @@ -520,7 +620,7 @@ namespace Ganss.XSS continue; } - if(CssExpression.IsMatch(val) || DisallowCssPropertyValue.IsMatch(val)) + if (CssExpression.IsMatch(val) || DisallowCssPropertyValue.IsMatch(val)) { removeStyles.Add(new Tuple(style, RemoveReason.NotAllowedValue)); continue; @@ -547,15 +647,15 @@ namespace Ganss.XSS } } - foreach (var style in removeStyles) - { - RemoveStyle(element, styles, style.Item1, style.Item2); - } - foreach (var style in setStyles) { styles.SetProperty(style.Key, style.Value); } + + foreach (var style in removeStyles) + { + RemoveStyle(element, styles, style.Item1, style.Item2); + } } /// @@ -635,10 +735,10 @@ namespace Ganss.XSS } /// - /// Remove a tag from the document. + /// Removes a tag from the document. /// - /// to be removed - /// reason why to be removed + /// Tag to be removed + /// Reason for removal private void RemoveTag(IElement tag, RemoveReason reason) { var e = new RemovingTagEventArgs { Tag = tag, Reason = reason }; @@ -647,11 +747,11 @@ namespace Ganss.XSS } /// - /// Remove an attribute from the document. + /// Removes an attribute from the document. /// - /// tag where the attribute to belongs - /// to be removed - /// reason why to be removed + /// Tag the attribute belongs to + /// Attribute to be removed + /// Reason for removal private void RemoveAttribute(IElement tag, IAttr attribute, RemoveReason reason) { var e = new RemovingAttributeEventArgs { Tag = tag, Attribute = attribute, Reason = reason }; @@ -660,17 +760,30 @@ namespace Ganss.XSS } /// - /// Remove a style from the document. + /// Removes a style from the document. /// - /// tag where the style belongs - /// collection where the style to belongs - /// to be removed - /// reason why to be removed + /// Tag the style belongs to + /// Style rule that contains the style to be removed + /// Style to be removed + /// Reason for removal private void RemoveStyle(IElement tag, ICssStyleDeclaration styles, ICssProperty style, RemoveReason reason) { var e = new RemovingStyleEventArgs { Tag = tag, Style = style, Reason = reason }; OnRemovingStyle(e); if (!e.Cancel) styles.RemoveProperty(style.Name); } + + /// + /// Removes an at-rule from the document. + /// + /// Tag the style belongs to + /// Rule to be removed + /// true, if the rule can be removed; false, otherwise. + private bool RemoveAtRule(IElement tag, ICssRule rule) + { + var e = new RemovingAtRuleEventArgs { Tag = tag, Rule = rule }; + OnRemovingAtRule(e); + return !e.Cancel; + } } } diff --git a/HtmlSanitizer/HtmlSanitizer.nuspec b/HtmlSanitizer/HtmlSanitizer.nuspec index 68484c1..cb31a6c 100644 --- a/HtmlSanitizer/HtmlSanitizer.nuspec +++ b/HtmlSanitizer/HtmlSanitizer.nuspec @@ -2,7 +2,7 @@ $id$ - $version$-beta + $version$ $title$ $author$ $author$ diff --git a/HtmlSanitizer/RemoveReason.cs b/HtmlSanitizer/RemoveReason.cs index 9a59561..6d67895 100644 --- a/HtmlSanitizer/RemoveReason.cs +++ b/HtmlSanitizer/RemoveReason.cs @@ -24,7 +24,7 @@ namespace Ganss.XSS /// NotAllowedStyle, /// - /// Value is a not allowed or harmful url + /// Value is a non-allowed or harmful url /// NotAllowedUrlValue, /// diff --git a/README.md b/README.md index 6e9a5a8..8856808 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ In order to facilitate different use cases, HtmlSanitizer can be customized at s - Configure allowed HTML tags through the property `AllowedTags`. All other tags will be stripped. - Configure allowed HTML attributes through the property `AllowedAttributes`. All other attributes will be stripped. - Configure allowed CSS property names through the property `AllowedCssProperties`. All other styles will be stripped. +- Configure allowed CSS [at-rules](https://developer.mozilla.org/en-US/docs/Web/CSS/At-rule) through the property `AllowedAtRules`. All other at-rules will be stripped. - Configure allowed URI schemes through the property `AllowedSchemes`. All other URIs will be stripped. - Configure HTML attributes that contain URIs (such as "src", "href" etc.) through the property `UriAttributes`. - Provide a base URI that will be used to resolve relative URIs against. @@ -38,6 +39,14 @@ var sanitized = sanitizer.Sanitize(html); ### CSS properties allowed by default `background, background-attachment, background-color, background-image, background-position, background-repeat, border, border-bottom, border-bottom-color, border-bottom-style, border-bottom-width, border-collapse, border-color, border-left, border-left-color, border-left-style, border-left-width, border-right, border-right-color, border-right-style, border-right-width, border-spacing, border-style, border-top, border-top-color, border-top-style, border-top-width, border-width, bottom, caption-side, clear, clip, color, content, counter-increment, counter-reset, cursor, direction, display, empty-cells, float, font, font-family, font-size, font-style, font-variant, font-weight, height, left, letter-spacing, line-height, list-style, list-style-image, list-style-position, list-style-type, margin, margin-bottom, margin-left, margin-right, margin-top, max-height, max-width, min-height, min-width, opacity, orphans, outline, outline-color, outline-style, outline-width, overflow, padding, padding-bottom, padding-left, padding-right, padding-top, page-break-after, page-break-before, page-break-inside, quotes, right, table-layout, text-align, text-decoration, text-indent, text-transform, top, unicode-bidi, vertical-align, visibility, white-space, widows, width, word-spacing, z-index` +### CSS at-rules allowed by default +`namespace, style` + +`style` refers to style declarations within other at-rules such as `@media`. Disallowing `@namespace` while allowing other types of at-rules can lead to errors. +Property declarations in `@font-face` and `@viewport` are not sanitized. + +_Note:_ the `style` tag is disallowed by default. + ### URI schemes allowed by default ``http, https``