From a0a88d2ef0aee06b3818b4fadde93ea251fb738a Mon Sep 17 00:00:00 2001 From: Michael Ganss Date: Mon, 24 Jun 2013 19:38:49 +0200 Subject: [PATCH] - added xml comments --- HtmlSanitizer.cs | 44 ++++++++++++++++++++++++++++++++++++++++---- Tests.cs | 14 ++++++++++++-- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/HtmlSanitizer.cs b/HtmlSanitizer.cs index afbc072..bcf5f01 100644 --- a/HtmlSanitizer.cs +++ b/HtmlSanitizer.cs @@ -7,6 +7,42 @@ using System.Text.RegularExpressions; namespace Html { + /// + /// Cleans HTML fragments from constructs that can lead to XSS attacks. + /// + /// + /// XSS attacks can occur at several levels within an HTML fragment: + /// + /// HTML Tags (e.g. the <script> tag) + /// HTML attributes (e.g. the "onload" attribute) + /// CSS styles (url property values) + /// malformed HTML or HTML that exploits parser bugs in specific browsers + /// + /// + /// The HtmlSanitizer class addresses all of these possible attack vectors by using an HTML parser that is based on the one used + /// in the Gecko browser engine (see CsQuery). + /// + /// + /// In order to facilitate different use cases, HtmlSanitizer can be customized at the levels mentioned above: + /// + /// You can specify the allowed HTML tags through the property . All other tags will be stripped. + /// You can specify the allowed HTML attributes through the property . All other attributes will be stripped. + /// You can specify the allowed CSS property names through the property . All other styles will be stripped. + /// You can specify the allowed URI schemes through the property . All other URIs will be stripped. + /// You can specify the HTML attributes that contain URIs (such as "src", "href" etc.) through the property . + /// + /// + /// + /// + /// + /// alert('xss')
Test
"; + /// var sanitized = sanitizer.Sanitize(html, "http://www.example.com"); + /// // -> "
Test
" + /// ]]> + ///
+ ///
public class HtmlSanitizer { private IEnumerable _allowedSchemes; @@ -235,11 +271,11 @@ namespace Html } // frolm http://genshi.edgewall.org/ - protected static readonly Regex CssUnicodeEscapes = new Regex(@"\\([0-9a-fA-F]{1,6})\s?|\\([^\r\n\f0-9a-fA-F'""{};:()#*])", RegexOptions.Compiled); - protected static readonly Regex CssComments = new Regex(@"/\*.*?\*/", RegexOptions.Compiled); + private static readonly Regex CssUnicodeEscapes = new Regex(@"\\([0-9a-fA-F]{1,6})\s?|\\([^\r\n\f0-9a-fA-F'""{};:()#*])", RegexOptions.Compiled); + private static readonly Regex CssComments = new Regex(@"/\*.*?\*/", RegexOptions.Compiled); // IE6 - protected static readonly Regex CssExpression = new Regex(@"[eE\uFF25\uFF45][xX\uFF38\uFF58][pP\uFF30\uFF50][rR\u0280\uFF32\uFF52][eE\uFF25\uFF45][sS\uFF33\uFF53]{2}[iI\u026A\uFF29\uFF49][oO\uFF2F\uFF4F][nN\u0274\uFF2E\uFF4E]", RegexOptions.Compiled); - protected static readonly Regex CssUrl = new Regex(@"[Uu][Rr\u0280][Ll\u029F]\s*\(\s*['""]?\s*([^'"")]+)", RegexOptions.Compiled); + private static readonly Regex CssExpression = new Regex(@"[eE\uFF25\uFF45][xX\uFF38\uFF58][pP\uFF30\uFF50][rR\u0280\uFF32\uFF52][eE\uFF25\uFF45][sS\uFF33\uFF53]{2}[iI\u026A\uFF29\uFF49][oO\uFF2F\uFF4F][nN\u0274\uFF2E\uFF4E]", RegexOptions.Compiled); + private static readonly Regex CssUrl = new Regex(@"[Uu][Rr\u0280][Ll\u029F]\s*\(\s*['""]?\s*([^'"")]+)", RegexOptions.Compiled); /// /// Sanitizes the style. diff --git a/Tests.cs b/Tests.cs index f56f969..6819c61 100644 --- a/Tests.cs +++ b/Tests.cs @@ -10,6 +10,9 @@ using System.Text; // https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.232_-_Attribute_Escape_Before_Inserting_Untrusted_Data_into_HTML_Common_Attributes // and http://ha.ckers.org/xss.html +// disable XML comments warnings +#pragma warning disable 1591 + namespace Html { /// @@ -1661,6 +1664,11 @@ S actual = sanitizer.Sanitize(html); expected = ""; Assert.That(actual, Is.EqualTo(expected).IgnoreCase); + + html = "
Test
"; + actual = sanitizer.Sanitize(html, "http://www.example.com"); + expected = @"
Test
"; + Assert.That(actual, Is.EqualTo(expected).IgnoreCase); } /// @@ -1730,7 +1738,7 @@ S } /// - /// Tests sanitiuation of URLs that are contained in CSS property values. + /// Tests sanitization of URLs that are contained in CSS property values. /// [Test] public void UrlStyleTest() @@ -2017,4 +2025,6 @@ rl(javascript:alert(""foo""))'>"; Assert.That(sanitizer.Sanitize(html), Is.EqualTo(@"
XSS
").IgnoreCase); } } -} \ No newline at end of file +} + +#pragma warning restore 1591