- added xml comments

This commit is contained in:
Michael Ganss
2013-06-24 19:38:49 +02:00
parent 56bf838f35
commit a0a88d2ef0
2 changed files with 52 additions and 6 deletions

View File

@@ -7,6 +7,42 @@ using System.Text.RegularExpressions;
namespace Html
{
/// <summary>
/// Cleans HTML fragments from constructs that can lead to <a href="https://en.wikipedia.org/wiki/Cross-site_scripting">XSS attacks</a>.
/// </summary>
/// <remarks>
/// XSS attacks can occur at several levels within an HTML fragment:
/// <list type="bullet">
/// <item>HTML Tags (e.g. the &lt;script&gt; tag)</item>
/// <item>HTML attributes (e.g. the "onload" attribute)</item>
/// <item>CSS styles (url property values)</item>
/// <item>malformed HTML or HTML that exploits parser bugs in specific browsers</item>
/// </list>
/// <para>
/// The HtmlSanitizer class addresses all of these possible attack vectors by using an HTML parser that is based on the one used
/// in the Gecko browser engine (see <a href="https://github.com/jamietre/CsQuery">CsQuery</a>).
/// </para>
/// <para>
/// In order to facilitate different use cases, HtmlSanitizer can be customized at the levels mentioned above:
/// <list type="bullet">
/// <item>You can specify the allowed HTML tags through the property <see cref="AllowedTags"/>. All other tags will be stripped.</item>
/// <item>You can specify the allowed HTML attributes through the property <see cref="AllowedAttributes"/>. All other attributes will be stripped.</item>
/// <item>You can specify the allowed CSS property names through the property <see cref="AllowedCssProperties"/>. All other styles will be stripped.</item>
/// <item>You can specify the allowed URI schemes through the property <see cref="AllowedCssProperties"/>. All other URIs will be stripped.</item>
/// <item>You can specify the HTML attributes that contain URIs (such as "src", "href" etc.) through the property <see cref="UriAttributes"/>.</item>
/// </list>
/// </para>
/// </remarks>
/// <example>
/// <code>
/// <![CDATA[
/// var sanitizer = new HtmlSanitizer();
/// var html = @"<script>alert('xss')</script><div onload=""alert('xss')"" style=""background-color: test"">Test<img src=""test.gif"" style=""background-image: url(javascript:alert('xss')); margin: 10px""></div>";
/// var sanitized = sanitizer.Sanitize(html, "http://www.example.com");
/// // -> "<div style="background-color: test">Test<img style="margin: 10px" src="http://www.example.com/test.gif"></div>"
/// ]]>
/// </code>
/// </example>
public class HtmlSanitizer
{
private IEnumerable<string> _allowedSchemes;
@@ -235,11 +271,11 @@ namespace Html
}
// frolm http://genshi.edgewall.org/
protected static readonly Regex CssUnicodeEscapes = new Regex(@"\\([0-9a-fA-F]{1,6})\s?|\\([^\r\n\f0-9a-fA-F'""{};:()#*])", RegexOptions.Compiled);
protected static readonly Regex CssComments = new Regex(@"/\*.*?\*/", RegexOptions.Compiled);
private static readonly Regex CssUnicodeEscapes = new Regex(@"\\([0-9a-fA-F]{1,6})\s?|\\([^\r\n\f0-9a-fA-F'""{};:()#*])", RegexOptions.Compiled);
private static readonly Regex CssComments = new Regex(@"/\*.*?\*/", RegexOptions.Compiled);
// IE6 <http://heideri.ch/jso/#80>
protected static readonly Regex CssExpression = new Regex(@"[eE\uFF25\uFF45][xX\uFF38\uFF58][pP\uFF30\uFF50][rR\u0280\uFF32\uFF52][eE\uFF25\uFF45][sS\uFF33\uFF53]{2}[iI\u026A\uFF29\uFF49][oO\uFF2F\uFF4F][nN\u0274\uFF2E\uFF4E]", RegexOptions.Compiled);
protected static readonly Regex CssUrl = new Regex(@"[Uu][Rr\u0280][Ll\u029F]\s*\(\s*['""]?\s*([^'"")]+)", RegexOptions.Compiled);
private static readonly Regex CssExpression = new Regex(@"[eE\uFF25\uFF45][xX\uFF38\uFF58][pP\uFF30\uFF50][rR\u0280\uFF32\uFF52][eE\uFF25\uFF45][sS\uFF33\uFF53]{2}[iI\u026A\uFF29\uFF49][oO\uFF2F\uFF4F][nN\u0274\uFF2E\uFF4E]", RegexOptions.Compiled);
private static readonly Regex CssUrl = new Regex(@"[Uu][Rr\u0280][Ll\u029F]\s*\(\s*['""]?\s*([^'"")]+)", RegexOptions.Compiled);
/// <summary>
/// Sanitizes the style.

View File

@@ -10,6 +10,9 @@ using System.Text;
// https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.232_-_Attribute_Escape_Before_Inserting_Untrusted_Data_into_HTML_Common_Attributes
// and http://ha.ckers.org/xss.html
// disable XML comments warnings
#pragma warning disable 1591
namespace Html
{
/// <summary>
@@ -1661,6 +1664,11 @@ S
actual = sanitizer.Sanitize(html);
expected = "<img>";
Assert.That(actual, Is.EqualTo(expected).IgnoreCase);
html = "<script>alert('xss')</script><div onload=\"alert('xss')\" style=\"background-color: test\">Test<img src=\"test.gif\" style=\"background-image: url(javascript:alert('xss')); margin: 10px\"></div>";
actual = sanitizer.Sanitize(html, "http://www.example.com");
expected = @"<div style=""background-color: test"">Test<img style=""margin: 10px"" src=""http://www.example.com/test.gif""></div>";
Assert.That(actual, Is.EqualTo(expected).IgnoreCase);
}
/// <summary>
@@ -1730,7 +1738,7 @@ S
}
/// <summary>
/// Tests sanitiuation of URLs that are contained in CSS property values.
/// Tests sanitization of URLs that are contained in CSS property values.
/// </summary>
[Test]
public void UrlStyleTest()
@@ -2017,4 +2025,6 @@ rl(javascript:alert(""foo""))'>";
Assert.That(sanitizer.Sanitize(html), Is.EqualTo(@"<div>XSS</div>").IgnoreCase);
}
}
}
}
#pragma warning restore 1591