From 18a7897bf505bc9df125cf843add3eadd67a110a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Caner=20Pat=C4=B1r?= Date: Mon, 7 May 2018 10:32:06 +0300 Subject: [PATCH] Much more tests are implemented. --- AntiSamy.NET.v3.ncrunchsolution | 6 + src/AntiSamy/AntiSamy.cs | 8 +- src/AntiSamy/AntiSamy.csproj | 2 + src/AntiSamy/AntiSamyDomScanner.cs | 9 +- src/AntiSamy/CssScanner.cs | 6 +- test/AntiSamy.Tests/AntiSamyTests.cs | 368 +++++++++++++++++++-------- test/AntiSamy.Tests/LiteralTests.cs | 30 +++ test/AntiSamy.Tests/TestBase.cs | 21 ++ 8 files changed, 325 insertions(+), 125 deletions(-) create mode 100644 AntiSamy.NET.v3.ncrunchsolution create mode 100644 test/AntiSamy.Tests/LiteralTests.cs create mode 100644 test/AntiSamy.Tests/TestBase.cs diff --git a/AntiSamy.NET.v3.ncrunchsolution b/AntiSamy.NET.v3.ncrunchsolution new file mode 100644 index 0000000..10420ac --- /dev/null +++ b/AntiSamy.NET.v3.ncrunchsolution @@ -0,0 +1,6 @@ + + + True + True + + \ No newline at end of file diff --git a/src/AntiSamy/AntiSamy.cs b/src/AntiSamy/AntiSamy.cs index bea5fe4..a111cdb 100644 --- a/src/AntiSamy/AntiSamy.cs +++ b/src/AntiSamy/AntiSamy.cs @@ -2,24 +2,20 @@ { public class AntiSamy { - public string InputEncoding { get; } = AntiSamyDomScanner.DefaultEncodingAlgorithm; - - public string OutputEncoding { get; } = AntiSamyDomScanner.DefaultEncodingAlgorithm; - public virtual AntiySamyResult Scan(string taintedHtml, string filename) { Policy policy = Policy.FromFile(filename); var antiSamy = new AntiSamyDomScanner(policy); - return antiSamy.Scan(taintedHtml, InputEncoding, OutputEncoding); + return antiSamy.Scan(taintedHtml); } public virtual AntiySamyResult Scan(string taintedHtml, Policy policy) { var antiSamy = new AntiSamyDomScanner(policy); - return antiSamy.Scan(taintedHtml, InputEncoding, OutputEncoding); + return antiSamy.Scan(taintedHtml); } } diff --git a/src/AntiSamy/AntiSamy.csproj b/src/AntiSamy/AntiSamy.csproj index 979a3b1..efb84ca 100644 --- a/src/AntiSamy/AntiSamy.csproj +++ b/src/AntiSamy/AntiSamy.csproj @@ -4,6 +4,8 @@ netstandard2.0 true Caner Patır + 1.0.1 + 1.0.1.0 diff --git a/src/AntiSamy/AntiSamyDomScanner.cs b/src/AntiSamy/AntiSamyDomScanner.cs index 8e63df2..9f8b3be 100644 --- a/src/AntiSamy/AntiSamyDomScanner.cs +++ b/src/AntiSamy/AntiSamyDomScanner.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Text; using System.Text.RegularExpressions; @@ -11,8 +12,6 @@ namespace AntiSamy { public sealed class AntiSamyDomScanner { - public const string DefaultEncodingAlgorithm = "UTF-8"; - private readonly List _errorMessages = new List(); private readonly Policy _policy; @@ -21,7 +20,7 @@ namespace AntiSamy public AntiSamyDomScanner(Policy policy) => _policy = policy; - public AntiySamyResult Scan(string html, string inputEncoding, string outputEncoding) + public AntiySamyResult Scan(string html) { if (html == null) { @@ -174,7 +173,7 @@ namespace AntiSamy else { - if ("style".Equals(name.ToLower()) && allowwdAttr != null) + if ("style".Equals(name.ToLower())) { ScanCss(node, parentNode, maxinputsize, true); } @@ -336,7 +335,7 @@ namespace AntiSamy cssResult = styleScanner.ScanStyleSheet(node.FirstChild.InnerHtml, maxinputsize, fromStyleAttribute); node.FirstChild.InnerHtml = cssResult.CleanHtml; } - if (cssResult != null) + if (cssResult != null && cssResult.ErrorMessages.Any()) _errorMessages.AddRange(cssResult.ErrorMessages); } catch (ParseException e) diff --git a/src/AntiSamy/CssScanner.cs b/src/AntiSamy/CssScanner.cs index 999b481..dce7f83 100644 --- a/src/AntiSamy/CssScanner.cs +++ b/src/AntiSamy/CssScanner.cs @@ -58,7 +58,7 @@ namespace AntiSamy throw new ScanException("An error occured while scanning css", exception); } - return new AntiySamyResult(start, cleanStyleSheet, _errors); + return new AntiySamyResult(start, !string.IsNullOrEmpty(cleanStyleSheet) ? cleanStyleSheet.Trim() : cleanStyleSheet, _errors); } private string CleanDummyWrapper(string result) @@ -176,13 +176,13 @@ namespace AntiSamy private void ValidateValue(CssProperty allowedCssProperty, ICssProperty cssProperty, string value, List> removeStyles) { - if (!allowedCssProperty.AllowedLiterals.Any(lit => lit.Equals(value, StringComparison.OrdinalIgnoreCase))) + if (allowedCssProperty.AllowedLiterals.Any() && !allowedCssProperty.AllowedLiterals.Any(lit => lit.Equals(value, StringComparison.OrdinalIgnoreCase))) { removeStyles.Add(new Tuple(cssProperty, $"\"{value}\" is not allowed literal")); return; } - if (!allowedCssProperty.AllowedRegExps.Any(regex => new Regex(regex).IsMatch(value))) + if (allowedCssProperty.AllowedRegExps.Any() && !allowedCssProperty.AllowedRegExps.Any(regex => new Regex(regex).IsMatch(value))) { removeStyles.Add(new Tuple(cssProperty, $"\"{value}\" is not allowed literal by regex")); return; diff --git a/test/AntiSamy.Tests/AntiSamyTests.cs b/test/AntiSamy.Tests/AntiSamyTests.cs index 86b9349..9a3720d 100644 --- a/test/AntiSamy.Tests/AntiSamyTests.cs +++ b/test/AntiSamy.Tests/AntiSamyTests.cs @@ -1,18 +1,15 @@ +using FluentAssertions; using System; -using System.Collections.Generic; -using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; -using FluentAssertions; - using Xunit; namespace AntiSamy.Tests { - public class AntiSamyTests + public class AntiSamyTests : TestBase { - private static readonly String[] BASE64_BAD_XML_STRINGS = new String[]{ + private static readonly string[] BASE64_BAD_XML_STRINGS = new string[]{ // first string is // "click here" "PGEgLSBocmVmPSJodHRwOi8vd3d3Lm93YXNwLm9yZyI+Y2xpY2sgaGVyZTwvYT4=", @@ -33,44 +30,38 @@ namespace AntiSamy.Tests }; private AntiSamy _sut = new AntiSamy(); - Policy policy = GetPolicy("antisamy.xml"); - private static Policy GetPolicy(string fileName) - { - string currentDir = Directory.GetCurrentDirectory(); - return Policy.FromFile(Path.Combine(currentDir, $@"resources\{fileName}")); - } [Fact] public void scriptAttacks() { - _sut.Scan("test", policy).CleanHtml.Contains("script").Should().BeFalse(); + _sut.Scan("test", TestPolicy).CleanHtml.Contains("script").Should().BeFalse(); - _sut.Scan("<<<><", policy).CleanHtml.Contains("", TestPolicy).CleanHtml.Contains("", policy).CleanHtml.Contains("onload").Should().BeFalse(); + _sut.Scan("", TestPolicy).CleanHtml.Contains("onload").Should().BeFalse(); - _sut.Scan("", policy).CleanHtml.Contains("alert").Should().BeFalse(); + _sut.Scan("", TestPolicy).CleanHtml.Contains("alert").Should().BeFalse(); - _sut.Scan("", policy).CleanHtml.Contains("iframe").Should().BeFalse(); + _sut.Scan("", TestPolicy).CleanHtml.Contains("iframe").Should().BeFalse(); } [Fact] public void IllegalXML() { - - foreach (String BASE64_BAD_XML_STRING in BASE64_BAD_XML_STRINGS) + foreach (string BASE64_BAD_XML_STRING in BASE64_BAD_XML_STRINGS) { - try { - String testStr = Encoding.UTF8.GetString(Convert.FromBase64String(BASE64_BAD_XML_STRING)); - _sut.Scan(testStr, policy); + string testStr = Encoding.UTF8.GetString(Convert.FromBase64String(BASE64_BAD_XML_STRING)); + _sut.Scan(testStr, TestPolicy); } - catch (ScanException ex) + catch (ScanException) { // still success! } } - _sut.Scan(""; - AntiySamyResult result = _sut.Scan(s, policy); + var s = ""; + AntiySamyResult result = _sut.Scan(s, TestPolicy); s.Should().BeEquivalentTo(result.CleanHtml); } @@ -248,19 +237,18 @@ namespace AntiSamy.Tests public void issue30() { - String s = ""; + var s = ""; - _sut.Scan(s, policy); + _sut.Scan(s, TestPolicy); /* followup - does the patch fix multiline CSS? */ - String s2 = ""; - var cr = _sut.Scan(s2, policy); + var s2 = ""; + AntiySamyResult cr = _sut.Scan(s2, TestPolicy); "".Should().BeEquivalentTo(cr.CleanHtml); /* next followup - does non-CDATA parsing still work? */ - String s3 = "\n".Should().BeEquivalentTo(cr.CleanHtml); @@ -270,11 +258,11 @@ namespace AntiSamy.Tests public void isssue31() { - String test = "foo"; + var test = "foo"; //Policy revised = policy.cloneWithDirective("onUnknownTag", "encode"); - var cr = _sut.Scan(test, policy); - String s = cr.CleanHtml; + AntiySamyResult cr = _sut.Scan(test, TestPolicy); + string s = cr.CleanHtml; s.Contains("<g>").Should().BeTrue(); } @@ -290,7 +278,7 @@ namespace AntiSamy.Tests + "Names For Snow. We'll catch up with you next week....wonder which" + "hat Bill will wear?
Jane"; Policy mySpacePolicy = GetPolicy("antisamy-myspace.xml"); - var cr = _sut.Scan(dirty, mySpacePolicy); + AntiySamyResult cr = _sut.Scan(dirty, mySpacePolicy); cr.CleanHtml.Should().NotBeNull(); Policy ebayPolicy = GetPolicy("antisamy-ebay.xml"); @@ -307,9 +295,9 @@ namespace AntiSamy.Tests { /* issue #38 - color problem/color combinations */ - String s = "Test"; - String expected = "Test"; - assertEquals(_sut.Scan(s, policy).CleanHtml, expected); + var s = "Test"; + var expected = "Test"; + assertEquals(_sut.Scan(s, TestPolicy).CleanHtml, expected); //Not supported //s = "
Test 3 letter code
"; @@ -318,43 +306,31 @@ namespace AntiSamy.Tests s = "Test"; expected = "Test"; - assertEquals(_sut.Scan(s, policy).CleanHtml, expected); + assertEquals(_sut.Scan(s, TestPolicy).CleanHtml, expected); s = "Test"; expected = "Test"; - assertEquals(_sut.Scan(s, policy).CleanHtml, expected); + assertEquals(_sut.Scan(s, TestPolicy).CleanHtml, expected); s = "Test"; expected = "Test"; - assertEquals(_sut.Scan(s, policy).CleanHtml, expected); + assertEquals(_sut.Scan(s, TestPolicy).CleanHtml, expected); s = "
Test
"; expected = "
Test
"; - assertEquals(_sut.Scan(s, policy).CleanHtml, expected); + assertEquals(_sut.Scan(s, TestPolicy).CleanHtml, expected); s = "Test"; expected = "Test"; - assertEquals(_sut.Scan(s, policy).CleanHtml, expected); + assertEquals(_sut.Scan(s, TestPolicy).CleanHtml, expected); //Not supported //s = "
Test
"; //expected = "
Test
"; //assertEquals(_sut.Scan(s, policy).CleanHtml, expected); - /* - * This test case was failing because of the following code from the - * batik CSS library, which throws an exception if any character - * other than a '!' follows a beginning token of '<'. The - * ParseException is now caught in the node a CssScanner.java and - * the outside AntiSamyDOMScanner.java. - * - * 0398 nextChar(); 0399 if (current != '!') { 0400 throw new - * ParseException("character", 0401 reader.getLine(), 0402 - * reader.getColumn()); - */ s = "foo@import 'x';bar"; - _sut.Scan(s, policy); - + _sut.Scan(s, TestPolicy); } [Fact] @@ -362,10 +338,10 @@ namespace AntiSamy.Tests { /* issue #40 - handling "; + var s = ""; //Policy revised = policy.cloneWithDirective(Policy.PRESERVE_SPACE, "true"); - var result = _sut.Scan(s, policy); + AntiySamyResult result = _sut.Scan(s, TestPolicy); result.CleanHtml.Contains("print, projection, screen").Should().BeTrue(); } @@ -374,5 +350,175 @@ namespace AntiSamy.Tests { actual.Should().BeEquivalentTo(expected); } + + [Fact] + public void issue41() + { + /* issue #41 - comment handling */ + // comments will be removed by default + _sut.Scan("text ", TestPolicy).CleanHtml.Should().BeEquivalentTo("text "); + + //Policy revised2 = policy.cloneWithDirective(Policy.PRESERVE_COMMENTS, "true").cloneWithDirective(Policy.PRESERVE_SPACE, "true").cloneWithDirective(Policy.FORMAT_OUTPUT, "false"); + + ///* + //* These make sure the regular comments are kept alive and that + //* conditional comments are ripped out. + //*/ + //assertEquals("
text
", as.scan("
text
", revised2, AntiSamy.DOM).getCleanHTML()); + //assertEquals("
text
", as.scan("
text
", revised2, AntiSamy.DOM).getCleanHTML()); + + ///* + //* Check to see how nested conditional comments are handled. This is + //* not very clean but the main goal is to avoid any tags. Not sure + //* on encodings allowed in comments. + //*/ + string input = "
text <[endif]-->
"; + string expected = "
text <[endif]-->
"; + _sut.Scan(input, TestPolicy).CleanHtml.Should().BeEquivalentTo(expected); + + /* + * Regular comment nested inside conditional comment. Test makes + * sure + */ + _sut.Scan("
text comment <[endif]-->
", TestPolicy).CleanHtml + .Should().BeEquivalentTo("
text comment <[endif]-->
"); + + ///* + //* These play with whitespace and have invalid comment syntax. + //*/ + //assertEquals("
text
", as.scan("
text
", revised2, AntiSamy.DOM).getCleanHTML()); + //assertEquals("
text comment
", as.scan("
text comment
", revised2, AntiSamy.DOM).getCleanHTML()); + //assertEquals("
text comment
", as.scan("
text comment
", revised2, AntiSamy.DOM).getCleanHTML()); + + var attack = "[if lte 8]" + ""; + _sut.Scan(s, TestPolicy); + _sut.Scan(s, TestPolicy).ErrorMessages.Count().Should().Be(3); + + } + + [Fact] + public void issue51() + { + /* issue #51 - offsite urls with () are found to be invalid */ + var s = "test"; + AntiySamyResult result = _sut.Scan(s, TestPolicy); + + result.ErrorMessages.Count().Should().Be(0); + } + + [Fact] + public void isssue56() + { + /* issue #56 - unnecessary spaces */ + + var s = "Hello World!"; + var expected = "Hello World!"; + + AntiySamyResult result = _sut.Scan(s, TestPolicy); + result.CleanHtml.Should().BeEquivalentTo(expected); + } + + [Fact] + public void issue58() + { + /* issue #58 - input not in list of allowed-to-be-empty tags */ + var s = "tgdan g h"; + AntiySamyResult result = _sut.Scan(s, TestPolicy); + result.ErrorMessages.Count().Should().Be(0); + } + + [Fact] + public void issue61() + { + /* issue #61 - input has newline appended if ends with an accepted tag */ + var dirtyInput = "blah blah."; + //Format output not supported + //Policy revised = policy.cloneWithDirective(Policy.FORMAT_OUTPUT, "false"); + AntiySamyResult result = _sut.Scan(dirtyInput, TestPolicy); + result.CleanHtml.Should().BeEquivalentTo(dirtyInput); + } + + [Fact] + public void issue69() + { + /* issue #69 - char attribute should allow single char or entity ref */ + + string s = "
test
"; + AntiySamyResult result = _sut.Scan(s, TestPolicy); + result.CleanHtml.Contains("char").Should().BeTrue(); + + s = "
test
"; + result = _sut.Scan(s, TestPolicy); + result.CleanHtml.Contains("char").Should().BeFalse(); + + s = "
test
"; + result = _sut.Scan(s, TestPolicy); + result.CleanHtml.Contains("char").Should().BeTrue(); + + s = "
test
"; + result = _sut.Scan(s, TestPolicy); + result.CleanHtml.Contains("char").Should().BeFalse(); + + s = "
test
"; + result = _sut.Scan(s, TestPolicy); + result.CleanHtml.Contains("char").Should().BeFalse(); + } + + [Fact(Skip = "CData section is not supported and will be removed by default")] + public void CDATAByPass() + { + String malInput = "]]>"; + AntiySamyResult result = _sut.Scan(malInput, TestPolicy); + result.ErrorMessages.Should().NotBeEmpty(); + result.CleanHtml.Should().Contain("<script"); + result.CleanHtml.Should().NotContain("html"; + + AntiySamyResult result = new AntiSamy().Scan(html, TestPolicy); + + result.ErrorMessages.Count().Should().Be(0); + } + + [Fact] + public void TestDomBadResult() + { + var badHtml = "
badhtml
"; + + AntiySamyResult result = new AntiSamy().Scan(badHtml, TestPolicy); + + result.ErrorMessages.Count().Should().BeGreaterThan(0); + } + } +} diff --git a/test/AntiSamy.Tests/TestBase.cs b/test/AntiSamy.Tests/TestBase.cs new file mode 100644 index 0000000..d857540 --- /dev/null +++ b/test/AntiSamy.Tests/TestBase.cs @@ -0,0 +1,21 @@ +using System.IO; + +namespace AntiSamy.Tests +{ + public abstract class TestBase + { + private const string DefaultAntiSamyFile = "antisamy.xml"; + protected readonly Policy TestPolicy; + + protected TestBase() + { + TestPolicy = GetPolicy(DefaultAntiSamyFile); + } + + protected Policy GetPolicy(string fileName) + { + string currentDir = Directory.GetCurrentDirectory(); + return Policy.FromFile(Path.Combine(currentDir, $@"resources\{fileName}")); + } + } +}