cumulative improvements and additional tests

This commit is contained in:
Caner Patır
2018-05-07 14:01:23 +03:00
parent 18a7897bf5
commit a84dffaa31
8 changed files with 2568 additions and 87 deletions

View File

@@ -1,6 +1,6 @@
<Project> <Project>
<PropertyGroup> <PropertyGroup>
<VersionPrefix>1.0.1</VersionPrefix> <VersionPrefix>1.0.2</VersionPrefix>
<NoWarn>$(NoWarn);CS1591</NoWarn> <NoWarn>$(NoWarn);CS1591</NoWarn>
<PackageIconUrl>https://raw.githubusercontent.com/canerpatir/AntiSamy.NET/master/icon.png</PackageIconUrl> <PackageIconUrl>https://raw.githubusercontent.com/canerpatir/AntiSamy.NET/master/icon.png</PackageIconUrl>
<PackageProjectUrl>https://github.com/canerpatir/AntiSamy.NET</PackageProjectUrl> <PackageProjectUrl>https://github.com/canerpatir/AntiSamy.NET</PackageProjectUrl>

View File

@@ -4,7 +4,7 @@
<TargetFramework>netstandard2.0</TargetFramework> <TargetFramework>netstandard2.0</TargetFramework>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild> <GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<Authors>Caner Patır</Authors> <Authors>Caner Patır</Authors>
<Version>1.0.1</Version> <Version>1.0.2</Version>
<FileVersion>1.0.1.0</FileVersion> <FileVersion>1.0.1.0</FileVersion>
</PropertyGroup> </PropertyGroup>

View File

@@ -16,8 +16,6 @@ namespace AntiSamy
private readonly Policy _policy; private readonly Policy _policy;
private int _num;
public AntiSamyDomScanner(Policy policy) => _policy = policy; public AntiSamyDomScanner(Policy policy) => _policy = policy;
public AntiySamyResult Scan(string html) public AntiySamyResult Scan(string html)
@@ -27,18 +25,10 @@ namespace AntiSamy
throw new ArgumentNullException(nameof(html)); throw new ArgumentNullException(nameof(html));
} }
//had problems with the &nbsp; getting double encoded, so this converts it to a literal space.
//this may need to be changed.
html = html.Replace("&nbsp;", char.Parse("\u00a0").ToString()); html = html.Replace("&nbsp;", char.Parse("\u00a0").ToString());
//We have to replace any invalid XML characters
html = StripNonValidXmlCharacters(html); html = StripNonValidXmlCharacters(html);
//holds the maximum input size for the incoming fragment
int maxInputSize = Policy.DefaultMaxInputSize; int maxInputSize = Policy.DefaultMaxInputSize;
//grab the size specified in the config file
try try
{ {
maxInputSize = _policy.GetDirectiveAsInt("maxInputSize", int.MaxValue); maxInputSize = _policy.GetDirectiveAsInt("maxInputSize", int.MaxValue);
@@ -48,64 +38,53 @@ namespace AntiSamy
Console.WriteLine("Format Exception: " + fe); Console.WriteLine("Format Exception: " + fe);
} }
//ensure our input is less than the max
if (maxInputSize < html.Length) if (maxInputSize < html.Length)
{ {
throw new ScanException("File size [" + html.Length + "] is larger than maximum [" + maxInputSize + "]"); throw new ScanException("File size [" + html.Length + "] is larger than maximum [" + maxInputSize + "]");
} }
//grab start time (to be put in the result set along with end time)
DateTime start = DateTime.Now; DateTime start = DateTime.Now;
//fixes some weirdness in HTML agility
if (!HtmlNode.ElementsFlags.ContainsKey("iframe")) if (!HtmlNode.ElementsFlags.ContainsKey("iframe"))
{ {
HtmlNode.ElementsFlags.Add("iframe", HtmlElementFlag.Empty); HtmlNode.ElementsFlags.Add("iframe", HtmlElementFlag.Empty);
} }
HtmlNode.ElementsFlags.Remove("form"); HtmlNode.ElementsFlags.Remove("form");
//Let's parse the incoming HTML
var doc = new HtmlDocument(); var doc = new HtmlDocument();
doc.LoadHtml(html); doc.LoadHtml(html.Replace(Environment.NewLine, string.Empty).Replace("\t", string.Empty));
//add closing tags
doc.OptionAutoCloseOnEnd = true; doc.OptionAutoCloseOnEnd = true;
//enforces XML rules, encodes big 5
doc.OptionOutputAsXml = true; doc.OptionOutputAsXml = true;
//loop through every node now, and enforce the rules held in the policy object EvaluateNodeCollection(doc.DocumentNode.ChildNodes);
for (var i = 0; i < doc.DocumentNode.ChildNodes.Count; i++)
{
//grab current node
HtmlNode tmp = doc.DocumentNode.ChildNodes[i];
//this node can hold other nodes, so recursively validate
RecursiveValidateTag(tmp);
if (tmp.ParentNode == null)
{
i--;
}
}
string finalCleanHtml = doc.DocumentNode.InnerHtml; string finalCleanHtml = doc.DocumentNode.InnerHtml;
return new AntiySamyResult(start, finalCleanHtml, _errorMessages); return new AntiySamyResult(start, finalCleanHtml, _errorMessages);
} }
private void RecursiveValidateTag(HtmlNode node) private void EvaluateNodeCollection(HtmlNodeCollection nodes)
{
for (var i = 0; i < nodes.Count; i++)
{
HtmlNode node = nodes[i];
EvaluateNode(node);
if (node.ParentNode == null)
{
i--;
}
}
}
private void EvaluateNode(HtmlNode node)
{ {
int maxinputsize = _policy.GetDirectiveAsInt("maxInputSize", int.MaxValue); int maxinputsize = _policy.GetDirectiveAsInt("maxInputSize", int.MaxValue);
_num++;
HtmlNode parentNode = node.ParentNode; HtmlNode parentNode = node.ParentNode;
HtmlNode tmp = null;
string tagName = node.Name; string tagName = node.Name;
//check this out
//might not be robust enough
if (tagName.ToLower().Equals("#text")) // || tagName.ToLower().Equals("#comment")) if (tagName.ToLower().Equals("#text")) // || tagName.ToLower().Equals("#comment"))
{ {
return; return;
@@ -122,7 +101,7 @@ namespace AntiSamy
} }
else else
{ {
errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName.ToLower()) + "</b> "); errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName.ToLower()) + "\" ");
} }
errBuff.Append("tag has been filtered for security reasons. The contents of the tag will "); errBuff.Append("tag has been filtered for security reasons. The contents of the tag will ");
@@ -130,16 +109,8 @@ namespace AntiSamy
_errorMessages.Add(errBuff.ToString()); _errorMessages.Add(errBuff.ToString());
for (var i = 0; i < node.ChildNodes.Count; i++) EvaluateNodeCollection(node.ChildNodes);
{
tmp = node.ChildNodes[i];
RecursiveValidateTag(tmp);
if (tmp.ParentNode == null)
{
i--;
}
}
PromoteChildren(node); PromoteChildren(node);
} }
else if (Consts.TagActions.VALIDATE.Equals(tag.Action)) else if (Consts.TagActions.VALIDATE.Equals(tag.Action))
@@ -162,8 +133,8 @@ namespace AntiSamy
{ {
var errBuff = new StringBuilder(); var errBuff = new StringBuilder();
errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(name)); errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(name));
errBuff.Append("</b> attribute of the <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag has been removed for security reasons. "); errBuff.Append("\" attribute of the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag has been removed for security reasons. ");
errBuff.Append("This removal should not affect the display of the HTML submitted."); errBuff.Append("This removal should not affect the display of the HTML submitted.");
_errorMessages.Add(errBuff.ToString()); _errorMessages.Add(errBuff.ToString());
@@ -215,8 +186,8 @@ namespace AntiSamy
string onInvalidAction = allowwdAttr.OnInvalid; string onInvalidAction = allowwdAttr.OnInvalid;
var errBuff = new StringBuilder(); var errBuff = new StringBuilder();
errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag contained an attribute that we couldn't process. "); errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag contained an attribute that we couldn't process. ");
errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(name) + "</b> attribute had a value of <u>" + HtmlEntityEncoder.HtmlEntityEncode(value) + "</u>. "); errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(name) + "\" attribute had a value of <u>" + HtmlEntityEncoder.HtmlEntityEncode(value) + "</u>. ");
errBuff.Append("This value could not be accepted for security reasons. We have chosen to "); errBuff.Append("This value could not be accepted for security reasons. We have chosen to ");
//Console.WriteLine(policy); //Console.WriteLine(policy);
@@ -224,29 +195,22 @@ namespace AntiSamy
if (Consts.OnInvalidActions.REMOVE_TAG.Equals(onInvalidAction)) if (Consts.OnInvalidActions.REMOVE_TAG.Equals(onInvalidAction))
{ {
parentNode.RemoveChild(node); parentNode.RemoveChild(node);
errBuff.Append("remove the <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag and its contents in order to process this input. "); errBuff.Append("remove the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag and its contents in order to process this input. ");
} }
else if (Consts.OnInvalidActions.FILTER_TAG.Equals(onInvalidAction)) else if (Consts.OnInvalidActions.FILTER_TAG.Equals(onInvalidAction))
{ {
for (var i = 0; i < node.ChildNodes.Count; i++)
{ EvaluateNodeCollection(node.ChildNodes);
tmp = node.ChildNodes[i];
RecursiveValidateTag(tmp);
if (tmp.ParentNode == null)
{
i--;
}
}
PromoteChildren(node); PromoteChildren(node);
errBuff.Append("filter the <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag and leave its contents in place so that we could process this input."); errBuff.Append("filter the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag and leave its contents in place so that we could process this input.");
} }
else else
{ {
node.Attributes.Remove(allowwdAttr.Name); node.Attributes.Remove(allowwdAttr.Name);
currentAttributeIndex--; currentAttributeIndex--;
errBuff.Append("remove the <b>" + HtmlEntityEncoder.HtmlEntityEncode(name) + "</b> attribute from the tag and leave everything else in place so that we could process this input."); errBuff.Append("remove the \"" + HtmlEntityEncoder.HtmlEntityEncode(name) + "\" attribute from the tag and leave everything else in place so that we could process this input.");
} }
_errorMessages.Add(errBuff.ToString()); _errorMessages.Add(errBuff.ToString());
@@ -260,15 +224,7 @@ namespace AntiSamy
} }
} }
for (var i = 0; i < node.ChildNodes.Count; i++) EvaluateNodeCollection(node.ChildNodes);
{
tmp = node.ChildNodes[i];
RecursiveValidateTag(tmp);
if (tmp.ParentNode == null)
{
i--;
}
}
} }
else if ("truncate".Equals(tag.Action))// || Consts.TagActions.REMOVE.Equals(tag.Action)) else if ("truncate".Equals(tag.Action))// || Consts.TagActions.REMOVE.Equals(tag.Action))
{ {
@@ -279,8 +235,8 @@ namespace AntiSamy
{ {
var errBuff = new StringBuilder(); var errBuff = new StringBuilder();
errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(nnmap[0].Name)); errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(nnmap[0].Name));
errBuff.Append("</b> attribute of the <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag has been removed for security reasons. "); errBuff.Append("\" attribute of the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag has been removed for security reasons. ");
errBuff.Append("This removal should not affect the display of the HTML submitted."); errBuff.Append("This removal should not affect the display of the HTML submitted.");
node.Attributes.Remove(nnmap[0].Name); node.Attributes.Remove(nnmap[0].Name);
_errorMessages.Add(errBuff.ToString()); _errorMessages.Add(errBuff.ToString());
@@ -308,7 +264,7 @@ namespace AntiSamy
} }
else else
{ {
_errorMessages.Add("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag has been removed for security reasons."); _errorMessages.Add("The \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag has been removed for security reasons.");
parentNode.RemoveChild(node); parentNode.RemoveChild(node);
} }
} }

View File

@@ -89,6 +89,8 @@ namespace AntiSamy
public DocumentAttribute GetGlobalAttribute(string name) => GlobalTagAttributes.TryGetValue(name, out DocumentAttribute val) ? val : null; public DocumentAttribute GetGlobalAttribute(string name) => GlobalTagAttributes.TryGetValue(name, out DocumentAttribute val) ? val : null;
public DocumentAttribute GetCommonAttribute(string name) => CommonAttributes.TryGetValue(name, out DocumentAttribute val) ? val : null;
public DocumentTag GetTag(string tagName) => TagRules.TryGetValue(tagName, out DocumentTag value) ? value : null; public DocumentTag GetTag(string tagName) => TagRules.TryGetValue(tagName, out DocumentTag value) ? value : null;
public CssProperty GetCssProperty(string propertyName) => CssRules.TryGetValue(propertyName, out CssProperty value) ? value : null; public CssProperty GetCssProperty(string propertyName) => CssRules.TryGetValue(propertyName, out CssProperty value) ? value : null;
@@ -131,7 +133,7 @@ namespace AntiSamy
{ {
string name = node.Attributes[0].Value; string name = node.Attributes[0].Value;
DocumentAttribute toAdd = CommonAttributes[name]; DocumentAttribute toAdd = GetCommonAttribute(name);
if (toAdd != null) if (toAdd != null)
{ {
globalAttributes.Add(name, toAdd); globalAttributes.Add(name, toAdd);
@@ -249,24 +251,24 @@ namespace AntiSamy
XmlNodeList attributeList = tagNode.SelectNodes("attribute"); XmlNodeList attributeList = tagNode.SelectNodes("attribute");
foreach (XmlNode attributeNode in attributeList) foreach (XmlNode attributeNode in attributeList)
{ {
if (!attributeNode.HasChildNodes) if (IsCommonAttributeRule(attributeNode))
{ {
CommonAttributes.TryGetValue(attributeNode.Attributes["name"].Value, out DocumentAttribute attribute); DocumentAttribute commonAttribute = GetCommonAttribute(attributeNode.Attributes["name"].Value);
if (attribute != null) if (commonAttribute != null)
{ {
string onInvalid = attributeNode.Attributes["onInvalid"]?.Value; string onInvalid = attributeNode.Attributes["onInvalid"]?.Value;
string description = attributeNode.Attributes["description"]?.Value; string description = attributeNode.Attributes["description"]?.Value;
if (!string.IsNullOrEmpty(onInvalid)) if (!string.IsNullOrEmpty(onInvalid))
{ {
attribute.OnInvalid = onInvalid; commonAttribute.OnInvalid = onInvalid;
} }
if (!string.IsNullOrEmpty(description)) if (!string.IsNullOrEmpty(description))
{ {
attribute.Description = description; commonAttribute.Description = description;
} }
tag.AddAllowedAttribute((DocumentAttribute)attribute.Clone()); tag.AddAllowedAttribute((DocumentAttribute)commonAttribute.Clone());
} }
} }
else else
@@ -337,6 +339,11 @@ namespace AntiSamy
return tags; return tags;
} }
private static bool IsCommonAttributeRule(XmlNode attributeNode)
{
return !attributeNode.HasChildNodes;
}
private Dictionary<string, CssProperty> ParseCssRules(XmlNode cssNodeList) private Dictionary<string, CssProperty> ParseCssRules(XmlNode cssNodeList)
{ {
var properties = new Dictionary<string, CssProperty>(StringComparer.InvariantCultureIgnoreCase); var properties = new Dictionary<string, CssProperty>(StringComparer.InvariantCultureIgnoreCase);

View File

@@ -44,6 +44,9 @@
<None Update="resources\antisamy.xsd"> <None Update="resources\antisamy.xsd">
<CopyToOutputDirectory>Always</CopyToOutputDirectory> <CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None> </None>
<None Update="resources\antisamy1.xml">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>

View File

@@ -4,9 +4,10 @@ namespace AntiSamy.Tests
{ {
public abstract class TestBase public abstract class TestBase
{ {
private const string DefaultAntiSamyFile = "antisamy.xml";
protected readonly Policy TestPolicy; protected readonly Policy TestPolicy;
protected virtual string DefaultAntiSamyFile => "antisamy.xml";
protected TestBase() protected TestBase()
{ {
TestPolicy = GetPolicy(DefaultAntiSamyFile); TestPolicy = GetPolicy(DefaultAntiSamyFile);

View File

@@ -0,0 +1,132 @@
using FluentAssertions;
using Xunit;
namespace AntiSamy.Tests
{
public class UseCaseTests : TestBase
{
protected override string DefaultAntiSamyFile => "antisamy1.xml";
[Fact]
public void invalid_img_urls_should_be_filtered()
{
var scanner = new AntiSamy();
/*
* remove non-allowed image srcs
*/
var input = @"<div>
<img src='mysite.com/image.jpg' /> <!-- to be allowed --!>
Some description
<img src='hackers.com/xss.js' />
</div>";
AntiySamyResult result = scanner.Scan(input, TestPolicy);
// safe - allowed url pattern in the antisamy1.xml
result.CleanHtml.Should().Contain("Some description");
result.CleanHtml.Should().Contain("<div");
result.CleanHtml.Should().Contain("mysite.com/image.jpg");
// non safe
result.CleanHtml.Should().NotContain("hackers.com/xss.js");
}
[Fact]
public void invalid_tags_should_be_removed()
{
var scanner = new AntiSamy();
/*
* remove iframe, object, embed, frame, frameset
*/
var input = @"<div>
Some description
<iframe src='hackers.com/xss' />
<object data='hackers.com/xss' />
<embed />
<frame />
<frameset />
</div>";
AntiySamyResult result = scanner.Scan(input, TestPolicy);
//safe
result.CleanHtml.Should().Contain("<div");
result.CleanHtml.Should().Contain("Some description");
// non safe
result.CleanHtml.Should().NotContain("<iframe");
result.CleanHtml.Should().NotContain("<object");
result.CleanHtml.Should().NotContain("<embed");
result.CleanHtml.Should().NotContain("<frame");
result.CleanHtml.Should().NotContain("<frameset");
}
[Fact]
public void invalid_a_hrefs_should_be_filtered()
{
var scanner = new AntiSamy();
/*
* remove non-allowed hrefs
*/
var input = @"<div>
<a href='mysite.com/image.jpg' /> <!-- to be allowed --!>
<a href='mysite.com/some_relative_path' /> <!-- to be allowed --!>
<a href='mysite.com/some_relative_path/level2' /> <!-- to be allowed --!>
Some description
<a href='hackers.com/xss.js' />
<a href='abc.com' />
another description
</div>";
AntiySamyResult result = scanner.Scan(input, TestPolicy);
// safe - allowed url pattern in the antisamy1.xml
result.CleanHtml.Should().Contain("<div");
result.CleanHtml.Should().Contain("Some description");
result.CleanHtml.Should().Contain("another description");
result.CleanHtml.Should().Contain("mysite.com/image.jpg");
result.CleanHtml.Should().Contain("mysite.com/some_relative_path");
result.CleanHtml.Should().Contain("mysite.com/some_relative_path/level2");
// non safe
result.CleanHtml.Should().NotContain("hackers.com/xss.js");
result.CleanHtml.Should().NotContain("abc.com");
}
[Fact]
public void script_references_should_be_removed_by_default()
{
var scanner = new AntiSamy();
/*
* remove non-allowed hrefs
*/
var input = @"<script type='text/javascript' src='hackers.com/xss.js' />
<script>alert('XSS !!!');</script>
<div>
Some description
<script type='text/javascript' src='hackers.com/xss.js' />
</div>";
AntiySamyResult result = scanner.Scan(input, TestPolicy);
//safe
result.CleanHtml.Should().Contain("<div");
result.CleanHtml.Should().Contain("Some description");
// non safe
result.CleanHtml.Should().NotContain("<script");
}
}
}

File diff suppressed because it is too large Load Diff