cumulative improvements and additional tests

This commit is contained in:
Caner Patır
2018-05-07 14:01:23 +03:00
parent 18a7897bf5
commit a84dffaa31
8 changed files with 2568 additions and 87 deletions

View File

@@ -1,6 +1,6 @@
<Project>
<PropertyGroup>
<VersionPrefix>1.0.1</VersionPrefix>
<VersionPrefix>1.0.2</VersionPrefix>
<NoWarn>$(NoWarn);CS1591</NoWarn>
<PackageIconUrl>https://raw.githubusercontent.com/canerpatir/AntiSamy.NET/master/icon.png</PackageIconUrl>
<PackageProjectUrl>https://github.com/canerpatir/AntiSamy.NET</PackageProjectUrl>

View File

@@ -4,7 +4,7 @@
<TargetFramework>netstandard2.0</TargetFramework>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<Authors>Caner Patır</Authors>
<Version>1.0.1</Version>
<Version>1.0.2</Version>
<FileVersion>1.0.1.0</FileVersion>
</PropertyGroup>

View File

@@ -16,8 +16,6 @@ namespace AntiSamy
private readonly Policy _policy;
private int _num;
public AntiSamyDomScanner(Policy policy) => _policy = policy;
public AntiySamyResult Scan(string html)
@@ -27,18 +25,10 @@ namespace AntiSamy
throw new ArgumentNullException(nameof(html));
}
//had problems with the &nbsp; getting double encoded, so this converts it to a literal space.
//this may need to be changed.
html = html.Replace("&nbsp;", char.Parse("\u00a0").ToString());
//We have to replace any invalid XML characters
html = StripNonValidXmlCharacters(html);
//holds the maximum input size for the incoming fragment
int maxInputSize = Policy.DefaultMaxInputSize;
//grab the size specified in the config file
try
{
maxInputSize = _policy.GetDirectiveAsInt("maxInputSize", int.MaxValue);
@@ -48,64 +38,53 @@ namespace AntiSamy
Console.WriteLine("Format Exception: " + fe);
}
//ensure our input is less than the max
if (maxInputSize < html.Length)
{
throw new ScanException("File size [" + html.Length + "] is larger than maximum [" + maxInputSize + "]");
}
//grab start time (to be put in the result set along with end time)
DateTime start = DateTime.Now;
//fixes some weirdness in HTML agility
if (!HtmlNode.ElementsFlags.ContainsKey("iframe"))
{
HtmlNode.ElementsFlags.Add("iframe", HtmlElementFlag.Empty);
}
HtmlNode.ElementsFlags.Remove("form");
//Let's parse the incoming HTML
var doc = new HtmlDocument();
doc.LoadHtml(html);
doc.LoadHtml(html.Replace(Environment.NewLine, string.Empty).Replace("\t", string.Empty));
//add closing tags
doc.OptionAutoCloseOnEnd = true;
//enforces XML rules, encodes big 5
doc.OptionOutputAsXml = true;
//loop through every node now, and enforce the rules held in the policy object
for (var i = 0; i < doc.DocumentNode.ChildNodes.Count; i++)
{
//grab current node
HtmlNode tmp = doc.DocumentNode.ChildNodes[i];
//this node can hold other nodes, so recursively validate
RecursiveValidateTag(tmp);
if (tmp.ParentNode == null)
{
i--;
}
}
EvaluateNodeCollection(doc.DocumentNode.ChildNodes);
string finalCleanHtml = doc.DocumentNode.InnerHtml;
return new AntiySamyResult(start, finalCleanHtml, _errorMessages);
}
private void RecursiveValidateTag(HtmlNode node)
private void EvaluateNodeCollection(HtmlNodeCollection nodes)
{
for (var i = 0; i < nodes.Count; i++)
{
HtmlNode node = nodes[i];
EvaluateNode(node);
if (node.ParentNode == null)
{
i--;
}
}
}
private void EvaluateNode(HtmlNode node)
{
int maxinputsize = _policy.GetDirectiveAsInt("maxInputSize", int.MaxValue);
_num++;
HtmlNode parentNode = node.ParentNode;
HtmlNode tmp = null;
string tagName = node.Name;
//check this out
//might not be robust enough
if (tagName.ToLower().Equals("#text")) // || tagName.ToLower().Equals("#comment"))
{
return;
@@ -122,7 +101,7 @@ namespace AntiSamy
}
else
{
errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName.ToLower()) + "</b> ");
errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName.ToLower()) + "\" ");
}
errBuff.Append("tag has been filtered for security reasons. The contents of the tag will ");
@@ -130,16 +109,8 @@ namespace AntiSamy
_errorMessages.Add(errBuff.ToString());
for (var i = 0; i < node.ChildNodes.Count; i++)
{
tmp = node.ChildNodes[i];
RecursiveValidateTag(tmp);
if (tmp.ParentNode == null)
{
i--;
}
}
EvaluateNodeCollection(node.ChildNodes);
PromoteChildren(node);
}
else if (Consts.TagActions.VALIDATE.Equals(tag.Action))
@@ -162,8 +133,8 @@ namespace AntiSamy
{
var errBuff = new StringBuilder();
errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(name));
errBuff.Append("</b> attribute of the <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag has been removed for security reasons. ");
errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(name));
errBuff.Append("\" attribute of the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag has been removed for security reasons. ");
errBuff.Append("This removal should not affect the display of the HTML submitted.");
_errorMessages.Add(errBuff.ToString());
@@ -215,8 +186,8 @@ namespace AntiSamy
string onInvalidAction = allowwdAttr.OnInvalid;
var errBuff = new StringBuilder();
errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag contained an attribute that we couldn't process. ");
errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(name) + "</b> attribute had a value of <u>" + HtmlEntityEncoder.HtmlEntityEncode(value) + "</u>. ");
errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag contained an attribute that we couldn't process. ");
errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(name) + "\" attribute had a value of <u>" + HtmlEntityEncoder.HtmlEntityEncode(value) + "</u>. ");
errBuff.Append("This value could not be accepted for security reasons. We have chosen to ");
//Console.WriteLine(policy);
@@ -224,29 +195,22 @@ namespace AntiSamy
if (Consts.OnInvalidActions.REMOVE_TAG.Equals(onInvalidAction))
{
parentNode.RemoveChild(node);
errBuff.Append("remove the <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag and its contents in order to process this input. ");
errBuff.Append("remove the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag and its contents in order to process this input. ");
}
else if (Consts.OnInvalidActions.FILTER_TAG.Equals(onInvalidAction))
{
for (var i = 0; i < node.ChildNodes.Count; i++)
{
tmp = node.ChildNodes[i];
RecursiveValidateTag(tmp);
if (tmp.ParentNode == null)
{
i--;
}
}
EvaluateNodeCollection(node.ChildNodes);
PromoteChildren(node);
errBuff.Append("filter the <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag and leave its contents in place so that we could process this input.");
errBuff.Append("filter the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag and leave its contents in place so that we could process this input.");
}
else
{
node.Attributes.Remove(allowwdAttr.Name);
currentAttributeIndex--;
errBuff.Append("remove the <b>" + HtmlEntityEncoder.HtmlEntityEncode(name) + "</b> attribute from the tag and leave everything else in place so that we could process this input.");
errBuff.Append("remove the \"" + HtmlEntityEncoder.HtmlEntityEncode(name) + "\" attribute from the tag and leave everything else in place so that we could process this input.");
}
_errorMessages.Add(errBuff.ToString());
@@ -260,15 +224,7 @@ namespace AntiSamy
}
}
for (var i = 0; i < node.ChildNodes.Count; i++)
{
tmp = node.ChildNodes[i];
RecursiveValidateTag(tmp);
if (tmp.ParentNode == null)
{
i--;
}
}
EvaluateNodeCollection(node.ChildNodes);
}
else if ("truncate".Equals(tag.Action))// || Consts.TagActions.REMOVE.Equals(tag.Action))
{
@@ -279,8 +235,8 @@ namespace AntiSamy
{
var errBuff = new StringBuilder();
errBuff.Append("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(nnmap[0].Name));
errBuff.Append("</b> attribute of the <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag has been removed for security reasons. ");
errBuff.Append("The \"" + HtmlEntityEncoder.HtmlEntityEncode(nnmap[0].Name));
errBuff.Append("\" attribute of the \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag has been removed for security reasons. ");
errBuff.Append("This removal should not affect the display of the HTML submitted.");
node.Attributes.Remove(nnmap[0].Name);
_errorMessages.Add(errBuff.ToString());
@@ -308,7 +264,7 @@ namespace AntiSamy
}
else
{
_errorMessages.Add("The <b>" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "</b> tag has been removed for security reasons.");
_errorMessages.Add("The \"" + HtmlEntityEncoder.HtmlEntityEncode(tagName) + "\" tag has been removed for security reasons.");
parentNode.RemoveChild(node);
}
}

View File

@@ -89,6 +89,8 @@ namespace AntiSamy
public DocumentAttribute GetGlobalAttribute(string name) => GlobalTagAttributes.TryGetValue(name, out DocumentAttribute val) ? val : null;
public DocumentAttribute GetCommonAttribute(string name) => CommonAttributes.TryGetValue(name, out DocumentAttribute val) ? val : null;
public DocumentTag GetTag(string tagName) => TagRules.TryGetValue(tagName, out DocumentTag value) ? value : null;
public CssProperty GetCssProperty(string propertyName) => CssRules.TryGetValue(propertyName, out CssProperty value) ? value : null;
@@ -131,7 +133,7 @@ namespace AntiSamy
{
string name = node.Attributes[0].Value;
DocumentAttribute toAdd = CommonAttributes[name];
DocumentAttribute toAdd = GetCommonAttribute(name);
if (toAdd != null)
{
globalAttributes.Add(name, toAdd);
@@ -249,24 +251,24 @@ namespace AntiSamy
XmlNodeList attributeList = tagNode.SelectNodes("attribute");
foreach (XmlNode attributeNode in attributeList)
{
if (!attributeNode.HasChildNodes)
if (IsCommonAttributeRule(attributeNode))
{
CommonAttributes.TryGetValue(attributeNode.Attributes["name"].Value, out DocumentAttribute attribute);
DocumentAttribute commonAttribute = GetCommonAttribute(attributeNode.Attributes["name"].Value);
if (attribute != null)
if (commonAttribute != null)
{
string onInvalid = attributeNode.Attributes["onInvalid"]?.Value;
string description = attributeNode.Attributes["description"]?.Value;
if (!string.IsNullOrEmpty(onInvalid))
{
attribute.OnInvalid = onInvalid;
commonAttribute.OnInvalid = onInvalid;
}
if (!string.IsNullOrEmpty(description))
{
attribute.Description = description;
commonAttribute.Description = description;
}
tag.AddAllowedAttribute((DocumentAttribute)attribute.Clone());
tag.AddAllowedAttribute((DocumentAttribute)commonAttribute.Clone());
}
}
else
@@ -337,6 +339,11 @@ namespace AntiSamy
return tags;
}
private static bool IsCommonAttributeRule(XmlNode attributeNode)
{
return !attributeNode.HasChildNodes;
}
private Dictionary<string, CssProperty> ParseCssRules(XmlNode cssNodeList)
{
var properties = new Dictionary<string, CssProperty>(StringComparer.InvariantCultureIgnoreCase);

View File

@@ -44,6 +44,9 @@
<None Update="resources\antisamy.xsd">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="resources\antisamy1.xml">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>

View File

@@ -4,9 +4,10 @@ namespace AntiSamy.Tests
{
public abstract class TestBase
{
private const string DefaultAntiSamyFile = "antisamy.xml";
protected readonly Policy TestPolicy;
protected virtual string DefaultAntiSamyFile => "antisamy.xml";
protected TestBase()
{
TestPolicy = GetPolicy(DefaultAntiSamyFile);

View File

@@ -0,0 +1,132 @@
using FluentAssertions;
using Xunit;
namespace AntiSamy.Tests
{
public class UseCaseTests : TestBase
{
protected override string DefaultAntiSamyFile => "antisamy1.xml";
[Fact]
public void invalid_img_urls_should_be_filtered()
{
var scanner = new AntiSamy();
/*
* remove non-allowed image srcs
*/
var input = @"<div>
<img src='mysite.com/image.jpg' /> <!-- to be allowed --!>
Some description
<img src='hackers.com/xss.js' />
</div>";
AntiySamyResult result = scanner.Scan(input, TestPolicy);
// safe - allowed url pattern in the antisamy1.xml
result.CleanHtml.Should().Contain("Some description");
result.CleanHtml.Should().Contain("<div");
result.CleanHtml.Should().Contain("mysite.com/image.jpg");
// non safe
result.CleanHtml.Should().NotContain("hackers.com/xss.js");
}
[Fact]
public void invalid_tags_should_be_removed()
{
var scanner = new AntiSamy();
/*
* remove iframe, object, embed, frame, frameset
*/
var input = @"<div>
Some description
<iframe src='hackers.com/xss' />
<object data='hackers.com/xss' />
<embed />
<frame />
<frameset />
</div>";
AntiySamyResult result = scanner.Scan(input, TestPolicy);
//safe
result.CleanHtml.Should().Contain("<div");
result.CleanHtml.Should().Contain("Some description");
// non safe
result.CleanHtml.Should().NotContain("<iframe");
result.CleanHtml.Should().NotContain("<object");
result.CleanHtml.Should().NotContain("<embed");
result.CleanHtml.Should().NotContain("<frame");
result.CleanHtml.Should().NotContain("<frameset");
}
[Fact]
public void invalid_a_hrefs_should_be_filtered()
{
var scanner = new AntiSamy();
/*
* remove non-allowed hrefs
*/
var input = @"<div>
<a href='mysite.com/image.jpg' /> <!-- to be allowed --!>
<a href='mysite.com/some_relative_path' /> <!-- to be allowed --!>
<a href='mysite.com/some_relative_path/level2' /> <!-- to be allowed --!>
Some description
<a href='hackers.com/xss.js' />
<a href='abc.com' />
another description
</div>";
AntiySamyResult result = scanner.Scan(input, TestPolicy);
// safe - allowed url pattern in the antisamy1.xml
result.CleanHtml.Should().Contain("<div");
result.CleanHtml.Should().Contain("Some description");
result.CleanHtml.Should().Contain("another description");
result.CleanHtml.Should().Contain("mysite.com/image.jpg");
result.CleanHtml.Should().Contain("mysite.com/some_relative_path");
result.CleanHtml.Should().Contain("mysite.com/some_relative_path/level2");
// non safe
result.CleanHtml.Should().NotContain("hackers.com/xss.js");
result.CleanHtml.Should().NotContain("abc.com");
}
[Fact]
public void script_references_should_be_removed_by_default()
{
var scanner = new AntiSamy();
/*
* remove non-allowed hrefs
*/
var input = @"<script type='text/javascript' src='hackers.com/xss.js' />
<script>alert('XSS !!!');</script>
<div>
Some description
<script type='text/javascript' src='hackers.com/xss.js' />
</div>";
AntiySamyResult result = scanner.Scan(input, TestPolicy);
//safe
result.CleanHtml.Should().Contain("<div");
result.CleanHtml.Should().Contain("Some description");
// non safe
result.CleanHtml.Should().NotContain("<script");
}
}
}

File diff suppressed because it is too large Load Diff