From 11b271631e781d14a62e703738e37361ee9185dc Mon Sep 17 00:00:00 2001 From: Michael Ganss Date: Sun, 27 Jan 2019 17:02:38 +0100 Subject: [PATCH] Add SanitizeDocument overload that takes a Stream Fixes #158 --- src/HtmlSanitizer/HtmlSanitizer.cs | 22 +++++++++++++ .../HtmlSanitizer.Tests.csproj | 32 +++++++++++-------- test/HtmlSanitizer.Tests/Tests.cs | 24 ++++++++++++++ 3 files changed, 64 insertions(+), 14 deletions(-) diff --git a/src/HtmlSanitizer/HtmlSanitizer.cs b/src/HtmlSanitizer/HtmlSanitizer.cs index cfdfd3f..be9755e 100644 --- a/src/HtmlSanitizer/HtmlSanitizer.cs +++ b/src/HtmlSanitizer/HtmlSanitizer.cs @@ -9,6 +9,7 @@ using AngleSharp.Parser.Html; using System; using System.Collections.Generic; using System.Globalization; +using System.IO; using System.Linq; using System.Text.RegularExpressions; @@ -490,6 +491,27 @@ namespace Ganss.XSS } } + /// + /// Sanitizes the specified HTML document. Even if only a fragment is given, a whole document will be returned. + /// + /// The HTML document to sanitize. + /// The base URL relative URLs are resolved against. No resolution if empty. + /// The formatter used to render the DOM. Using the if null. + /// The sanitized HTML document. + public string SanitizeDocument(Stream html, string baseUrl = "", IMarkupFormatter outputFormatter = null) + { + var parser = HtmlParserFactory(); + + using (var dom = parser.Parse(html)) + { + DoSanitize(dom, dom.DocumentElement, baseUrl); + + var output = dom.ToHtml(outputFormatter ?? OutputFormatter); + + return output; + } + } + /// /// Creeates an instance of . /// diff --git a/test/HtmlSanitizer.Tests/HtmlSanitizer.Tests.csproj b/test/HtmlSanitizer.Tests/HtmlSanitizer.Tests.csproj index 343a599..f0a2565 100644 --- a/test/HtmlSanitizer.Tests/HtmlSanitizer.Tests.csproj +++ b/test/HtmlSanitizer.Tests/HtmlSanitizer.Tests.csproj @@ -1,7 +1,7 @@  - netcoreapp2.1;netcoreapp2.0;net452 + netcoreapp2.1;netcoreapp2.0;net46 HtmlSanitizer.Tests HtmlSanitizer.Tests true @@ -23,19 +23,23 @@ - - - - - - - - - - - - - + + all + runtime; build; native; contentfiles; analyzers + + + + all + runtime; build; native; contentfiles; analyzers + + + all + runtime; build; native; contentfiles; analyzers + + + + 4.5.1 + diff --git a/test/HtmlSanitizer.Tests/Tests.cs b/test/HtmlSanitizer.Tests/Tests.cs index a49a734..b5ceddf 100644 --- a/test/HtmlSanitizer.Tests/Tests.cs +++ b/test/HtmlSanitizer.Tests/Tests.cs @@ -10,6 +10,8 @@ using AngleSharp; using AngleSharp.Dom.Css; using System.Threading; using System.Reflection; +using System.IO; +using System.Text; // Tests based on tests from http://roadkill.codeplex.com/ @@ -36,6 +38,7 @@ namespace Ganss.XSS.Tests public HtmlSanitizerTests(HtmlSanitizerFixture fixture) { + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); Sanitizer = fixture.Sanitizer; } @@ -3097,6 +3100,27 @@ zqy1QY1kkPOuMvKWvvmFIwClI2393jVVcp91eda4+J+fIYDbfJa7RY5YcNrZhTuV//9k=""> Assert.Equal(@"", actual); } + + + [Fact] + public void EncodingTest() + { + // https://github.com/mganss/HtmlSanitizer/issues/158 + + var sanitizer = new HtmlSanitizer(); + sanitizer.AllowedTags.Add("meta"); + sanitizer.AllowedAttributes.Add("http-equiv"); + sanitizer.AllowedAttributes.Add("content"); + + var html = @"kopieën"; + + using (var stream = new MemoryStream(Encoding.GetEncoding("iso-8859-1").GetBytes(html))) + { + var actual = sanitizer.SanitizeDocument(stream); + + Assert.Equal(html, actual); + } + } } }