From be6a42b2a3e2600c774d91f9fb830c44bfc217fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Thu, 17 Jun 2021 19:35:13 -0300 Subject: [PATCH 1/2] Override URL encoding when serializing results to HTML --- .../owasp/validator/html/scan/ASHTMLSerializer.java | 13 +++++++++++++ .../org/owasp/validator/html/test/AntiSamyTest.java | 10 ++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java b/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java index 57ae10ef..3407dc97 100644 --- a/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java +++ b/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java @@ -4,6 +4,8 @@ import org.apache.xml.serialize.HTMLdtd; import org.apache.xml.serialize.OutputFormat; import org.owasp.validator.html.InternalPolicy; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.Writer; @@ -11,6 +13,7 @@ @SuppressWarnings("deprecation") public class ASHTMLSerializer extends org.apache.xml.serialize.HTMLSerializer { + private static final Logger logger = LoggerFactory.getLogger(ASHTMLSerializer.class); private boolean encodeAllPossibleEntities; public ASHTMLSerializer(Writer w, OutputFormat format, InternalPolicy policy) { @@ -67,4 +70,14 @@ public void endElementIO(String namespaceURI, String localName, _printer.flush(); } + @Override + protected String escapeURI(String uri) { + String originalURI = uri; + try { + printEscaped(uri); + } catch (IOException e) { + logger.error("URI escaping failed for value: " + originalURI); + } + return ""; + } } diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index e1c1acaa..a4632590 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1490,5 +1490,15 @@ public void testGithubIssue81() throws ScanException, PolicyException { assertThat(as.scan("

Some Text

", policy, AntiSamy.DOM).getCleanHTML(), not(containsString("!important"))); assertThat(as.scan("

Some Text

", policy, AntiSamy.SAX).getCleanHTML(), not(containsString("!important"))); } + + @Test + public void entityReferenceEncodedInHtmlAttribute() throws ScanException, PolicyException { + // Concern is that "&" is not being encoded and "#00058" was not being interpreted as ":" + // so the validations based on regexp passed and a browser would load "&:" together + assertThat(as.scan("

xss

", policy, AntiSamy.DOM).getCleanHTML(), + containsString("javascript&#00058")); + assertThat(as.scan("

xss

", policy, AntiSamy.SAX).getCleanHTML(), + containsString("javascript&#00058")); + } } From 2dc029ffc7cc15d1b628071a2b7a7837a9a0aade Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Thu, 17 Jun 2021 20:13:10 -0300 Subject: [PATCH 2/2] Add useXHTML as false on HTML encoding test --- .../java/org/owasp/validator/html/test/AntiSamyTest.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index a4632590..8c5dba13 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1494,10 +1494,12 @@ public void testGithubIssue81() throws ScanException, PolicyException { @Test public void entityReferenceEncodedInHtmlAttribute() throws ScanException, PolicyException { // Concern is that "&" is not being encoded and "#00058" was not being interpreted as ":" - // so the validations based on regexp passed and a browser would load "&:" together - assertThat(as.scan("

xss

", policy, AntiSamy.DOM).getCleanHTML(), + // so the validations based on regexp passed and a browser would load "&:" together. + // All this when not using the XHTML serializer. + Policy revised = policy.cloneWithDirective("useXHTML","false"); + assertThat(as.scan("

xss

", revised, AntiSamy.DOM).getCleanHTML(), containsString("javascript&#00058")); - assertThat(as.scan("

xss

", policy, AntiSamy.SAX).getCleanHTML(), + assertThat(as.scan("

xss

", revised, AntiSamy.SAX).getCleanHTML(), containsString("javascript&#00058")); } }