From e0754272c378bf4fd9bc5e0d6161dc87fff8f9cf Mon Sep 17 00:00:00 2001 From: Ben Kelly Date: Fri, 5 Mar 2021 15:16:34 -0800 Subject: [PATCH] URLPattern: Canonicalize pattern encoding. This CL adds an encoding callback to liburlpattern::Parse(). The parse will invoke the given callback for plaintext parts of the pattern to validate and encode the characters. This callback mechanism is then used to apply the chromium url canonicalization code for each component pattern. There are a couple of behaviors in the canonicalizer that do not play well with this approach that the CL works around: 1. The port canonicalizer will replace an exact default port with the empty string. Since the liburlpattern::Parse() callback is invoked for partial values this CL instead implements this canoncilization separately before pattern compilation. 2. The URL canonicalizer will prepend a leading `/` character if there isn't one. Again, this behavior does not make sense when operating on partial values. Therefore this CL exposes the internal partial path canonicalization routine so that we can use it in URLPattern. In addition, this CL removes a DCHECK from url's DoPartialPath() that asserted there was always a character preceding a dot. The DCHECK has had a runtime check checking the same behavior since 2013 so it seems safe to remove the DCHECK. And in this case we want to be able to run the canonicalize partial paths that do start with dots. The CL adds a number of additional WPT test cases validating the new canonicalization behavior. The behavior in this test has been discussed in this spec issue: https://github.com/WICG/urlpattern/issues/33 Bug: 1141510 Change-Id: I388be5d0cc57b125d44465b283050df5ed0b5321 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2720702 Reviewed-by: Jeremy Roman Reviewed-by: Charlie Harrison Commit-Queue: Ben Kelly Cr-Commit-Position: refs/heads/master@{#860399} --- urlpattern/resources/urlpatterntestdata.json | 96 ++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/urlpattern/resources/urlpatterntestdata.json b/urlpattern/resources/urlpatterntestdata.json index 9e7fe20005bed1..ae9405ea4b1292 100644 --- a/urlpattern/resources/urlpatterntestdata.json +++ b/urlpattern/resources/urlpatterntestdata.json @@ -985,6 +985,14 @@ "username": { "input": "caf%C3%A9", "groups": {}} } }, + { + "pattern": { "username": "café" }, + "input": { "username" : "café" }, + "expected": { + "input": { "username" : "café" }, + "username": { "input": "caf%C3%A9", "groups": {}} + } + }, { "pattern": { "username": "caf%c3%a9" }, "input": { "username" : "café" }, @@ -998,6 +1006,14 @@ "password": { "input": "caf%C3%A9", "groups": {}} } }, + { + "pattern": { "password": "café" }, + "input": { "password" : "café" }, + "expected": { + "input": { "password" : "café" }, + "password": { "input": "caf%C3%A9", "groups": {}} + } + }, { "pattern": { "password": "caf%c3%a9" }, "input": { "password" : "café" }, @@ -1011,6 +1027,14 @@ "hostname": { "input": "xn--caf-dma.com", "groups": {}} } }, + { + "pattern": { "hostname": "café.com" }, + "input": { "hostname" : "café.com" }, + "expected": { + "input": { "hostname" : "café.com" }, + "hostname": { "input": "xn--caf-dma.com", "groups": {}} + } + }, { "pattern": { "port": "" }, "input": { "protocol": "http", "port": "80" }, @@ -1020,6 +1044,35 @@ "port": { "input": "", "groups": {}} } }, + { + "pattern": { "protocol": "http", "port": "80" }, + "input": { "protocol": "http", "port": "80" }, + "expected": { + "input": { "protocol": "http", "port": "80" }, + "protocol": { "input": "http", "groups": {}}, + "port": { "input": "", "groups": {}} + } + }, + { + "pattern": { "protocol": "http", "port": "80{20}?" }, + "input": { "protocol": "http", "port": "80" }, + "expected": null + }, + { + "pattern": { "protocol": "http", "port": "80 " }, + "input": { "protocol": "http", "port": "80" }, + "error": true + }, + { + "pattern": { "port": "80" }, + "input": { "protocol": "http", "port": "80" }, + "expected": null + }, + { + "pattern": { "protocol": "http{s}?", "port": "80" }, + "input": { "protocol": "http", "port": "80" }, + "expected": null + }, { "pattern": { "port": "80" }, "input": { "port": "80" }, @@ -1057,6 +1110,14 @@ "pathname": { "input": "/caf%C3%A9", "groups": {}} } }, + { + "pattern": { "pathname": "/café" }, + "input": { "pathname": "/café" }, + "expected": { + "input": { "pathname": "/café" }, + "pathname": { "input": "/caf%C3%A9", "groups": {}} + } + }, { "pattern": { "pathname": "/caf%c3%a9" }, "input": { "pathname": "/café" }, @@ -1077,6 +1138,25 @@ "pathname": { "input": "/foo/bar", "groups": {}} } }, + { + "pattern": { "pathname": "/foo/../bar" }, + "input": { "pathname": "/bar" }, + "expected": { + "input": { "pathname": "/bar" }, + "pathname": { "input": "/bar", "groups": {}} + } + }, + { + "pattern": { "pathname": "./foo/bar", "baseURL": "https://example.com" }, + "input": { "pathname": "foo/bar", "baseURL": "https://example.com" }, + "expected": { + "input": { "pathname": "foo/bar", "baseURL": "https://example.com" }, + "exactly_empty_components": [ "username", "password", "port" ], + "protocol": { "input": "https", "groups": {}}, + "hostname": { "input": "example.com", "groups": {}}, + "pathname": { "input": "/foo/bar", "groups": {}} + } + }, { "pattern": { "pathname": "foo/bar" }, "input": "https://example.com/foo/bar", @@ -1112,6 +1192,14 @@ "search": { "input": "q=caf%C3%A9", "groups": {}} } }, + { + "pattern": { "search": "q=café" }, + "input": { "search": "q=café" }, + "expected": { + "input": { "search": "q=café" }, + "search": { "input": "q=caf%C3%A9", "groups": {}} + } + }, { "pattern": { "search": "q=caf%c3%a9" }, "input": { "search": "q=café" }, @@ -1125,6 +1213,14 @@ "hash": { "input": "caf%C3%A9", "groups": {}} } }, + { + "pattern": { "hash": "café" }, + "input": { "hash": "café" }, + "expected": { + "input": { "hash": "café" }, + "hash": { "input": "caf%C3%A9", "groups": {}} + } + }, { "pattern": { "hash": "caf%c3%a9" }, "input": { "hash": "café" },