From 4eb9af42632550bfc930dffc5ee0264139c968ff Mon Sep 17 00:00:00 2001 From: Ben Kelly Date: Fri, 5 Mar 2021 12:22:00 -0800 Subject: [PATCH] URLPattern: Canonicalize pattern encoding. This CL adds an encoding callback to liburlpattern::Parse(). The parse will invoke the given callback for plaintext parts of the pattern to validate and encode the characters. This callback mechanism is then used to apply the chromium url canonicalization code for each component pattern. There are a couple of behaviors in the canonicalizer that do not play well with this approach that the CL works around: 1. The port canonicalizer will replace an exact default port with the empty string. Since the liburlpattern::Parse() callback is invoked for partial values this CL instead implements this canoncilization separately before pattern compilation. 2. The URL canonicalizer will prepend a leading `/` character if there isn't one. Again, this behavior does not make sense when operating on partial values. Therefore this CL exposes the internal partial path canonicalization routine so that we can use it in URLPattern. In addition, this CL removes a DCHECK from url's DoPartialPath() that asserted there was always a character preceding a dot. The DCHECK has had a runtime check checking the same behavior since 2013 so it seems safe to remove the DCHECK. And in this case we want to be able to run the canonicalize partial paths that do start with dots. The CL adds a number of additional WPT test cases validating the new canonicalization behavior. The behavior in this test has been discussed in this spec issue: https://github.com/WICG/urlpattern/issues/33 Bug: 1141510 Change-Id: I388be5d0cc57b125d44465b283050df5ed0b5321 --- urlpattern/resources/urlpatterntestdata.json | 96 ++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/urlpattern/resources/urlpatterntestdata.json b/urlpattern/resources/urlpatterntestdata.json index 9e7fe20005bed1a..ae9405ea4b1292a 100644 --- a/urlpattern/resources/urlpatterntestdata.json +++ b/urlpattern/resources/urlpatterntestdata.json @@ -985,6 +985,14 @@ "username": { "input": "caf%C3%A9", "groups": {}} } }, + { + "pattern": { "username": "café" }, + "input": { "username" : "café" }, + "expected": { + "input": { "username" : "café" }, + "username": { "input": "caf%C3%A9", "groups": {}} + } + }, { "pattern": { "username": "caf%c3%a9" }, "input": { "username" : "café" }, @@ -998,6 +1006,14 @@ "password": { "input": "caf%C3%A9", "groups": {}} } }, + { + "pattern": { "password": "café" }, + "input": { "password" : "café" }, + "expected": { + "input": { "password" : "café" }, + "password": { "input": "caf%C3%A9", "groups": {}} + } + }, { "pattern": { "password": "caf%c3%a9" }, "input": { "password" : "café" }, @@ -1011,6 +1027,14 @@ "hostname": { "input": "xn--caf-dma.com", "groups": {}} } }, + { + "pattern": { "hostname": "café.com" }, + "input": { "hostname" : "café.com" }, + "expected": { + "input": { "hostname" : "café.com" }, + "hostname": { "input": "xn--caf-dma.com", "groups": {}} + } + }, { "pattern": { "port": "" }, "input": { "protocol": "http", "port": "80" }, @@ -1020,6 +1044,35 @@ "port": { "input": "", "groups": {}} } }, + { + "pattern": { "protocol": "http", "port": "80" }, + "input": { "protocol": "http", "port": "80" }, + "expected": { + "input": { "protocol": "http", "port": "80" }, + "protocol": { "input": "http", "groups": {}}, + "port": { "input": "", "groups": {}} + } + }, + { + "pattern": { "protocol": "http", "port": "80{20}?" }, + "input": { "protocol": "http", "port": "80" }, + "expected": null + }, + { + "pattern": { "protocol": "http", "port": "80 " }, + "input": { "protocol": "http", "port": "80" }, + "error": true + }, + { + "pattern": { "port": "80" }, + "input": { "protocol": "http", "port": "80" }, + "expected": null + }, + { + "pattern": { "protocol": "http{s}?", "port": "80" }, + "input": { "protocol": "http", "port": "80" }, + "expected": null + }, { "pattern": { "port": "80" }, "input": { "port": "80" }, @@ -1057,6 +1110,14 @@ "pathname": { "input": "/caf%C3%A9", "groups": {}} } }, + { + "pattern": { "pathname": "/café" }, + "input": { "pathname": "/café" }, + "expected": { + "input": { "pathname": "/café" }, + "pathname": { "input": "/caf%C3%A9", "groups": {}} + } + }, { "pattern": { "pathname": "/caf%c3%a9" }, "input": { "pathname": "/café" }, @@ -1077,6 +1138,25 @@ "pathname": { "input": "/foo/bar", "groups": {}} } }, + { + "pattern": { "pathname": "/foo/../bar" }, + "input": { "pathname": "/bar" }, + "expected": { + "input": { "pathname": "/bar" }, + "pathname": { "input": "/bar", "groups": {}} + } + }, + { + "pattern": { "pathname": "./foo/bar", "baseURL": "https://example.com" }, + "input": { "pathname": "foo/bar", "baseURL": "https://example.com" }, + "expected": { + "input": { "pathname": "foo/bar", "baseURL": "https://example.com" }, + "exactly_empty_components": [ "username", "password", "port" ], + "protocol": { "input": "https", "groups": {}}, + "hostname": { "input": "example.com", "groups": {}}, + "pathname": { "input": "/foo/bar", "groups": {}} + } + }, { "pattern": { "pathname": "foo/bar" }, "input": "https://example.com/foo/bar", @@ -1112,6 +1192,14 @@ "search": { "input": "q=caf%C3%A9", "groups": {}} } }, + { + "pattern": { "search": "q=café" }, + "input": { "search": "q=café" }, + "expected": { + "input": { "search": "q=café" }, + "search": { "input": "q=caf%C3%A9", "groups": {}} + } + }, { "pattern": { "search": "q=caf%c3%a9" }, "input": { "search": "q=café" }, @@ -1125,6 +1213,14 @@ "hash": { "input": "caf%C3%A9", "groups": {}} } }, + { + "pattern": { "hash": "café" }, + "input": { "hash": "café" }, + "expected": { + "input": { "hash": "café" }, + "hash": { "input": "caf%C3%A9", "groups": {}} + } + }, { "pattern": { "hash": "caf%c3%a9" }, "input": { "hash": "café" },