From 19392b72c91eaad08a51a2def8d61eab1aca1aeb Mon Sep 17 00:00:00 2001 From: Ben Kelly Date: Fri, 19 Mar 2021 14:16:18 -0700 Subject: [PATCH] URLPattern: Adjust pathname canonicalization based on protocol. This CL refines the pathname canonicalization routines to account for differences between "standard" URLs and what chrome calls "path" URLs. "Path" URLs are referred to as "cannot-be-a-base" URLs in the spec. To choose the canonicalization routine we look at the protocol pattern or string. If the protocol matches a "standard" scheme, then we choose "standard" canonicalization. We also ensure that empty string protocol values default to "standard" canonicalization. Otherwise we use the "path" URL canonicalization which is more lenient; e.g. it makes it easier to write javascript: URL paths, etc. This CL also exposes two new functions from the url component: 1. GetStandardSchemeList() returns the list of all known standard schemes. We need this in order to match them against our protocol pattern. For example, `http{s}?` should match as a standard protocol pattern since it matches both `http` and `https`. We can't use the IsStandard() function for this so we need to expose the whole list to iterate and check against. 2. CanonicalizePathURLPath() exposes the per-component canonicalization routine for "path" URLs. Previously only a full url parsing function was exposed. Discussed in: https://github.com/WICG/urlpattern/issues/33 Bug: 1141510 Change-Id: I3176a36d1e0eb2f8a0ccdf65fde346a4a623f9dd --- urlpattern/resources/urlpatterntestdata.json | 54 ++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/urlpattern/resources/urlpatterntestdata.json b/urlpattern/resources/urlpatterntestdata.json index 61a228e13a01f09..a9e74c4800f4736 100644 --- a/urlpattern/resources/urlpatterntestdata.json +++ b/urlpattern/resources/urlpatterntestdata.json @@ -1399,5 +1399,59 @@ "input": { "pathname": "/foo(" }, "pathname": { "input": "/foo(", "groups": {}} } + }, + { + "pattern": { "protocol": "javascript", "pathname": "var x = 1;" }, + "input": { "protocol": "javascript", "pathname": "var x = 1;" }, + "expected_match": { + "input": { "protocol": "javascript", "pathname": "var x = 1;" }, + "protocol": { "input": "javascript", "groups": {}}, + "pathname": { "input": "var x = 1;", "groups": {}} + } + }, + { + "pattern": { "pathname": "var x = 1;" }, + "input": { "protocol": "javascript", "pathname": "var x = 1;" }, + "expected_obj": { + "pathname": "var%20x%20=%201;" + }, + "expected_match": null + }, + { + "pattern": { "protocol": "javascript", "pathname": "var x = 1;" }, + "input": { "baseURL": "javascript:var x = 1;" }, + "expected_match": { + "input": { "baseURL": "javascript:var x = 1;" }, + "protocol": { "input": "javascript", "groups": {}}, + "pathname": { "input": "var x = 1;", "groups": {}} + } + }, + { + "pattern": { "protocol": "(data|javascript)", "pathname": "var x = 1;" }, + "input": { "protocol": "javascript", "pathname": "var x = 1;" }, + "expected_match": { + "input": { "protocol": "javascript", "pathname": "var x = 1;" }, + "protocol": { "input": "javascript", "groups": {"0": "javascript"}}, + "pathname": { "input": "var x = 1;", "groups": {}} + } + }, + { + "pattern": { "protocol": "(https|javascript)", "pathname": "var x = 1;" }, + "input": { "protocol": "javascript", "pathname": "var x = 1;" }, + "expected_obj": { + "pathname": "var%20x%20=%201;" + }, + "expected_match": null + }, + { + "pattern": { "pathname": "var x = 1;" }, + "input": { "pathname": "var x = 1;" }, + "expected_obj": { + "pathname": "var%20x%20=%201;" + }, + "expected_match": { + "input": { "pathname": "var x = 1;" }, + "pathname": { "input": "var%20x%20=%201;", "groups": {}} + } } ]