Skip to content

Commit

Permalink
URLPattern: Support pattern syntax in IPv6 hostnames.
Browse files Browse the repository at this point in the history
This adds support for patterns that having matching syntax inside of
IPv6 address hostnames like:

  new URLPattern({ hostname: '[:address]' });

This issue is discussed here:

  whatwg/urlpattern#115

This CL also does a drive-by fix of a stale header reference in the
blink presubmit warnings.

Fixed: 1245998
Change-Id: I772258dc69c2b658ee4d7306b4f1975324624338
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3140140
Reviewed-by: Jeremy Roman <jbroman@chromium.org>
Reviewed-by: Stephen McGruer <smcgruer@chromium.org>
Commit-Queue: Ben Kelly <wanderview@chromium.org>
Cr-Commit-Position: refs/heads/main@{#919392}
NOKEYCHECK=True
GitOrigin-RevId: d78439a4b3e959d8f0fca925eaba89758164ccc9
  • Loading branch information
wanderview authored and copybara-github committed Sep 8, 2021
1 parent 820255f commit be4a8d8
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 10 deletions.
25 changes: 25 additions & 0 deletions blink/renderer/modules/url_pattern/url_pattern_canon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "third_party/blink/renderer/modules/url_pattern/url_pattern_component.h"
#include "third_party/blink/renderer/platform/bindings/exception_state.h"
#include "third_party/blink/renderer/platform/weborigin/security_origin.h"
#include "third_party/blink/renderer/platform/wtf/text/ascii_ctype.h"
#include "third_party/blink/renderer/platform/wtf/text/string_utf8_adaptor.h"
#include "url/url_canon.h"
#include "url/url_util.h"
Expand Down Expand Up @@ -120,6 +121,30 @@ absl::StatusOr<std::string> HostnameEncodeCallback(absl::string_view input) {
return StdStringFromCanonOutput(canon_output, component);
}

absl::StatusOr<std::string> IPv6HostnameEncodeCallback(
absl::string_view input) {
std::string result;
result.reserve(input.size());
// This implements a light validation and canonicalization of IPv6 hostname
// content. Ideally we would use the URL parser's hostname canonicalizer
// here, but that is too strict for the encoding callback. The callback may
// see only bits and pieces of the hostname pattern; e.g. for `[:address]` it
// sees the `[` and `]` strings as separate calls. Since the full URL
// hostname parser wants to completely parse IPv6 hostnames, this will always
// trigger an error. Therefore, to allow pattern syntax within IPv6 brackets
// we simply check for valid characters and lowercase any hex digits.
for (size_t i = 0; i < input.size(); ++i) {
char c = input[i];
if (!IsASCIIHexDigit(c) && c != '[' && c != ']' && c != ':') {
return absl::InvalidArgumentError(
std::string("Invalid IPv6 hostname character '") + c + "' in '" +
std::string(input) + "'.");
}
result += ToASCIILower(c);
}
return result;
}

absl::StatusOr<std::string> PortEncodeCallback(absl::string_view input) {
if (input.empty())
return std::string();
Expand Down
1 change: 1 addition & 0 deletions blink/renderer/modules/url_pattern/url_pattern_canon.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ absl::StatusOr<std::string> ProtocolEncodeCallback(absl::string_view input);
absl::StatusOr<std::string> UsernameEncodeCallback(absl::string_view input);
absl::StatusOr<std::string> PasswordEncodeCallback(absl::string_view input);
absl::StatusOr<std::string> HostnameEncodeCallback(absl::string_view input);
absl::StatusOr<std::string> IPv6HostnameEncodeCallback(absl::string_view input);
absl::StatusOr<std::string> PortEncodeCallback(absl::string_view input);
absl::StatusOr<std::string> StandardURLPathnameEncodeCallback(
absl::string_view input);
Expand Down
33 changes: 30 additions & 3 deletions blink/renderer/modules/url_pattern/url_pattern_component.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,31 @@ StringView TypeToString(Component::Type type) {
NOTREACHED();
}

// Utility method to determine if a particular hostname pattern should be
// treated as an IPv6 hostname. This implements a simple and fast heuristic
// looking for a leading `[`. It is intended to catch the most common cases
// with minimum overhead.
bool TreatAsIPv6Hostname(base::StringPiece pattern_utf8) {
// The `[` string cannot be a valid IPv6 hostname. We need at least two
// characters to represent `[*`.
if (pattern_utf8.size() < 2)
return false;

if (pattern_utf8[0] == '[')
return true;

// We do a bit of extra work to detect brackets behind an escape and
// within a grouping.
if ((pattern_utf8[0] == '\\' || pattern_utf8[0] == '{') &&
pattern_utf8[1] == '[')
return true;

return false;
}

// Utility method to get the correct encoding callback for a given type.
liburlpattern::EncodeCallback GetEncodeCallback(Component::Type type,
liburlpattern::EncodeCallback GetEncodeCallback(base::StringPiece pattern_utf8,
Component::Type type,
Component* protocol_component) {
switch (type) {
case Component::Type::kProtocol:
Expand All @@ -51,7 +74,10 @@ liburlpattern::EncodeCallback GetEncodeCallback(Component::Type type,
case Component::Type::kPassword:
return PasswordEncodeCallback;
case Component::Type::kHostname:
return HostnameEncodeCallback;
if (TreatAsIPv6Hostname(pattern_utf8))
return IPv6HostnameEncodeCallback;
else
return HostnameEncodeCallback;
case Component::Type::kPort:
return PortEncodeCallback;
case Component::Type::kPathname:
Expand Down Expand Up @@ -205,7 +231,8 @@ Component* Component::Compile(StringView pattern,
StringUTF8Adaptor utf8(final_pattern);
auto parse_result = liburlpattern::Parse(
absl::string_view(utf8.data(), utf8.size()),
GetEncodeCallback(type, protocol_component), options);
GetEncodeCallback(utf8.AsStringPiece(), type, protocol_component),
options);
if (!parse_result.ok()) {
exception_state.ThrowTypeError(
"Invalid " + TypeToString(type) + " pattern '" + final_pattern + "'. " +
Expand Down
3 changes: 3 additions & 0 deletions blink/tools/blinkpy/presubmit/audit_non_blink_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1307,6 +1307,9 @@
# liburlpattern API.
"base::IsStringASCII",

# Needed to use part of the StringUTF8Adaptor API.
"base::StringPiece",

# //third_party/liburlpattern
'liburlpattern::.+',

Expand Down
8 changes: 4 additions & 4 deletions blink/tools/blinkpy/style/checkers/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1472,7 +1472,7 @@ def get_previous_non_blank_line(clean_lines, line_number):

def check_ctype_functions(clean_lines, line_number, file_state, error):
"""Looks for use of the standard functions in ctype.h and suggest they be replaced
by use of equivalent ones in <wtf/ASCIICType.h>?.
by use of equivalent ones in "wtf/text/ascii_ctype.h"?.
Args:
clean_lines: A CleansedLines instance containing the file.
Expand All @@ -1493,9 +1493,9 @@ def check_ctype_functions(clean_lines, line_number, file_state, error):

ctype_function = ctype_function_search.group('ctype_function')
error(
line_number, 'runtime/ctype_function', 4,
'Use equivalent function in <wtf/ASCIICType.h> instead of the %s() function.'
% (ctype_function))
line_number, 'runtime/ctype_function', 4, 'Use equivalent function in '
'"third_party/blink/renderer/platform/wtf/text/ascii_ctype.h" instead '
'of the %s() function.' % (ctype_function))


def replaceable_check(operator, macro, line):
Expand Down
7 changes: 4 additions & 3 deletions blink/tools/blinkpy/style/checkers/cpp_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2486,9 +2486,10 @@ def test_using_std_swap_ignored(self):

def test_ctype_fucntion(self):
self.assert_lint(
'int i = isascii(8);',
'Use equivalent function in <wtf/ASCIICType.h> instead of the '
'isascii() function. [runtime/ctype_function] [4]', 'foo.cpp')
'int i = isascii(8);', 'Use equivalent function in '
'"third_party/blink/renderer/platform/wtf/text/ascii_ctype.h" '
'instead of the isascii() function. [runtime/ctype_function] [4]',
'foo.cpp')

def test_redundant_virtual(self):
self.assert_lint(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2199,6 +2199,85 @@
"pathname": { "input": "/", "groups": {} }
}
},
{
"pattern": [ "http://[:address]/" ],
"inputs": [ "http://[::1]/" ],
"exactly_empty_components": [ "username", "password", "port", "search",
"hash" ],
"expected_obj": {
"protocol": "http",
"hostname": "[:address]",
"pathname": "/"
},
"expected_match": {
"protocol": { "input": "http", "groups": {} },
"hostname": { "input": "[::1]", "groups": { "address": "::1" }},
"pathname": { "input": "/", "groups": {} }
}
},
{
"pattern": [ "http://[\\:\\:AB\\::num]/" ],
"inputs": [ "http://[::ab:1]/" ],
"exactly_empty_components": [ "username", "password", "port", "search",
"hash" ],
"expected_obj": {
"protocol": "http",
"hostname": "[\\:\\:ab\\::num]",
"pathname": "/"
},
"expected_match": {
"protocol": { "input": "http", "groups": {} },
"hostname": { "input": "[::ab:1]", "groups": { "num": "1" }},
"pathname": { "input": "/", "groups": {} }
}
},
{
"pattern": [{ "hostname": "[\\:\\:AB\\::num]" }],
"inputs": [{ "hostname": "[::ab:1]" }],
"expected_obj": {
"hostname": "[\\:\\:ab\\::num]"
},
"expected_match": {
"hostname": { "input": "[::ab:1]", "groups": { "num": "1" }}
}
},
{
"pattern": [{ "hostname": "[\\:\\:xY\\::num]" }],
"expected_obj": "error"
},
{
"pattern": [{ "hostname": "{[\\:\\:ab\\::num]}" }],
"inputs": [{ "hostname": "[::ab:1]" }],
"expected_match": {
"hostname": { "input": "[::ab:1]", "groups": { "num": "1" }}
}
},
{
"pattern": [{ "hostname": "{[\\:\\:fé\\::num]}" }],
"expected_obj": "error"
},
{
"pattern": [{ "hostname": "{[\\:\\::num\\:1]}" }],
"inputs": [{ "hostname": "[::ab:1]" }],
"expected_match": {
"hostname": { "input": "[::ab:1]", "groups": { "num": "ab" }}
}
},
{
"pattern": [{ "hostname": "{[\\:\\::num\\:fé]}" }],
"expected_obj": "error"
},
{
"pattern": [{ "hostname": "[*\\:1]" }],
"inputs": [{ "hostname": "[::ab:1]" }],
"expected_match": {
"hostname": { "input": "[::ab:1]", "groups": { "0": "::ab" }}
}
},
{
"pattern": [{ "hostname": "*\\:1]" }],
"expected_obj": "error"
},
{
"pattern": [ "https://foo{{@}}example.com" ],
"inputs": [ "https://foo@example.com" ],
Expand Down

0 comments on commit be4a8d8

Please sign in to comment.