Skip to content

Commit

Permalink
URLPattern: Implement compareComponent() method.
Browse files Browse the repository at this point in the history
This CL adds a prototype URLPattern.compareComponent() to provide a
natural ordering to URLPattern pattern strings.  This was based on
feedback from routing framework authors and there is some discussion
in:

whatwg/urlpattern#61

The general algorithm is to compare the component patterns Part by Part.
The PartType, Modifier, and text contents are compared for each Part,
but group names are not considered.  The end result is a mostly
lexicographical ordering based on fixed text.  Matching groups and
modifiers are ordered such that more restrictive patterns are greater.

Bug: 1232795
Change-Id: I8474cd7d7689e657c9c74c552ad630cdcdd86c95
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3052630
Commit-Queue: Ben Kelly <wanderview@chromium.org>
Reviewed-by: Jeremy Roman <jbroman@chromium.org>
Cr-Commit-Position: refs/heads/master@{#906025}
  • Loading branch information
wanderview authored and Chromium LUCI CQ committed Jul 28, 2021
1 parent eb113ed commit 22c632e
Show file tree
Hide file tree
Showing 18 changed files with 342 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3278,6 +3278,7 @@ enum WebFeature {
kBlobStoreAccessAcrossAgentClustersInResolveAsURLLoaderFactory = 3963,
kBlobStoreAccessAcrossAgentClustersInResolveForNavigation = 3964,
kTapDelayEnabled = 3965,
kV8URLPattern_CompareComponent_Method = 3966,

// Add new features immediately above this line. Don't change assigned
// numbers of any item, and don't reuse removed slots.
Expand Down
2 changes: 2 additions & 0 deletions third_party/blink/renderer/bindings/generated_in_modules.gni
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,8 @@ generated_dictionary_sources_in_modules = [
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_text_decoder_options.h",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_text_encoder_encode_into_result.cc",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_text_encoder_encode_into_result.h",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_component.cc",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_component.h",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_component_result.cc",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_component_result.h",
"$root_gen_dir/third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_init.cc",
Expand Down
30 changes: 30 additions & 0 deletions third_party/blink/renderer/modules/url_pattern/url_pattern.cc
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,36 @@ String URLPattern::hash() const {
return hash_->GeneratePatternString();
}

// static
int URLPattern::compareComponent(const V8URLPatternComponent& component,
const URLPattern* left,
const URLPattern* right) {
switch (component.AsEnum()) {
case V8URLPatternComponent::Enum::kProtocol:
return url_pattern::Component::Compare(*left->protocol_,
*right->protocol_);
case V8URLPatternComponent::Enum::kUsername:
return url_pattern::Component::Compare(*left->username_,
*right->username_);
case V8URLPatternComponent::Enum::kPassword:
return url_pattern::Component::Compare(*left->password_,
*right->password_);
case V8URLPatternComponent::Enum::kHostname:
return url_pattern::Component::Compare(*left->hostname_,
*right->hostname_);
case V8URLPatternComponent::Enum::kPort:
return url_pattern::Component::Compare(*left->port_, *right->port_);
case V8URLPatternComponent::Enum::kPathname:
return url_pattern::Component::Compare(*left->pathname_,
*right->pathname_);
case V8URLPatternComponent::Enum::kSearch:
return url_pattern::Component::Compare(*left->search_, *right->search_);
case V8URLPatternComponent::Enum::kHash:
return url_pattern::Component::Compare(*left->hash_, *right->hash_);
}
NOTREACHED();
}

void URLPattern::Trace(Visitor* visitor) const {
visitor->Trace(protocol_);
visitor->Trace(username_);
Expand Down
6 changes: 5 additions & 1 deletion third_party/blink/renderer/modules/url_pattern/url_pattern.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

Expand All @@ -7,6 +6,7 @@

#include "base/types/pass_key.h"
#include "third_party/blink/renderer/bindings/modules/v8/v8_typedefs.h"
#include "third_party/blink/renderer/bindings/modules/v8/v8_url_pattern_component.h"
#include "third_party/blink/renderer/modules/modules_export.h"
#include "third_party/blink/renderer/platform/bindings/script_wrappable.h"
#include "third_party/liburlpattern/parse.h"
Expand Down Expand Up @@ -69,6 +69,10 @@ class MODULES_EXPORT URLPattern : public ScriptWrappable {
String search() const;
String hash() const;

static int compareComponent(const V8URLPatternComponent& component,
const URLPattern* left,
const URLPattern* right);

void Trace(Visitor* visitor) const override;

private:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

typedef (USVString or URLPatternInit) URLPatternInput;

enum URLPatternComponent { "protocol", "username", "password", "hostname",
"port", "pathname", "search", "hash" };

// https://wicg.github.io/urlpattern/
[
Exposed=(Window,Worker),
Expand All @@ -26,4 +29,8 @@ typedef (USVString or URLPatternInit) URLPatternInput;
readonly attribute USVString pathname;
readonly attribute USVString search;
readonly attribute USVString hash;

[Measure]
static short compareComponent(URLPatternComponent component,
URLPattern left, URLPattern right);
};
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,12 @@ liburlpattern::EncodeCallback GetEncodeCallback(Component::Type type,
// Utility method to get the correct liburlpattern parse options for a given
// type.
const liburlpattern::Options& GetOptions(Component::Type type) {
using liburlpattern::Options;

// The liburlpattern::Options to use for most component patterns. We
// default to strict mode and case sensitivity. In addition, most
// components have no concept of a delimiter or prefix character.
DEFINE_THREAD_SAFE_STATIC_LOCAL(liburlpattern::Options, default_options,
DEFINE_THREAD_SAFE_STATIC_LOCAL(Options, default_options,
({.delimiter_list = "",
.prefix_list = "",
.sensitive = true,
Expand All @@ -106,7 +108,7 @@ const liburlpattern::Options& GetOptions(Component::Type type) {
// by default. Note, hostnames are case insensitive but we require case
// sensitivity here. This assumes that the hostname values have already
// been normalized to lower case as in URL().
DEFINE_THREAD_SAFE_STATIC_LOCAL(liburlpattern::Options, hostname_options,
DEFINE_THREAD_SAFE_STATIC_LOCAL(Options, hostname_options,
({.delimiter_list = ".",
.prefix_list = "",
.sensitive = true,
Expand All @@ -116,7 +118,7 @@ const liburlpattern::Options& GetOptions(Component::Type type) {
// "/" delimiter controlling how far a named group like ":bar" will match
// by default. It also configures "/" to be treated as an automatic
// prefix before groups.
DEFINE_THREAD_SAFE_STATIC_LOCAL(liburlpattern::Options, pathname_options,
DEFINE_THREAD_SAFE_STATIC_LOCAL(Options, pathname_options,
({.delimiter_list = "/",
.prefix_list = "/",
.sensitive = true,
Expand All @@ -138,6 +140,88 @@ const liburlpattern::Options& GetOptions(Component::Type type) {
NOTREACHED();
}

// Utility function to return a statically allocated Part list.
const std::vector<liburlpattern::Part>& GetWildcardOnlyPartList() {
using liburlpattern::Modifier;
using liburlpattern::Part;
using liburlpattern::PartType;
DEFINE_THREAD_SAFE_STATIC_LOCAL(
std::vector<Part>, instance,
({Part(PartType::kFullWildcard,
/*name=*/"",
/*prefix=*/"", /*value=*/"", /*suffix=*/"", Modifier::kNone)}));
return instance;
}

int ComparePart(const liburlpattern::Part& lh, const liburlpattern::Part& rh) {
// We prioritize PartType in the ordering so we can favor fixed text. The
// type ordering is:
//
// kFixed > kRegex > kSegmentWildcard > kFullWildcard.
//
// We considered kRegex greater than the wildcards because it is likely to be
// used for imposing some constraint and not just duplicating wildcard
// behavior.
//
// This comparison depends on the PartType enum in liburlpattern having the
// correct corresponding numeric values.
//
// Next the Modifier is considered:
//
// kNone > kOneOrMore > kOptional > kZeroOrMore.
//
// The rationale here is that requring the match group to exist is more
// restrictive then making it optional and requiring an exact count is more
// restrictive than repeating.
//
// This comparison depends on the Modifier enum in liburlpattern having the
// correct corresponding numeric values.
//
// Finally we lexicographically compare the text components from left to
// right; `prefix`, `value`, and `suffix`. Its ok to depend on simple
// byte-wise string comparison here because the values have all been URL
// encoded. This guarantees the strings contain only ASCII.
auto left = std::tie(lh.type, lh.modifier, lh.prefix, lh.value, lh.suffix);
auto right = std::tie(rh.type, rh.modifier, rh.prefix, rh.value, rh.suffix);
if (left < right)
return -1;
else if (left == right)
return 0;
else
return 1;
}

// Utility method to compare two part lists.
int ComparePartList(const std::vector<liburlpattern::Part>& lh,
const std::vector<liburlpattern::Part>& rh) {
using liburlpattern::Modifier;
using liburlpattern::Part;
using liburlpattern::PartType;

// Begin by comparing each Part in the lists with each other. If any
// are not equal, then we are done.
size_t i = 0;
for (; i < lh.size() && i < rh.size(); ++i) {
int r = ComparePart(lh[i], rh[i]);
if (r)
return r;
}

// We reached the end of at least one of the lists without finding a
// difference. However, we must handle the case where one list is longer
// than the other. In this case we compare the next Part from the
// longer list to a synthetically created empty kFixed Part. This is
// necessary in order for "/foo/" to be considered more restrictive, and
// therefore greater, than "/foo/*".
if (i == lh.size() && i != rh.size())
return ComparePart(Part(PartType::kFixed, "", Modifier::kNone), rh[i]);
else if (i != lh.size() && i == rh.size())
return ComparePart(lh[i], Part(PartType::kFixed, "", Modifier::kNone));

// No differences were found, so declare them equal.
return 0;
}

} // anonymous namespace

// static
Expand Down Expand Up @@ -218,6 +302,31 @@ Component* Component::Compile(const String& pattern,
std::move(wtf_name_list), base::PassKey<Component>());
}

// static
int Component::Compare(const Component& lh, const Component& rh) {
using liburlpattern::Modifier;
using liburlpattern::Part;
using liburlpattern::PartType;

// If both the left and right components are empty wildcards, then they are
// effectively equal.
if (!lh.pattern_.has_value() && !rh.pattern_.has_value())
return 0;

// If one side has a real pattern and the other side is an empty component,
// then we have to compare to a part list with a single full wildcard.
if (lh.pattern_.has_value() && !rh.pattern_.has_value()) {
return ComparePartList(lh.pattern_->PartList(), GetWildcardOnlyPartList());
}

if (!lh.pattern_.has_value() && rh.pattern_.has_value()) {
return ComparePartList(GetWildcardOnlyPartList(), rh.pattern_->PartList());
}

// Otherwise compare the part lists of the patterns on each side.
return ComparePartList(lh.pattern_->PartList(), rh.pattern_->PartList());
}

Component::Component(Type type,
liburlpattern::Pattern pattern,
ScriptRegexp* regexp,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ class Component final : public GarbageCollected<Component> {
Component* protocol_component,
ExceptionState& exception_state);

// Compare the pattern strings in the two given components. This provides a
// mostly lexicographical ordering based on fixed text in the patterns.
// Matching groups and modifiers are treated such that more restrictive
// patterns are greater in value. Group names are not considered in the
// comparison.
static int Compare(const Component& lh, const Component& rh);

// Constructs a Component with a real `pattern` that compiled to the given
// `regexp`.
Component(Type type,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
[
{
"component": "pathname",
"left": { "pathname": "/foo/a" },
"right": { "pathname": "/foo/b" },
"expected": -1
},
{
"component": "pathname",
"left": { "pathname": "/foo/b" },
"right": { "pathname": "/foo/bar" },
"expected": -1
},
{
"component": "pathname",
"left": { "pathname": "/foo/bar" },
"right": { "pathname": "/foo/:bar" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/" },
"right": { "pathname": "/foo/:bar" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/:bar" },
"right": { "pathname": "/foo/*" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/{bar}" },
"right": { "pathname": "/foo/(bar)" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/{bar}" },
"right": { "pathname": "/foo/{bar}+" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/{bar}+" },
"right": { "pathname": "/foo/{bar}?" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/{bar}?" },
"right": { "pathname": "/foo/{bar}*" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/(123)" },
"right": { "pathname": "/foo/(12)" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/:b" },
"right": { "pathname": "/foo/:a" },
"expected": 0
},
{
"component": "pathname",
"left": { "pathname": "*/foo" },
"right": { "pathname": "*" },
"expected": 1
},
{
"component": "port",
"left": { "port": "9" },
"right": { "port": "100" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "foo/:bar?/baz" },
"right": { "pathname": "foo/{:bar}?/baz" },
"expected": -1
},
{
"component": "pathname",
"left": { "pathname": "foo/:bar?/baz" },
"right": { "pathname": "foo{/:bar}?/baz" },
"expected": 0
},
{
"component": "pathname",
"left": { "pathname": "foo/:bar?/baz" },
"right": { "pathname": "fo{o/:bar}?/baz" },
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "foo/:bar?/baz" },
"right": { "pathname": "foo{/:bar/}?baz" },
"expected": -1
},
{
"component": "pathname",
"left": "https://a.example.com/b?a",
"right": "https://b.example.com/a?b",
"expected": 1
},
{
"component": "pathname",
"left": { "pathname": "/foo/{bar}/baz" },
"right": { "pathname": "/foo/bar/baz" },
"expected": 0
}
]
Loading

0 comments on commit 22c632e

Please sign in to comment.