From 4eb0749b6c16cfa6cdfedfab4cef256dc27e55b1 Mon Sep 17 00:00:00 2001 From: Ali Hassan <24819103+thisalihassan@users.noreply.github.com> Date: Sat, 13 Apr 2024 17:57:12 +0500 Subject: [PATCH] url: implement parse method for safer URL parsing Implement the static parse method as per the WHATWG URL specification. Unlike the URL constructor, URL.parse does not throw on invalid input, instead returning null. This behavior allows safer parsing of URLs without the need for try-catch blocks around constructor calls. The implementation follows the steps outlined in the WHATWG URL standard, ensuring compatibility and consistency with web platform URL parsing APIs. Fixes: https://github.com/nodejs/node/issues/52208 Refs: https://github.com/whatwg/url/pull/825 PR-URL: https://github.com/nodejs/node/pull/52280 Reviewed-By: Yagiz Nizipli Reviewed-By: Matteo Collina Reviewed-By: Daniel Lemire Reviewed-By: Benjamin Gruenbaum --- lib/internal/url.js | 24 ++- src/node_url.cc | 11 +- test/fixtures/wpt/README.md | 2 +- .../wpt/url/resources/urltestdata.json | 185 ++++++++++++++++++ .../fixtures/wpt/url/url-statics-parse.any.js | 50 +++++ 5 files changed, 267 insertions(+), 5 deletions(-) create mode 100644 test/fixtures/wpt/url/url-statics-parse.any.js diff --git a/lib/internal/url.js b/lib/internal/url.js index 4103775560644e..e82279bbc152ee 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -764,6 +764,14 @@ function isURL(self) { return Boolean(self?.href && self.protocol && self.auth === undefined && self.path === undefined); } +/** + * A unique symbol used as a private identifier to safely invoke the URL constructor + * with a special parsing behavior. When passed as the third argument to the URL + * constructor, it signals that the constructor should not throw an exception + * for invalid URL inputs. + */ +const kParseURLSymbol = Symbol('kParseURL'); + class URL { #context = new URLContext(); #searchParams; @@ -782,7 +790,7 @@ class URL { }; } - constructor(input, base = undefined) { + constructor(input, base = undefined, parseSymbol = undefined) { if (arguments.length === 0) { throw new ERR_MISSING_ARGS('url'); } @@ -794,7 +802,19 @@ class URL { base = `${base}`; } - this.#updateContext(bindingUrl.parse(input, base)); + const raiseException = parseSymbol !== kParseURLSymbol; + const href = bindingUrl.parse(input, base, raiseException); + if (href) { + this.#updateContext(href); + } + } + + static parse(input, base = undefined) { + if (arguments.length === 0) { + throw new ERR_MISSING_ARGS('url'); + } + const parsedURLObject = new URL(input, base, kParseURLSymbol); + return parsedURLObject.href ? parsedURLObject : null; } [inspect.custom](depth, opts) { diff --git a/src/node_url.cc b/src/node_url.cc index 08753e4a6539be..5eefb4a2512df1 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -262,6 +262,9 @@ void BindingData::Parse(const FunctionCallbackInfo& args) { CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); // input // args[1] // base url + // args[2] // raise Exception + + const bool raise_exception = args.Length() > 2 && args[2]->IsTrue(); Realm* realm = Realm::GetCurrent(args); BindingData* binding_data = realm->GetBindingData(); @@ -274,16 +277,20 @@ void BindingData::Parse(const FunctionCallbackInfo& args) { if (args[1]->IsString()) { base_ = Utf8Value(isolate, args[1]).ToString(); base = ada::parse(*base_); - if (!base) { + if (!base && raise_exception) { return ThrowInvalidURL(realm->env(), input.ToStringView(), base_); + } else if (!base) { + return; } base_pointer = &base.value(); } auto out = ada::parse(input.ToStringView(), base_pointer); - if (!out) { + if (!out && raise_exception) { return ThrowInvalidURL(realm->env(), input.ToStringView(), base_); + } else if (!out) { + return; } binding_data->UpdateComponents(out->get_components(), out->type); diff --git a/test/fixtures/wpt/README.md b/test/fixtures/wpt/README.md index 3f65907043f984..02e02a9d1f3c73 100644 --- a/test/fixtures/wpt/README.md +++ b/test/fixtures/wpt/README.md @@ -36,4 +36,4 @@ Last update: - webmessaging/broadcastchannel: https://github.com/web-platform-tests/wpt/tree/e97fac4791/webmessaging/broadcastchannel [Web Platform Tests]: https://github.com/web-platform-tests/wpt -[`git node wpt`]: https://github.com/nodejs/node-core-utils/blob/main/docs/git-node.md#git-node-wpt +[`git node wpt`]: https://github.com/nodejs/node-core-utils/blob/main/docs/git-node.md#git-node-wpt \ No newline at end of file diff --git a/test/fixtures/wpt/url/resources/urltestdata.json b/test/fixtures/wpt/url/resources/urltestdata.json index 7c2a0f68c77a26..53f6d575e165bf 100644 --- a/test/fixtures/wpt/url/resources/urltestdata.json +++ b/test/fixtures/wpt/url/resources/urltestdata.json @@ -734,6 +734,36 @@ "search": "", "hash": "" }, + { + "input": "http://a:b@c\\", + "base": null, + "href": "http://a:b@c/", + "origin": "http://c", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "c", + "hostname": "c", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://a@b\\c", + "base": null, + "href": "ws://a@b/c", + "origin": "ws://b", + "protocol": "ws:", + "username": "a", + "password": "", + "host": "b", + "hostname": "b", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, { "input": "foo:/", "base": "http://example.org/foo/bar", @@ -9627,5 +9657,160 @@ "pathname": "", "search": "", "hash": "" + }, + "Scheme relative path starting with multiple slashes", + { + "input": "///test", + "base": "http://example.org/", + "href": "http://test/", + "protocol": "http:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///\\//\\//test", + "base": "http://example.org/", + "href": "http://test/", + "protocol": "http:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///example.org/path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../../", + "base": "http://example.org/", + "href": "http://example.org/", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../path/../../", + "base": "http://example.org/", + "href": "http://example.org/", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../path/../../path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "/\\/\\//example.org/../path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "///abcdef/../", + "base": "file:///", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "/\\//\\/a/../", + "base": "file:///", + "href": "file://////", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "////", + "search": "", + "hash": "" + }, + { + "input": "//a/../", + "base": "file:///", + "href": "file://a/", + "protocol": "file:", + "username": "", + "password": "", + "host": "a", + "hostname": "a", + "port": "", + "pathname": "/", + "search": "", + "hash": "" } ] diff --git a/test/fixtures/wpt/url/url-statics-parse.any.js b/test/fixtures/wpt/url/url-statics-parse.any.js new file mode 100644 index 00000000000000..0822e9da07af6a --- /dev/null +++ b/test/fixtures/wpt/url/url-statics-parse.any.js @@ -0,0 +1,50 @@ +// This intentionally does not use resources/urltestdata.json to preserve resources. +[ + { + "url": undefined, + "base": undefined, + "expected": false + }, + { + "url": "aaa:b", + "base": undefined, + "expected": true + }, + { + "url": undefined, + "base": "aaa:b", + "expected": false + }, + { + "url": "aaa:/b", + "base": undefined, + "expected": true + }, + { + "url": undefined, + "base": "aaa:/b", + "expected": true + }, + { + "url": "https://test:test", + "base": undefined, + "expected": false + }, + { + "url": "a", + "base": "https://b/", + "expected": true + } +].forEach(({ url, base, expected }) => { + test(() => { + if (expected == false) { + assert_equals(URL.parse(url, base), null); + } else { + assert_equals(URL.parse(url, base).href, new URL(url, base).href); + } + }, `URL.parse(${url}, ${base})`); +}); + +test(() => { + assert_not_equals(URL.parse("https://example/"), URL.parse("https://example/")); +}, `URL.parse() should return a unique object`);