From 711dad2f3acfdd9da9c600d33e432025bf267aff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Zasso?= Date: Sat, 17 Dec 2016 13:49:05 +0100 Subject: [PATCH] url: improve spec compliance of WHATWG URL This patch contains the following changes: url: make IPv4 parser more spec compliant * Return int64_t from ParseNumber to prevent overflow for valid big numbers * Don't throw when there are more than 4 parts (it cannot be an IP address) * Correctly interpret the address and don't always throw when there are numbers > 255 Ref: https://url.spec.whatwg.org/#concept-ipv4-parser Fixes: https://github.com/nodejs/node/issues/10306 url: percent encode fragment to follow spec change Ref: https://github.com/whatwg/url/issues/150 Ref: https://github.com/whatwg/url/commit/373dbedbbf0596f723ce8a195923da98b698aeb0 url: fix URL#search setter The check for empty string must be done before removing the leading '?'. Ref: https://url.spec.whatwg.org/#dom-url-search url: set port to null if an empty string is given This is to follow a spec change. Ref: https://github.com/whatwg/url/pull/113 url: fix parsing of paths with Windows drive letter test: update WHATWG URL test fixtures PR-URL: https://github.com/nodejs/node/pull/10317 Reviewed-By: James M Snell Reviewed-By: Benjamin Gruenbaum --- lib/internal/url.js | 5 +- src/node_url.cc | 64 +++-- test/fixtures/url-setter-tests.json | 34 +-- test/fixtures/url-tests.json | 377 +++++++++++++++++++++++++++- 4 files changed, 429 insertions(+), 51 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index d80f3ca79a9529..ef22d2d4943f67 100755 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -444,8 +444,7 @@ Object.defineProperties(URL.prototype, { return; port = String(port); if (port === '') { - // Currently, if port number is empty, left unchanged. - // TODO(jasnell): This might be changing in the spec + ctx.port = undefined; return; } binding.parse(port, binding.kPort, null, ctx, @@ -478,13 +477,13 @@ Object.defineProperties(URL.prototype, { set(search) { const ctx = this[context]; search = String(search); - if (search[0] === '?') search = search.slice(1); if (!search) { ctx.query = null; ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY; this[searchParams][searchParams] = {}; return; } + if (search[0] === '?') search = search.slice(1); ctx.query = ''; binding.parse(search, binding.kQuery, null, ctx, onParseSearchComplete.bind(this)); diff --git a/src/node_url.cc b/src/node_url.cc index 9bf58944d6b927..11a03ea5211a89 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -261,7 +261,7 @@ namespace url { return type; } - static inline int ParseNumber(const char* start, const char* end) { + static inline int64_t ParseNumber(const char* start, const char* end) { unsigned R = 10; if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { start += 2; @@ -293,7 +293,7 @@ namespace url { } p++; } - return strtol(start, NULL, R); + return strtoll(start, NULL, R); } static url_host_type ParseIPv4Host(url_host* host, @@ -305,7 +305,8 @@ namespace url { const char* end = pointer + length; int parts = 0; uint32_t val = 0; - unsigned numbers[4]; + uint64_t numbers[4]; + int tooBigNumbers = 0; if (length == 0) goto end; @@ -313,20 +314,16 @@ namespace url { const char ch = pointer < end ? pointer[0] : kEOL; const int remaining = end - pointer - 1; if (ch == '.' || ch == kEOL) { - if (++parts > 4 || pointer - mark == 0) - break; - int n = ParseNumber(mark, pointer); - if (n < 0) { - type = HOST_TYPE_DOMAIN; + if (++parts > 4) goto end; - } - if (pointer - mark == 10) { - numbers[parts - 1] = n; + if (pointer - mark == 0) break; - } - if (n > 255) { - type = HOST_TYPE_FAILED; + int64_t n = ParseNumber(mark, pointer); + if (n < 0) goto end; + + if (n > 255) { + tooBigNumbers++; } numbers[parts - 1] = n; mark = pointer + 1; @@ -335,14 +332,23 @@ namespace url { } pointer++; } + CHECK_GT(parts, 0); + + // If any but the last item in numbers is greater than 255, return failure. + // If the last item in numbers is greater than or equal to + // 256^(5 - the number of items in numbers), return failure. + if (tooBigNumbers > 1 || + (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || + numbers[parts - 1] >= pow(256, static_cast(5 - parts))) { + type = HOST_TYPE_FAILED; + goto end; + } type = HOST_TYPE_IPV4; - if (parts > 0) { - val = numbers[parts - 1]; - for (int n = 0; n < parts - 1; n++) { - double b = 3-n; - val += numbers[n] * pow(256, b); - } + val = numbers[parts - 1]; + for (int n = 0; n < parts - 1; n++) { + double b = 3 - n; + val += numbers[n] * pow(256, b); } host->value.ipv4 = val; @@ -618,6 +624,13 @@ namespace url { } } + static inline void ShortenUrlPath(struct url_data* url) { + if (url->path.empty()) return; + if (url->path.size() == 1 && url->scheme == "file:" && + NORMALIZED_WINDOWS_DRIVE_LETTER(url->path[0])) return; + url->path.pop_back(); + } + static void Parse(Environment* env, Local recv, const char* input, @@ -895,8 +908,7 @@ namespace url { if (DOES_HAVE_PATH(base)) { SET_HAVE_PATH() url.path = base.path; - if (!url.path.empty()) - url.path.pop_back(); + ShortenUrlPath(&url); } url.port = base.port; state = kPath; @@ -1112,8 +1124,7 @@ namespace url { SET_HAVE_PATH() url.path = base.path; } - if (!url.path.empty()) - url.path.pop_back(); + ShortenUrlPath(&url); } state = kPath; continue; @@ -1172,8 +1183,7 @@ namespace url { special_back_slash || (!state_override && (ch == '?' || ch == '#'))) { if (IsDoubleDotSegment(buffer)) { - if (!url.path.empty()) - url.path.pop_back(); + ShortenUrlPath(&url); if (ch != '/' && !special_back_slash) { SET_HAVE_PATH() url.path.push_back(""); @@ -1247,7 +1257,7 @@ namespace url { case 0: break; default: - buffer += ch; + AppendOrEscape(&buffer, ch, SimpleEncodeSet); } break; default: diff --git a/test/fixtures/url-setter-tests.json b/test/fixtures/url-setter-tests.json index 10d6895828b5f7..e3a163e788301e 100644 --- a/test/fixtures/url-setter-tests.json +++ b/test/fixtures/url-setter-tests.json @@ -347,7 +347,7 @@ } }, { - "comment": "Port number is unchanges if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113", + "comment": "Port number is unchanged if not specified", "href": "http://example.net:8080", "new_value": "example.com:", "expected": { @@ -358,7 +358,6 @@ } }, { - "comment": "The empty host is not valid for special schemes", "href": "http://example.net", "new_value": "", @@ -763,14 +762,14 @@ } }, { - "comment": "Port number is unchanged if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113", + "comment": "Port number is removed if empty is the new value", "href": "http://example.net:8080", "new_value": "", "expected": { - "href": "http://example.net:8080/", - "host": "example.net:8080", + "href": "http://example.net/", + "host": "example.net", "hostname": "example.net", - "port": "8080" + "port": "" } }, { @@ -975,6 +974,15 @@ "href": "http://example.net/..%c3%89t%C3%A9", "pathname": "/..%c3%89t%C3%A9" } + }, + { + "comment": "? needs to be encoded", + "href": "http://example.net", + "new_value": "?", + "expected": { + "href": "http://example.net/%3F", + "pathname": "/%3F" + } } ], "search": [ @@ -1011,7 +1019,6 @@ } }, { -"skip": "we do not pass this, but we do match chromes behavior", "href": "https://example.net?lang=en-US#nav", "new_value": "?", "expected": { @@ -1096,7 +1103,6 @@ } }, { -"skip": "we do not pass this, but we do match chromes behavior", "href": "https://example.net?lang=en-US#nav", "new_value": "#", "expected": { @@ -1113,12 +1119,12 @@ } }, { - "comment": "No percent-encoding at all (!); nuls, tabs, and newlines are removed", + "comment": "Simple percent-encoding; nuls, tabs, and newlines are removed", "href": "a:/", "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "expected": { - "href": "a:/#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", - "hash": "#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé" + "href": "a:/#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", + "hash": "#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" } }, { @@ -1126,9 +1132,9 @@ "href": "http://example.net", "new_value": "%c3%89té", "expected": { - "href": "http://example.net/#%c3%89té", - "hash": "#%c3%89té" + "href": "http://example.net/#%c3%89t%C3%A9", + "hash": "#%c3%89t%C3%A9" } } ] -} +} \ No newline at end of file diff --git a/test/fixtures/url-tests.json b/test/fixtures/url-tests.json index 282b482f6527f0..44d665b99f5bb6 100644 --- a/test/fixtures/url-tests.json +++ b/test/fixtures/url-tests.json @@ -842,6 +842,36 @@ "search": "", "hash": "" }, + { + "input": "http://[::127.0.0.1]", + "base": "http://example.org/foo/bar", + "href": "http://[::7f00:1]/", + "origin": "http://[::7f00:1]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[::7f00:1]", + "hostname": "[::7f00:1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[0:0:0:0:0:0:13.1.68.3]", + "base": "http://example.org/foo/bar", + "href": "http://[::d01:4403]/", + "origin": "http://[::d01:4403]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[::d01:4403]", + "hostname": "[::d01:4403]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, { "input": "http://[2001::1]:80", "base": "http://example.org/foo/bar", @@ -1264,7 +1294,7 @@ { "input": "#β", "base": "http://example.org/foo/bar", - "href": "http://example.org/foo/bar#β", + "href": "http://example.org/foo/bar#%CE%B2", "origin": "http://example.org", "protocol": "http:", "username": "", @@ -1274,7 +1304,7 @@ "port": "", "pathname": "/foo/bar", "search": "", - "hash": "#β" + "hash": "#%CE%B2" }, { "input": "data:text/html,test#test", @@ -1291,6 +1321,21 @@ "search": "", "hash": "#test" }, + { + "input": "tel:1234567890", + "base": "http://example.org/foo/bar", + "href": "tel:1234567890", + "origin": "null", + "protocol": "tel:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "1234567890", + "search": "", + "hash": "" + }, "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/file.html", { "input": "file:c:\\foo\\bar.html", @@ -2118,7 +2163,7 @@ { "input": "http://www.google.com/foo?bar=baz# »", "base": "about:blank", - "href": "http://www.google.com/foo?bar=baz# »", + "href": "http://www.google.com/foo?bar=baz# %C2%BB", "origin": "http://www.google.com", "protocol": "http:", "username": "", @@ -2128,12 +2173,12 @@ "port": "", "pathname": "/foo", "search": "?bar=baz", - "hash": "# »" + "hash": "# %C2%BB" }, { "input": "data:test# »", "base": "about:blank", - "href": "data:test# »", + "href": "data:test# %C2%BB", "origin": "null", "protocol": "data:", "username": "", @@ -2143,7 +2188,7 @@ "port": "", "pathname": "test", "search": "", - "hash": "# »" + "hash": "# %C2%BB" }, { "input": "http://[www.google.com]/", @@ -4165,6 +4210,22 @@ "search": "", "hash": "" }, + "# unknown scheme with path looking like a password", + { + "input": "sc::a@example.net", + "base": "about:blank", + "href": "sc::a@example.net", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": ":a@example.net", + "search": "", + "hash": "" + }, "# tests from jsdom/whatwg-url designed for code coverage", { "input": "http://127.0.0.1:10100/relative_import.html", @@ -4226,5 +4287,307 @@ "pathname": "/path", "search": "?query", "hash": "#frag" + }, + "# Stringification of URL.searchParams", + { + "input": "?a=b&c=d", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar?a=b&c=d", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "?a=b&c=d", + "searchParams": "a=b&c=d", + "hash": "" + }, + { + "input": "??a=b&c=d", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar??a=b&c=d", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "??a=b&c=d", + "searchParams": "%3Fa=b&c=d", + "hash": "" + }, + "# Scheme only", + { + "input": "http:", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "searchParams": "", + "hash": "" + }, + { + "input": "http:", + "base": "https://example.org/foo/bar", + "failure": true + }, + { + "input": "sc:", + "base": "https://example.org/foo/bar", + "href": "sc:", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "searchParams": "", + "hash": "" + }, + "# Percent encoding of fragments", + { + "input": "http://foo.bar/baz?qux#foo\bbar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%08bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "", + "hash": "#foo%08bar" + }, + "# IPv4 parsing (via https://github.com/nodejs/node/pull/10317)", + { + "input": "http://192.168.257", + "base": "http://other.com/", + "href": "http://192.168.1.1/", + "origin": "http://192.168.1.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.1.1", + "hostname": "192.168.1.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.168.257.com", + "base": "http://other.com/", + "href": "http://192.168.257.com/", + "origin": "http://192.168.257.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.257.com", + "hostname": "192.168.257.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://256", + "base": "http://other.com/", + "href": "http://0.0.1.0/", + "origin": "http://0.0.1.0", + "protocol": "http:", + "username": "", + "password": "", + "host": "0.0.1.0", + "hostname": "0.0.1.0", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://256.com", + "base": "http://other.com/", + "href": "http://256.com/", + "origin": "http://256.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "256.com", + "hostname": "256.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://999999999", + "base": "http://other.com/", + "href": "http://59.154.201.255/", + "origin": "http://59.154.201.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "59.154.201.255", + "hostname": "59.154.201.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://999999999.com", + "base": "http://other.com/", + "href": "http://999999999.com/", + "origin": "http://999999999.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "999999999.com", + "hostname": "999999999.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://10000000000", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://10000000000.com", + "base": "http://other.com/", + "href": "http://10000000000.com/", + "origin": "http://10000000000.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "10000000000.com", + "hostname": "10000000000.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://4294967295", + "base": "http://other.com/", + "href": "http://255.255.255.255/", + "origin": "http://255.255.255.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "255.255.255.255", + "hostname": "255.255.255.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://4294967296", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://0xffffffff", + "base": "http://other.com/", + "href": "http://255.255.255.255/", + "origin": "http://255.255.255.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "255.255.255.255", + "hostname": "255.255.255.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://0xffffffff1", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://256.256.256.256", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://256.256.256.256.256", + "base": "http://other.com/", + "href": "http://256.256.256.256.256/", + "origin": "http://256.256.256.256.256", + "protocol": "http:", + "username": "", + "password": "", + "host": "256.256.256.256.256", + "hostname": "256.256.256.256.256", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# file URLs relative to other file URLs (via https://github.com/jsdom/whatwg-url/pull/60)", + { + "input": "pix/submit.gif", + "base": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/anchor.html", + "href": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///C:/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" } -] \ No newline at end of file +]