From ff9c478e8fc74bd98c61ced7021779c6eb8a52b2 Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Mon, 16 Jan 2023 18:18:39 -0300 Subject: [PATCH 01/14] src: refactor EndsInANumber in node_url.cc and adds IsIPv4NumberValid Removes the use of vector in EndsInANumber and uses IsIPv4NumberValid instead of parsing the number to check if it is valid. Fixes: https://github.com/nodejs/performance/issues/36 Refs: https://github.com/ada-url/ada/pull/36 --- src/node_url.cc | 575 +++++++++++++++++++++++------------------------- 1 file changed, 271 insertions(+), 304 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 8b56490379acf7..e6499b72382df6 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -16,13 +16,13 @@ namespace node { using errors::TryCatchScope; -using url::table_data::hex; using url::table_data::C0_CONTROL_ENCODE_SET; using url::table_data::FRAGMENT_ENCODE_SET; +using url::table_data::hex; using url::table_data::PATH_ENCODE_SET; -using url::table_data::USERINFO_ENCODE_SET; using url::table_data::QUERY_ENCODE_SET_NONSPECIAL; using url::table_data::QUERY_ENCODE_SET_SPECIAL; +using url::table_data::USERINFO_ENCODE_SET; using v8::Array; using v8::Context; @@ -42,10 +42,9 @@ using v8::Undefined; using v8::Value; Local Utf8String(Isolate* isolate, const std::string& str) { - return String::NewFromUtf8(isolate, - str.data(), - NewStringType::kNormal, - str.length()).ToLocalChecked(); + return String::NewFromUtf8( + isolate, str.data(), NewStringType::kNormal, str.length()) + .ToLocalChecked(); } namespace url { @@ -117,13 +116,13 @@ class URLHost { void SetOpaque(std::string&& string) { Reset(); type_ = HostType::H_OPAQUE; - new(&value_.domain_or_opaque) std::string(std::move(string)); + new (&value_.domain_or_opaque) std::string(std::move(string)); } void SetDomain(std::string&& string) { Reset(); type_ = HostType::H_DOMAIN; - new(&value_.domain_or_opaque) std::string(std::move(string)); + new (&value_.domain_or_opaque) std::string(std::move(string)); } }; @@ -131,16 +130,16 @@ URLHost::~URLHost() { Reset(); } -#define ARGS(XX) \ - XX(ARG_FLAGS) \ - XX(ARG_PROTOCOL) \ - XX(ARG_USERNAME) \ - XX(ARG_PASSWORD) \ - XX(ARG_HOST) \ - XX(ARG_PORT) \ - XX(ARG_PATH) \ - XX(ARG_QUERY) \ - XX(ARG_FRAGMENT) \ +#define ARGS(XX) \ + XX(ARG_FLAGS) \ + XX(ARG_PROTOCOL) \ + XX(ARG_USERNAME) \ + XX(ARG_PASSWORD) \ + XX(ARG_HOST) \ + XX(ARG_PORT) \ + XX(ARG_PATH) \ + XX(ARG_QUERY) \ + XX(ARG_FRAGMENT) \ XX(ARG_COUNT) // This one has to be last. enum url_cb_args { @@ -149,18 +148,18 @@ enum url_cb_args { #undef XX }; -#define TWO_CHAR_STRING_TEST(bits, name, expr) \ - template \ - bool name(const T ch1, const T ch2) { \ - static_assert(sizeof(ch1) >= (bits) / 8, \ - "Character must be wider than " #bits " bits"); \ - return (expr); \ - } \ - template \ - bool name(const std::basic_string& str) { \ - static_assert(sizeof(str[0]) >= (bits) / 8, \ - "Character must be wider than " #bits " bits"); \ - return str.length() >= 2 && name(str[0], str[1]); \ +#define TWO_CHAR_STRING_TEST(bits, name, expr) \ + template \ + bool name(const T ch1, const T ch2) { \ + static_assert(sizeof(ch1) >= (bits) / 8, \ + "Character must be wider than " #bits " bits"); \ + return (expr); \ + } \ + template \ + bool name(const std::basic_string& str) { \ + static_assert(sizeof(str[0]) >= (bits) / 8, \ + "Character must be wider than " #bits " bits"); \ + return str.length() >= 2 && name(str[0], str[1]); \ } // https://infra.spec.whatwg.org/#ascii-tab-or-newline @@ -176,13 +175,15 @@ CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' ')) CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9')) // https://infra.spec.whatwg.org/#ascii-hex-digit -CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) || - (ch >= 'A' && ch <= 'F') || - (ch >= 'a' && ch <= 'f'))) +CHAR_TEST(8, + IsASCIIHexDigit, + (IsASCIIDigit(ch) || (ch >= 'A' && ch <= 'F') || + (ch >= 'a' && ch <= 'f'))) // https://infra.spec.whatwg.org/#ascii-alpha -CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') || - (ch >= 'a' && ch <= 'z'))) +CHAR_TEST(8, + IsASCIIAlpha, + ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))) // https://infra.spec.whatwg.org/#ascii-alphanumeric CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch))) @@ -208,11 +209,13 @@ CHAR_TEST(8, ch == '\x7f') // https://url.spec.whatwg.org/#windows-drive-letter -TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter, +TWO_CHAR_STRING_TEST(8, + IsWindowsDriveLetter, (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|'))) // https://url.spec.whatwg.org/#normalized-windows-drive-letter -TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter, +TWO_CHAR_STRING_TEST(8, + IsNormalizedWindowsDriveLetter, (IsASCIIAlpha(ch1) && ch2 == ':')) #undef TWO_CHAR_STRING_TEST @@ -233,19 +236,15 @@ void AppendOrEscape(std::string* str, } unsigned hex2bin(const char ch) { - if (ch >= '0' && ch <= '9') - return ch - '0'; - if (ch >= 'A' && ch <= 'F') - return 10 + (ch - 'A'); - if (ch >= 'a' && ch <= 'f') - return 10 + (ch - 'a'); + if (ch >= '0' && ch <= '9') return ch - '0'; + if (ch >= 'A' && ch <= 'F') return 10 + (ch - 'A'); + if (ch >= 'a' && ch <= 'f') return 10 + (ch - 'a'); UNREACHABLE(); } std::string PercentDecode(const char* input, size_t len) { std::string dest; - if (len == 0) - return dest; + if (len == 0) return dest; dest.reserve(len); const char* pointer = input; const char* end = input + len; @@ -255,8 +254,7 @@ std::string PercentDecode(const char* input, size_t len) { size_t remaining = end - pointer - 1; if (ch != '%' || remaining < 2 || (ch == '%' && - (!IsASCIIHexDigit(pointer[1]) || - !IsASCIIHexDigit(pointer[2])))) { + (!IsASCIIHexDigit(pointer[1]) || !IsASCIIHexDigit(pointer[2])))) { dest += ch; pointer++; continue; @@ -271,31 +269,33 @@ std::string PercentDecode(const char* input, size_t len) { return dest; } -#define SPECIALS(XX) \ - XX(ftp, 21, "ftp:") \ - XX(file, -1, "file:") \ - XX(http, 80, "http:") \ - XX(https, 443, "https:") \ - XX(ws, 80, "ws:") \ +#define SPECIALS(XX) \ + XX(ftp, 21, "ftp:") \ + XX(file, -1, "file:") \ + XX(http, 80, "http:") \ + XX(https, 443, "https:") \ + XX(ws, 80, "ws:") \ XX(wss, 443, "wss:") bool IsSpecial(const std::string& scheme) { -#define V(_, __, name) if (scheme == name) return true; +#define V(_, __, name) \ + if (scheme == name) return true; SPECIALS(V); #undef V return false; } Local GetSpecial(Environment* env, const std::string& scheme) { -#define V(key, _, name) if (scheme == name) \ - return env->url_special_##key##_string(); +#define V(key, _, name) \ + if (scheme == name) return env->url_special_##key##_string(); SPECIALS(V) #undef V UNREACHABLE(); } int NormalizePort(const std::string& scheme, int p) { -#define V(_, port, name) if (scheme == name && p == port) return -1; +#define V(_, port, name) \ + if (scheme == name && p == port) return -1; SPECIALS(V); #undef V return p; @@ -304,34 +304,27 @@ int NormalizePort(const std::string& scheme, int p) { // https://url.spec.whatwg.org/#start-with-a-windows-drive-letter bool StartsWithWindowsDriveLetter(const char* p, const char* end) { size_t length = end - p; - return length >= 2 && - IsWindowsDriveLetter(p[0], p[1]) && - (length == 2 || - p[2] == '/' || - p[2] == '\\' || - p[2] == '?' || - p[2] == '#'); + return length >= 2 && IsWindowsDriveLetter(p[0], p[1]) && + (length == 2 || p[2] == '/' || p[2] == '\\' || p[2] == '?' || + p[2] == '#'); } #if defined(NODE_HAVE_I18N_SUPPORT) bool ToUnicode(const std::string& input, std::string* output) { MaybeStackBuffer buf; - if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0) - return false; + if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0) return false; output->assign(*buf, buf.length()); return true; } bool ToASCII(const std::string& input, std::string* output) { MaybeStackBuffer buf; - if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0) - return false; - if (buf.length() == 0) - return false; + if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0) return false; + if (buf.length() == 0) return false; output->assign(*buf, buf.length()); return true; } -#else // !defined(NODE_HAVE_I18N_SUPPORT) +#else // !defined(NODE_HAVE_I18N_SUPPORT) // Intentional non-ops if ICU is not present. bool ToUnicode(const std::string& input, std::string* output) { *output = input; @@ -361,7 +354,8 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) { return; } - // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119 + // Ref: + // https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119 for (int i = 0; i < NS_IN6ADDRSZ; i += 2) { value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1]; } @@ -390,16 +384,13 @@ int64_t ParseIPv4Number(const char* start, const char* end) { const char ch = p[0]; switch (R) { case 8: - if (ch < '0' || ch > '7') - return -1; + if (ch < '0' || ch > '7') return -1; break; case 10: - if (!IsASCIIDigit(ch)) - return -1; + if (!IsASCIIDigit(ch)) return -1; break; case 16: - if (!IsASCIIHexDigit(ch)) - return -1; + if (!IsASCIIHexDigit(ch)) return -1; break; } p++; @@ -407,29 +398,86 @@ int64_t ParseIPv4Number(const char* start, const char* end) { return strtoll(start, nullptr, R); } +// https://url.spec.whatwg.org/#ipv4-number-parser +bool IsIPv4NumberValid(std::string_view input) { + // If input is the empty string, then return failure. + if (input.empty()) { + return false; + } + + // If input contains at least two code points.. + if (input.size() >= 2) { + // and the first two code points are either "0X" or "0x", then: + if (input[0] == '0' && (input[1] == 'X' || input[1] == 'x')) { + if (input.size() == 2) { + return true; + } + + // Remove the first two code points from input, + // radix-R is 16 + // If input contains a code point that is not a radix-R digit, then return + // failure. + return input.find_first_not_of("0123456789abcdefABCDEF", 2) == + std::string_view::npos; + + // and the first code point is U+0030 (0), then: + } else if (input[0] == '0') { + if (input.size() == 1) { + return true; + } + + // Remove the first code point from input. + // radix-R is 8 + // If input contains a code point that is not a radix-R digit, then return + // failure. + return input.find_first_not_of("01234567", 1) == std::string_view::npos; + } + } + + // If input contains a code point that is not a radix-R digit, then return + // failure. radix-R is 10 + return std::all_of(input.begin(), input.end(), ::isdigit); +} + // https://url.spec.whatwg.org/#ends-in-a-number-checker -bool EndsInANumber(const std::string& input) { - std::vector parts = SplitString(input, '.', false); +bool EndsInANumber(const std::string_view input) { + if (input.empty()) { + return false; + } + + const std::string delimiter = "."; + auto pointer_start = input.begin(); + auto pointer_end = input.end(); - if (parts.empty()) return false; + uint8_t parts_size = std::count(pointer_start, pointer_end, delimiter[0]); + ++parts_size; - if (parts.back().empty()) { - if (parts.size() == 1) return false; - parts.pop_back(); + // If the last item in parts is the empty string, then: + if (input.back() == delimiter[0]) { + // Remove the last item from parts. + --pointer_end; + --parts_size; } - const std::string& last = parts.back(); + // Let last be the last item in parts + if (parts_size > 1) { + pointer_start = std::find_end( + pointer_start, pointer_end, delimiter.begin(), delimiter.end()); + ++pointer_start; + } - // If last is non-empty and contains only ASCII digits, then return true - if (!last.empty() && std::all_of(last.begin(), last.end(), ::isdigit)) { - return true; + if (std::distance(pointer_start, pointer_end) == 0) { + return false; } - const char* last_str = last.c_str(); - int64_t num = ParseIPv4Number(last_str, last_str + last.size()); - if (num >= 0) return true; + // If last is non-empty and contains only ASCII digits, then return true. + if (std::all_of(pointer_start, pointer_end, ::isdigit)) { + return true; + } - return false; + // If parsing last as an IPv4 number does not return failure, then return + // true. + return IsIPv4NumberValid(std::string(pointer_start, pointer_end)); } void URLHost::ParseIPv4Host(const char* input, size_t length) { @@ -441,27 +489,23 @@ void URLHost::ParseIPv4Host(const char* input, size_t length) { uint32_t val = 0; uint64_t numbers[4]; int tooBigNumbers = 0; - if (length == 0) - return; + if (length == 0) return; while (pointer <= end) { const char ch = pointer < end ? pointer[0] : kEOL; int64_t remaining = end - pointer - 1; if (ch == '.' || ch == kEOL) { if (++parts > static_cast(arraysize(numbers))) return; - if (pointer == mark) - return; + if (pointer == mark) return; int64_t n = ParseIPv4Number(mark, pointer); - if (n < 0) - return; + if (n < 0) return; if (n > 255) { tooBigNumbers++; } numbers[parts - 1] = n; mark = pointer + 1; - if (ch == '.' && remaining == 0) - break; + if (ch == '.' && remaining == 0) break; } pointer++; } @@ -470,8 +514,7 @@ void URLHost::ParseIPv4Host(const char* input, size_t length) { // If any but the last item in numbers is greater than 255, return failure. // If the last item in numbers is greater than or equal to // 256^(5 - the number of items in numbers), return failure. - if (tooBigNumbers > 1 || - (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || + if (tooBigNumbers > 1 || (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || numbers[parts - 1] >= pow(256, static_cast(5 - parts))) { return; } @@ -510,24 +553,20 @@ void URLHost::ParseHost(const char* input, CHECK_EQ(type_, HostType::H_FAILED); const char* pointer = input; - if (length == 0) - return; + if (length == 0) return; if (pointer[0] == '[') { - if (pointer[length - 1] != ']') - return; + if (pointer[length - 1] != ']') return; return ParseIPv6Host(++pointer, length - 2); } - if (!is_special) - return ParseOpaqueHost(input, length); + if (!is_special) return ParseOpaqueHost(input, length); // First, we have to percent decode std::string decoded = PercentDecode(input, length); // Then we have to punycode toASCII - if (!ToASCII(decoded, &decoded)) - return; + if (!ToASCII(decoded, &decoded)) return; // If any of the following characters are still present, we have to fail for (size_t n = 0; n < decoded.size(); n++) { @@ -543,8 +582,7 @@ void URLHost::ParseHost(const char* input, } // If the unicode flag is set, run the result through punycode ToUnicode - if (unicode && !ToUnicode(decoded, &decoded)) - return; + if (unicode && !ToUnicode(decoded, &decoded)) return; // It's not an IPv4 or IPv6 address, it must be a domain SetDomain(std::move(decoded)); @@ -563,8 +601,7 @@ T* FindLongestZeroSequence(T* values, size_t len) { while (start < end) { if (*start == 0) { - if (current == nullptr) - current = start; + if (current == nullptr) current = start; counter++; } else { if (counter > longest) { @@ -576,8 +613,7 @@ T* FindLongestZeroSequence(T* values, size_t len) { } start++; } - if (counter > longest) - result = current; + if (counter > longest) result = current; return result; } @@ -607,8 +643,7 @@ std::string URLHost::ToString() const { uint32_t value = value_.ipv4; for (int n = 0; n < 4; n++) { dest.insert(0, std::to_string(value % 256)); - if (n < 3) - dest.insert(0, 1, '.'); + if (n < 3) dest.insert(0, 1, '.'); value /= 256; } break; @@ -617,8 +652,7 @@ std::string URLHost::ToString() const { dest.reserve(41); dest += '['; const uint16_t* start = &value_.ipv6[0]; - const uint16_t* compress_pointer = - FindLongestZeroSequence(start, 8); + const uint16_t* compress_pointer = FindLongestZeroSequence(start, 8); bool ignore0 = false; for (int n = 0; n <= 7; n++) { const uint16_t* piece = &value_.ipv6[n]; @@ -634,8 +668,7 @@ std::string URLHost::ToString() const { char buf[5]; snprintf(buf, sizeof(buf), "%x", *piece); dest += buf; - if (n < 7) - dest += ':'; + if (n < 7) dest += ':'; } dest += ']'; break; @@ -656,8 +689,7 @@ bool ParseHost(const std::string& input, } URLHost host; host.ParseHost(input.c_str(), input.length(), is_special, unicode); - if (host.ParsingFailed()) - return false; + if (host.ParsingFailed()) return false; *output = host.ToStringMove(); return true; } @@ -665,8 +697,7 @@ bool ParseHost(const std::string& input, std::vector FromJSStringArray(Environment* env, Local array) { std::vector vec; - if (array->Length() > 0) - vec.reserve(array->Length()); + if (array->Length() > 0) vec.reserve(array->Length()); for (size_t n = 0; n < array->Length(); n++) { Local val = array->Get(env->context(), n).ToLocalChecked(); if (val->IsString()) { @@ -683,13 +714,11 @@ url_data HarvestBase(Environment* env, Local base_obj) { Local flags = base_obj->Get(env->context(), env->flags_string()).ToLocalChecked(); - if (flags->IsInt32()) - base.flags = flags->Int32Value(context).FromJust(); + if (flags->IsInt32()) base.flags = flags->Int32Value(context).FromJust(); Local port = base_obj->Get(env->context(), env->port_string()).ToLocalChecked(); - if (port->IsInt32()) - base.port = port->Int32Value(context).FromJust(); + if (port->IsInt32()) base.port = port->Int32Value(context).FromJust(); Local scheme = base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked(); @@ -723,8 +752,8 @@ url_data HarvestBase(Environment* env, Local base_obj) { env->fragment_string(), true); - Local - path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked(); + Local path = + base_obj->Get(env->context(), env->path_string()).ToLocalChecked(); if (path->IsArray()) { base.flags |= URL_FLAGS_HAS_PATH; base.path = FromJSStringArray(env, path.As()); @@ -738,11 +767,8 @@ url_data HarvestContext(Environment* env, Local context_obj) { context_obj->Get(env->context(), env->flags_string()).ToLocalChecked(); if (flags->IsInt32()) { static constexpr int32_t kCopyFlagsMask = - URL_FLAGS_SPECIAL | - URL_FLAGS_CANNOT_BE_BASE | - URL_FLAGS_HAS_USERNAME | - URL_FLAGS_HAS_PASSWORD | - URL_FLAGS_HAS_HOST; + URL_FLAGS_SPECIAL | URL_FLAGS_CANNOT_BE_BASE | URL_FLAGS_HAS_USERNAME | + URL_FLAGS_HAS_PASSWORD | URL_FLAGS_HAS_HOST; context.flags |= flags.As()->Value() & kCopyFlagsMask; } Local scheme = @@ -753,27 +779,25 @@ url_data HarvestContext(Environment* env, Local context_obj) { } Local port = context_obj->Get(env->context(), env->port_string()).ToLocalChecked(); - if (port->IsInt32()) - context.port = port.As()->Value(); + if (port->IsInt32()) context.port = port.As()->Value(); if (context.flags & URL_FLAGS_HAS_USERNAME) { Local username = - context_obj->Get(env->context(), - env->username_string()).ToLocalChecked(); + context_obj->Get(env->context(), env->username_string()) + .ToLocalChecked(); CHECK(username->IsString()); Utf8Value value(env->isolate(), username); context.username.assign(*value, value.length()); } if (context.flags & URL_FLAGS_HAS_PASSWORD) { Local password = - context_obj->Get(env->context(), - env->password_string()).ToLocalChecked(); + context_obj->Get(env->context(), env->password_string()) + .ToLocalChecked(); CHECK(password->IsString()); Utf8Value value(env->isolate(), password); context.password.assign(*value, value.length()); } Local host = - context_obj->Get(env->context(), - env->host_string()).ToLocalChecked(); + context_obj->Get(env->context(), env->host_string()).ToLocalChecked(); if (host->IsString()) { Utf8Value value(env->isolate(), host); context.host.assign(*value, value.length()); @@ -787,9 +811,7 @@ bool IsSingleDotSegment(const std::string& str) { case 1: return str == "."; case 3: - return str[0] == '%' && - str[1] == '2' && - ASCIILowercase(str[2]) == 'e'; + return str[0] == '%' && str[1] == '2' && ASCIILowercase(str[2]) == 'e'; default: return false; } @@ -803,23 +825,14 @@ bool IsDoubleDotSegment(const std::string& str) { case 2: return str == ".."; case 4: - if (str[0] != '.' && str[0] != '%') - return false; - return ((str[0] == '.' && - str[1] == '%' && - str[2] == '2' && + if (str[0] != '.' && str[0] != '%') return false; + return ((str[0] == '.' && str[1] == '%' && str[2] == '2' && ASCIILowercase(str[3]) == 'e') || - (str[0] == '%' && - str[1] == '2' && - ASCIILowercase(str[2]) == 'e' && - str[3] == '.')); + (str[0] == '%' && str[1] == '2' && + ASCIILowercase(str[2]) == 'e' && str[3] == '.')); case 6: - return (str[0] == '%' && - str[1] == '2' && - ASCIILowercase(str[2]) == 'e' && - str[3] == '%' && - str[4] == '2' && - ASCIILowercase(str[5]) == 'e'); + return (str[0] == '%' && str[1] == '2' && ASCIILowercase(str[2]) == 'e' && + str[3] == '%' && str[4] == '2' && ASCIILowercase(str[5]) == 'e'); default: return false; } @@ -828,7 +841,8 @@ bool IsDoubleDotSegment(const std::string& str) { void ShortenUrlPath(struct url_data* url) { if (url->path.empty()) return; if (url->path.size() == 1 && url->scheme == "file:" && - IsNormalizedWindowsDriveLetter(url->path[0])) return; + IsNormalizedWindowsDriveLetter(url->path[0])) + return; url->path.pop_back(); } @@ -866,16 +880,14 @@ void URL::Parse(const char* input, // contents, but in the general case we avoid the overhead. std::string whitespace_stripped; for (const char* ptr = p; ptr < end; ptr++) { - if (!IsASCIITabOrNewline(*ptr)) - continue; + if (!IsASCIITabOrNewline(*ptr)) continue; // Hit tab or newline. Allocate storage, copy what we have until now, // and then iterate and filter all similar characters out. whitespace_stripped.reserve(len - 1); whitespace_stripped.assign(p, ptr - p); // 'ptr + 1' skips the current char, which we know to be tab or newline. for (ptr = ptr + 1; ptr < end; ptr++) { - if (!IsASCIITabOrNewline(*ptr)) - whitespace_stripped += *ptr; + if (!IsASCIITabOrNewline(*ptr)) whitespace_stripped += *ptr; } // Update variables like they should have looked like if the string @@ -887,16 +899,16 @@ void URL::Parse(const char* input, break; } - bool atflag = false; // Set when @ has been seen. - bool square_bracket_flag = false; // Set inside of [...] + bool atflag = false; // Set when @ has been seen. + bool square_bracket_flag = false; // Set inside of [...] bool password_token_seen_flag = false; // Set after a : after an username. std::string buffer; // Set the initial parse state. const bool has_state_override = state_override != kUnknownState; - enum url_parse_state state = has_state_override ? state_override : - kSchemeStart; + enum url_parse_state state = + has_state_override ? state_override : kSchemeStart; if (state < kSchemeStart || state > kFragment) { url->flags |= URL_FLAGS_INVALID_PARSE_STATE; @@ -940,7 +952,7 @@ void URL::Parse(const char* input, ((url->flags & URL_FLAGS_HAS_USERNAME) || (url->flags & URL_FLAGS_HAS_PASSWORD) || (url->port != -1))) || - (url->scheme == "file:" && url->host.empty())) { + (url->scheme == "file:" && url->host.empty())) { url->flags |= URL_FLAGS_TERMINATED; return; } @@ -960,13 +972,10 @@ void URL::Parse(const char* input, // equals to `false`. special_back_slash = false; buffer.clear(); - if (has_state_override) - return; + if (has_state_override) return; if (url->scheme == "file:") { state = kFile; - } else if (special && - has_base && - url->scheme == base->scheme) { + } else if (special && has_base && url->scheme == base->scheme) { state = kSpecialRelativeOrAuthority; } else if (special) { state = kSpecialAuthoritySlashes; @@ -1018,8 +1027,7 @@ void URL::Parse(const char* input, } url->flags |= URL_FLAGS_CANNOT_BE_BASE; state = kFragment; - } else if (has_base && - base->scheme != "file:") { + } else if (has_base && base->scheme != "file:") { state = kRelative; continue; } else { @@ -1220,10 +1228,7 @@ void URL::Parse(const char* input, } } buffer.clear(); - } else if (ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || + } else if (ch == kEOL || ch == '/' || ch == '?' || ch == '#' || special_back_slash) { if (atflag && buffer.size() == 0) { url->flags |= URL_FLAGS_FAILED; @@ -1256,18 +1261,14 @@ void URL::Parse(const char* input, } buffer.clear(); state = kPort; - } else if (ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || + } else if (ch == kEOL || ch == '/' || ch == '?' || ch == '#' || special_back_slash) { p--; if (special && buffer.size() == 0) { url->flags |= URL_FLAGS_FAILED; return; } - if (has_state_override && - buffer.size() == 0 && + if (has_state_override && buffer.size() == 0 && ((url->username.size() > 0 || url->password.size() > 0) || url->port != -1)) { url->flags |= URL_FLAGS_TERMINATED; @@ -1284,22 +1285,16 @@ void URL::Parse(const char* input, return; } } else { - if (ch == '[') - square_bracket_flag = true; - if (ch == ']') - square_bracket_flag = false; + if (ch == '[') square_bracket_flag = true; + if (ch == ']') square_bracket_flag = false; buffer += ch; } break; case kPort: if (IsASCIIDigit(ch)) { buffer += ch; - } else if (has_state_override || - ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { + } else if (has_state_override || ch == kEOL || ch == '/' || ch == '?' || + ch == '#' || special_back_slash) { if (buffer.size() > 0) { unsigned port = 0; // the condition port <= 0xffff prevents integer overflow @@ -1318,8 +1313,7 @@ void URL::Parse(const char* input, } // the port is valid url->port = NormalizePort(url->scheme, static_cast(port)); - if (url->port == -1) - url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT; + if (url->port == -1) url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT; buffer.clear(); } else if (has_state_override) { // TODO(TimothyGu): Similar case as above. @@ -1425,20 +1419,14 @@ void URL::Parse(const char* input, } break; case kFileHost: - if (ch == kEOL || - ch == '/' || - ch == '\\' || - ch == '?' || - ch == '#') { - if (!has_state_override && - buffer.size() == 2 && + if (ch == kEOL || ch == '/' || ch == '\\' || ch == '?' || ch == '#') { + if (!has_state_override && buffer.size() == 2 && IsWindowsDriveLetter(buffer)) { state = kPath; } else if (buffer.size() == 0) { url->flags |= URL_FLAGS_HAS_HOST; url->host.clear(); - if (has_state_override) - return; + if (has_state_override) return; state = kPathStart; } else { std::string host; @@ -1446,12 +1434,10 @@ void URL::Parse(const char* input, url->flags |= URL_FLAGS_FAILED; return; } - if (host == "localhost") - host.clear(); + if (host == "localhost") host.clear(); url->flags |= URL_FLAGS_HAS_HOST; url->host = host; - if (has_state_override) - return; + if (has_state_override) return; buffer.clear(); state = kPathStart; } @@ -1485,9 +1471,7 @@ void URL::Parse(const char* input, } break; case kPath: - if (ch == kEOL || - ch == '/' || - special_back_slash || + if (ch == kEOL || ch == '/' || special_back_slash || (!has_state_override && (ch == '?' || ch == '#'))) { if (IsDoubleDotSegment(buffer)) { ShortenUrlPath(url); @@ -1495,15 +1479,13 @@ void URL::Parse(const char* input, url->flags |= URL_FLAGS_HAS_PATH; url->path.emplace_back(""); } - } else if (IsSingleDotSegment(buffer) && - ch != '/' && !special_back_slash) { + } else if (IsSingleDotSegment(buffer) && ch != '/' && + !special_back_slash) { url->flags |= URL_FLAGS_HAS_PATH; url->path.emplace_back(""); } else if (!IsSingleDotSegment(buffer)) { - if (url->scheme == "file:" && - url->path.empty() && - buffer.size() == 2 && - IsWindowsDriveLetter(buffer)) { + if (url->scheme == "file:" && url->path.empty() && + buffer.size() == 2 && IsWindowsDriveLetter(buffer)) { buffer[1] = ':'; } url->flags |= URL_FLAGS_HAS_PATH; @@ -1543,11 +1525,12 @@ void URL::Parse(const char* input, url->flags |= URL_FLAGS_HAS_QUERY; url->query = std::move(buffer); buffer.clear(); - if (ch == '#') - state = kFragment; + if (ch == '#') state = kFragment; } else { - AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL : - QUERY_ENCODE_SET_NONSPECIAL); + AppendOrEscape( + &buffer, + ch, + special ? QUERY_ENCODE_SET_SPECIAL : QUERY_ENCODE_SET_NONSPECIAL); } break; case kFragment: @@ -1570,23 +1553,17 @@ void URL::Parse(const char* input, } // NOLINT(readability/fn_size) // https://url.spec.whatwg.org/#url-serializing -std::string URL::SerializeURL(const url_data& url, - bool exclude = false) { +std::string URL::SerializeURL(const url_data& url, bool exclude = false) { std::string output; output.reserve( - 10 + // We generally insert < 10 separator characters between URL parts - url.scheme.size() + - url.username.size() + - url.password.size() + - url.host.size() + - url.query.size() + - url.fragment.size() + - url.href.size() + - std::accumulate( - url.path.begin(), - url.path.end(), - 0, - [](size_t sum, const auto& str) { return sum + str.size(); })); + 10 + // We generally insert < 10 separator characters between URL parts + url.scheme.size() + url.username.size() + url.password.size() + + url.host.size() + url.query.size() + url.fragment.size() + + url.href.size() + + std::accumulate( + url.path.begin(), url.path.end(), 0, [](size_t sum, const auto& str) { + return sum + str.size(); + })); output += url.scheme; if (url.flags & URL_FLAGS_HAS_HOST) { @@ -1609,9 +1586,8 @@ std::string URL::SerializeURL(const url_data& url, if (url.flags & URL_FLAGS_CANNOT_BE_BASE) { output += url.path[0]; } else { - if (!(url.flags & URL_FLAGS_HAS_HOST) && - url.path.size() > 1 && - url.path[0].empty()) { + if (!(url.flags & URL_FLAGS_HAS_HOST) && url.path.size() > 1 && + url.path[0].empty()) { output += "/."; } for (size_t i = 1; i < url.path.size(); i++) { @@ -1634,10 +1610,9 @@ void SetArgs(Environment* env, const struct url_data& url) { Isolate* isolate = env->isolate(); argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); - argv[ARG_PROTOCOL] = - url.flags & URL_FLAGS_SPECIAL ? - GetSpecial(env, url.scheme) : - OneByteString(isolate, url.scheme.c_str()); + argv[ARG_PROTOCOL] = url.flags & URL_FLAGS_SPECIAL + ? GetSpecial(env, url.scheme) + : OneByteString(isolate, url.scheme.c_str()); if (url.flags & URL_FLAGS_HAS_USERNAME) argv[ARG_USERNAME] = Utf8String(isolate, url.username); if (url.flags & URL_FLAGS_HAS_PASSWORD) @@ -1648,8 +1623,7 @@ void SetArgs(Environment* env, argv[ARG_QUERY] = Utf8String(isolate, url.query); if (url.flags & URL_FLAGS_HAS_FRAGMENT) argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment); - if (url.port > -1) - argv[ARG_PORT] = Integer::New(isolate, url.port); + if (url.port > -1) argv[ARG_PORT] = Integer::New(isolate, url.port); if (url.flags & URL_FLAGS_HAS_PATH) argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked(); } @@ -1673,15 +1647,12 @@ void Parse(Environment* env, url_data base; url_data url; - if (has_context) - url = HarvestContext(env, context_obj.As()); - if (has_base) - base = HarvestBase(env, base_obj.As()); + if (has_context) url = HarvestContext(env, context_obj.As()); + if (has_base) base = HarvestBase(env, base_obj.As()); URL::Parse(input, len, state_override, &url, has_context, &base, has_base); if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || - ((state_override != kUnknownState) && - (url.flags & URL_FLAGS_TERMINATED))) + ((state_override != kUnknownState) && (url.flags & URL_FLAGS_TERMINATED))) return; // Define the return value placeholders @@ -1689,15 +1660,15 @@ void Parse(Environment* env, const Local null = Null(isolate); if (!(url.flags & URL_FLAGS_FAILED)) { Local argv[] = { - undef, - undef, - undef, - undef, - null, // host defaults to null - null, // port defaults to null - undef, - null, // query defaults to null - null, // fragment defaults to null + undef, + undef, + undef, + undef, + null, // host defaults to null + null, // port defaults to null + undef, + null, // query defaults to null + null, // fragment defaults to null }; SetArgs(env, argv, url); USE(cb->Call(context, recv, arraysize(argv), argv)); @@ -1710,14 +1681,12 @@ void Parse(Environment* env, void Parse(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 5); - CHECK(args[0]->IsString()); // input + CHECK(args[0]->IsString()); // input CHECK(args[2]->IsUndefined() || // base context - args[2]->IsNull() || - args[2]->IsObject()); + args[2]->IsNull() || args[2]->IsObject()); CHECK(args[3]->IsUndefined() || // context - args[3]->IsNull() || - args[3]->IsObject()); - CHECK(args[4]->IsFunction()); // complete callback + args[3]->IsNull() || args[3]->IsObject()); + CHECK(args[4]->IsFunction()); // complete callback CHECK(args[5]->IsUndefined() || args[5]->IsFunction()); // error callback Utf8Value input(env->isolate(), args[0]); @@ -1727,8 +1696,10 @@ void Parse(const FunctionCallbackInfo& args) { args[1]->Uint32Value(env->context()).FromJust()); } - Parse(env, args.This(), - *input, input.length(), + Parse(env, + args.This(), + *input, + input.length(), state_override, args[2], args[3], @@ -1830,16 +1801,11 @@ std::string URL::ToFilePath() const { #ifdef _WIN32 const char* slash = "\\"; - auto is_slash = [] (char ch) { - return ch == '/' || ch == '\\'; - }; + auto is_slash = [](char ch) { return ch == '/' || ch == '\\'; }; #else const char* slash = "/"; - auto is_slash = [] (char ch) { - return ch == '/'; - }; - if ((context_.flags & URL_FLAGS_HAS_HOST) && - context_.host.length() > 0) { + auto is_slash = [](char ch) { return ch == '/'; }; + if ((context_.flags & URL_FLAGS_HAS_HOST) && context_.host.length() > 0) { return ""; } #endif @@ -1862,8 +1828,7 @@ std::string URL::ToFilePath() const { // need to worry about percent encoding because the URL parser will have // already taken care of that for us. Note that this only causes IDNs with an // appropriate `xn--` prefix to be decoded. - if ((context_.flags & URL_FLAGS_HAS_HOST) && - context_.host.length() > 0) { + if ((context_.flags & URL_FLAGS_HAS_HOST) && context_.host.length() > 0) { std::string unicode_host; if (!ToUnicode(context_.host, &unicode_host)) { return ""; @@ -1874,8 +1839,7 @@ std::string URL::ToFilePath() const { if (decoded_path.length() < 3) { return ""; } - if (decoded_path[2] != ':' || - !IsASCIIAlpha(decoded_path[1])) { + if (decoded_path[2] != ':' || !IsASCIIAlpha(decoded_path[1])) { return ""; } // Strip out the leading '\'. @@ -1890,11 +1854,15 @@ URL URL::FromFilePath(const std::string& file_path) { std::string escaped_file_path; for (size_t i = 0; i < file_path.length(); ++i) { escaped_file_path += file_path[i]; - if (file_path[i] == '%') - escaped_file_path += "25"; + if (file_path[i] == '%') escaped_file_path += "25"; } - URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart, - &url.context_, true, nullptr, false); + URL::Parse(escaped_file_path.c_str(), + escaped_file_path.length(), + kPathStart, + &url.context_, + true, + nullptr, + false); return url; } @@ -1909,19 +1877,18 @@ MaybeLocal URL::ToObject(Environment* env) const { const Local undef = Undefined(isolate); const Local null = Null(isolate); - if (context_.flags & URL_FLAGS_FAILED) - return Local(); + if (context_.flags & URL_FLAGS_FAILED) return Local(); Local argv[] = { - undef, - undef, - undef, - undef, - null, // host defaults to null - null, // port defaults to null - undef, - null, // query defaults to null - null, // fragment defaults to null + undef, + undef, + undef, + undef, + null, // host defaults to null + null, // port defaults to null + undef, + null, // query defaults to null + null, // fragment defaults to null }; SetArgs(env, argv, context_); @@ -1933,8 +1900,8 @@ MaybeLocal URL::ToObject(Environment* env) const { // set the constructor function used below. SetURLConstructor is // called automatically when the internal/url.js module is loaded // during the internal/bootstrap/node.js processing. - ret = env->url_constructor_function() - ->Call(env->context(), undef, arraysize(argv), argv); + ret = env->url_constructor_function()->Call( + env->context(), undef, arraysize(argv), argv); } return ret; From f07693501d5d1303709085c1da44c63a39b5faa5 Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Mon, 16 Jan 2023 18:54:10 -0300 Subject: [PATCH 02/14] src: fix node_url.cc code formatting --- src/node_url.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/node_url.cc b/src/node_url.cc index e6499b72382df6..3c1f581bb95971 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -324,7 +324,7 @@ bool ToASCII(const std::string& input, std::string* output) { output->assign(*buf, buf.length()); return true; } -#else // !defined(NODE_HAVE_I18N_SUPPORT) +#else // !defined(NODE_HAVE_I18N_SUPPORT) // Intentional non-ops if ICU is not present. bool ToUnicode(const std::string& input, std::string* output) { *output = input; From b1f4c83a57a455647fba52e6c9d4099e9a6d4155 Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Mon, 16 Jan 2023 19:29:22 -0300 Subject: [PATCH 03/14] src: fix node_url.cc code formatting (back to what it was) --- src/node_url.cc | 490 ++++++++++++++++++++++++++++-------------------- 1 file changed, 290 insertions(+), 200 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 3c1f581bb95971..cec0f66463f94b 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -16,13 +16,13 @@ namespace node { using errors::TryCatchScope; +using url::table_data::hex; using url::table_data::C0_CONTROL_ENCODE_SET; using url::table_data::FRAGMENT_ENCODE_SET; -using url::table_data::hex; using url::table_data::PATH_ENCODE_SET; +using url::table_data::USERINFO_ENCODE_SET; using url::table_data::QUERY_ENCODE_SET_NONSPECIAL; using url::table_data::QUERY_ENCODE_SET_SPECIAL; -using url::table_data::USERINFO_ENCODE_SET; using v8::Array; using v8::Context; @@ -42,9 +42,10 @@ using v8::Undefined; using v8::Value; Local Utf8String(Isolate* isolate, const std::string& str) { - return String::NewFromUtf8( - isolate, str.data(), NewStringType::kNormal, str.length()) - .ToLocalChecked(); + return String::NewFromUtf8(isolate, + str.data(), + NewStringType::kNormal, + str.length()).ToLocalChecked(); } namespace url { @@ -116,13 +117,13 @@ class URLHost { void SetOpaque(std::string&& string) { Reset(); type_ = HostType::H_OPAQUE; - new (&value_.domain_or_opaque) std::string(std::move(string)); + new(&value_.domain_or_opaque) std::string(std::move(string)); } void SetDomain(std::string&& string) { Reset(); type_ = HostType::H_DOMAIN; - new (&value_.domain_or_opaque) std::string(std::move(string)); + new(&value_.domain_or_opaque) std::string(std::move(string)); } }; @@ -130,16 +131,16 @@ URLHost::~URLHost() { Reset(); } -#define ARGS(XX) \ - XX(ARG_FLAGS) \ - XX(ARG_PROTOCOL) \ - XX(ARG_USERNAME) \ - XX(ARG_PASSWORD) \ - XX(ARG_HOST) \ - XX(ARG_PORT) \ - XX(ARG_PATH) \ - XX(ARG_QUERY) \ - XX(ARG_FRAGMENT) \ +#define ARGS(XX) \ + XX(ARG_FLAGS) \ + XX(ARG_PROTOCOL) \ + XX(ARG_USERNAME) \ + XX(ARG_PASSWORD) \ + XX(ARG_HOST) \ + XX(ARG_PORT) \ + XX(ARG_PATH) \ + XX(ARG_QUERY) \ + XX(ARG_FRAGMENT) \ XX(ARG_COUNT) // This one has to be last. enum url_cb_args { @@ -148,18 +149,18 @@ enum url_cb_args { #undef XX }; -#define TWO_CHAR_STRING_TEST(bits, name, expr) \ - template \ - bool name(const T ch1, const T ch2) { \ - static_assert(sizeof(ch1) >= (bits) / 8, \ - "Character must be wider than " #bits " bits"); \ - return (expr); \ - } \ - template \ - bool name(const std::basic_string& str) { \ - static_assert(sizeof(str[0]) >= (bits) / 8, \ - "Character must be wider than " #bits " bits"); \ - return str.length() >= 2 && name(str[0], str[1]); \ +#define TWO_CHAR_STRING_TEST(bits, name, expr) \ + template \ + bool name(const T ch1, const T ch2) { \ + static_assert(sizeof(ch1) >= (bits) / 8, \ + "Character must be wider than " #bits " bits"); \ + return (expr); \ + } \ + template \ + bool name(const std::basic_string& str) { \ + static_assert(sizeof(str[0]) >= (bits) / 8, \ + "Character must be wider than " #bits " bits"); \ + return str.length() >= 2 && name(str[0], str[1]); \ } // https://infra.spec.whatwg.org/#ascii-tab-or-newline @@ -175,15 +176,13 @@ CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' ')) CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9')) // https://infra.spec.whatwg.org/#ascii-hex-digit -CHAR_TEST(8, - IsASCIIHexDigit, - (IsASCIIDigit(ch) || (ch >= 'A' && ch <= 'F') || - (ch >= 'a' && ch <= 'f'))) +CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) || + (ch >= 'A' && ch <= 'F') || + (ch >= 'a' && ch <= 'f'))) // https://infra.spec.whatwg.org/#ascii-alpha -CHAR_TEST(8, - IsASCIIAlpha, - ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))) +CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') || + (ch >= 'a' && ch <= 'z'))) // https://infra.spec.whatwg.org/#ascii-alphanumeric CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch))) @@ -209,13 +208,11 @@ CHAR_TEST(8, ch == '\x7f') // https://url.spec.whatwg.org/#windows-drive-letter -TWO_CHAR_STRING_TEST(8, - IsWindowsDriveLetter, +TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter, (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|'))) // https://url.spec.whatwg.org/#normalized-windows-drive-letter -TWO_CHAR_STRING_TEST(8, - IsNormalizedWindowsDriveLetter, +TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter, (IsASCIIAlpha(ch1) && ch2 == ':')) #undef TWO_CHAR_STRING_TEST @@ -236,15 +233,19 @@ void AppendOrEscape(std::string* str, } unsigned hex2bin(const char ch) { - if (ch >= '0' && ch <= '9') return ch - '0'; - if (ch >= 'A' && ch <= 'F') return 10 + (ch - 'A'); - if (ch >= 'a' && ch <= 'f') return 10 + (ch - 'a'); + if (ch >= '0' && ch <= '9') + return ch - '0'; + if (ch >= 'A' && ch <= 'F') + return 10 + (ch - 'A'); + if (ch >= 'a' && ch <= 'f') + return 10 + (ch - 'a'); UNREACHABLE(); } std::string PercentDecode(const char* input, size_t len) { std::string dest; - if (len == 0) return dest; + if (len == 0) + return dest; dest.reserve(len); const char* pointer = input; const char* end = input + len; @@ -254,7 +255,8 @@ std::string PercentDecode(const char* input, size_t len) { size_t remaining = end - pointer - 1; if (ch != '%' || remaining < 2 || (ch == '%' && - (!IsASCIIHexDigit(pointer[1]) || !IsASCIIHexDigit(pointer[2])))) { + (!IsASCIIHexDigit(pointer[1]) || + !IsASCIIHexDigit(pointer[2])))) { dest += ch; pointer++; continue; @@ -269,33 +271,31 @@ std::string PercentDecode(const char* input, size_t len) { return dest; } -#define SPECIALS(XX) \ - XX(ftp, 21, "ftp:") \ - XX(file, -1, "file:") \ - XX(http, 80, "http:") \ - XX(https, 443, "https:") \ - XX(ws, 80, "ws:") \ +#define SPECIALS(XX) \ + XX(ftp, 21, "ftp:") \ + XX(file, -1, "file:") \ + XX(http, 80, "http:") \ + XX(https, 443, "https:") \ + XX(ws, 80, "ws:") \ XX(wss, 443, "wss:") bool IsSpecial(const std::string& scheme) { -#define V(_, __, name) \ - if (scheme == name) return true; +#define V(_, __, name) if (scheme == name) return true; SPECIALS(V); #undef V return false; } Local GetSpecial(Environment* env, const std::string& scheme) { -#define V(key, _, name) \ - if (scheme == name) return env->url_special_##key##_string(); +#define V(key, _, name) if (scheme == name) \ + return env->url_special_##key##_string(); SPECIALS(V) #undef V UNREACHABLE(); } int NormalizePort(const std::string& scheme, int p) { -#define V(_, port, name) \ - if (scheme == name && p == port) return -1; +#define V(_, port, name) if (scheme == name && p == port) return -1; SPECIALS(V); #undef V return p; @@ -304,23 +304,30 @@ int NormalizePort(const std::string& scheme, int p) { // https://url.spec.whatwg.org/#start-with-a-windows-drive-letter bool StartsWithWindowsDriveLetter(const char* p, const char* end) { size_t length = end - p; - return length >= 2 && IsWindowsDriveLetter(p[0], p[1]) && - (length == 2 || p[2] == '/' || p[2] == '\\' || p[2] == '?' || - p[2] == '#'); + return length >= 2 && + IsWindowsDriveLetter(p[0], p[1]) && + (length == 2 || + p[2] == '/' || + p[2] == '\\' || + p[2] == '?' || + p[2] == '#'); } #if defined(NODE_HAVE_I18N_SUPPORT) bool ToUnicode(const std::string& input, std::string* output) { MaybeStackBuffer buf; - if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0) return false; + if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0) + return false; output->assign(*buf, buf.length()); return true; } bool ToASCII(const std::string& input, std::string* output) { MaybeStackBuffer buf; - if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0) return false; - if (buf.length() == 0) return false; + if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0) + return false; + if (buf.length() == 0) + return false; output->assign(*buf, buf.length()); return true; } @@ -354,8 +361,7 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) { return; } - // Ref: - // https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119 + // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119 for (int i = 0; i < NS_IN6ADDRSZ; i += 2) { value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1]; } @@ -384,13 +390,16 @@ int64_t ParseIPv4Number(const char* start, const char* end) { const char ch = p[0]; switch (R) { case 8: - if (ch < '0' || ch > '7') return -1; + if (ch < '0' || ch > '7') + return -1; break; case 10: - if (!IsASCIIDigit(ch)) return -1; + if (!IsASCIIDigit(ch)) + return -1; break; case 16: - if (!IsASCIIHexDigit(ch)) return -1; + if (!IsASCIIHexDigit(ch)) + return -1; break; } p++; @@ -489,23 +498,27 @@ void URLHost::ParseIPv4Host(const char* input, size_t length) { uint32_t val = 0; uint64_t numbers[4]; int tooBigNumbers = 0; - if (length == 0) return; + if (length == 0) + return; while (pointer <= end) { const char ch = pointer < end ? pointer[0] : kEOL; int64_t remaining = end - pointer - 1; if (ch == '.' || ch == kEOL) { if (++parts > static_cast(arraysize(numbers))) return; - if (pointer == mark) return; + if (pointer == mark) + return; int64_t n = ParseIPv4Number(mark, pointer); - if (n < 0) return; + if (n < 0) + return; if (n > 255) { tooBigNumbers++; } numbers[parts - 1] = n; mark = pointer + 1; - if (ch == '.' && remaining == 0) break; + if (ch == '.' && remaining == 0) + break; } pointer++; } @@ -514,7 +527,8 @@ void URLHost::ParseIPv4Host(const char* input, size_t length) { // If any but the last item in numbers is greater than 255, return failure. // If the last item in numbers is greater than or equal to // 256^(5 - the number of items in numbers), return failure. - if (tooBigNumbers > 1 || (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || + if (tooBigNumbers > 1 || + (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || numbers[parts - 1] >= pow(256, static_cast(5 - parts))) { return; } @@ -553,20 +567,24 @@ void URLHost::ParseHost(const char* input, CHECK_EQ(type_, HostType::H_FAILED); const char* pointer = input; - if (length == 0) return; + if (length == 0) + return; if (pointer[0] == '[') { - if (pointer[length - 1] != ']') return; + if (pointer[length - 1] != ']') + return; return ParseIPv6Host(++pointer, length - 2); } - if (!is_special) return ParseOpaqueHost(input, length); + if (!is_special) + return ParseOpaqueHost(input, length); // First, we have to percent decode std::string decoded = PercentDecode(input, length); // Then we have to punycode toASCII - if (!ToASCII(decoded, &decoded)) return; + if (!ToASCII(decoded, &decoded)) + return; // If any of the following characters are still present, we have to fail for (size_t n = 0; n < decoded.size(); n++) { @@ -582,7 +600,8 @@ void URLHost::ParseHost(const char* input, } // If the unicode flag is set, run the result through punycode ToUnicode - if (unicode && !ToUnicode(decoded, &decoded)) return; + if (unicode && !ToUnicode(decoded, &decoded)) + return; // It's not an IPv4 or IPv6 address, it must be a domain SetDomain(std::move(decoded)); @@ -601,7 +620,8 @@ T* FindLongestZeroSequence(T* values, size_t len) { while (start < end) { if (*start == 0) { - if (current == nullptr) current = start; + if (current == nullptr) + current = start; counter++; } else { if (counter > longest) { @@ -613,7 +633,8 @@ T* FindLongestZeroSequence(T* values, size_t len) { } start++; } - if (counter > longest) result = current; + if (counter > longest) + result = current; return result; } @@ -643,7 +664,8 @@ std::string URLHost::ToString() const { uint32_t value = value_.ipv4; for (int n = 0; n < 4; n++) { dest.insert(0, std::to_string(value % 256)); - if (n < 3) dest.insert(0, 1, '.'); + if (n < 3) + dest.insert(0, 1, '.'); value /= 256; } break; @@ -652,7 +674,8 @@ std::string URLHost::ToString() const { dest.reserve(41); dest += '['; const uint16_t* start = &value_.ipv6[0]; - const uint16_t* compress_pointer = FindLongestZeroSequence(start, 8); + const uint16_t* compress_pointer = + FindLongestZeroSequence(start, 8); bool ignore0 = false; for (int n = 0; n <= 7; n++) { const uint16_t* piece = &value_.ipv6[n]; @@ -668,7 +691,8 @@ std::string URLHost::ToString() const { char buf[5]; snprintf(buf, sizeof(buf), "%x", *piece); dest += buf; - if (n < 7) dest += ':'; + if (n < 7) + dest += ':'; } dest += ']'; break; @@ -689,7 +713,8 @@ bool ParseHost(const std::string& input, } URLHost host; host.ParseHost(input.c_str(), input.length(), is_special, unicode); - if (host.ParsingFailed()) return false; + if (host.ParsingFailed()) + return false; *output = host.ToStringMove(); return true; } @@ -697,7 +722,8 @@ bool ParseHost(const std::string& input, std::vector FromJSStringArray(Environment* env, Local array) { std::vector vec; - if (array->Length() > 0) vec.reserve(array->Length()); + if (array->Length() > 0) + vec.reserve(array->Length()); for (size_t n = 0; n < array->Length(); n++) { Local val = array->Get(env->context(), n).ToLocalChecked(); if (val->IsString()) { @@ -714,11 +740,13 @@ url_data HarvestBase(Environment* env, Local base_obj) { Local flags = base_obj->Get(env->context(), env->flags_string()).ToLocalChecked(); - if (flags->IsInt32()) base.flags = flags->Int32Value(context).FromJust(); + if (flags->IsInt32()) + base.flags = flags->Int32Value(context).FromJust(); Local port = base_obj->Get(env->context(), env->port_string()).ToLocalChecked(); - if (port->IsInt32()) base.port = port->Int32Value(context).FromJust(); + if (port->IsInt32()) + base.port = port->Int32Value(context).FromJust(); Local scheme = base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked(); @@ -752,8 +780,8 @@ url_data HarvestBase(Environment* env, Local base_obj) { env->fragment_string(), true); - Local path = - base_obj->Get(env->context(), env->path_string()).ToLocalChecked(); + Local + path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked(); if (path->IsArray()) { base.flags |= URL_FLAGS_HAS_PATH; base.path = FromJSStringArray(env, path.As()); @@ -767,8 +795,11 @@ url_data HarvestContext(Environment* env, Local context_obj) { context_obj->Get(env->context(), env->flags_string()).ToLocalChecked(); if (flags->IsInt32()) { static constexpr int32_t kCopyFlagsMask = - URL_FLAGS_SPECIAL | URL_FLAGS_CANNOT_BE_BASE | URL_FLAGS_HAS_USERNAME | - URL_FLAGS_HAS_PASSWORD | URL_FLAGS_HAS_HOST; + URL_FLAGS_SPECIAL | + URL_FLAGS_CANNOT_BE_BASE | + URL_FLAGS_HAS_USERNAME | + URL_FLAGS_HAS_PASSWORD | + URL_FLAGS_HAS_HOST; context.flags |= flags.As()->Value() & kCopyFlagsMask; } Local scheme = @@ -779,25 +810,27 @@ url_data HarvestContext(Environment* env, Local context_obj) { } Local port = context_obj->Get(env->context(), env->port_string()).ToLocalChecked(); - if (port->IsInt32()) context.port = port.As()->Value(); + if (port->IsInt32()) + context.port = port.As()->Value(); if (context.flags & URL_FLAGS_HAS_USERNAME) { Local username = - context_obj->Get(env->context(), env->username_string()) - .ToLocalChecked(); + context_obj->Get(env->context(), + env->username_string()).ToLocalChecked(); CHECK(username->IsString()); Utf8Value value(env->isolate(), username); context.username.assign(*value, value.length()); } if (context.flags & URL_FLAGS_HAS_PASSWORD) { Local password = - context_obj->Get(env->context(), env->password_string()) - .ToLocalChecked(); + context_obj->Get(env->context(), + env->password_string()).ToLocalChecked(); CHECK(password->IsString()); Utf8Value value(env->isolate(), password); context.password.assign(*value, value.length()); } Local host = - context_obj->Get(env->context(), env->host_string()).ToLocalChecked(); + context_obj->Get(env->context(), + env->host_string()).ToLocalChecked(); if (host->IsString()) { Utf8Value value(env->isolate(), host); context.host.assign(*value, value.length()); @@ -811,7 +844,9 @@ bool IsSingleDotSegment(const std::string& str) { case 1: return str == "."; case 3: - return str[0] == '%' && str[1] == '2' && ASCIILowercase(str[2]) == 'e'; + return str[0] == '%' && + str[1] == '2' && + ASCIILowercase(str[2]) == 'e'; default: return false; } @@ -825,14 +860,23 @@ bool IsDoubleDotSegment(const std::string& str) { case 2: return str == ".."; case 4: - if (str[0] != '.' && str[0] != '%') return false; - return ((str[0] == '.' && str[1] == '%' && str[2] == '2' && + if (str[0] != '.' && str[0] != '%') + return false; + return ((str[0] == '.' && + str[1] == '%' && + str[2] == '2' && ASCIILowercase(str[3]) == 'e') || - (str[0] == '%' && str[1] == '2' && - ASCIILowercase(str[2]) == 'e' && str[3] == '.')); + (str[0] == '%' && + str[1] == '2' && + ASCIILowercase(str[2]) == 'e' && + str[3] == '.')); case 6: - return (str[0] == '%' && str[1] == '2' && ASCIILowercase(str[2]) == 'e' && - str[3] == '%' && str[4] == '2' && ASCIILowercase(str[5]) == 'e'); + return (str[0] == '%' && + str[1] == '2' && + ASCIILowercase(str[2]) == 'e' && + str[3] == '%' && + str[4] == '2' && + ASCIILowercase(str[5]) == 'e'); default: return false; } @@ -841,8 +885,7 @@ bool IsDoubleDotSegment(const std::string& str) { void ShortenUrlPath(struct url_data* url) { if (url->path.empty()) return; if (url->path.size() == 1 && url->scheme == "file:" && - IsNormalizedWindowsDriveLetter(url->path[0])) - return; + IsNormalizedWindowsDriveLetter(url->path[0])) return; url->path.pop_back(); } @@ -880,14 +923,16 @@ void URL::Parse(const char* input, // contents, but in the general case we avoid the overhead. std::string whitespace_stripped; for (const char* ptr = p; ptr < end; ptr++) { - if (!IsASCIITabOrNewline(*ptr)) continue; + if (!IsASCIITabOrNewline(*ptr)) + continue; // Hit tab or newline. Allocate storage, copy what we have until now, // and then iterate and filter all similar characters out. whitespace_stripped.reserve(len - 1); whitespace_stripped.assign(p, ptr - p); // 'ptr + 1' skips the current char, which we know to be tab or newline. for (ptr = ptr + 1; ptr < end; ptr++) { - if (!IsASCIITabOrNewline(*ptr)) whitespace_stripped += *ptr; + if (!IsASCIITabOrNewline(*ptr)) + whitespace_stripped += *ptr; } // Update variables like they should have looked like if the string @@ -899,16 +944,16 @@ void URL::Parse(const char* input, break; } - bool atflag = false; // Set when @ has been seen. - bool square_bracket_flag = false; // Set inside of [...] + bool atflag = false; // Set when @ has been seen. + bool square_bracket_flag = false; // Set inside of [...] bool password_token_seen_flag = false; // Set after a : after an username. std::string buffer; // Set the initial parse state. const bool has_state_override = state_override != kUnknownState; - enum url_parse_state state = - has_state_override ? state_override : kSchemeStart; + enum url_parse_state state = has_state_override ? state_override : + kSchemeStart; if (state < kSchemeStart || state > kFragment) { url->flags |= URL_FLAGS_INVALID_PARSE_STATE; @@ -952,7 +997,7 @@ void URL::Parse(const char* input, ((url->flags & URL_FLAGS_HAS_USERNAME) || (url->flags & URL_FLAGS_HAS_PASSWORD) || (url->port != -1))) || - (url->scheme == "file:" && url->host.empty())) { + (url->scheme == "file:" && url->host.empty())) { url->flags |= URL_FLAGS_TERMINATED; return; } @@ -972,10 +1017,13 @@ void URL::Parse(const char* input, // equals to `false`. special_back_slash = false; buffer.clear(); - if (has_state_override) return; + if (has_state_override) + return; if (url->scheme == "file:") { state = kFile; - } else if (special && has_base && url->scheme == base->scheme) { + } else if (special && + has_base && + url->scheme == base->scheme) { state = kSpecialRelativeOrAuthority; } else if (special) { state = kSpecialAuthoritySlashes; @@ -1027,7 +1075,8 @@ void URL::Parse(const char* input, } url->flags |= URL_FLAGS_CANNOT_BE_BASE; state = kFragment; - } else if (has_base && base->scheme != "file:") { + } else if (has_base && + base->scheme != "file:") { state = kRelative; continue; } else { @@ -1228,7 +1277,10 @@ void URL::Parse(const char* input, } } buffer.clear(); - } else if (ch == kEOL || ch == '/' || ch == '?' || ch == '#' || + } else if (ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || special_back_slash) { if (atflag && buffer.size() == 0) { url->flags |= URL_FLAGS_FAILED; @@ -1261,14 +1313,18 @@ void URL::Parse(const char* input, } buffer.clear(); state = kPort; - } else if (ch == kEOL || ch == '/' || ch == '?' || ch == '#' || + } else if (ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || special_back_slash) { p--; if (special && buffer.size() == 0) { url->flags |= URL_FLAGS_FAILED; return; } - if (has_state_override && buffer.size() == 0 && + if (has_state_override && + buffer.size() == 0 && ((url->username.size() > 0 || url->password.size() > 0) || url->port != -1)) { url->flags |= URL_FLAGS_TERMINATED; @@ -1285,16 +1341,22 @@ void URL::Parse(const char* input, return; } } else { - if (ch == '[') square_bracket_flag = true; - if (ch == ']') square_bracket_flag = false; + if (ch == '[') + square_bracket_flag = true; + if (ch == ']') + square_bracket_flag = false; buffer += ch; } break; case kPort: if (IsASCIIDigit(ch)) { buffer += ch; - } else if (has_state_override || ch == kEOL || ch == '/' || ch == '?' || - ch == '#' || special_back_slash) { + } else if (has_state_override || + ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { if (buffer.size() > 0) { unsigned port = 0; // the condition port <= 0xffff prevents integer overflow @@ -1313,7 +1375,8 @@ void URL::Parse(const char* input, } // the port is valid url->port = NormalizePort(url->scheme, static_cast(port)); - if (url->port == -1) url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT; + if (url->port == -1) + url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT; buffer.clear(); } else if (has_state_override) { // TODO(TimothyGu): Similar case as above. @@ -1419,14 +1482,20 @@ void URL::Parse(const char* input, } break; case kFileHost: - if (ch == kEOL || ch == '/' || ch == '\\' || ch == '?' || ch == '#') { - if (!has_state_override && buffer.size() == 2 && + if (ch == kEOL || + ch == '/' || + ch == '\\' || + ch == '?' || + ch == '#') { + if (!has_state_override && + buffer.size() == 2 && IsWindowsDriveLetter(buffer)) { state = kPath; } else if (buffer.size() == 0) { url->flags |= URL_FLAGS_HAS_HOST; url->host.clear(); - if (has_state_override) return; + if (has_state_override) + return; state = kPathStart; } else { std::string host; @@ -1434,10 +1503,12 @@ void URL::Parse(const char* input, url->flags |= URL_FLAGS_FAILED; return; } - if (host == "localhost") host.clear(); + if (host == "localhost") + host.clear(); url->flags |= URL_FLAGS_HAS_HOST; url->host = host; - if (has_state_override) return; + if (has_state_override) + return; buffer.clear(); state = kPathStart; } @@ -1471,7 +1542,9 @@ void URL::Parse(const char* input, } break; case kPath: - if (ch == kEOL || ch == '/' || special_back_slash || + if (ch == kEOL || + ch == '/' || + special_back_slash || (!has_state_override && (ch == '?' || ch == '#'))) { if (IsDoubleDotSegment(buffer)) { ShortenUrlPath(url); @@ -1479,13 +1552,15 @@ void URL::Parse(const char* input, url->flags |= URL_FLAGS_HAS_PATH; url->path.emplace_back(""); } - } else if (IsSingleDotSegment(buffer) && ch != '/' && - !special_back_slash) { + } else if (IsSingleDotSegment(buffer) && + ch != '/' && !special_back_slash) { url->flags |= URL_FLAGS_HAS_PATH; url->path.emplace_back(""); } else if (!IsSingleDotSegment(buffer)) { - if (url->scheme == "file:" && url->path.empty() && - buffer.size() == 2 && IsWindowsDriveLetter(buffer)) { + if (url->scheme == "file:" && + url->path.empty() && + buffer.size() == 2 && + IsWindowsDriveLetter(buffer)) { buffer[1] = ':'; } url->flags |= URL_FLAGS_HAS_PATH; @@ -1525,12 +1600,11 @@ void URL::Parse(const char* input, url->flags |= URL_FLAGS_HAS_QUERY; url->query = std::move(buffer); buffer.clear(); - if (ch == '#') state = kFragment; + if (ch == '#') + state = kFragment; } else { - AppendOrEscape( - &buffer, - ch, - special ? QUERY_ENCODE_SET_SPECIAL : QUERY_ENCODE_SET_NONSPECIAL); + AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL : + QUERY_ENCODE_SET_NONSPECIAL); } break; case kFragment: @@ -1553,17 +1627,23 @@ void URL::Parse(const char* input, } // NOLINT(readability/fn_size) // https://url.spec.whatwg.org/#url-serializing -std::string URL::SerializeURL(const url_data& url, bool exclude = false) { +std::string URL::SerializeURL(const url_data& url, + bool exclude = false) { std::string output; output.reserve( - 10 + // We generally insert < 10 separator characters between URL parts - url.scheme.size() + url.username.size() + url.password.size() + - url.host.size() + url.query.size() + url.fragment.size() + - url.href.size() + - std::accumulate( - url.path.begin(), url.path.end(), 0, [](size_t sum, const auto& str) { - return sum + str.size(); - })); + 10 + // We generally insert < 10 separator characters between URL parts + url.scheme.size() + + url.username.size() + + url.password.size() + + url.host.size() + + url.query.size() + + url.fragment.size() + + url.href.size() + + std::accumulate( + url.path.begin(), + url.path.end(), + 0, + [](size_t sum, const auto& str) { return sum + str.size(); })); output += url.scheme; if (url.flags & URL_FLAGS_HAS_HOST) { @@ -1586,8 +1666,9 @@ std::string URL::SerializeURL(const url_data& url, bool exclude = false) { if (url.flags & URL_FLAGS_CANNOT_BE_BASE) { output += url.path[0]; } else { - if (!(url.flags & URL_FLAGS_HAS_HOST) && url.path.size() > 1 && - url.path[0].empty()) { + if (!(url.flags & URL_FLAGS_HAS_HOST) && + url.path.size() > 1 && + url.path[0].empty()) { output += "/."; } for (size_t i = 1; i < url.path.size(); i++) { @@ -1610,9 +1691,10 @@ void SetArgs(Environment* env, const struct url_data& url) { Isolate* isolate = env->isolate(); argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); - argv[ARG_PROTOCOL] = url.flags & URL_FLAGS_SPECIAL - ? GetSpecial(env, url.scheme) - : OneByteString(isolate, url.scheme.c_str()); + argv[ARG_PROTOCOL] = + url.flags & URL_FLAGS_SPECIAL ? + GetSpecial(env, url.scheme) : + OneByteString(isolate, url.scheme.c_str()); if (url.flags & URL_FLAGS_HAS_USERNAME) argv[ARG_USERNAME] = Utf8String(isolate, url.username); if (url.flags & URL_FLAGS_HAS_PASSWORD) @@ -1623,7 +1705,8 @@ void SetArgs(Environment* env, argv[ARG_QUERY] = Utf8String(isolate, url.query); if (url.flags & URL_FLAGS_HAS_FRAGMENT) argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment); - if (url.port > -1) argv[ARG_PORT] = Integer::New(isolate, url.port); + if (url.port > -1) + argv[ARG_PORT] = Integer::New(isolate, url.port); if (url.flags & URL_FLAGS_HAS_PATH) argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked(); } @@ -1647,12 +1730,15 @@ void Parse(Environment* env, url_data base; url_data url; - if (has_context) url = HarvestContext(env, context_obj.As()); - if (has_base) base = HarvestBase(env, base_obj.As()); + if (has_context) + url = HarvestContext(env, context_obj.As()); + if (has_base) + base = HarvestBase(env, base_obj.As()); URL::Parse(input, len, state_override, &url, has_context, &base, has_base); if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || - ((state_override != kUnknownState) && (url.flags & URL_FLAGS_TERMINATED))) + ((state_override != kUnknownState) && + (url.flags & URL_FLAGS_TERMINATED))) return; // Define the return value placeholders @@ -1660,15 +1746,15 @@ void Parse(Environment* env, const Local null = Null(isolate); if (!(url.flags & URL_FLAGS_FAILED)) { Local argv[] = { - undef, - undef, - undef, - undef, - null, // host defaults to null - null, // port defaults to null - undef, - null, // query defaults to null - null, // fragment defaults to null + undef, + undef, + undef, + undef, + null, // host defaults to null + null, // port defaults to null + undef, + null, // query defaults to null + null, // fragment defaults to null }; SetArgs(env, argv, url); USE(cb->Call(context, recv, arraysize(argv), argv)); @@ -1681,12 +1767,14 @@ void Parse(Environment* env, void Parse(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 5); - CHECK(args[0]->IsString()); // input + CHECK(args[0]->IsString()); // input CHECK(args[2]->IsUndefined() || // base context - args[2]->IsNull() || args[2]->IsObject()); + args[2]->IsNull() || + args[2]->IsObject()); CHECK(args[3]->IsUndefined() || // context - args[3]->IsNull() || args[3]->IsObject()); - CHECK(args[4]->IsFunction()); // complete callback + args[3]->IsNull() || + args[3]->IsObject()); + CHECK(args[4]->IsFunction()); // complete callback CHECK(args[5]->IsUndefined() || args[5]->IsFunction()); // error callback Utf8Value input(env->isolate(), args[0]); @@ -1696,10 +1784,8 @@ void Parse(const FunctionCallbackInfo& args) { args[1]->Uint32Value(env->context()).FromJust()); } - Parse(env, - args.This(), - *input, - input.length(), + Parse(env, args.This(), + *input, input.length(), state_override, args[2], args[3], @@ -1801,11 +1887,16 @@ std::string URL::ToFilePath() const { #ifdef _WIN32 const char* slash = "\\"; - auto is_slash = [](char ch) { return ch == '/' || ch == '\\'; }; + auto is_slash = [] (char ch) { + return ch == '/' || ch == '\\'; + }; #else const char* slash = "/"; - auto is_slash = [](char ch) { return ch == '/'; }; - if ((context_.flags & URL_FLAGS_HAS_HOST) && context_.host.length() > 0) { + auto is_slash = [] (char ch) { + return ch == '/'; + }; + if ((context_.flags & URL_FLAGS_HAS_HOST) && + context_.host.length() > 0) { return ""; } #endif @@ -1828,7 +1919,8 @@ std::string URL::ToFilePath() const { // need to worry about percent encoding because the URL parser will have // already taken care of that for us. Note that this only causes IDNs with an // appropriate `xn--` prefix to be decoded. - if ((context_.flags & URL_FLAGS_HAS_HOST) && context_.host.length() > 0) { + if ((context_.flags & URL_FLAGS_HAS_HOST) && + context_.host.length() > 0) { std::string unicode_host; if (!ToUnicode(context_.host, &unicode_host)) { return ""; @@ -1839,7 +1931,8 @@ std::string URL::ToFilePath() const { if (decoded_path.length() < 3) { return ""; } - if (decoded_path[2] != ':' || !IsASCIIAlpha(decoded_path[1])) { + if (decoded_path[2] != ':' || + !IsASCIIAlpha(decoded_path[1])) { return ""; } // Strip out the leading '\'. @@ -1854,15 +1947,11 @@ URL URL::FromFilePath(const std::string& file_path) { std::string escaped_file_path; for (size_t i = 0; i < file_path.length(); ++i) { escaped_file_path += file_path[i]; - if (file_path[i] == '%') escaped_file_path += "25"; + if (file_path[i] == '%') + escaped_file_path += "25"; } - URL::Parse(escaped_file_path.c_str(), - escaped_file_path.length(), - kPathStart, - &url.context_, - true, - nullptr, - false); + URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart, + &url.context_, true, nullptr, false); return url; } @@ -1877,18 +1966,19 @@ MaybeLocal URL::ToObject(Environment* env) const { const Local undef = Undefined(isolate); const Local null = Null(isolate); - if (context_.flags & URL_FLAGS_FAILED) return Local(); + if (context_.flags & URL_FLAGS_FAILED) + return Local(); Local argv[] = { - undef, - undef, - undef, - undef, - null, // host defaults to null - null, // port defaults to null - undef, - null, // query defaults to null - null, // fragment defaults to null + undef, + undef, + undef, + undef, + null, // host defaults to null + null, // port defaults to null + undef, + null, // query defaults to null + null, // fragment defaults to null }; SetArgs(env, argv, context_); @@ -1900,8 +1990,8 @@ MaybeLocal URL::ToObject(Environment* env) const { // set the constructor function used below. SetURLConstructor is // called automatically when the internal/url.js module is loaded // during the internal/bootstrap/node.js processing. - ret = env->url_constructor_function()->Call( - env->context(), undef, arraysize(argv), argv); + ret = env->url_constructor_function() + ->Call(env->context(), undef, arraysize(argv), argv); } return ret; @@ -1911,4 +2001,4 @@ MaybeLocal URL::ToObject(Environment* env) const { } // namespace node NODE_BINDING_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize) -NODE_BINDING_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences) +NODE_BINDING_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences) \ No newline at end of file From 95d3081e305e59fd8975fb25c045afd3119a40ac Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Mon, 16 Jan 2023 21:27:45 -0300 Subject: [PATCH 04/14] src: refactor EndsInANumber in node_url.cc to use string_view::rfind --- src/node_url.cc | 45 +++++++++++++++------------------------------ 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index cec0f66463f94b..af73a4653b6be4 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -5,11 +5,16 @@ #include "node_i18n.h" #include "util-inl.h" +#include <__string> #include +#include #include +#include #include +#include #include #include +#include #include namespace node { @@ -409,42 +414,28 @@ int64_t ParseIPv4Number(const char* start, const char* end) { // https://url.spec.whatwg.org/#ipv4-number-parser bool IsIPv4NumberValid(std::string_view input) { - // If input is the empty string, then return failure. if (input.empty()) { return false; } - // If input contains at least two code points.. - if (input.size() >= 2) { - // and the first two code points are either "0X" or "0x", then: - if (input[0] == '0' && (input[1] == 'X' || input[1] == 'x')) { + if (input.size() >= 2 && input[0] == '0') { + if (input[1] == 'X' || input[1] == 'x') { if (input.size() == 2) { return true; } - // Remove the first two code points from input, - // radix-R is 16 - // If input contains a code point that is not a radix-R digit, then return - // failure. return input.find_first_not_of("0123456789abcdefABCDEF", 2) == std::string_view::npos; - // and the first code point is U+0030 (0), then: - } else if (input[0] == '0') { + } else { if (input.size() == 1) { return true; } - // Remove the first code point from input. - // radix-R is 8 - // If input contains a code point that is not a radix-R digit, then return - // failure. return input.find_first_not_of("01234567", 1) == std::string_view::npos; } } - // If input contains a code point that is not a radix-R digit, then return - // failure. radix-R is 10 return std::all_of(input.begin(), input.end(), ::isdigit); } @@ -454,38 +445,32 @@ bool EndsInANumber(const std::string_view input) { return false; } - const std::string delimiter = "."; + char delimiter = '.'; auto pointer_start = input.begin(); auto pointer_end = input.end(); - uint8_t parts_size = std::count(pointer_start, pointer_end, delimiter[0]); + uint8_t parts_size = std::count(pointer_start, pointer_end, delimiter); ++parts_size; - // If the last item in parts is the empty string, then: - if (input.back() == delimiter[0]) { - // Remove the last item from parts. + if (input.back() == delimiter) { --pointer_end; --parts_size; } - // Let last be the last item in parts if (parts_size > 1) { - pointer_start = std::find_end( - pointer_start, pointer_end, delimiter.begin(), delimiter.end()); - ++pointer_start; + pointer_start += + input.rfind(delimiter, std::distance(pointer_start, pointer_end) - 1) + + 1; } if (std::distance(pointer_start, pointer_end) == 0) { return false; } - // If last is non-empty and contains only ASCII digits, then return true. if (std::all_of(pointer_start, pointer_end, ::isdigit)) { return true; } - // If parsing last as an IPv4 number does not return failure, then return - // true. return IsIPv4NumberValid(std::string(pointer_start, pointer_end)); } @@ -2001,4 +1986,4 @@ MaybeLocal URL::ToObject(Environment* env) const { } // namespace node NODE_BINDING_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize) -NODE_BINDING_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences) \ No newline at end of file +NODE_BINDING_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences) From e9282ad65a63d8d2dcc637495cd7164545b1ca98 Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Mon, 16 Jan 2023 21:35:39 -0300 Subject: [PATCH 05/14] src: fix imports typo in node_url.cc --- src/node_url.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index af73a4653b6be4..d0a1b50e4a66ad 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -5,16 +5,11 @@ #include "node_i18n.h" #include "util-inl.h" -#include <__string> #include -#include #include -#include #include -#include #include #include -#include #include namespace node { From a683d340f33c4094cb84b5087b46e1921d610218 Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Tue, 17 Jan 2023 10:06:02 -0300 Subject: [PATCH 06/14] src: removes unnecessary if statement from EndsInANumber in node_url.cc --- src/node_url.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index d0a1b50e4a66ad..e7d64e45b602d6 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -421,12 +421,7 @@ bool IsIPv4NumberValid(std::string_view input) { return input.find_first_not_of("0123456789abcdefABCDEF", 2) == std::string_view::npos; - } else { - if (input.size() == 1) { - return true; - } - return input.find_first_not_of("01234567", 1) == std::string_view::npos; } } From d165e39a9ae4620c2f6025543595ba01e909838c Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Tue, 17 Jan 2023 10:36:55 -0300 Subject: [PATCH 07/14] src: use string_view instead of string in EndsInANumber in node_url.cc --- src/node_url.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/node_url.cc b/src/node_url.cc index e7d64e45b602d6..b89044ee505e4a 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -461,7 +461,8 @@ bool EndsInANumber(const std::string_view input) { return true; } - return IsIPv4NumberValid(std::string(pointer_start, pointer_end)); + return IsIPv4NumberValid(std::string_view( + pointer_start, std::distance(pointer_start, pointer_end))); } void URLHost::ParseIPv4Host(const char* input, size_t length) { From 573dd4af7dda4da573f6f165f5f5898852a46f1d Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Tue, 17 Jan 2023 16:02:58 -0300 Subject: [PATCH 08/14] src: fix string_view constructor call for IsIPv4NumberValid in node_url.cc --- src/node_url.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index b89044ee505e4a..72ecfec300946a 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -461,8 +461,8 @@ bool EndsInANumber(const std::string_view input) { return true; } - return IsIPv4NumberValid(std::string_view( - pointer_start, std::distance(pointer_start, pointer_end))); + return IsIPv4NumberValid( + std::string_view(&*pointer_start, pointer_end - pointer_start)); } void URLHost::ParseIPv4Host(const char* input, size_t length) { From b52bbd4443eb4431725bb1efbc8296e829c694d3 Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Wed, 18 Jan 2023 10:51:16 -0300 Subject: [PATCH 09/14] src: adds minor tweaks to EndsInANumber at node_url.cc --- src/node_url.cc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 72ecfec300946a..8e20875678920b 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -408,11 +408,12 @@ int64_t ParseIPv4Number(const char* start, const char* end) { } // https://url.spec.whatwg.org/#ipv4-number-parser -bool IsIPv4NumberValid(std::string_view input) { +bool IsIPv4NumberValid(const std::string_view input) { if (input.empty()) { return false; } + // ref: #ipv4-number-parser session in url specs, subtopics 4-5 if (input.size() >= 2 && input[0] == '0') { if (input[1] == 'X' || input[1] == 'x') { if (input.size() == 2) { @@ -421,23 +422,23 @@ bool IsIPv4NumberValid(std::string_view input) { return input.find_first_not_of("0123456789abcdefABCDEF", 2) == std::string_view::npos; - } else { - return input.find_first_not_of("01234567", 1) == std::string_view::npos; } + + return input.find_first_not_of("01234567", 1) == std::string_view::npos; } return std::all_of(input.begin(), input.end(), ::isdigit); } // https://url.spec.whatwg.org/#ends-in-a-number-checker -bool EndsInANumber(const std::string_view input) { +inline bool EndsInANumber(const std::string_view input) { if (input.empty()) { return false; } char delimiter = '.'; - auto pointer_start = input.begin(); - auto pointer_end = input.end(); + std::string_view::iterator pointer_start = input.begin(); + std::string_view::iterator pointer_end = input.end(); uint8_t parts_size = std::count(pointer_start, pointer_end, delimiter); ++parts_size; @@ -449,11 +450,10 @@ bool EndsInANumber(const std::string_view input) { if (parts_size > 1) { pointer_start += - input.rfind(delimiter, std::distance(pointer_start, pointer_end) - 1) + - 1; + input.rfind(delimiter, pointer_end - pointer_start - 1) + 1; } - if (std::distance(pointer_start, pointer_end) == 0) { + if ((pointer_end - pointer_start) == 0) { return false; } @@ -462,7 +462,7 @@ bool EndsInANumber(const std::string_view input) { } return IsIPv4NumberValid( - std::string_view(&*pointer_start, pointer_end - pointer_start)); + std::string_view(pointer_start, pointer_end - pointer_start)); } void URLHost::ParseIPv4Host(const char* input, size_t length) { From 3074383d0bb153c1156415f0ad69cff0889bd893 Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Wed, 18 Jan 2023 11:02:55 -0300 Subject: [PATCH 10/14] src: adds comment to IsIPv4NumberValid conditions in node_url.cc --- src/node_url.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/node_url.cc b/src/node_url.cc index 8e20875678920b..b7cd48cb056973 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -413,7 +413,9 @@ bool IsIPv4NumberValid(const std::string_view input) { return false; } - // ref: #ipv4-number-parser session in url specs, subtopics 4-5 + // If a number starts with '0' it might be a number with base 8 or base + // 16. If not, checking if all characters are digits proves that it is a + // base 10 number. if (input.size() >= 2 && input[0] == '0') { if (input[1] == 'X' || input[1] == 'x') { if (input.size() == 2) { From de4f0651d24ef86f0ff308c6046ed0736e642fe5 Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Wed, 18 Jan 2023 12:37:27 -0300 Subject: [PATCH 11/14] src: cast interators to size_t in IsIPv4NumberValid call in node_url.cc --- src/node_url.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/node_url.cc b/src/node_url.cc index b7cd48cb056973..96869cc7eed463 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -464,7 +464,7 @@ inline bool EndsInANumber(const std::string_view input) { } return IsIPv4NumberValid( - std::string_view(pointer_start, pointer_end - pointer_start)); + std::string_view(pointer_start, size_t(pointer_end - pointer_start))); } void URLHost::ParseIPv4Host(const char* input, size_t length) { From 9058eb57f9909b7bd1089cee05b2ce4aedf4a7e2 Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Wed, 18 Jan 2023 14:40:54 -0300 Subject: [PATCH 12/14] src: cast interator to const char* in IsIPv4NumberValid call in node_url.cc --- src/node_url.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 96869cc7eed463..5980b0e4d82401 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -463,8 +463,8 @@ inline bool EndsInANumber(const std::string_view input) { return true; } - return IsIPv4NumberValid( - std::string_view(pointer_start, size_t(pointer_end - pointer_start))); + return IsIPv4NumberValid(std::string_view(const_cast(pointer_start), + pointer_end - pointer_start)); } void URLHost::ParseIPv4Host(const char* input, size_t length) { From b65769f401fb0fefaadf5208c7686d8d480ea7a6 Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Thu, 19 Jan 2023 10:34:00 -0300 Subject: [PATCH 13/14] src: use &(*it) to make windows build pass + ::all_of instead of ::find_first_not_of --- src/node_url.cc | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 5980b0e4d82401..4d286a1ffdae49 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -175,6 +175,8 @@ CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' ')) // https://infra.spec.whatwg.org/#ascii-digit CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9')) +CHAR_TEST(8, IsASCIIOcDigit, (ch >= '0' && ch <= '7')) + // https://infra.spec.whatwg.org/#ascii-hex-digit CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) || (ch >= 'A' && ch <= 'F') || @@ -422,14 +424,19 @@ bool IsIPv4NumberValid(const std::string_view input) { return true; } - return input.find_first_not_of("0123456789abcdefABCDEF", 2) == - std::string_view::npos; + return std::all_of(input.begin() + 2, input.end(), [](const char& c) { + return IsASCIIHexDigit(c); + }); } - return input.find_first_not_of("01234567", 1) == std::string_view::npos; + return std::all_of(input.begin() + 1, input.end(), [](const char& c) { + return IsASCIIOcDigit(c); + }); } - return std::all_of(input.begin(), input.end(), ::isdigit); + return std::all_of(input.begin(), input.end(), [](const char& c) { + return IsASCIIDigit(c); + }); } // https://url.spec.whatwg.org/#ends-in-a-number-checker @@ -439,32 +446,31 @@ inline bool EndsInANumber(const std::string_view input) { } char delimiter = '.'; - std::string_view::iterator pointer_start = input.begin(); - std::string_view::iterator pointer_end = input.end(); + auto it_start = input.begin(); + auto it_end = input.end(); - uint8_t parts_size = std::count(pointer_start, pointer_end, delimiter); + auto parts_size = std::count(it_start, it_end, delimiter); ++parts_size; if (input.back() == delimiter) { - --pointer_end; + --it_end; --parts_size; } if (parts_size > 1) { - pointer_start += - input.rfind(delimiter, pointer_end - pointer_start - 1) + 1; + it_start += input.rfind(delimiter, it_end - it_start - 1) + 1; } - if ((pointer_end - pointer_start) == 0) { + if ((it_end - it_start) == 0) { return false; } - if (std::all_of(pointer_start, pointer_end, ::isdigit)) { + if (std::all_of( + it_start, it_end, [](const char& c) { return IsASCIIDigit(c); })) { return true; } - return IsIPv4NumberValid(std::string_view(const_cast(pointer_start), - pointer_end - pointer_start)); + return IsIPv4NumberValid(std::string_view(&(*it_start), it_end - it_start)); } void URLHost::ParseIPv4Host(const char* input, size_t length) { From 9534455fbf68c055ca2453d1722a0a640289b474 Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Fri, 20 Jan 2023 10:51:52 -0300 Subject: [PATCH 14/14] src: refactor EndsInANumber to be more readable in node_url.cc --- src/node_url.cc | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 4d286a1ffdae49..d9e8a9b59ea391 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -446,31 +446,30 @@ inline bool EndsInANumber(const std::string_view input) { } char delimiter = '.'; - auto it_start = input.begin(); - auto it_end = input.end(); - - auto parts_size = std::count(it_start, it_end, delimiter); - ++parts_size; - + auto last_index = input.size() - 1; if (input.back() == delimiter) { - --it_end; - --parts_size; + --last_index; } - if (parts_size > 1) { - it_start += input.rfind(delimiter, it_end - it_start - 1) + 1; + std::string_view last{}; + auto pos = input.find_last_of(delimiter, last_index); + if (pos == std::string_view::npos) { + last = input.substr(0, last_index); + } else { + last = input.substr(pos + 1, last_index - pos); } - if ((it_end - it_start) == 0) { + if (last.empty()) { return false; } - if (std::all_of( - it_start, it_end, [](const char& c) { return IsASCIIDigit(c); })) { + if (std::all_of(last.begin(), last.end(), [](const char& c) { + return IsASCIIDigit(c); + })) { return true; } - return IsIPv4NumberValid(std::string_view(&(*it_start), it_end - it_start)); + return IsIPv4NumberValid(last); } void URLHost::ParseIPv4Host(const char* input, size_t length) {