From b06298c98e49ea4d288451d8611222a321e27221 Mon Sep 17 00:00:00 2001 From: Miguel Teixeira Date: Mon, 23 Jan 2023 12:05:19 -0300 Subject: [PATCH] src: refactor EndsInANumber in node_url.cc and adds IsIPv4NumberValid Removes the use of vector in EndsInANumber and uses IsIPv4NumberValid instead of parsing the number to check if it is valid. Fixes: https://github.com/nodejs/performance/issues/36 Refs: https://github.com/ada-url/ada/pull/36 PR-URL: https://github.com/nodejs/node/pull/46227 Reviewed-By: Yagiz Nizipli Reviewed-By: Anna Henningsen Reviewed-By: James M Snell --- src/node_url.cc | 68 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 1cc6ceb4594e66..3b0fd85895916e 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -175,6 +175,8 @@ CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' ')) // https://infra.spec.whatwg.org/#ascii-digit CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9')) +CHAR_TEST(8, IsASCIIOcDigit, (ch >= '0' && ch <= '7')) + // https://infra.spec.whatwg.org/#ascii-hex-digit CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) || (ch >= 'A' && ch <= 'F') || @@ -407,29 +409,67 @@ int64_t ParseIPv4Number(const char* start, const char* end) { return strtoll(start, nullptr, R); } +// https://url.spec.whatwg.org/#ipv4-number-parser +bool IsIPv4NumberValid(const std::string_view input) { + if (input.empty()) { + return false; + } + + // If a number starts with '0' it might be a number with base 8 or base + // 16. If not, checking if all characters are digits proves that it is a + // base 10 number. + if (input.size() >= 2 && input[0] == '0') { + if (input[1] == 'X' || input[1] == 'x') { + if (input.size() == 2) { + return true; + } + + return std::all_of(input.begin() + 2, input.end(), [](const char& c) { + return IsASCIIHexDigit(c); + }); + } + + return std::all_of(input.begin() + 1, input.end(), [](const char& c) { + return IsASCIIOcDigit(c); + }); + } + + return std::all_of(input.begin(), input.end(), [](const char& c) { + return IsASCIIDigit(c); + }); +} + // https://url.spec.whatwg.org/#ends-in-a-number-checker -bool EndsInANumber(const std::string& input) { - std::vector parts = SplitString(input, '.', false); +inline bool EndsInANumber(const std::string_view input) { + if (input.empty()) { + return false; + } - if (parts.empty()) return false; + char delimiter = '.'; + auto last_index = input.size() - 1; + if (input.back() == delimiter) { + --last_index; + } - if (parts.back().empty()) { - if (parts.size() == 1) return false; - parts.pop_back(); + std::string_view last{}; + auto pos = input.find_last_of(delimiter, last_index); + if (pos == std::string_view::npos) { + last = input.substr(0, last_index); + } else { + last = input.substr(pos + 1, last_index - pos); } - const std::string& last = parts.back(); + if (last.empty()) { + return false; + } - // If last is non-empty and contains only ASCII digits, then return true - if (!last.empty() && std::all_of(last.begin(), last.end(), ::isdigit)) { + if (std::all_of(last.begin(), last.end(), [](const char& c) { + return IsASCIIDigit(c); + })) { return true; } - const char* last_str = last.c_str(); - int64_t num = ParseIPv4Number(last_str, last_str + last.size()); - if (num >= 0) return true; - - return false; + return IsIPv4NumberValid(last); } void URLHost::ParseIPv4Host(const char* input, size_t length) {