From de21f612c2b91a63e49964f13cfba7b52f736f4c Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Sun, 8 Sep 2024 12:08:14 -0400 Subject: [PATCH 01/12] tools: refactor js2c.cc to use c++20 --- tools/js2c.cc | 78 ++++++++++++++++++--------------------------------- 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/tools/js2c.cc b/tools/js2c.cc index a536b5dcd85727..a3b7fcf68cf1eb 100644 --- a/tools/js2c.cc +++ b/tools/js2c.cc @@ -1,13 +1,10 @@ #include #include #include -#include #include #include #include -#include #include -#include #include #include #include @@ -72,26 +69,8 @@ size_t GetFileSize(const std::string& filename, int* error) { return result; } -bool EndsWith(const std::string& str, std::string_view suffix) { - size_t suffix_len = suffix.length(); - size_t str_len = str.length(); - if (str_len < suffix_len) { - return false; - } - return str.compare(str_len - suffix_len, suffix_len, suffix) == 0; -} - -bool StartsWith(const std::string& str, std::string_view prefix) { - size_t prefix_len = prefix.length(); - size_t str_len = str.length(); - if (str_len < prefix_len) { - return false; - } - return str.compare(0, prefix_len, prefix) == 0; -} - -bool FilenameIsConfigGypi(const std::string& path) { - return path == "config.gypi" || EndsWith(path, "/config.gypi"); +constexpr bool FilenameIsConfigGypi(const std::string_view path) { + return path == "config.gypi" || path.ends_with("/config.gypi"); } typedef std::vector FileList; @@ -99,7 +78,7 @@ typedef std::map FileMap; bool SearchFiles(const std::string& dir, FileMap* file_map, - const std::string& extension) { + std::string_view extension) { uv_fs_t scan_req; int result = uv_fs_scandir(nullptr, &scan_req, dir.c_str(), 0, nullptr); bool errored = false; @@ -107,7 +86,7 @@ bool SearchFiles(const std::string& dir, PrintUvError("scandir", dir.c_str(), result); errored = true; } else { - auto it = file_map->insert({extension, FileList()}).first; + auto it = file_map->insert({std::string(extension), FileList()}).first; FileList& files = it->second; files.reserve(files.size() + result); uv_dirent_t dent; @@ -124,7 +103,7 @@ bool SearchFiles(const std::string& dir, } std::string path = dir + '/' + dent.name; - if (EndsWith(path, extension)) { + if (path.ends_with(extension)) { files.emplace_back(path); continue; } @@ -153,12 +132,11 @@ constexpr std::string_view kJsSuffix = ".js"; constexpr std::string_view kGypiSuffix = ".gypi"; constexpr std::string_view depsPrefix = "deps/"; constexpr std::string_view libPrefix = "lib/"; -std::set kAllowedExtensions{ - kGypiSuffix, kJsSuffix, kMjsSuffix}; -std::string_view HasAllowedExtensions(const std::string& filename) { - for (const auto& ext : kAllowedExtensions) { - if (EndsWith(filename, ext)) { +constexpr std::string_view HasAllowedExtensions( + const std::string_view filename) { + for (const auto& ext : {kGypiSuffix, kJsSuffix, kMjsSuffix}) { + if (filename.ends_with(ext)) { return ext; } } @@ -350,17 +328,17 @@ std::string GetFileId(const std::string& filename) { size_t start = 0; std::string prefix; // Strip .mjs and .js suffix - if (EndsWith(filename, kMjsSuffix)) { + if (filename.ends_with(kMjsSuffix)) { end -= kMjsSuffix.size(); - } else if (EndsWith(filename, kJsSuffix)) { + } else if (filename.ends_with(kJsSuffix)) { end -= kJsSuffix.size(); } // deps/acorn/acorn/dist/acorn.js -> internal/deps/acorn/acorn/dist/acorn - if (StartsWith(filename, depsPrefix)) { + if (filename.starts_with(depsPrefix)) { start = depsPrefix.size(); prefix = "internal/deps/"; - } else if (StartsWith(filename, libPrefix)) { + } else if (filename.starts_with(libPrefix)) { // lib/internal/url.js -> internal/url start = libPrefix.size(); prefix = ""; @@ -381,17 +359,16 @@ std::string GetVariableName(const std::string& id) { return result; } -std::vector GetCodeTable() { - size_t size = 1 << 16; - std::vector code_table(size); - for (size_t i = 0; i < size; ++i) { - code_table[i] = std::to_string(i) + ','; +static const std::array GetCodeTable() { + std::array table{}; + for (size_t i = 0; i < 65536; ++i) { + table[i] = std::to_string(i) + ','; } - return code_table; + return table; } -const std::string& GetCode(uint16_t index) { - static std::vector table = GetCodeTable(); +const std::string_view GetCode(uint16_t index) { + static std::array table = GetCodeTable(); return table[index]; } @@ -532,8 +509,7 @@ Fragment GetDefinitionImpl(const std::vector& code, // Avoid using snprintf on large chunks of data because it's much slower. // It's fine to use it on small amount of data though. if constexpr (is_two_byte) { - std::vector utf16_codepoints; - utf16_codepoints.resize(count); + std::vector utf16_codepoints(count); size_t utf16_count = simdutf::convert_utf8_to_utf16( code.data(), code.size(), @@ -542,8 +518,8 @@ Fragment GetDefinitionImpl(const std::vector& code, utf16_codepoints.resize(utf16_count); Debug("static size %zu\n", utf16_count); for (size_t i = 0; i < utf16_count; ++i) { - const std::string& str = GetCode(utf16_codepoints[i]); - memcpy(result.data() + cur, str.c_str(), str.size()); + std::string_view str = GetCode(utf16_codepoints[i]); + memcpy(result.data() + cur, str.data(), str.size()); cur += str.size(); } } else { @@ -556,8 +532,8 @@ Fragment GetDefinitionImpl(const std::vector& code, i, ch); } - const std::string& str = GetCode(ch); - memcpy(result.data() + cur, str.c_str(), str.size()); + std::string_view str = GetCode(ch); + memcpy(result.data() + cur, str.data(), str.size()); cur += str.size(); } } @@ -895,8 +871,8 @@ int Main(int argc, char* argv[]) { int error = 0; const std::string& file = args[i]; if (IsDirectory(file, &error)) { - if (!SearchFiles(file, &file_map, std::string(kJsSuffix)) || - !SearchFiles(file, &file_map, std::string(kMjsSuffix))) { + if (!SearchFiles(file, &file_map, kJsSuffix) || + !SearchFiles(file, &file_map, kMjsSuffix)) { return 1; } } else if (error != 0) { From af4aeac64dbf0625333885ae7afae8968885a62c Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Sun, 8 Sep 2024 15:06:22 -0400 Subject: [PATCH 02/12] reducing memory usage and making GetCode's buffer potentially evaluated at compile time --- tools/js2c.cc | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/tools/js2c.cc b/tools/js2c.cc index a3b7fcf68cf1eb..01f81be90d856e 100644 --- a/tools/js2c.cc +++ b/tools/js2c.cc @@ -359,17 +359,45 @@ std::string GetVariableName(const std::string& id) { return result; } -static const std::array GetCodeTable() { - std::array table{}; - for (size_t i = 0; i < 65536; ++i) { - table[i] = std::to_string(i) + ','; + + +constexpr std::pair,std::array> precompute_string() { + std::array str; + std::array off; + off[0] = 0; + char *p = &str[0]; + // We roll our own int to string conversion to get constexpr + constexpr auto const_int_to_str = [](uint16_t value, char *s) -> size_t { + int index = 0; + do { + s[index++] = '0' + (value % 10); + value /= 10; + } while (value != 0); + + for (int i = 0; i < index / 2; ++i) { + char temp = s[i]; + s[i] = s[index - i - 1]; + s[index - i - 1] = temp; + } + s[index] = ','; + return index; +}; + for (int i = 0; i < 65536; ++i) { + size_t offset = const_int_to_str(i, p); + p += offset; + off[i + 1] = off[i] + offset; } - return table; + return {str, off}; } const std::string_view GetCode(uint16_t index) { - static std::array table = GetCodeTable(); - return table[index]; + // uses about 644254 bytes of memory. An array of 65536 strings might use + // 2097152 bytes so we save 3x the memory + // Furthermore, compilers such as GCC will evaluate precompute_string() at compile time, thus + // potentially speeding up the program's startup time. + static auto [backing_string, offsets] = precompute_string(); + return std::string_view(&backing_string[offsets[index]], + offsets[index + 1] - offsets[index]); } #ifdef NODE_JS2C_USE_STRING_LITERALS From 143b3b4f90f1a16d46fef963bdc3eeca3e3dbe35 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Sun, 8 Sep 2024 15:10:43 -0400 Subject: [PATCH 03/12] Update js2c.cc Co-authored-by: Daniel Lemire --- tools/js2c.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/js2c.cc b/tools/js2c.cc index 01f81be90d856e..46fff26791f004 100644 --- a/tools/js2c.cc +++ b/tools/js2c.cc @@ -1,4 +1,5 @@ #include +#include #include #include #include From c302c786c457c5da628eba2a5ca8a158b0653a67 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Sun, 8 Sep 2024 15:19:51 -0400 Subject: [PATCH 04/12] Update js2c.cc Co-authored-by: Daniel Lemire --- tools/js2c.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/js2c.cc b/tools/js2c.cc index 46fff26791f004..bd6147e45603c3 100644 --- a/tools/js2c.cc +++ b/tools/js2c.cc @@ -362,6 +362,8 @@ std::string GetVariableName(const std::string& id) { +// 382106 is the length of the string "0,1,2,3,...,65535,". +// 65537 is 2**16 + 1 constexpr std::pair,std::array> precompute_string() { std::array str; std::array off; From 48ca54d48e456c07ed4ed81451ea3fa977f771ba Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Sun, 8 Sep 2024 17:50:12 -0400 Subject: [PATCH 05/12] lint --- tools/js2c.cc | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/tools/js2c.cc b/tools/js2c.cc index bd6147e45603c3..46b95be9faec83 100644 --- a/tools/js2c.cc +++ b/tools/js2c.cc @@ -360,31 +360,30 @@ std::string GetVariableName(const std::string& id) { return result; } - - // 382106 is the length of the string "0,1,2,3,...,65535,". // 65537 is 2**16 + 1 -constexpr std::pair,std::array> precompute_string() { +constexpr std::pair, std::array> +precompute_string() { std::array str; std::array off; off[0] = 0; - char *p = &str[0]; + char* p = &str[0]; // We roll our own int to string conversion to get constexpr - constexpr auto const_int_to_str = [](uint16_t value, char *s) -> size_t { + constexpr auto const_int_to_str = [](uint16_t value, char* s) -> size_t { int index = 0; do { - s[index++] = '0' + (value % 10); - value /= 10; + s[index++] = '0' + (value % 10); + value /= 10; } while (value != 0); for (int i = 0; i < index / 2; ++i) { - char temp = s[i]; - s[i] = s[index - i - 1]; - s[index - i - 1] = temp; + char temp = s[i]; + s[i] = s[index - i - 1]; + s[index - i - 1] = temp; } s[index] = ','; return index; -}; + }; for (int i = 0; i < 65536; ++i) { size_t offset = const_int_to_str(i, p); p += offset; @@ -396,8 +395,8 @@ constexpr std::pair,std::array> precom const std::string_view GetCode(uint16_t index) { // uses about 644254 bytes of memory. An array of 65536 strings might use // 2097152 bytes so we save 3x the memory - // Furthermore, compilers such as GCC will evaluate precompute_string() at compile time, thus - // potentially speeding up the program's startup time. + // Furthermore, compilers such as GCC will evaluate precompute_string() at + // compile time, thus potentially speeding up the program's startup time. static auto [backing_string, offsets] = precompute_string(); return std::string_view(&backing_string[offsets[index]], offsets[index + 1] - offsets[index]); From 8d7a19f27d9ea52492a01d3ca371184ccf37bf84 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 9 Sep 2024 10:19:31 -0400 Subject: [PATCH 06/12] minor fix --- tools/js2c.cc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/js2c.cc b/tools/js2c.cc index 46b95be9faec83..74ce59f6aed968 100644 --- a/tools/js2c.cc +++ b/tools/js2c.cc @@ -369,20 +369,20 @@ precompute_string() { off[0] = 0; char* p = &str[0]; // We roll our own int to string conversion to get constexpr - constexpr auto const_int_to_str = [](uint16_t value, char* s) -> size_t { - int index = 0; + constexpr auto const_int_to_str = [](uint16_t value, char* s) -> uint32_t { + uint32_t index = 0; do { s[index++] = '0' + (value % 10); value /= 10; } while (value != 0); - for (int i = 0; i < index / 2; ++i) { + for (uint32_t i = 0; i < index / 2; ++i) { char temp = s[i]; s[i] = s[index - i - 1]; s[index - i - 1] = temp; } s[index] = ','; - return index; + return index + 1; }; for (int i = 0; i < 65536; ++i) { size_t offset = const_int_to_str(i, p); @@ -397,7 +397,10 @@ const std::string_view GetCode(uint16_t index) { // 2097152 bytes so we save 3x the memory // Furthermore, compilers such as GCC will evaluate precompute_string() at // compile time, thus potentially speeding up the program's startup time. + // Theoretically, we could use consteval, but the function is expensive and + // some compilers will refuse to compile it. static auto [backing_string, offsets] = precompute_string(); + //return std::to_string(index) + ","; return std::string_view(&backing_string[offsets[index]], offsets[index + 1] - offsets[index]); } From 15a0abca445f25a75c8c2b526dc987613e437413 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Mon, 9 Sep 2024 10:26:46 -0400 Subject: [PATCH 07/12] Update tools/js2c.cc --- tools/js2c.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/js2c.cc b/tools/js2c.cc index 74ce59f6aed968..24b6d9fee91840 100644 --- a/tools/js2c.cc +++ b/tools/js2c.cc @@ -400,7 +400,6 @@ const std::string_view GetCode(uint16_t index) { // Theoretically, we could use consteval, but the function is expensive and // some compilers will refuse to compile it. static auto [backing_string, offsets] = precompute_string(); - //return std::to_string(index) + ","; return std::string_view(&backing_string[offsets[index]], offsets[index + 1] - offsets[index]); } From caaceade9590423ed13baac4ebce759abb4e2e78 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 9 Sep 2024 11:19:40 -0400 Subject: [PATCH 08/12] removing constexpr --- tools/js2c.cc | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/js2c.cc b/tools/js2c.cc index 24b6d9fee91840..876bd4bb3215bf 100644 --- a/tools/js2c.cc +++ b/tools/js2c.cc @@ -362,13 +362,13 @@ std::string GetVariableName(const std::string& id) { // 382106 is the length of the string "0,1,2,3,...,65535,". // 65537 is 2**16 + 1 -constexpr std::pair, std::array> +// This function could be constexpr, but it might become too expensive to compile. +std::pair, std::array> precompute_string() { std::array str; std::array off; off[0] = 0; char* p = &str[0]; - // We roll our own int to string conversion to get constexpr constexpr auto const_int_to_str = [](uint16_t value, char* s) -> uint32_t { uint32_t index = 0; do { @@ -393,12 +393,8 @@ precompute_string() { } const std::string_view GetCode(uint16_t index) { - // uses about 644254 bytes of memory. An array of 65536 strings might use - // 2097152 bytes so we save 3x the memory - // Furthermore, compilers such as GCC will evaluate precompute_string() at - // compile time, thus potentially speeding up the program's startup time. - // Theoretically, we could use consteval, but the function is expensive and - // some compilers will refuse to compile it. + // We use about 644254 bytes of memory. An array of 65536 strings might use + // 2097152 bytes so we save 3x the memory. static auto [backing_string, offsets] = precompute_string(); return std::string_view(&backing_string[offsets[index]], offsets[index + 1] - offsets[index]); From 93aed49396aff54acedf0c2f00686483b36e8ee5 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 9 Sep 2024 11:23:41 -0400 Subject: [PATCH 09/12] lint --- tools/js2c.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/js2c.cc b/tools/js2c.cc index 876bd4bb3215bf..ff2b35a38d0d73 100644 --- a/tools/js2c.cc +++ b/tools/js2c.cc @@ -362,7 +362,8 @@ std::string GetVariableName(const std::string& id) { // 382106 is the length of the string "0,1,2,3,...,65535,". // 65537 is 2**16 + 1 -// This function could be constexpr, but it might become too expensive to compile. +// This function could be constexpr, but it might become too expensive to +// compile. std::pair, std::array> precompute_string() { std::array str; From f39b3dc3947b6b1aabbe18ff1cc733b68fc71091 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Wed, 11 Sep 2024 13:17:34 -0400 Subject: [PATCH 10/12] Update tools/js2c.cc Co-authored-by: Daniel Lemire --- tools/js2c.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/js2c.cc b/tools/js2c.cc index ff2b35a38d0d73..144f8299f42595 100644 --- a/tools/js2c.cc +++ b/tools/js2c.cc @@ -360,6 +360,11 @@ std::string GetVariableName(const std::string& id) { return result; } +// The function returns a string buffer and an array of +// offsets. The string is just "0,1,2,3,...,65535,". +// The second array contain the offsets indicating the +// start of each substring ("0,", "1,", etc.) and the final +// offset points just beyond the end of the string. // 382106 is the length of the string "0,1,2,3,...,65535,". // 65537 is 2**16 + 1 // This function could be constexpr, but it might become too expensive to From fc7b2efe1c94ff6de94a19bc2ccf8aaebf7444e3 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Wed, 11 Sep 2024 13:17:41 -0400 Subject: [PATCH 11/12] Update tools/js2c.cc Co-authored-by: Daniel Lemire --- tools/js2c.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/js2c.cc b/tools/js2c.cc index 144f8299f42595..b33b958b2140a1 100644 --- a/tools/js2c.cc +++ b/tools/js2c.cc @@ -371,6 +371,7 @@ std::string GetVariableName(const std::string& id) { // compile. std::pair, std::array> precompute_string() { + // the string "0,1,2,3,...,65535,". std::array str; std::array off; off[0] = 0; From ce40b6059bb84167528c3c5f74a35c93f965b660 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Wed, 11 Sep 2024 13:17:47 -0400 Subject: [PATCH 12/12] Update tools/js2c.cc Co-authored-by: Daniel Lemire --- tools/js2c.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/js2c.cc b/tools/js2c.cc index b33b958b2140a1..21992cbe894a88 100644 --- a/tools/js2c.cc +++ b/tools/js2c.cc @@ -373,6 +373,7 @@ std::pair, std::array> precompute_string() { // the string "0,1,2,3,...,65535,". std::array str; + // the offsets in the string pointing at the beginning of each substring std::array off; off[0] = 0; char* p = &str[0];