Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tools: refactor js2c.cc to use c++20 #54849

Merged
merged 12 commits into from
Sep 19, 2024
110 changes: 58 additions & 52 deletions tools/js2c.cc
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
#include <algorithm>
#include <array>
#include <cassert>
#include <cctype>
#include <cinttypes>
#include <cstdarg>
#include <cstdio>
#include <functional>
#include <iostream>
#include <map>
#include <set>
#include <string>
#include <string_view>
#include <vector>
Expand Down Expand Up @@ -72,42 +70,24 @@ size_t GetFileSize(const std::string& filename, int* error) {
return result;
}

bool EndsWith(const std::string& str, std::string_view suffix) {
size_t suffix_len = suffix.length();
size_t str_len = str.length();
if (str_len < suffix_len) {
return false;
}
return str.compare(str_len - suffix_len, suffix_len, suffix) == 0;
}

bool StartsWith(const std::string& str, std::string_view prefix) {
size_t prefix_len = prefix.length();
size_t str_len = str.length();
if (str_len < prefix_len) {
return false;
}
return str.compare(0, prefix_len, prefix) == 0;
}

bool FilenameIsConfigGypi(const std::string& path) {
return path == "config.gypi" || EndsWith(path, "/config.gypi");
constexpr bool FilenameIsConfigGypi(const std::string_view path) {
return path == "config.gypi" || path.ends_with("/config.gypi");
}

typedef std::vector<std::string> FileList;
typedef std::map<std::string, FileList> FileMap;

bool SearchFiles(const std::string& dir,
FileMap* file_map,
const std::string& extension) {
std::string_view extension) {
uv_fs_t scan_req;
int result = uv_fs_scandir(nullptr, &scan_req, dir.c_str(), 0, nullptr);
bool errored = false;
if (result < 0) {
PrintUvError("scandir", dir.c_str(), result);
errored = true;
} else {
auto it = file_map->insert({extension, FileList()}).first;
auto it = file_map->insert({std::string(extension), FileList()}).first;
FileList& files = it->second;
files.reserve(files.size() + result);
uv_dirent_t dent;
Expand All @@ -124,7 +104,7 @@ bool SearchFiles(const std::string& dir,
}

std::string path = dir + '/' + dent.name;
if (EndsWith(path, extension)) {
if (path.ends_with(extension)) {
files.emplace_back(path);
continue;
}
Expand Down Expand Up @@ -153,12 +133,11 @@ constexpr std::string_view kJsSuffix = ".js";
constexpr std::string_view kGypiSuffix = ".gypi";
constexpr std::string_view depsPrefix = "deps/";
constexpr std::string_view libPrefix = "lib/";
std::set<std::string_view> kAllowedExtensions{
kGypiSuffix, kJsSuffix, kMjsSuffix};

std::string_view HasAllowedExtensions(const std::string& filename) {
for (const auto& ext : kAllowedExtensions) {
if (EndsWith(filename, ext)) {
constexpr std::string_view HasAllowedExtensions(
const std::string_view filename) {
for (const auto& ext : {kGypiSuffix, kJsSuffix, kMjsSuffix}) {
if (filename.ends_with(ext)) {
return ext;
}
}
Expand Down Expand Up @@ -350,17 +329,17 @@ std::string GetFileId(const std::string& filename) {
size_t start = 0;
std::string prefix;
// Strip .mjs and .js suffix
if (EndsWith(filename, kMjsSuffix)) {
if (filename.ends_with(kMjsSuffix)) {
end -= kMjsSuffix.size();
} else if (EndsWith(filename, kJsSuffix)) {
} else if (filename.ends_with(kJsSuffix)) {
end -= kJsSuffix.size();
}

// deps/acorn/acorn/dist/acorn.js -> internal/deps/acorn/acorn/dist/acorn
if (StartsWith(filename, depsPrefix)) {
if (filename.starts_with(depsPrefix)) {
start = depsPrefix.size();
prefix = "internal/deps/";
} else if (StartsWith(filename, libPrefix)) {
} else if (filename.starts_with(libPrefix)) {
// lib/internal/url.js -> internal/url
start = libPrefix.size();
prefix = "";
Expand All @@ -381,18 +360,46 @@ std::string GetVariableName(const std::string& id) {
return result;
}

std::vector<std::string> GetCodeTable() {
size_t size = 1 << 16;
std::vector<std::string> code_table(size);
for (size_t i = 0; i < size; ++i) {
code_table[i] = std::to_string(i) + ',';
// 382106 is the length of the string "0,1,2,3,...,65535,".
// 65537 is 2**16 + 1
constexpr std::pair<std::array<char, 382106>, std::array<uint32_t, 65537>>
Copy link
Member

@joyeecheung joyeecheung Sep 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if this fares well in MSVC - have you checked how long it compiles + runs on Windows? JS2C is a tool that only gets used at build time. So if we make its compile time, say, 100ms slower, so that it runs 10ms faster, then all we get is only the build would be 90ms slower.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PS: This also uses 3x less memory.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Runtime memory, or compile time memory? The runtime memory should be negligible already, while MSVC might have a hard time compiling this (sometimes MSVC gets killed during compilation due to OOM).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@joyeecheung I removed the constexpr so that it always build the array at runtime. See caacead

This was not really a concern with VS since it won't do automatic consteval in this case.

So now we just reduce by 3x the runtime memory usage which should improve the stability of the builds. (I expect that this reduction in the memory usage is 'free'. The runtime performance should be just as good or better.)

Copy link
Member

@joyeecheung joyeecheung Sep 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am skeptical whether reducing ~66MB memory usage to ~22MB does anything to the build stability - the memory used to compile/link this file could be bigger than 66MB already. Previously the Python version took ~120MB and a second or two to run but that hadn't been a bottleneck either. Anyway as long as it doesn't do too much at compile time to risk the compiler running OOM, it doesn't seem to hurt either.

precompute_string() {
std::array<char, 382106> str;
std::array<uint32_t, 65537> off;
off[0] = 0;
char* p = &str[0];
// We roll our own int to string conversion to get constexpr
constexpr auto const_int_to_str = [](uint16_t value, char* s) -> size_t {
int index = 0;
do {
s[index++] = '0' + (value % 10);
value /= 10;
} while (value != 0);

for (int i = 0; i < index / 2; ++i) {
char temp = s[i];
s[i] = s[index - i - 1];
s[index - i - 1] = temp;
}
s[index] = ',';
return index;
};
for (int i = 0; i < 65536; ++i) {
size_t offset = const_int_to_str(i, p);
p += offset;
off[i + 1] = off[i] + offset;
}
return code_table;
return {str, off};
}

const std::string& GetCode(uint16_t index) {
static std::vector<std::string> table = GetCodeTable();
return table[index];
const std::string_view GetCode(uint16_t index) {
// uses about 644254 bytes of memory. An array of 65536 strings might use
// 2097152 bytes so we save 3x the memory
// Furthermore, compilers such as GCC will evaluate precompute_string() at
// compile time, thus potentially speeding up the program's startup time.
static auto [backing_string, offsets] = precompute_string();
return std::string_view(&backing_string[offsets[index]],
offsets[index + 1] - offsets[index]);
}

#ifdef NODE_JS2C_USE_STRING_LITERALS
Expand Down Expand Up @@ -532,8 +539,7 @@ Fragment GetDefinitionImpl(const std::vector<char>& code,
// Avoid using snprintf on large chunks of data because it's much slower.
// It's fine to use it on small amount of data though.
if constexpr (is_two_byte) {
std::vector<uint16_t> utf16_codepoints;
utf16_codepoints.resize(count);
std::vector<uint16_t> utf16_codepoints(count);
size_t utf16_count = simdutf::convert_utf8_to_utf16(
code.data(),
code.size(),
Expand All @@ -542,8 +548,8 @@ Fragment GetDefinitionImpl(const std::vector<char>& code,
utf16_codepoints.resize(utf16_count);
Debug("static size %zu\n", utf16_count);
for (size_t i = 0; i < utf16_count; ++i) {
const std::string& str = GetCode(utf16_codepoints[i]);
memcpy(result.data() + cur, str.c_str(), str.size());
std::string_view str = GetCode(utf16_codepoints[i]);
memcpy(result.data() + cur, str.data(), str.size());
cur += str.size();
}
} else {
Expand All @@ -556,8 +562,8 @@ Fragment GetDefinitionImpl(const std::vector<char>& code,
i,
ch);
}
const std::string& str = GetCode(ch);
memcpy(result.data() + cur, str.c_str(), str.size());
std::string_view str = GetCode(ch);
memcpy(result.data() + cur, str.data(), str.size());
cur += str.size();
}
}
Expand Down Expand Up @@ -895,8 +901,8 @@ int Main(int argc, char* argv[]) {
int error = 0;
const std::string& file = args[i];
if (IsDirectory(file, &error)) {
if (!SearchFiles(file, &file_map, std::string(kJsSuffix)) ||
!SearchFiles(file, &file_map, std::string(kMjsSuffix))) {
if (!SearchFiles(file, &file_map, kJsSuffix) ||
!SearchFiles(file, &file_map, kMjsSuffix)) {
return 1;
}
} else if (error != 0) {
Expand Down
Loading