Skip to content

Commit 7960a95

Browse files
committed
src: refactor vector writing in snapshot builder
- Build a static table of octal strings and use it instead of building octal strings repeatedly during printing. - Print a newline and an offset for every 64 bytes in the case of printing array literals so it's easier to locate variation in snapshot blobs. - Rework the printing routines so that the differences are only made in a WriteByteVectorLiteral routine. We can update this for compression support in the future. - Rename Snapshot::Generate() that write the data as C++ source instead of a blob as Snaphost::GenerateAsSource() for clarity, and move the file stream operations into it to streamline error handling.
1 parent c301404 commit 7960a95

File tree

4 files changed

+133
-96
lines changed

4 files changed

+133
-96
lines changed

node.gyp

+3-3
Original file line numberDiff line numberDiff line change
@@ -885,9 +885,6 @@
885885
'node_target_type=="executable"', {
886886
'defines': [ 'NODE_ENABLE_LARGE_CODE_PAGES=1' ],
887887
}],
888-
['OS in "linux mac"', {
889-
'defines': [ 'NODE_MKSNAPSHOT_USE_STRING_LITERALS' ],
890-
}],
891888
[ 'use_openssl_def==1', {
892889
# TODO(bnoordhuis) Make all platforms export the same list of symbols.
893890
# Teach mkssldef.py to generate linker maps that UNIX linkers understand.
@@ -1256,6 +1253,9 @@
12561253
],
12571254

12581255
'conditions': [
1256+
['OS in "linux mac"', {
1257+
'defines': [ 'NODE_MKSNAPSHOT_USE_STRING_LITERALS=1' ],
1258+
}],
12591259
[ 'node_use_openssl=="true"', {
12601260
'defines': [
12611261
'HAVE_OPENSSL=1',

src/node_snapshot_builder.h

+6-4
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@ struct SnapshotData;
1818

1919
class NODE_EXTERN_PRIVATE SnapshotBuilder {
2020
public:
21-
static ExitCode Generate(std::ostream& out,
22-
const std::vector<std::string>& args,
23-
const std::vector<std::string>& exec_args,
24-
std::optional<std::string_view> main_script);
21+
static ExitCode GenerateAsSource(
22+
const char* out_path,
23+
const std::vector<std::string>& args,
24+
const std::vector<std::string>& exec_args,
25+
std::optional<std::string_view> main_script_path = std::nullopt,
26+
bool use_string_literals = true);
2527

2628
// Generate the snapshot into out.
2729
static ExitCode Generate(SnapshotData* out,

src/node_snapshotable.cc

+111-72
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22
#include "node_snapshotable.h"
3+
#include <fstream>
34
#include <iostream>
45
#include <sstream>
56
#include <vector>
@@ -711,13 +712,6 @@ SnapshotData::~SnapshotData() {
711712
}
712713
}
713714

714-
template <typename T>
715-
void WriteVector(std::ostream* ss, const T* vec, size_t size) {
716-
for (size_t i = 0; i < size; i++) {
717-
*ss << std::to_string(vec[i]) << (i == size - 1 ? '\n' : ',');
718-
}
719-
}
720-
721715
static std::string GetCodeCacheDefName(const std::string& id) {
722716
char buf[64] = {0};
723717
size_t size = id.size();
@@ -742,48 +736,71 @@ static std::string FormatSize(size_t size) {
742736
return buf;
743737
}
744738

745-
#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
746-
static void WriteDataAsCharString(std::ostream* ss,
747-
const uint8_t* data,
748-
size_t length) {
749-
for (size_t i = 0; i < length; i++) {
750-
const uint8_t ch = data[i];
751-
// We can print most printable characters directly. The exceptions are '\'
752-
// (escape characters), " (would end the string), and ? (trigraphs). The
753-
// latter may be overly conservative: we compile with C++17 which doesn't
754-
// support trigraphs.
755-
if (ch >= ' ' && ch <= '~' && ch != '\\' && ch != '"' && ch != '?') {
756-
*ss << ch;
757-
} else {
758-
// All other characters are blindly output as octal.
759-
const char c0 = '0' + ((ch >> 6) & 7);
760-
const char c1 = '0' + ((ch >> 3) & 7);
761-
const char c2 = '0' + (ch & 7);
762-
*ss << "\\" << c0 << c1 << c2;
763-
}
764-
if (i % 64 == 63) {
765-
// Go to a newline every 64 bytes since many text editors have
766-
// problems with very long lines.
767-
*ss << "\"\n\"";
768-
}
739+
std::string ToOctalString(const uint8_t ch) {
740+
// We can print most printable characters directly. The exceptions are '\'
741+
// (escape characters), " (would end the string), and ? (trigraphs). The
742+
// latter may be overly conservative: we compile with C++17 which doesn't
743+
// support trigraphs.
744+
if (ch >= ' ' && ch <= '~' && ch != '\\' && ch != '"' && ch != '?') {
745+
return std::string(1, static_cast<char>(ch));
769746
}
747+
// All other characters are blindly output as octal.
748+
const char c0 = '0' + ((ch >> 6) & 7);
749+
const char c1 = '0' + ((ch >> 3) & 7);
750+
const char c2 = '0' + (ch & 7);
751+
return std::string("\\") + c0 + c1 + c2;
770752
}
771753

772-
static void WriteStaticCodeCacheDataAsStringLiteral(
773-
std::ostream* ss, const builtins::CodeCacheInfo& info) {
774-
*ss << "static const uint8_t *" << GetCodeCacheDefName(info.id)
775-
<< "= reinterpret_cast<const uint8_t *>(\"";
776-
WriteDataAsCharString(ss, info.data.data, info.data.length);
777-
*ss << "\");\n";
754+
std::vector<std::string> GetOctalTable() {
755+
size_t size = 1 << 8;
756+
std::vector<std::string> code_table(size);
757+
for (size_t i = 0; i < size; ++i) {
758+
code_table[i] = ToOctalString(static_cast<uint8_t>(i));
759+
}
760+
return code_table;
778761
}
779-
#else
780-
static void WriteStaticCodeCacheDataAsArray(
781-
std::ostream* ss, const builtins::CodeCacheInfo& info) {
782-
*ss << "static const uint8_t " << GetCodeCacheDefName(info.id) << "[] = {\n";
783-
WriteVector(ss, info.data.data, info.data.length);
784-
*ss << "};\n";
762+
763+
const std::string& GetOctalCode(uint8_t index) {
764+
static std::vector<std::string> table = GetOctalTable();
765+
return table[index];
766+
}
767+
768+
template <typename T>
769+
void WriteByteVectorLiteral(std::ostream* ss,
770+
const T* vec,
771+
size_t size,
772+
const char* var_name,
773+
bool use_string_literals) {
774+
constexpr bool is_uint8_t = std::is_same_v<T, uint8_t>;
775+
static_assert(is_uint8_t || std::is_same_v<T, char>);
776+
constexpr const char* type_name = is_uint8_t ? "uint8_t" : "char";
777+
if (use_string_literals) {
778+
const uint8_t* data = reinterpret_cast<const uint8_t*>(vec);
779+
*ss << "static const " << type_name << " *" << var_name << " = ";
780+
*ss << (is_uint8_t ? R"(reinterpret_cast<const uint8_t *>(")" : "\"");
781+
for (size_t i = 0; i < size; i++) {
782+
const uint8_t ch = data[i];
783+
*ss << GetOctalCode(ch);
784+
if (i % 64 == 63) {
785+
// Go to a newline every 64 bytes since many text editors have
786+
// problems with very long lines.
787+
*ss << "\"\n\"";
788+
}
789+
}
790+
*ss << (is_uint8_t ? "\");\n" : "\";\n");
791+
} else {
792+
*ss << "static const " << type_name << " " << var_name << "[] = {";
793+
for (size_t i = 0; i < size; i++) {
794+
*ss << std::to_string(vec[i]) << (i == size - 1 ? '\n' : ',');
795+
if (i % 64 == 63) {
796+
// Print a newline every 64 units and a offset to improve
797+
// readability.
798+
*ss << " // " << (i / 64) << "\n";
799+
}
800+
}
801+
*ss << "};\n";
802+
}
785803
}
786-
#endif
787804

788805
static void WriteCodeCacheInitializer(std::ostream* ss,
789806
const std::string& id,
@@ -796,7 +813,9 @@ static void WriteCodeCacheInitializer(std::ostream* ss,
796813
*ss << " },\n";
797814
}
798815

799-
void FormatBlob(std::ostream& ss, const SnapshotData* data) {
816+
void FormatBlob(std::ostream& ss,
817+
const SnapshotData* data,
818+
bool use_string_literals) {
800819
ss << R"(#include <cstddef>
801820
#include "env.h"
802821
#include "node_snapshot_builder.h"
@@ -807,32 +826,24 @@ void FormatBlob(std::ostream& ss, const SnapshotData* data) {
807826
namespace node {
808827
)";
809828

810-
#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
811-
ss << R"(static const char *v8_snapshot_blob_data = ")";
812-
WriteDataAsCharString(
813-
&ss,
814-
reinterpret_cast<const uint8_t*>(data->v8_snapshot_blob_data.data),
815-
data->v8_snapshot_blob_data.raw_size);
816-
ss << R"(";)";
817-
#else
818-
ss << R"(static const char v8_snapshot_blob_data[] = {)";
819-
WriteVector(&ss,
820-
data->v8_snapshot_blob_data.data,
821-
data->v8_snapshot_blob_data.raw_size);
822-
ss << R"(};)";
823-
#endif
829+
WriteByteVectorLiteral(&ss,
830+
data->v8_snapshot_blob_data.data,
831+
data->v8_snapshot_blob_data.raw_size,
832+
"v8_snapshot_blob_data",
833+
use_string_literals);
824834

825835
ss << R"(static const int v8_snapshot_blob_size = )"
826-
<< data->v8_snapshot_blob_data.raw_size << ";";
836+
<< data->v8_snapshot_blob_data.raw_size << ";\n";
827837

838+
// Windows can't deal with too many large vector initializers.
839+
// Store the data into static arrays first.
828840
for (const auto& item : data->code_cache) {
829-
#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
830-
WriteStaticCodeCacheDataAsStringLiteral(&ss, item);
831-
#else
832-
// Windows can't deal with too many large vector initializers.
833-
// Store the data into static arrays first.
834-
WriteStaticCodeCacheDataAsArray(&ss, item);
835-
#endif
841+
std::string var_name = GetCodeCacheDefName(item.id);
842+
WriteByteVectorLiteral(&ss,
843+
item.data.data,
844+
item.data.length,
845+
var_name.c_str(),
846+
use_string_literals);
836847
}
837848

838849
ss << R"(const SnapshotData snapshot_data {
@@ -1069,17 +1080,45 @@ ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out,
10691080
return ExitCode::kNoFailure;
10701081
}
10711082

1072-
ExitCode SnapshotBuilder::Generate(
1073-
std::ostream& out,
1083+
ExitCode SnapshotBuilder::GenerateAsSource(
1084+
const char* out_path,
10741085
const std::vector<std::string>& args,
10751086
const std::vector<std::string>& exec_args,
1076-
std::optional<std::string_view> main_script) {
1087+
std::optional<std::string_view> main_script_path,
1088+
bool use_string_literals) {
1089+
std::string main_script_content;
1090+
std::optional<std::string_view> main_script_optional;
1091+
if (main_script_path.has_value()) {
1092+
int r = ReadFileSync(&main_script_content, main_script_path.value().data());
1093+
if (r != 0) {
1094+
FPrintF(stderr,
1095+
"Cannot read main script %s for building snapshot. %s: %s",
1096+
main_script_path.value(),
1097+
uv_err_name(r),
1098+
uv_strerror(r));
1099+
return ExitCode::kGenericUserError;
1100+
}
1101+
main_script_optional = main_script_content;
1102+
}
1103+
1104+
std::ofstream out(out_path, std::ios::out | std::ios::binary);
1105+
if (!out) {
1106+
FPrintF(stderr, "Cannot open %s for output.\n", out_path);
1107+
return ExitCode::kGenericUserError;
1108+
}
1109+
10771110
SnapshotData data;
1078-
ExitCode exit_code = Generate(&data, args, exec_args, main_script);
1111+
ExitCode exit_code = Generate(&data, args, exec_args, main_script_optional);
10791112
if (exit_code != ExitCode::kNoFailure) {
10801113
return exit_code;
10811114
}
1082-
FormatBlob(out, &data);
1115+
FormatBlob(out, &data, use_string_literals);
1116+
1117+
if (!out) {
1118+
std::cerr << "Failed to write to " << out_path << "\n";
1119+
exit_code = node::ExitCode::kGenericUserError;
1120+
}
1121+
10831122
return exit_code;
10841123
}
10851124

tools/snapshot/node_mksnapshot.cc

+13-17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#include <cstdio>
2-
#include <fstream>
32
#include <iostream>
43
#include <sstream>
54
#include <string>
@@ -72,29 +71,26 @@ int BuildSnapshot(int argc, char* argv[]) {
7271
CHECK_EQ(result->exit_code(), 0);
7372

7473
std::string out_path;
74+
std::optional<std::string_view> main_script_path = std::nullopt;
7575
if (node::per_process::cli_options->per_isolate->build_snapshot) {
76+
main_script_path = result->args()[1];
7677
out_path = result->args()[2];
7778
} else {
7879
out_path = result->args()[1];
7980
}
8081

81-
std::ofstream out(out_path, std::ios::out | std::ios::binary);
82-
if (!out) {
83-
std::cerr << "Cannot open " << out_path << "\n";
84-
return 1;
85-
}
82+
#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
83+
bool use_string_literals = true;
84+
#else
85+
bool use_string_literals = false;
86+
#endif
8687

87-
node::ExitCode exit_code = node::ExitCode::kNoFailure;
88-
{
89-
exit_code = node::SnapshotBuilder::Generate(
90-
out, result->args(), result->exec_args(), std::nullopt);
91-
if (exit_code == node::ExitCode::kNoFailure) {
92-
if (!out) {
93-
std::cerr << "Failed to write " << out_path << "\n";
94-
exit_code = node::ExitCode::kGenericUserError;
95-
}
96-
}
97-
}
88+
node::ExitCode exit_code =
89+
node::SnapshotBuilder::GenerateAsSource(out_path.c_str(),
90+
result->args(),
91+
result->exec_args(),
92+
main_script_path,
93+
use_string_literals);
9894

9995
node::TearDownOncePerProcess();
10096
return static_cast<int>(exit_code);

0 commit comments

Comments
 (0)