Skip to content

Commit 634eb50

Browse files
authored
src: refactor vector writing in snapshot builder
- Build a static table of octal strings and use it instead of building octal strings repeatedly during printing. - Print a newline and an offset for every 64 bytes in the case of printing array literals so it's easier to locate variation in snapshot blobs. - Rework the printing routines so that the differences are only made in a WriteByteVectorLiteral routine. We can update this for compression support in the future. - Rename Snapshot::Generate() that write the data as C++ source instead of a blob as Snaphost::GenerateAsSource() for clarity, and move the file stream operations into it to streamline error handling. PR-URL: #48851 Reviewed-By: Chengzhong Wu <legendecas@gmail.com> Reviewed-By: Darshan Sen <raisinten@gmail.com>
1 parent 52b3007 commit 634eb50

File tree

4 files changed

+133
-96
lines changed

4 files changed

+133
-96
lines changed

node.gyp

+3-3
Original file line numberDiff line numberDiff line change
@@ -885,9 +885,6 @@
885885
'node_target_type=="executable"', {
886886
'defines': [ 'NODE_ENABLE_LARGE_CODE_PAGES=1' ],
887887
}],
888-
['OS in "linux mac"', {
889-
'defines': [ 'NODE_MKSNAPSHOT_USE_STRING_LITERALS' ],
890-
}],
891888
[ 'use_openssl_def==1', {
892889
# TODO(bnoordhuis) Make all platforms export the same list of symbols.
893890
# Teach mkssldef.py to generate linker maps that UNIX linkers understand.
@@ -1257,6 +1254,9 @@
12571254
],
12581255

12591256
'conditions': [
1257+
['OS in "linux mac"', {
1258+
'defines': [ 'NODE_MKSNAPSHOT_USE_STRING_LITERALS=1' ],
1259+
}],
12601260
[ 'node_use_openssl=="true"', {
12611261
'defines': [
12621262
'HAVE_OPENSSL=1',

src/node_snapshot_builder.h

+6-4
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@ struct SnapshotData;
1818

1919
class NODE_EXTERN_PRIVATE SnapshotBuilder {
2020
public:
21-
static ExitCode Generate(std::ostream& out,
22-
const std::vector<std::string>& args,
23-
const std::vector<std::string>& exec_args,
24-
std::optional<std::string_view> main_script);
21+
static ExitCode GenerateAsSource(
22+
const char* out_path,
23+
const std::vector<std::string>& args,
24+
const std::vector<std::string>& exec_args,
25+
std::optional<std::string_view> main_script_path = std::nullopt,
26+
bool use_string_literals = true);
2527

2628
// Generate the snapshot into out.
2729
static ExitCode Generate(SnapshotData* out,

src/node_snapshotable.cc

+111-72
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22
#include "node_snapshotable.h"
3+
#include <fstream>
34
#include <iostream>
45
#include <sstream>
56
#include <vector>
@@ -715,13 +716,6 @@ SnapshotData::~SnapshotData() {
715716
}
716717
}
717718

718-
template <typename T>
719-
void WriteVector(std::ostream* ss, const T* vec, size_t size) {
720-
for (size_t i = 0; i < size; i++) {
721-
*ss << std::to_string(vec[i]) << (i == size - 1 ? '\n' : ',');
722-
}
723-
}
724-
725719
static std::string GetCodeCacheDefName(const std::string& id) {
726720
char buf[64] = {0};
727721
size_t size = id.size();
@@ -746,48 +740,71 @@ static std::string FormatSize(size_t size) {
746740
return buf;
747741
}
748742

749-
#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
750-
static void WriteDataAsCharString(std::ostream* ss,
751-
const uint8_t* data,
752-
size_t length) {
753-
for (size_t i = 0; i < length; i++) {
754-
const uint8_t ch = data[i];
755-
// We can print most printable characters directly. The exceptions are '\'
756-
// (escape characters), " (would end the string), and ? (trigraphs). The
757-
// latter may be overly conservative: we compile with C++17 which doesn't
758-
// support trigraphs.
759-
if (ch >= ' ' && ch <= '~' && ch != '\\' && ch != '"' && ch != '?') {
760-
*ss << ch;
761-
} else {
762-
// All other characters are blindly output as octal.
763-
const char c0 = '0' + ((ch >> 6) & 7);
764-
const char c1 = '0' + ((ch >> 3) & 7);
765-
const char c2 = '0' + (ch & 7);
766-
*ss << "\\" << c0 << c1 << c2;
767-
}
768-
if (i % 64 == 63) {
769-
// Go to a newline every 64 bytes since many text editors have
770-
// problems with very long lines.
771-
*ss << "\"\n\"";
772-
}
743+
std::string ToOctalString(const uint8_t ch) {
744+
// We can print most printable characters directly. The exceptions are '\'
745+
// (escape characters), " (would end the string), and ? (trigraphs). The
746+
// latter may be overly conservative: we compile with C++17 which doesn't
747+
// support trigraphs.
748+
if (ch >= ' ' && ch <= '~' && ch != '\\' && ch != '"' && ch != '?') {
749+
return std::string(1, static_cast<char>(ch));
773750
}
751+
// All other characters are blindly output as octal.
752+
const char c0 = '0' + ((ch >> 6) & 7);
753+
const char c1 = '0' + ((ch >> 3) & 7);
754+
const char c2 = '0' + (ch & 7);
755+
return std::string("\\") + c0 + c1 + c2;
774756
}
775757

776-
static void WriteStaticCodeCacheDataAsStringLiteral(
777-
std::ostream* ss, const builtins::CodeCacheInfo& info) {
778-
*ss << "static const uint8_t *" << GetCodeCacheDefName(info.id)
779-
<< "= reinterpret_cast<const uint8_t *>(\"";
780-
WriteDataAsCharString(ss, info.data.data, info.data.length);
781-
*ss << "\");\n";
758+
std::vector<std::string> GetOctalTable() {
759+
size_t size = 1 << 8;
760+
std::vector<std::string> code_table(size);
761+
for (size_t i = 0; i < size; ++i) {
762+
code_table[i] = ToOctalString(static_cast<uint8_t>(i));
763+
}
764+
return code_table;
782765
}
783-
#else
784-
static void WriteStaticCodeCacheDataAsArray(
785-
std::ostream* ss, const builtins::CodeCacheInfo& info) {
786-
*ss << "static const uint8_t " << GetCodeCacheDefName(info.id) << "[] = {\n";
787-
WriteVector(ss, info.data.data, info.data.length);
788-
*ss << "};\n";
766+
767+
const std::string& GetOctalCode(uint8_t index) {
768+
static std::vector<std::string> table = GetOctalTable();
769+
return table[index];
770+
}
771+
772+
template <typename T>
773+
void WriteByteVectorLiteral(std::ostream* ss,
774+
const T* vec,
775+
size_t size,
776+
const char* var_name,
777+
bool use_string_literals) {
778+
constexpr bool is_uint8_t = std::is_same_v<T, uint8_t>;
779+
static_assert(is_uint8_t || std::is_same_v<T, char>);
780+
constexpr const char* type_name = is_uint8_t ? "uint8_t" : "char";
781+
if (use_string_literals) {
782+
const uint8_t* data = reinterpret_cast<const uint8_t*>(vec);
783+
*ss << "static const " << type_name << " *" << var_name << " = ";
784+
*ss << (is_uint8_t ? R"(reinterpret_cast<const uint8_t *>(")" : "\"");
785+
for (size_t i = 0; i < size; i++) {
786+
const uint8_t ch = data[i];
787+
*ss << GetOctalCode(ch);
788+
if (i % 64 == 63) {
789+
// Go to a newline every 64 bytes since many text editors have
790+
// problems with very long lines.
791+
*ss << "\"\n\"";
792+
}
793+
}
794+
*ss << (is_uint8_t ? "\");\n" : "\";\n");
795+
} else {
796+
*ss << "static const " << type_name << " " << var_name << "[] = {";
797+
for (size_t i = 0; i < size; i++) {
798+
*ss << std::to_string(vec[i]) << (i == size - 1 ? '\n' : ',');
799+
if (i % 64 == 63) {
800+
// Print a newline every 64 units and a offset to improve
801+
// readability.
802+
*ss << " // " << (i / 64) << "\n";
803+
}
804+
}
805+
*ss << "};\n";
806+
}
789807
}
790-
#endif
791808

792809
static void WriteCodeCacheInitializer(std::ostream* ss,
793810
const std::string& id,
@@ -800,7 +817,9 @@ static void WriteCodeCacheInitializer(std::ostream* ss,
800817
*ss << " },\n";
801818
}
802819

803-
void FormatBlob(std::ostream& ss, const SnapshotData* data) {
820+
void FormatBlob(std::ostream& ss,
821+
const SnapshotData* data,
822+
bool use_string_literals) {
804823
ss << R"(#include <cstddef>
805824
#include "env.h"
806825
#include "node_snapshot_builder.h"
@@ -811,32 +830,24 @@ void FormatBlob(std::ostream& ss, const SnapshotData* data) {
811830
namespace node {
812831
)";
813832

814-
#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
815-
ss << R"(static const char *v8_snapshot_blob_data = ")";
816-
WriteDataAsCharString(
817-
&ss,
818-
reinterpret_cast<const uint8_t*>(data->v8_snapshot_blob_data.data),
819-
data->v8_snapshot_blob_data.raw_size);
820-
ss << R"(";)";
821-
#else
822-
ss << R"(static const char v8_snapshot_blob_data[] = {)";
823-
WriteVector(&ss,
824-
data->v8_snapshot_blob_data.data,
825-
data->v8_snapshot_blob_data.raw_size);
826-
ss << R"(};)";
827-
#endif
833+
WriteByteVectorLiteral(&ss,
834+
data->v8_snapshot_blob_data.data,
835+
data->v8_snapshot_blob_data.raw_size,
836+
"v8_snapshot_blob_data",
837+
use_string_literals);
828838

829839
ss << R"(static const int v8_snapshot_blob_size = )"
830-
<< data->v8_snapshot_blob_data.raw_size << ";";
840+
<< data->v8_snapshot_blob_data.raw_size << ";\n";
831841

842+
// Windows can't deal with too many large vector initializers.
843+
// Store the data into static arrays first.
832844
for (const auto& item : data->code_cache) {
833-
#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
834-
WriteStaticCodeCacheDataAsStringLiteral(&ss, item);
835-
#else
836-
// Windows can't deal with too many large vector initializers.
837-
// Store the data into static arrays first.
838-
WriteStaticCodeCacheDataAsArray(&ss, item);
839-
#endif
845+
std::string var_name = GetCodeCacheDefName(item.id);
846+
WriteByteVectorLiteral(&ss,
847+
item.data.data,
848+
item.data.length,
849+
var_name.c_str(),
850+
use_string_literals);
840851
}
841852

842853
ss << R"(const SnapshotData snapshot_data {
@@ -1073,17 +1084,45 @@ ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out,
10731084
return ExitCode::kNoFailure;
10741085
}
10751086

1076-
ExitCode SnapshotBuilder::Generate(
1077-
std::ostream& out,
1087+
ExitCode SnapshotBuilder::GenerateAsSource(
1088+
const char* out_path,
10781089
const std::vector<std::string>& args,
10791090
const std::vector<std::string>& exec_args,
1080-
std::optional<std::string_view> main_script) {
1091+
std::optional<std::string_view> main_script_path,
1092+
bool use_string_literals) {
1093+
std::string main_script_content;
1094+
std::optional<std::string_view> main_script_optional;
1095+
if (main_script_path.has_value()) {
1096+
int r = ReadFileSync(&main_script_content, main_script_path.value().data());
1097+
if (r != 0) {
1098+
FPrintF(stderr,
1099+
"Cannot read main script %s for building snapshot. %s: %s",
1100+
main_script_path.value(),
1101+
uv_err_name(r),
1102+
uv_strerror(r));
1103+
return ExitCode::kGenericUserError;
1104+
}
1105+
main_script_optional = main_script_content;
1106+
}
1107+
1108+
std::ofstream out(out_path, std::ios::out | std::ios::binary);
1109+
if (!out) {
1110+
FPrintF(stderr, "Cannot open %s for output.\n", out_path);
1111+
return ExitCode::kGenericUserError;
1112+
}
1113+
10811114
SnapshotData data;
1082-
ExitCode exit_code = Generate(&data, args, exec_args, main_script);
1115+
ExitCode exit_code = Generate(&data, args, exec_args, main_script_optional);
10831116
if (exit_code != ExitCode::kNoFailure) {
10841117
return exit_code;
10851118
}
1086-
FormatBlob(out, &data);
1119+
FormatBlob(out, &data, use_string_literals);
1120+
1121+
if (!out) {
1122+
std::cerr << "Failed to write to " << out_path << "\n";
1123+
exit_code = node::ExitCode::kGenericUserError;
1124+
}
1125+
10871126
return exit_code;
10881127
}
10891128

tools/snapshot/node_mksnapshot.cc

+13-17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#include <cstdio>
2-
#include <fstream>
32
#include <iostream>
43
#include <sstream>
54
#include <string>
@@ -72,29 +71,26 @@ int BuildSnapshot(int argc, char* argv[]) {
7271
CHECK_EQ(result->exit_code(), 0);
7372

7473
std::string out_path;
74+
std::optional<std::string_view> main_script_path = std::nullopt;
7575
if (node::per_process::cli_options->per_isolate->build_snapshot) {
76+
main_script_path = result->args()[1];
7677
out_path = result->args()[2];
7778
} else {
7879
out_path = result->args()[1];
7980
}
8081

81-
std::ofstream out(out_path, std::ios::out | std::ios::binary);
82-
if (!out) {
83-
std::cerr << "Cannot open " << out_path << "\n";
84-
return 1;
85-
}
82+
#ifdef NODE_MKSNAPSHOT_USE_STRING_LITERALS
83+
bool use_string_literals = true;
84+
#else
85+
bool use_string_literals = false;
86+
#endif
8687

87-
node::ExitCode exit_code = node::ExitCode::kNoFailure;
88-
{
89-
exit_code = node::SnapshotBuilder::Generate(
90-
out, result->args(), result->exec_args(), std::nullopt);
91-
if (exit_code == node::ExitCode::kNoFailure) {
92-
if (!out) {
93-
std::cerr << "Failed to write " << out_path << "\n";
94-
exit_code = node::ExitCode::kGenericUserError;
95-
}
96-
}
97-
}
88+
node::ExitCode exit_code =
89+
node::SnapshotBuilder::GenerateAsSource(out_path.c_str(),
90+
result->args(),
91+
result->exec_args(),
92+
main_script_path,
93+
use_string_literals);
9894

9995
node::TearDownOncePerProcess();
10096
return static_cast<int>(exit_code);

0 commit comments

Comments
 (0)