Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: use Blob{Des|S}erializer for SEA blobs #47962

Merged
merged 6 commits into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 27 additions & 25 deletions src/blob_serializer_deserializer-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

#include "debug_utils-inl.h"

// This is related to the blob that is used in snapshots and has nothing to do
// with `node_blob.h`.
// This is related to the blob that is used in snapshots and single executable
// applications and has nothing to do with `node_blob.h`.

namespace node {

Expand Down Expand Up @@ -130,22 +130,22 @@ std::vector<T> BlobDeserializer<Impl>::ReadVector() {

template <typename Impl>
std::string BlobDeserializer<Impl>::ReadString() {
size_t length = ReadArithmetic<size_t>();

if (is_debug) {
Debug("ReadString(), length=%d: ", length);
}
std::string_view view = ReadStringView(StringLogMode::kAddressAndContent);
return std::string(view);
}

CHECK_GT(length, 0); // There should be no empty strings.
MallocedBuffer<char> buf(length + 1);
memcpy(buf.data, sink.data() + read_total, length + 1);
std::string result(buf.data, length); // This creates a copy of buf.data.
template <typename Impl>
std::string_view BlobDeserializer<Impl>::ReadStringView(StringLogMode mode) {
size_t length = ReadArithmetic<size_t>();
Debug("ReadStringView(), length=%zu: ", length);

if (is_debug) {
Debug("\"%s\", read %zu bytes\n", result.c_str(), length + 1);
std::string_view result(sink.data() + read_total, length);
Debug("%p, read %zu bytes\n", result.data(), result.size());
if (mode == StringLogMode::kAddressAndContent) {
Debug("%s", result);
}

read_total += length + 1;
read_total += length;
return result;
}

Expand Down Expand Up @@ -262,26 +262,28 @@ size_t BlobSerializer<Impl>::WriteVector(const std::vector<T>& data) {
// [ 4/8 bytes ] length
// [ |length| bytes ] contents
template <typename Impl>
size_t BlobSerializer<Impl>::WriteString(const std::string& data) {
CHECK_GT(data.size(), 0); // No empty strings should be written.
size_t BlobSerializer<Impl>::WriteStringView(std::string_view data,
StringLogMode mode) {
Debug("WriteStringView(), length=%zu: %p\n", data.size(), data.data());
size_t written_total = WriteArithmetic<size_t>(data.size());
if (is_debug) {
std::string str = ToStr(data);
Debug("WriteString(), length=%zu: \"%s\"\n", data.size(), data.c_str());
}

// Write the null-terminated string.
size_t length = data.size() + 1;
sink.insert(sink.end(), data.c_str(), data.c_str() + length);
size_t length = data.size();
sink.insert(sink.end(), data.data(), data.data() + length);
written_total += length;

if (is_debug) {
Debug("WriteString() wrote %zu bytes\n", written_total);
Debug("WriteStringView() wrote %zu bytes\n", written_total);
if (mode == StringLogMode::kAddressAndContent) {
Debug("%s", data);
}

return written_total;
}

template <typename Impl>
size_t BlobSerializer<Impl>::WriteString(const std::string& data) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
size_t BlobSerializer<Impl>::WriteString(const std::string& data) {
size_t BlobSerializer<Impl>::WriteString(const std::string_view data) {

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the method that writes strings (which logs the string directly in debug mode). The one that doesn't is WriteStringView (which obviously doesn't log that data as string).

return WriteStringView(data, StringLogMode::kAddressAndContent);
}

// Helper for writing an array of numeric types.
template <typename Impl>
template <typename T>
Expand Down
18 changes: 11 additions & 7 deletions src/blob_serializer_deserializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

// This is related to the blob that is used in snapshots and has nothing to do
// with `node_blob.h`.
// This is related to the blob that is used in snapshots and single executable
// applications and has nothing to do with `node_blob.h`.

namespace node {

Expand All @@ -27,6 +27,11 @@ class BlobSerializerDeserializer {
bool is_debug = false;
};

enum class StringLogMode {
kAddressOnly, // Can be used when the string contains binary content.
kAddressAndContent,
};

// Child classes are expected to implement T Read<T>() where
// !std::is_arithmetic_v<T> && !std::is_same_v<T, std::string>
template <typename Impl>
Expand All @@ -52,7 +57,9 @@ class BlobDeserializer : public BlobSerializerDeserializer {
template <typename T>
std::vector<T> ReadVector();

// ReadString() creates a copy of the data. ReadStringView() doesn't.
std::string ReadString();
std::string_view ReadStringView(StringLogMode mode);

// Helper for reading an array of numeric types.
template <typename T>
Expand All @@ -77,11 +84,7 @@ template <typename Impl>
class BlobSerializer : public BlobSerializerDeserializer {
public:
explicit BlobSerializer(bool is_debug_v)
: BlobSerializerDeserializer(is_debug_v) {
// Currently the snapshot blob built with an empty script is around 4MB.
// So use that as the default sink size.
sink.reserve(4 * 1024 * 1024);
}
: BlobSerializerDeserializer(is_debug_v) {}
~BlobSerializer() {}

Impl* impl() { return static_cast<Impl*>(this); }
Expand All @@ -102,6 +105,7 @@ class BlobSerializer : public BlobSerializerDeserializer {
// The layout of a written string:
// [ 4/8 bytes ] length
// [ |length| bytes ] contents
size_t WriteStringView(std::string_view data, StringLogMode mode);
size_t WriteString(const std::string& data);

// Helper for writing an array of numeric types.
Expand Down
1 change: 1 addition & 0 deletions src/debug_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ void NODE_EXTERN_PRIVATE FWrite(FILE* file, const std::string& str);
V(INSPECTOR_PROFILER) \
V(CODE_CACHE) \
V(NGTCP2_DEBUG) \
V(SEA) \
V(WASI) \
V(MKSNAPSHOT)

Expand Down
10 changes: 6 additions & 4 deletions src/node_main_instance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,16 @@ ExitCode NodeMainInstance::Run() {

void NodeMainInstance::Run(ExitCode* exit_code, Environment* env) {
if (*exit_code == ExitCode::kNoFailure) {
bool is_sea = false;
bool runs_sea_code = false;
#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION
if (sea::IsSingleExecutable()) {
is_sea = true;
LoadEnvironment(env, sea::FindSingleExecutableCode());
runs_sea_code = true;
sea::SeaResource sea = sea::FindSingleExecutableResource();
std::string_view code = sea.code;
LoadEnvironment(env, code);
}
#endif
if (!is_sea) {
if (!runs_sea_code) {
LoadEnvironment(env, StartExecutionCallback{});
}

Expand Down
147 changes: 93 additions & 54 deletions src/node_sea.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "node_sea.h"

#include "blob_serializer_deserializer-inl.h"
#include "debug_utils-inl.h"
#include "env-inl.h"
#include "json_parser.h"
Expand Down Expand Up @@ -34,16 +35,6 @@ namespace node {
namespace sea {

namespace {
// A special number that will appear at the beginning of the single executable
// preparation blobs ready to be injected into the binary. We use this to check
// that the data given to us are intended for building single executable
// applications.
const uint32_t kMagic = 0x143da20;

enum class SeaFlags : uint32_t {
kDefault = 0,
kDisableExperimentalSeaWarning = 1 << 0,
};

SeaFlags operator|(SeaFlags x, SeaFlags y) {
return static_cast<SeaFlags>(static_cast<uint32_t>(x) |
Expand All @@ -59,47 +50,100 @@ SeaFlags operator|=(/* NOLINT (runtime/references) */ SeaFlags& x, SeaFlags y) {
return x = x | y;
}

struct SeaResource {
SeaFlags flags = SeaFlags::kDefault;
std::string_view code;
static constexpr size_t kHeaderSize = sizeof(kMagic) + sizeof(SeaFlags);
class SeaSerializer : public BlobSerializer<SeaSerializer> {
public:
SeaSerializer()
: BlobSerializer<SeaSerializer>(
per_process::enabled_debug_list.enabled(DebugCategory::SEA)) {}

template <typename T,
std::enable_if_t<!std::is_same<T, std::string>::value>* = nullptr,
std::enable_if_t<!std::is_arithmetic<T>::value>* = nullptr>
size_t Write(const T& data);
};

SeaResource FindSingleExecutableResource() {
template <>
size_t SeaSerializer::Write(const SeaResource& sea) {
sink.reserve(SeaResource::kHeaderSize + sea.code.size());

Debug("Write SEA magic %x\n", kMagic);
size_t written_total = WriteArithmetic<uint32_t>(kMagic);
joyeecheung marked this conversation as resolved.
Show resolved Hide resolved

uint32_t flags = static_cast<uint32_t>(sea.flags);
Debug("Write SEA flags %x\n", flags);
written_total += WriteArithmetic<uint32_t>(flags);
DCHECK_EQ(written_total, SeaResource::kHeaderSize);

Debug("Write SEA resource code %p, size=%zu\n",
sea.code.data(),
sea.code.size());
written_total += WriteStringView(sea.code, StringLogMode::kAddressAndContent);
return written_total;
}

class SeaDeserializer : public BlobDeserializer<SeaDeserializer> {
public:
explicit SeaDeserializer(std::string_view v)
: BlobDeserializer<SeaDeserializer>(
per_process::enabled_debug_list.enabled(DebugCategory::SEA), v) {}

template <typename T,
std::enable_if_t<!std::is_same<T, std::string>::value>* = nullptr,
std::enable_if_t<!std::is_arithmetic<T>::value>* = nullptr>
T Read();
};

template <>
SeaResource SeaDeserializer::Read() {
uint32_t magic = ReadArithmetic<uint32_t>();
Debug("Read SEA magic %x\n", magic);

CHECK_EQ(magic, kMagic);
SeaFlags flags(static_cast<SeaFlags>(ReadArithmetic<uint32_t>()));
Debug("Read SEA flags %x\n", static_cast<uint32_t>(flags));
CHECK_EQ(read_total, SeaResource::kHeaderSize);

std::string_view code = ReadStringView(StringLogMode::kAddressAndContent);
Debug("Read SEA resource code %p, size=%zu\n", code.data(), code.size());
return {flags, code};
}

std::string_view FindSingleExecutableBlob() {
CHECK(IsSingleExecutable());
static const SeaResource sea_resource = []() -> SeaResource {
static const std::string_view result = []() -> std::string_view {
size_t size;
#ifdef __APPLE__
postject_options options;
postject_options_init(&options);
options.macho_segment_name = "NODE_SEA";
const char* code = static_cast<const char*>(
const char* blob = static_cast<const char*>(
postject_find_resource("NODE_SEA_BLOB", &size, &options));
#else
const char* code = static_cast<const char*>(
const char* blob = static_cast<const char*>(
postject_find_resource("NODE_SEA_BLOB", &size, nullptr));
#endif
uint32_t first_word = reinterpret_cast<const uint32_t*>(code)[0];
CHECK_EQ(first_word, kMagic);
SeaFlags flags{
reinterpret_cast<const SeaFlags*>(code + sizeof(first_word))[0]};
// TODO(joyeecheung): do more checks here e.g. matching the versions.
return {
flags,
{
code + SeaResource::kHeaderSize,
size - SeaResource::kHeaderSize,
},
};
return {blob, size};
}();
return sea_resource;
per_process::Debug(DebugCategory::SEA,
"Found SEA blob %p, size=%zu\n",
result.data(),
result.size());
return result;
}

} // namespace
} // anonymous namespace

std::string_view FindSingleExecutableCode() {
SeaResource sea_resource = FindSingleExecutableResource();
return sea_resource.code;
SeaResource FindSingleExecutableResource() {
static const SeaResource sea_resource = []() -> SeaResource {
std::string_view blob = FindSingleExecutableBlob();
per_process::Debug(DebugCategory::SEA,
"Found SEA resource %p, size=%zu\n",
blob.data(),
blob.size());
SeaDeserializer deserializer(blob);
return deserializer.Read<SeaResource>();
}();
return sea_resource;
}

bool IsSingleExecutable() {
Expand Down Expand Up @@ -194,51 +238,46 @@ std::optional<SeaConfig> ParseSingleExecutableConfig(
return result;
}

bool GenerateSingleExecutableBlob(const SeaConfig& config) {
ExitCode GenerateSingleExecutableBlob(const SeaConfig& config) {
std::string main_script;
// TODO(joyeecheung): unify the file utils.
int r = ReadFileSync(&main_script, config.main_path.c_str());
if (r != 0) {
const char* err = uv_strerror(r);
FPrintF(stderr, "Cannot read main script %s:%s\n", config.main_path, err);
return false;
return ExitCode::kGenericUserError;
}

std::vector<char> sink;
// TODO(joyeecheung): reuse the SnapshotSerializerDeserializer for this.
sink.reserve(SeaResource::kHeaderSize + main_script.size());
const char* pos = reinterpret_cast<const char*>(&kMagic);
sink.insert(sink.end(), pos, pos + sizeof(kMagic));
pos = reinterpret_cast<const char*>(&(config.flags));
sink.insert(sink.end(), pos, pos + sizeof(SeaFlags));
sink.insert(
sink.end(), main_script.data(), main_script.data() + main_script.size());

uv_buf_t buf = uv_buf_init(sink.data(), sink.size());
SeaResource sea{config.flags, main_script};

SeaSerializer serializer;
serializer.Write(sea);

uv_buf_t buf = uv_buf_init(serializer.sink.data(), serializer.sink.size());
r = WriteFileSync(config.output_path.c_str(), buf);
if (r != 0) {
const char* err = uv_strerror(r);
FPrintF(stderr, "Cannot write output to %s:%s\n", config.output_path, err);
return false;
return ExitCode::kGenericUserError;
}

FPrintF(stderr,
"Wrote single executable preparation blob to %s\n",
config.output_path);
return true;
return ExitCode::kNoFailure;
}

} // anonymous namespace

ExitCode BuildSingleExecutableBlob(const std::string& config_path) {
std::optional<SeaConfig> config_opt =
ParseSingleExecutableConfig(config_path);
if (!config_opt.has_value() ||
!GenerateSingleExecutableBlob(config_opt.value())) {
return ExitCode::kGenericUserError;
if (config_opt.has_value()) {
ExitCode code = GenerateSingleExecutableBlob(config_opt.value());
return code;
}

return ExitCode::kNoFailure;
return ExitCode::kGenericUserError;
}

void Initialize(Local<Object> target,
Expand Down
Loading