From cb7ee4745f304feb8def46240023ecd66fdc6e44 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Fri, 7 Apr 2023 02:17:47 +0200 Subject: [PATCH] src: split BlobSerializer/BlobDeserializer This patch splits BlobSerializer and BlobDeserializer out of SnapshotSerializer and SnapshotDeserializer. The child classes can implement serialization methods for custom types on top of BlobSerializer/BlobDeserializer for conversions between native types and binary blobs. This allows us to reuse the classes for other cases (e.g. SEA blobs). PR-URL: https://github.com/nodejs/node/pull/47458 Reviewed-By: Darshan Sen --- src/env.h | 1 + src/node_snapshotable.cc | 268 ++++++++++++++++++++++++--------------- 2 files changed, 169 insertions(+), 100 deletions(-) diff --git a/src/env.h b/src/env.h index a50e2a83e332cf..5359436be31e76 100644 --- a/src/env.h +++ b/src/env.h @@ -534,6 +534,7 @@ struct SnapshotData { bool Check() const; static bool FromFile(SnapshotData* out, FILE* in); static bool FromBlob(SnapshotData* out, const std::vector& in); + static bool FromBlob(SnapshotData* out, std::string_view in); static const SnapshotData* FromEmbedderWrapper( const EmbedderSnapshotData* data); EmbedderSnapshotData::Pointer AsEmbedderWrapper() const; diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index 0c842a86315f37..6302fa04c4d0a1 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -140,16 +140,15 @@ std::ostream& operator<<(std::ostream& output, const EnvSerializeInfo& i) { return output; } -class SnapshotSerializerDeserializer { +class BlobSerializerDeserializer { public: - SnapshotSerializerDeserializer() - : is_debug(per_process::enabled_debug_list.enabled( - DebugCategory::MKSNAPSHOT)) {} + explicit BlobSerializerDeserializer(bool is_debug_v) : is_debug(is_debug_v) {} template void Debug(const char* format, Args&&... args) const { - per_process::Debug( - DebugCategory::MKSNAPSHOT, format, std::forward(args)...); + if (is_debug) { + FPrintF(stderr, format, std::forward(args)...); + } } template @@ -185,18 +184,28 @@ class SnapshotSerializerDeserializer { bool is_debug = false; }; -class SnapshotDeserializer : public SnapshotSerializerDeserializer { +// TODO(joyeecheung): move it to the separate header file. +// Child classes are expected to implement T Read() where +// !std::is_arithmetic_v && !std::is_same_v +template +class BlobDeserializer : public BlobSerializerDeserializer { public: - explicit SnapshotDeserializer(const std::vector& s) - : SnapshotSerializerDeserializer(), sink(s) {} - ~SnapshotDeserializer() {} + explicit BlobDeserializer(bool is_debug_v, std::string_view s) + : BlobSerializerDeserializer(is_debug_v), sink(s) {} + ~BlobDeserializer() {} + + size_t read_total = 0; + std::string_view sink; + + Impl* impl() { return static_cast(this); } + const Impl* impl() const { return static_cast(this); } // Helper for reading numeric types. template - T Read() { + T ReadArithmetic() { static_assert(std::is_arithmetic_v, "Not an arithmetic type"); T result; - Read(&result, 1); + ReadArithmetic(&result, 1); return result; } @@ -209,14 +218,19 @@ class SnapshotDeserializer : public SnapshotSerializerDeserializer { std::string name = GetName(); Debug("\nReadVector<%s>()(%d-byte)\n", name.c_str(), sizeof(T)); } - size_t count = static_cast(Read()); + size_t count = static_cast(ReadArithmetic()); if (count == 0) { return std::vector(); } if (is_debug) { Debug("Reading %d vector elements...\n", count); } - std::vector result = ReadVector(count, std::is_arithmetic{}); + std::vector result; + if constexpr (std::is_arithmetic_v) { + result = ReadArithmeticVector(count); + } else { + result = ReadNonArithmeticVector(count); + } if (is_debug) { std::string str = std::is_arithmetic_v ? "" : ToStr(result); std::string name = GetName(); @@ -226,7 +240,7 @@ class SnapshotDeserializer : public SnapshotSerializerDeserializer { } std::string ReadString() { - size_t length = Read(); + size_t length = ReadArithmetic(); if (is_debug) { Debug("ReadString(), length=%d: ", length); @@ -245,13 +259,9 @@ class SnapshotDeserializer : public SnapshotSerializerDeserializer { return result; } - size_t read_total = 0; - const std::vector& sink; - - private: // Helper for reading an array of numeric types. template - void Read(T* out, size_t count) { + void ReadArithmetic(T* out, size_t count) { static_assert(std::is_arithmetic_v, "Not an arithmetic type"); DCHECK_GT(count, 0); // Should not read contents for vectors of size 0. if (is_debug) { @@ -272,17 +282,18 @@ class SnapshotDeserializer : public SnapshotSerializerDeserializer { // Helper for reading numeric vectors. template - std::vector ReadVector(size_t count, std::true_type) { + std::vector ReadArithmeticVector(size_t count) { static_assert(std::is_arithmetic_v, "Not an arithmetic type"); DCHECK_GT(count, 0); // Should not read contents for vectors of size 0. std::vector result(count); - Read(result.data(), count); + ReadArithmetic(result.data(), count); return result; } + private: // Helper for reading non-numeric vectors. template - std::vector ReadVector(size_t count, std::false_type) { + std::vector ReadNonArithmeticVector(size_t count) { static_assert(!std::is_arithmetic_v, "Arithmetic type"); DCHECK_GT(count, 0); // Should not read contents for vectors of size 0. std::vector result; @@ -293,29 +304,49 @@ class SnapshotDeserializer : public SnapshotSerializerDeserializer { if (is_debug) { Debug("\n[%d] ", i); } - result.push_back(Read()); + result.push_back(ReadElement()); } is_debug = original_is_debug; return result; } + + template + T ReadElement() { + if constexpr (std::is_arithmetic_v) { + return ReadArithmetic(); + } else if constexpr (std::is_same_v) { + return ReadString(); + } else { + return impl()->template Read(); + } + } }; -class SnapshotSerializer : public SnapshotSerializerDeserializer { +// TODO(joyeecheung): move it to the separate header file. +// Child classes are expected to implement size_t Write(const T&) where +// !std::is_arithmetic_v && !std::is_same_v +template +class BlobSerializer : public BlobSerializerDeserializer { public: - SnapshotSerializer() : SnapshotSerializerDeserializer() { + explicit BlobSerializer(bool is_debug_v) + : BlobSerializerDeserializer(is_debug_v) { // Currently the snapshot blob built with an empty script is around 4MB. // So use that as the default sink size. sink.reserve(4 * 1024 * 1024); } - ~SnapshotSerializer() {} + ~BlobSerializer() {} + + Impl* impl() { return static_cast(this); } + const Impl* impl() const { return static_cast(this); } + std::vector sink; // Helper for writing numeric types. template - size_t Write(const T& data) { + size_t WriteArithmetic(const T& data) { static_assert(std::is_arithmetic_v, "Not an arithmetic type"); - return Write(&data, 1); + return WriteArithmetic(&data, 1); } // Layout of vectors: @@ -333,11 +364,16 @@ class SnapshotSerializer : public SnapshotSerializerDeserializer { str.c_str()); } - size_t written_total = Write(data.size()); + size_t written_total = WriteArithmetic(data.size()); if (data.size() == 0) { return written_total; } - written_total += WriteVector(data, std::is_arithmetic{}); + + if constexpr (std::is_arithmetic_v) { + written_total += WriteArithmeticVector(data); + } else { + written_total += WriteNonArithmeticVector(data); + } if (is_debug) { std::string name = GetName(); @@ -352,7 +388,7 @@ class SnapshotSerializer : public SnapshotSerializerDeserializer { // [ |length| bytes ] contents size_t WriteString(const std::string& data) { CHECK_GT(data.size(), 0); // No empty strings should be written. - size_t written_total = Write(data.size()); + size_t written_total = WriteArithmetic(data.size()); if (is_debug) { std::string str = ToStr(data); Debug("WriteString(), length=%zu: \"%s\"\n", data.size(), data.c_str()); @@ -370,10 +406,10 @@ class SnapshotSerializer : public SnapshotSerializerDeserializer { return written_total; } - private: // Helper for writing an array of numeric types. template - size_t Write(const T* data, size_t count) { + size_t WriteArithmetic(const T* data, size_t count) { + static_assert(std::is_arithmetic_v, "Arithmetic type"); DCHECK_GT(count, 0); // Should not write contents for vectors of size 0. if (is_debug) { std::string str = @@ -398,13 +434,16 @@ class SnapshotSerializer : public SnapshotSerializerDeserializer { // Helper for writing numeric vectors. template - size_t WriteVector(const std::vector& data, std::true_type) { - return Write(data.data(), data.size()); + size_t WriteArithmeticVector(const std::vector& data) { + static_assert(std::is_arithmetic_v, "Arithmetic type"); + return WriteArithmetic(data.data(), data.size()); } + private: // Helper for writing non-numeric vectors. template - size_t WriteVector(const std::vector& data, std::false_type) { + size_t WriteNonArithmeticVector(const std::vector& data) { + static_assert(!std::is_arithmetic_v, "Arithmetic type"); DCHECK_GT(data.size(), 0); // Should not write contents for vectors of size 0. size_t written_total = 0; @@ -414,25 +453,50 @@ class SnapshotSerializer : public SnapshotSerializerDeserializer { if (is_debug) { Debug("\n[%d] ", i); } - written_total += Write(data[i]); + written_total += WriteElement(data[i]); } is_debug = original_is_debug; return written_total; } + + template + size_t WriteElement(const T& data) { + if constexpr (std::is_arithmetic_v) { + return WriteArithmetic(data); + } else if constexpr (std::is_same_v) { + return WriteString(data); + } else { + return impl()->template Write(data); + } + } }; -// Layout of serialized std::string: -// [ 4/8 bytes ] length -// [ |length| bytes ] contents -template <> -std::string SnapshotDeserializer::Read() { - return ReadString(); -} -template <> -size_t SnapshotSerializer::Write(const std::string& data) { - return WriteString(data); -} +class SnapshotDeserializer : public BlobDeserializer { + public: + explicit SnapshotDeserializer(std::string_view v) + : BlobDeserializer( + per_process::enabled_debug_list.enabled(DebugCategory::MKSNAPSHOT), + v) {} + + template ::value>* = nullptr, + std::enable_if_t::value>* = nullptr> + T Read(); +}; + +class SnapshotSerializer : public BlobSerializer { + public: + SnapshotSerializer() + : BlobSerializer( + per_process::enabled_debug_list.enabled( + DebugCategory::MKSNAPSHOT)) {} + + template ::value>* = nullptr, + std::enable_if_t::value>* = nullptr> + size_t Write(const T& data); +}; // Layout of v8::StartupData // [ 4/8 bytes ] raw_size @@ -441,13 +505,13 @@ template <> v8::StartupData SnapshotDeserializer::Read() { Debug("Read()\n"); - int raw_size = Read(); + int raw_size = ReadArithmetic(); Debug("size=%d\n", raw_size); CHECK_GT(raw_size, 0); // There should be no startup data of size 0. // The data pointer of v8::StartupData would be deleted so it must be new'ed. std::unique_ptr buf = std::unique_ptr(new char[raw_size]); - Read(buf.get(), raw_size); + ReadArithmetic(buf.get(), raw_size); return v8::StartupData{buf.release(), raw_size}; } @@ -457,8 +521,9 @@ size_t SnapshotSerializer::Write(const v8::StartupData& data) { Debug("\nWrite() size=%d\n", data.raw_size); CHECK_GT(data.raw_size, 0); // There should be no startup data of size 0. - size_t written_total = Write(data.raw_size); - written_total += Write(data.data, static_cast(data.raw_size)); + size_t written_total = WriteArithmetic(data.raw_size); + written_total += + WriteArithmetic(data.data, static_cast(data.raw_size)); Debug("Write() wrote %d bytes\n\n", written_total); return written_total; @@ -508,8 +573,8 @@ PropInfo SnapshotDeserializer::Read() { PropInfo result; result.name = ReadString(); - result.id = Read(); - result.index = Read(); + result.id = ReadArithmetic(); + result.index = ReadArithmetic(); if (is_debug) { std::string str = ToStr(result); @@ -527,8 +592,8 @@ size_t SnapshotSerializer::Write(const PropInfo& data) { } size_t written_total = WriteString(data.name); - written_total += Write(data.id); - written_total += Write(data.index); + written_total += WriteArithmetic(data.id); + written_total += WriteArithmetic(data.index); Debug("Write() wrote %d bytes\n", written_total); return written_total; @@ -547,10 +612,10 @@ AsyncHooks::SerializeInfo SnapshotDeserializer::Read() { Debug("Read()\n"); AsyncHooks::SerializeInfo result; - result.async_ids_stack = Read(); - result.fields = Read(); - result.async_id_fields = Read(); - result.js_execution_async_resources = Read(); + result.async_ids_stack = ReadArithmetic(); + result.fields = ReadArithmetic(); + result.async_id_fields = ReadArithmetic(); + result.js_execution_async_resources = ReadArithmetic(); result.native_execution_async_resources = ReadVector(); if (is_debug) { @@ -567,10 +632,12 @@ size_t SnapshotSerializer::Write(const AsyncHooks::SerializeInfo& data) { Debug("Write() %s\n", str.c_str()); } - size_t written_total = Write(data.async_ids_stack); - written_total += Write(data.fields); - written_total += Write(data.async_id_fields); - written_total += Write(data.js_execution_async_resources); + size_t written_total = + WriteArithmetic(data.async_ids_stack); + written_total += WriteArithmetic(data.fields); + written_total += WriteArithmetic(data.async_id_fields); + written_total += + WriteArithmetic(data.js_execution_async_resources); written_total += WriteVector(data.native_execution_async_resources); @@ -585,7 +652,7 @@ TickInfo::SerializeInfo SnapshotDeserializer::Read() { Debug("Read()\n"); TickInfo::SerializeInfo result; - result.fields = Read(); + result.fields = ReadArithmetic(); if (is_debug) { std::string str = ToStr(result); @@ -602,7 +669,7 @@ size_t SnapshotSerializer::Write(const TickInfo::SerializeInfo& data) { Debug("Write() %s\n", str.c_str()); } - size_t written_total = Write(data.fields); + size_t written_total = WriteArithmetic(data.fields); Debug("Write() wrote %d bytes\n", written_total); return written_total; @@ -612,11 +679,10 @@ size_t SnapshotSerializer::Write(const TickInfo::SerializeInfo& data) { // [ 4/8 bytes ] snapshot index of fields template <> ImmediateInfo::SerializeInfo SnapshotDeserializer::Read() { - per_process::Debug(DebugCategory::MKSNAPSHOT, - "Read()\n"); + Debug("Read()\n"); ImmediateInfo::SerializeInfo result; - result.fields = Read(); + result.fields = ReadArithmetic(); if (is_debug) { std::string str = ToStr(result); Debug("Read() %s\n", str.c_str()); @@ -631,7 +697,7 @@ size_t SnapshotSerializer::Write(const ImmediateInfo::SerializeInfo& data) { Debug("Write() %s\n", str.c_str()); } - size_t written_total = Write(data.fields); + size_t written_total = WriteArithmetic(data.fields); Debug("Write() wrote %d bytes\n", written_total); @@ -644,13 +710,12 @@ size_t SnapshotSerializer::Write(const ImmediateInfo::SerializeInfo& data) { // [ 4/8 bytes ] snapshot index of observers template <> performance::PerformanceState::SerializeInfo SnapshotDeserializer::Read() { - per_process::Debug(DebugCategory::MKSNAPSHOT, - "Read()\n"); + Debug("Read()\n"); performance::PerformanceState::SerializeInfo result; - result.root = Read(); - result.milestones = Read(); - result.observers = Read(); + result.root = ReadArithmetic(); + result.milestones = ReadArithmetic(); + result.observers = ReadArithmetic(); if (is_debug) { std::string str = ToStr(result); Debug("Read() %s\n", str.c_str()); @@ -666,9 +731,9 @@ size_t SnapshotSerializer::Write( Debug("Write() %s\n", str.c_str()); } - size_t written_total = Write(data.root); - written_total += Write(data.milestones); - written_total += Write(data.observers); + size_t written_total = WriteArithmetic(data.root); + written_total += WriteArithmetic(data.milestones); + written_total += WriteArithmetic(data.observers); Debug("Write() wrote %d bytes\n", written_total); @@ -682,8 +747,7 @@ size_t SnapshotSerializer::Write( // [ ... ] |length| of PropInfo data template <> IsolateDataSerializeInfo SnapshotDeserializer::Read() { - per_process::Debug(DebugCategory::MKSNAPSHOT, - "Read()\n"); + Debug("Read()\n"); IsolateDataSerializeInfo result; result.primitive_values = ReadVector(); @@ -711,12 +775,12 @@ size_t SnapshotSerializer::Write(const IsolateDataSerializeInfo& data) { template <> RealmSerializeInfo SnapshotDeserializer::Read() { - per_process::Debug(DebugCategory::MKSNAPSHOT, "Read()\n"); + Debug("Read()\n"); RealmSerializeInfo result; result.builtins = ReadVector(); result.persistent_values = ReadVector(); result.native_objects = ReadVector(); - result.context = Read(); + result.context = ReadArithmetic(); return result; } @@ -731,7 +795,7 @@ size_t SnapshotSerializer::Write(const RealmSerializeInfo& data) { size_t written_total = WriteVector(data.builtins); written_total += WriteVector(data.persistent_values); written_total += WriteVector(data.native_objects); - written_total += Write(data.context); + written_total += WriteArithmetic(data.context); Debug("Write() wrote %d bytes\n", written_total); return written_total; @@ -739,17 +803,17 @@ size_t SnapshotSerializer::Write(const RealmSerializeInfo& data) { template <> EnvSerializeInfo SnapshotDeserializer::Read() { - per_process::Debug(DebugCategory::MKSNAPSHOT, "Read()\n"); + Debug("Read()\n"); EnvSerializeInfo result; result.async_hooks = Read(); result.tick_info = Read(); result.immediate_info = Read(); - result.timeout_info = Read(); + result.timeout_info = ReadArithmetic(); result.performance_state = Read(); - result.exit_info = Read(); - result.stream_base_state = Read(); - result.should_abort_on_uncaught_toggle = Read(); + result.exit_info = ReadArithmetic(); + result.stream_base_state = ReadArithmetic(); + result.should_abort_on_uncaught_toggle = ReadArithmetic(); result.principal_realm = Read(); return result; } @@ -765,13 +829,13 @@ size_t SnapshotSerializer::Write(const EnvSerializeInfo& data) { size_t written_total = Write(data.async_hooks); written_total += Write(data.tick_info); written_total += Write(data.immediate_info); - written_total += Write(data.timeout_info); + written_total += WriteArithmetic(data.timeout_info); written_total += Write( data.performance_state); - written_total += Write(data.exit_info); - written_total += Write(data.stream_base_state); + written_total += WriteArithmetic(data.exit_info); + written_total += WriteArithmetic(data.stream_base_state); written_total += - Write(data.should_abort_on_uncaught_toggle); + WriteArithmetic(data.should_abort_on_uncaught_toggle); written_total += Write(data.principal_realm); Debug("Write() wrote %d bytes\n", written_total); @@ -789,14 +853,14 @@ size_t SnapshotSerializer::Write(const EnvSerializeInfo& data) { // [ 4 bytes ] v8 cache version tag template <> SnapshotMetadata SnapshotDeserializer::Read() { - per_process::Debug(DebugCategory::MKSNAPSHOT, "Read()\n"); + Debug("Read()\n"); SnapshotMetadata result; - result.type = static_cast(Read()); + result.type = static_cast(ReadArithmetic()); result.node_version = ReadString(); result.node_arch = ReadString(); result.node_platform = ReadString(); - result.v8_cache_version_tag = Read(); + result.v8_cache_version_tag = ReadArithmetic(); if (is_debug) { std::string str = ToStr(result); @@ -816,7 +880,7 @@ size_t SnapshotSerializer::Write(const SnapshotMetadata& data) { // Node.js may perform synchronizations that are platform-specific and they // can be changed in semver-patches. Debug("Write snapshot type %" PRIu8 "\n", static_cast(data.type)); - written_total += Write(static_cast(data.type)); + written_total += WriteArithmetic(static_cast(data.type)); Debug("Write Node.js version %s\n", data.node_version.c_str()); written_total += WriteString(data.node_version); Debug("Write Node.js arch %s\n", data.node_arch); @@ -825,7 +889,7 @@ size_t SnapshotSerializer::Write(const SnapshotMetadata& data) { written_total += WriteString(data.node_platform); Debug("Write V8 cached data version tag %" PRIx32 "\n", data.v8_cache_version_tag); - written_total += Write(data.v8_cache_version_tag); + written_total += WriteArithmetic(data.v8_cache_version_tag); return written_total; } @@ -848,7 +912,7 @@ std::vector SnapshotData::ToBlob() const { // Metadata w.Debug("Write magic %" PRIx32 "\n", kMagic); - written_total += w.Write(kMagic); + written_total += w.WriteArithmetic(kMagic); w.Debug("Write metadata\n"); written_total += w.Write(metadata); @@ -883,13 +947,17 @@ bool SnapshotData::FromFile(SnapshotData* out, FILE* in) { } bool SnapshotData::FromBlob(SnapshotData* out, const std::vector& in) { + return FromBlob(out, std::string_view(in.data(), in.size())); +} + +bool SnapshotData::FromBlob(SnapshotData* out, std::string_view in) { SnapshotDeserializer r(in); r.Debug("SnapshotData::FromBlob()\n"); DCHECK_EQ(out->data_ownership, SnapshotData::DataOwnership::kOwned); // Metadata - uint32_t magic = r.Read(); + uint32_t magic = r.ReadArithmetic(); r.Debug("Read magic %" PRIx32 "\n", magic); CHECK_EQ(magic, kMagic); out->metadata = r.Read();