From e638ea4f48a79d5f5a54e80ca4b2455784c6709b Mon Sep 17 00:00:00 2001 From: Joyee Cheung <joyeec9h3@gmail.com> Date: Wed, 10 Aug 2022 02:01:02 +0800 Subject: [PATCH] bootstrap: check more metadata when loading the snapshot This patch stores the metadata about the Node.js binary into the SnapshotData and adds fields denoting how the snapshot was generated, on what platform it was generated as well as the V8 cached data version flag. Instead of simply crashing when the metadata doesn't match, Node.js now prints an error message and exit with 1 for the customized snapshot, or ignore the snapshot and start from scratch if it's the default one. PR-URL: https://github.com/nodejs/node/pull/44132 Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Chengzhong Wu <legendecas@gmail.com> --- doc/api/cli.md | 9 ++ src/env.cc | 15 +++ src/env.h | 20 ++- src/node.cc | 14 +- src/node_internals.h | 1 + src/node_snapshotable.cc | 142 ++++++++++++++++++-- test/parallel/test-snapshot-incompatible.js | 76 +++++++++++ 7 files changed, 260 insertions(+), 17 deletions(-) create mode 100644 test/parallel/test-snapshot-incompatible.js diff --git a/doc/api/cli.md b/doc/api/cli.md index 16bf8bcfb5cd37..bf6e435492943d 100644 --- a/doc/api/cli.md +++ b/doc/api/cli.md @@ -1185,6 +1185,15 @@ in the current working directory. When used without `--build-snapshot`, `--snapshot-blob` specifies the path to the blob that will be used to restore the application state. +When loading a snapshot, Node.js checks that: + +1. The version, architecture and platform of the running Node.js binary + are exactly the same as that of the binary that generates the snapshot. +2. The V8 flags and CPU features are compatible with that of the binary + that generates the snapshot. + +If they don't match, Node.js would refuse to load the snapshot and exit with 1. + ### `--test` <!-- YAML diff --git a/src/env.cc b/src/env.cc index aabbf57341e3c8..ceb29b8c8eb3f5 100644 --- a/src/env.cc +++ b/src/env.cc @@ -265,6 +265,21 @@ std::ostream& operator<<(std::ostream& output, return output; } +std::ostream& operator<<(std::ostream& output, const SnapshotMetadata& i) { + output << "{\n" + << " " + << (i.type == SnapshotMetadata::Type::kDefault + ? "SnapshotMetadata::Type::kDefault" + : "SnapshotMetadata::Type::kFullyCustomized") + << ", // type\n" + << " \"" << i.node_version << "\", // node_version\n" + << " \"" << i.node_arch << "\", // node_arch\n" + << " \"" << i.node_platform << "\", // node_platform\n" + << " " << i.v8_cache_version_tag << ", // v8_cache_version_tag\n" + << "}"; + return output; +} + IsolateDataSerializeInfo IsolateData::Serialize(SnapshotCreator* creator) { Isolate* isolate = creator->GetIsolate(); IsolateDataSerializeInfo info; diff --git a/src/env.h b/src/env.h index 59dd7c8d9e1724..20e14f14e8bcf4 100644 --- a/src/env.h +++ b/src/env.h @@ -954,6 +954,19 @@ struct EnvSerializeInfo { friend std::ostream& operator<<(std::ostream& o, const EnvSerializeInfo& i); }; +struct SnapshotMetadata { + // For now kFullyCustomized is only built with the --build-snapshot CLI flag. + // We might want to add more types of snapshots in the future. + enum class Type : uint8_t { kDefault, kFullyCustomized }; + + Type type; + std::string node_version; + std::string node_arch; + std::string node_platform; + // Result of v8::ScriptCompiler::CachedDataVersionTag(). + uint32_t v8_cache_version_tag; +}; + struct SnapshotData { enum class DataOwnership { kOwned, kNotOwned }; @@ -964,6 +977,8 @@ struct SnapshotData { DataOwnership data_ownership = DataOwnership::kOwned; + SnapshotMetadata metadata; + // The result of v8::SnapshotCreator::CreateBlob() during the snapshot // building process. v8::StartupData v8_snapshot_blob_data{nullptr, 0}; @@ -980,7 +995,10 @@ struct SnapshotData { std::vector<builtins::CodeCacheInfo> code_cache; void ToBlob(FILE* out) const; - static void FromBlob(SnapshotData* out, FILE* in); + // If returns false, the metadata doesn't match the current Node.js binary, + // and the caller should not consume the snapshot data. + bool Check() const; + static bool FromBlob(SnapshotData* out, FILE* in); ~SnapshotData(); diff --git a/src/node.cc b/src/node.cc index 357ca1eb55652d..4e8bbb7ca58f66 100644 --- a/src/node.cc +++ b/src/node.cc @@ -1279,13 +1279,23 @@ int LoadSnapshotDataAndRun(const SnapshotData** snapshot_data_ptr, return exit_code; } std::unique_ptr<SnapshotData> read_data = std::make_unique<SnapshotData>(); - SnapshotData::FromBlob(read_data.get(), fp); + if (!SnapshotData::FromBlob(read_data.get(), fp)) { + // If we fail to read the customized snapshot, simply exit with 1. + exit_code = 1; + return exit_code; + } *snapshot_data_ptr = read_data.release(); fclose(fp); } else if (per_process::cli_options->node_snapshot) { // If --snapshot-blob is not specified, we are reading the embedded // snapshot, but we will skip it if --no-node-snapshot is specified. - *snapshot_data_ptr = SnapshotBuilder::GetEmbeddedSnapshotData(); + const node::SnapshotData* read_data = + SnapshotBuilder::GetEmbeddedSnapshotData(); + if (read_data != nullptr && read_data->Check()) { + // If we fail to read the embedded snapshot, treat it as if Node.js + // was built without one. + *snapshot_data_ptr = read_data; + } } if ((*snapshot_data_ptr) != nullptr) { diff --git a/src/node_internals.h b/src/node_internals.h index f27e03aed66fed..de81fbee07c30f 100644 --- a/src/node_internals.h +++ b/src/node_internals.h @@ -416,6 +416,7 @@ std::ostream& operator<<(std::ostream& output, const TickInfo::SerializeInfo& d); std::ostream& operator<<(std::ostream& output, const AsyncHooks::SerializeInfo& d); +std::ostream& operator<<(std::ostream& output, const SnapshotMetadata& d); namespace performance { std::ostream& operator<<(std::ostream& output, diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index f3bb4f443fc809..f19b83852eeb3e 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -681,6 +681,57 @@ size_t FileWriter::Write(const EnvSerializeInfo& data) { return written_total; } +// Layout of SnapshotMetadata +// [ 1 byte ] type of the snapshot +// [ 4/8 bytes ] length of the node version string +// [ ... ] |length| bytes of node version +// [ 4/8 bytes ] length of the node arch string +// [ ... ] |length| bytes of node arch +// [ 4/8 bytes ] length of the node platform string +// [ ... ] |length| bytes of node platform +// [ 4 bytes ] v8 cache version tag +template <> +SnapshotMetadata FileReader::Read() { + per_process::Debug(DebugCategory::MKSNAPSHOT, "Read<SnapshotMetadata>()\n"); + + SnapshotMetadata result; + result.type = static_cast<SnapshotMetadata::Type>(Read<uint8_t>()); + result.node_version = ReadString(); + result.node_arch = ReadString(); + result.node_platform = ReadString(); + result.v8_cache_version_tag = Read<uint32_t>(); + + if (is_debug) { + std::string str = ToStr(result); + Debug("Read<SnapshotMetadata>() %s\n", str.c_str()); + } + return result; +} + +template <> +size_t FileWriter::Write(const SnapshotMetadata& data) { + if (is_debug) { + std::string str = ToStr(data); + Debug("\nWrite<SnapshotMetadata>() %s\n", str.c_str()); + } + size_t written_total = 0; + // We need the Node.js version, platform and arch to match because + // Node.js may perform synchronizations that are platform-specific and they + // can be changed in semver-patches. + Debug("Write snapshot type %" PRIu8 "\n", static_cast<uint8_t>(data.type)); + written_total += Write<uint8_t>(static_cast<uint8_t>(data.type)); + Debug("Write Node.js version %s\n", data.node_version.c_str()); + written_total += WriteString(data.node_version); + Debug("Write Node.js arch %s\n", data.node_arch); + written_total += WriteString(data.node_arch); + Debug("Write Node.js platform %s\n", data.node_platform); + written_total += WriteString(data.node_platform); + Debug("Write V8 cached data version tag %" PRIx32 "\n", + data.v8_cache_version_tag); + written_total += Write<uint32_t>(data.v8_cache_version_tag); + return written_total; +} + // Layout of the snapshot blob // [ 4 bytes ] kMagic // [ 4/8 bytes ] length of Node.js version string @@ -697,13 +748,12 @@ void SnapshotData::ToBlob(FILE* out) const { w.Debug("SnapshotData::ToBlob()\n"); size_t written_total = 0; + // Metadata w.Debug("Write magic %" PRIx32 "\n", kMagic); written_total += w.Write<uint32_t>(kMagic); - w.Debug("Write version %s\n", NODE_VERSION); - written_total += w.WriteString(NODE_VERSION); - w.Debug("Write arch %s\n", NODE_ARCH); - written_total += w.WriteString(NODE_ARCH); + w.Debug("Write metadata\n"); + written_total += w.Write<SnapshotMetadata>(metadata); written_total += w.Write<v8::StartupData>(v8_snapshot_blob_data); w.Debug("Write isolate_data_indices\n"); @@ -714,22 +764,22 @@ void SnapshotData::ToBlob(FILE* out) const { w.Debug("SnapshotData::ToBlob() Wrote %d bytes\n", written_total); } -void SnapshotData::FromBlob(SnapshotData* out, FILE* in) { +bool SnapshotData::FromBlob(SnapshotData* out, FILE* in) { FileReader r(in); r.Debug("SnapshotData::FromBlob()\n"); + DCHECK_EQ(out->data_ownership, SnapshotData::DataOwnership::kOwned); + // Metadata uint32_t magic = r.Read<uint32_t>(); - r.Debug("Read magic %" PRIx64 "\n", magic); + r.Debug("Read magic %" PRIx32 "\n", magic); CHECK_EQ(magic, kMagic); - std::string version = r.ReadString(); - r.Debug("Read version %s\n", version.c_str()); - CHECK_EQ(version, NODE_VERSION); - std::string arch = r.ReadString(); - r.Debug("Read arch %s\n", arch.c_str()); - CHECK_EQ(arch, NODE_ARCH); + out->metadata = r.Read<SnapshotMetadata>(); + r.Debug("Read metadata\n"); + if (!out->Check()) { + return false; + } - DCHECK_EQ(out->data_ownership, SnapshotData::DataOwnership::kOwned); out->v8_snapshot_blob_data = r.Read<v8::StartupData>(); r.Debug("Read isolate_data_info\n"); out->isolate_data_info = r.Read<IsolateDataSerializeInfo>(); @@ -738,6 +788,54 @@ void SnapshotData::FromBlob(SnapshotData* out, FILE* in) { out->code_cache = r.ReadVector<builtins::CodeCacheInfo>(); r.Debug("SnapshotData::FromBlob() read %d bytes\n", r.read_total); + return true; +} + +bool SnapshotData::Check() const { + if (metadata.node_version != per_process::metadata.versions.node) { + fprintf(stderr, + "Failed to load the startup snapshot because it was built with" + "Node.js version %s and the current Node.js version is %s.\n", + metadata.node_version.c_str(), + NODE_VERSION); + return false; + } + + if (metadata.node_arch != per_process::metadata.arch) { + fprintf(stderr, + "Failed to load the startup snapshot because it was built with" + "architecture %s and the architecture is %s.\n", + metadata.node_arch.c_str(), + NODE_ARCH); + return false; + } + + if (metadata.node_platform != per_process::metadata.platform) { + fprintf(stderr, + "Failed to load the startup snapshot because it was built with" + "platform %s and the current platform is %s.\n", + metadata.node_platform.c_str(), + NODE_PLATFORM); + return false; + } + + uint32_t current_cache_version = v8::ScriptCompiler::CachedDataVersionTag(); + if (metadata.v8_cache_version_tag != current_cache_version && + metadata.type == SnapshotMetadata::Type::kFullyCustomized) { + // For now we only do this check for the customized snapshots - we know + // that the flags we use in the default snapshot are limited and safe + // enough so we can relax the constraints for it. + fprintf(stderr, + "Failed to load the startup snapshot because it was built with " + "a different version of V8 or with different V8 configurations.\n" + "Expected tag %" PRIx32 ", read %" PRIx32 "\n", + current_cache_version, + metadata.v8_cache_version_tag); + return false; + } + + // TODO(joyeecheung): check incompatible Node.js flags. + return true; } SnapshotData::~SnapshotData() { @@ -824,6 +922,10 @@ static const int v8_snapshot_blob_size = )" // -- data_ownership begins -- SnapshotData::DataOwnership::kNotOwned, // -- data_ownership ends -- + // -- metadata begins -- +)" << data->metadata + << R"(, + // -- metadata ends -- // -- v8_snapshot_blob_data begins -- { v8_snapshot_blob_data, v8_snapshot_blob_size }, // -- v8_snapshot_blob_data ends -- @@ -920,6 +1022,12 @@ int SnapshotBuilder::Generate(SnapshotData* out, per_process::v8_platform.Platform()->UnregisterIsolate(isolate); }); + // It's only possible to be kDefault in node_mksnapshot. + SnapshotMetadata::Type snapshot_type = + per_process::cli_options->build_snapshot + ? SnapshotMetadata::Type::kFullyCustomized + : SnapshotMetadata::Type::kDefault; + { HandleScope scope(isolate); TryCatch bootstrapCatch(isolate); @@ -982,7 +1090,7 @@ int SnapshotBuilder::Generate(SnapshotData* out, // point (we currently only support this kind of entry point, but we // could also explore snapshotting other kinds of execution modes // in the future). - if (per_process::cli_options->build_snapshot) { + if (snapshot_type == SnapshotMetadata::Type::kFullyCustomized) { #if HAVE_INSPECTOR // TODO(joyeecheung): move this before RunBootstrapping(). env->InitializeInspector({}); @@ -1050,6 +1158,12 @@ int SnapshotBuilder::Generate(SnapshotData* out, return SNAPSHOT_ERROR; } + out->metadata = SnapshotMetadata{snapshot_type, + per_process::metadata.versions.node, + per_process::metadata.arch, + per_process::metadata.platform, + v8::ScriptCompiler::CachedDataVersionTag()}; + // We cannot resurrect the handles from the snapshot, so make sure that // no handles are left open in the environment after the blob is created // (which should trigger a GC and close all handles that can be closed). diff --git a/test/parallel/test-snapshot-incompatible.js b/test/parallel/test-snapshot-incompatible.js new file mode 100644 index 00000000000000..44447df13c6f7c --- /dev/null +++ b/test/parallel/test-snapshot-incompatible.js @@ -0,0 +1,76 @@ +'use strict'; + +// This tests that Node.js refuses to load snapshots built with incompatible +// V8 configurations. + +require('../common'); +const assert = require('assert'); +const { spawnSync } = require('child_process'); +const tmpdir = require('../common/tmpdir'); +const fixtures = require('../common/fixtures'); +const path = require('path'); +const fs = require('fs'); + +tmpdir.refresh(); +const blobPath = path.join(tmpdir.path, 'snapshot.blob'); +const entry = fixtures.path('empty.js'); + +// The flag used can be any flag that makes a difference in +// v8::ScriptCompiler::CachedDataVersionTag(). --harmony +// is chosen here because it's stable enough and makes a difference. +{ + // Build a snapshot with --harmony. + const child = spawnSync(process.execPath, [ + '--harmony', + '--snapshot-blob', + blobPath, + '--build-snapshot', + entry, + ], { + cwd: tmpdir.path + }); + if (child.status !== 0) { + console.log(child.stderr.toString()); + console.log(child.stdout.toString()); + assert.strictEqual(child.status, 0); + } + const stats = fs.statSync(path.join(tmpdir.path, 'snapshot.blob')); + assert(stats.isFile()); +} + +{ + // Now load the snapshot without --harmony, which should fail. + const child = spawnSync(process.execPath, [ + '--snapshot-blob', + blobPath, + ], { + cwd: tmpdir.path, + env: { + ...process.env, + } + }); + + const stderr = child.stderr.toString().trim(); + assert.match(stderr, /Failed to load the startup snapshot/); + assert.strictEqual(child.status, 1); +} + +{ + // Load it again with --harmony and it should work. + const child = spawnSync(process.execPath, [ + '--harmony', + '--snapshot-blob', + blobPath, + ], { + cwd: tmpdir.path, + env: { + ...process.env, + } + }); + + if (child.status !== 0) { + console.log(child.stderr.toString()); + console.log(child.stdout.toString()); + assert.strictEqual(child.status, 0); + } +}