From c82ac3ce3ed9c194e5ac98e91c9896bfc7b22743 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Wed, 4 Sep 2024 20:09:56 +0200 Subject: [PATCH 1/3] module: write compile cache to temporary file and then rename it This works better in terms of avoiding race conditions. --- src/compile_cache.cc | 115 ++++++++++++++++++++++++++++++++++++------- src/compile_cache.h | 2 + src/env.cc | 9 +++- src/env.h | 1 + 4 files changed, 107 insertions(+), 20 deletions(-) diff --git a/src/compile_cache.cc b/src/compile_cache.cc index f5588fa99d5394..a9d7598d96297e 100644 --- a/src/compile_cache.cc +++ b/src/compile_cache.cc @@ -215,6 +215,8 @@ CompileCacheEntry* CompileCacheHandler::GetOrInsert( return loaded->second.get(); } + // If the code hash mismatches, the code has changed, discard the stale entry + // and create a new one. auto emplaced = compiler_cache_store_.emplace(key, std::make_unique()); auto* result = emplaced.first->second.get(); @@ -283,23 +285,26 @@ void CompileCacheHandler::MaybeSave(CompileCacheEntry* entry, MaybeSaveImpl(entry, func, rejected); } -// Layout of a cache file: -// [uint32_t] magic number -// [uint32_t] code size -// [uint32_t] code hash -// [uint32_t] cache size -// [uint32_t] cache hash -// .... compile cache content .... +/** + * Persist the compile cache accumulated in memory to disk. + * + * To avoid race conditions, the cache file includes hashes of the original + * source code and the cache content. It's first written to a temporary file + * before being renamed to the target name. + * + * Layout of a cache file: + * [uint32_t] magic number + * [uint32_t] code size + * [uint32_t] code hash + * [uint32_t] cache size + * [uint32_t] cache hash + * .... compile cache content .... + */ void CompileCacheHandler::Persist() { DCHECK(!compile_cache_dir_.empty()); - // NOTE(joyeecheung): in most circumstances the code caching reading - // writing logic is lenient enough that it's fine even if someone - // overwrites the cache (that leads to either size or hash mismatch - // in subsequent loads and the overwritten cache will be ignored). - // Also in most use cases users should not change the files on disk - // too rapidly. Therefore locking is not currently implemented to - // avoid the cost. + // TODO(joyeecheung): do this using a separate event loop to utilize the + // libuv thread pool and do the file system operations concurrently. for (auto& pair : compiler_cache_store_) { auto* entry = pair.second.get(); if (entry->cache == nullptr) { @@ -312,6 +317,11 @@ void CompileCacheHandler::Persist() { entry->source_filename); continue; } + if (entry->persisted == true) { + Debug("[compile cache] skip %s because cache was already persisted\n", + entry->source_filename); + continue; + } DCHECK_EQ(entry->cache->buffer_policy, v8::ScriptCompiler::CachedData::BufferOwned); @@ -328,27 +338,94 @@ void CompileCacheHandler::Persist() { headers[kCodeHashOffset] = entry->code_hash; headers[kCacheHashOffset] = cache_hash; - Debug("[compile cache] writing cache for %s in %s [%d %d %d %d %d]...", + // Generate the temporary filename. + // The temporary file should be placed in a location like: + // + // $NODE_COMPILE_CACHE_DIR/v23.0.0-pre-arm64-5fad6d45-501/e7f8ef7f.cache.tcqrsK + // + // 1. $NODE_COMPILE_CACHE_DIR either comes from the $NODE_COMPILE_CACHE + // environment + // variable or `module.enableCompileCache()`. + // 2. v23.0.0-pre-arm64-5fad6d45-501 is the sub cache directory and + // e7f8ef7f is the hash for the cache (see + // CompileCacheHandler::Enable()), + // 3. tcqrsK is generated by uv_fs_mkstemp() as a temporary indentifier. + uv_fs_t mkstemp_req; + auto cleanup_mkstemp = + OnScopeLeave([&mkstemp_req]() { uv_fs_req_cleanup(&mkstemp_req); }); + std::string cache_filename_tmp = entry->cache_filename + ".XXXXXX"; + Debug("[compile cache] Creating temporary file for cache of %s...", + entry->source_filename); + int err = uv_fs_mkstemp( + nullptr, &mkstemp_req, cache_filename_tmp.c_str(), nullptr); + if (err < 0) { + Debug("failed. %s\n", uv_strerror(err)); + continue; + } + Debug(" -> %s\n", mkstemp_req.path); + Debug("[compile cache] writing cache for %s to temporary file %s [%d %d %d " + "%d %d]...", entry->source_filename, - entry->cache_filename, + mkstemp_req.path, headers[kMagicNumberOffset], headers[kCodeSizeOffset], headers[kCacheSizeOffset], headers[kCodeHashOffset], headers[kCacheHashOffset]); + // Write to the temporary file. uv_buf_t headers_buf = uv_buf_init(reinterpret_cast(headers.data()), headers.size() * sizeof(uint32_t)); uv_buf_t data_buf = uv_buf_init(cache_ptr, entry->cache->length); uv_buf_t bufs[] = {headers_buf, data_buf}; - int err = WriteFileSync(entry->cache_filename.c_str(), bufs, 2); + uv_fs_t write_req; + auto cleanup_write = + OnScopeLeave([&write_req]() { uv_fs_req_cleanup(&write_req); }); + err = uv_fs_write( + nullptr, &write_req, mkstemp_req.result, bufs, 2, 0, nullptr); + if (err < 0) { + Debug("failed: %s\n", uv_strerror(err)); + continue; + } + + uv_fs_t close_req; + auto cleanup_close = + OnScopeLeave([&close_req]() { uv_fs_req_cleanup(&close_req); }); + err = uv_fs_close(nullptr, &close_req, mkstemp_req.result, nullptr); + + if (err < 0) { + Debug("failed: %s\n", uv_strerror(err)); + continue; + } + + Debug("success\n"); + + // Rename the temporary file to the actual cache file. + uv_fs_t rename_req; + auto cleanup_rename = + OnScopeLeave([&rename_req]() { uv_fs_req_cleanup(&rename_req); }); + std::string cache_filename_final = entry->cache_filename; + Debug("[compile cache] Renaming %s to %s...", + mkstemp_req.path, + cache_filename_final); + err = uv_fs_rename(nullptr, + &rename_req, + mkstemp_req.path, + cache_filename_final.c_str(), + nullptr); if (err < 0) { Debug("failed: %s\n", uv_strerror(err)); - } else { - Debug("success\n"); + continue; } + Debug("success\n"); + entry->persisted = true; } + + // Clear the map at the end in one go instead of during the iteration to + // avoid rehashing costs. + Debug("[compile cache] Clear deserialized cache.\n"); + compiler_cache_store_.clear(); } CompileCacheHandler::CompileCacheHandler(Environment* env) diff --git a/src/compile_cache.h b/src/compile_cache.h index 975a3f2080f7a7..fe8c0b93cb3ef3 100644 --- a/src/compile_cache.h +++ b/src/compile_cache.h @@ -30,6 +30,8 @@ struct CompileCacheEntry { std::string source_filename; CachedCodeType type; bool refreshed = false; + bool persisted = false; + // Copy the cache into a new store for V8 to consume. Caller takes // ownership. v8::ScriptCompiler::CachedData* CopyCache() const; diff --git a/src/env.cc b/src/env.cc index 4c8cb75945d825..3a15a63f878a0d 100644 --- a/src/env.cc +++ b/src/env.cc @@ -1143,7 +1143,7 @@ CompileCacheEnableResult Environment::EnableCompileCache( compile_cache_handler_ = std::move(handler); AtExit( [](void* env) { - static_cast(env)->compile_cache_handler()->Persist(); + static_cast(env)->FlushCompileCache(); }, this); } @@ -1160,6 +1160,13 @@ CompileCacheEnableResult Environment::EnableCompileCache( return result; } +void Environment::FlushCompileCache() { + if (!compile_cache_handler_ || compile_cache_handler_->cache_dir().empty()) { + return; + } + compile_cache_handler_->Persist(); +} + void Environment::ExitEnv(StopFlags::Flags flags) { // Should not access non-thread-safe methods here. set_stopping(true); diff --git a/src/env.h b/src/env.h index 2d8af8335d7d61..c45249d49f0a58 100644 --- a/src/env.h +++ b/src/env.h @@ -1041,6 +1041,7 @@ class Environment final : public MemoryRetainer { // Enable built-in compile cache if it has not yet been enabled. // The cache will be persisted to disk on exit. CompileCacheEnableResult EnableCompileCache(const std::string& cache_dir); + void FlushCompileCache(); void RunAndClearNativeImmediates(bool only_refed = false); void RunAndClearInterrupts(); From bad40911838b10a195550d6207738adb942007dd Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Tue, 17 Sep 2024 11:48:56 +0200 Subject: [PATCH 2/3] module: throw when invalid argument is passed to enableCompileCache() --- src/node_modules.cc | 5 ++++- test/parallel/test-compile-cache-api-error.js | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 test/parallel/test-compile-cache-api-error.js diff --git a/src/node_modules.cc b/src/node_modules.cc index 3bedd2dfecb49c..721b5e1e4b457d 100644 --- a/src/node_modules.cc +++ b/src/node_modules.cc @@ -436,10 +436,13 @@ void BindingData::GetPackageScopeConfig( } void EnableCompileCache(const FunctionCallbackInfo& args) { - CHECK(args[0]->IsString()); Isolate* isolate = args.GetIsolate(); Local context = isolate->GetCurrentContext(); Environment* env = Environment::GetCurrent(context); + if (!args[0]->IsString()) { + THROW_ERR_INVALID_ARG_TYPE(env, "cacheDir should be a string"); + return; + } Utf8Value value(isolate, args[0]); CompileCacheEnableResult result = env->EnableCompileCache(*value); std::vector> values = { diff --git a/test/parallel/test-compile-cache-api-error.js b/test/parallel/test-compile-cache-api-error.js new file mode 100644 index 00000000000000..580c8f756a0f04 --- /dev/null +++ b/test/parallel/test-compile-cache-api-error.js @@ -0,0 +1,11 @@ +'use strict'; + +// This tests module.enableCompileCache() throws when an invalid argument is passed. + +require('../common'); +const { enableCompileCache } = require('module'); +const assert = require('assert'); + +for (const invalid of [0, null, false, () => {}, {}, []]) { + assert.throws(() => enableCompileCache(invalid), { code: 'ERR_INVALID_ARG_TYPE' }); +} From 5d5b549db5d9ae159b18407b7000e138c76b319a Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Wed, 4 Sep 2024 20:18:12 +0200 Subject: [PATCH 3/3] module: implement flushCompileCache() This implements an API for users to intentionally flush the accumulated compile cache instead of waiting until process shutdown. It may be useful for application that loads dependencies first and then either reload itself in other instances, or spawning other instances that load an overlapping set of its dependencies - in this case its useful to flush the cache early instead of waiting until the shutdown of itself. Currently flushing is triggered by either process shutdown or user requests. In the future we should simply start the writes right after module loading on a separate thread, and this method only blocks until all the pending writes (if any) on the other thread are finished. In that case, the off-thread writes should finish long before any attempt of flushing is made so the method would then only incur a negligible overhead from thread synchronization. --- doc/api/module.md | 23 +++++++++ lib/internal/modules/helpers.js | 2 + lib/module.js | 3 ++ src/compile_cache.cc | 7 +++ src/env.cc | 14 +++--- src/node_modules.cc | 21 +++++++++ test/fixtures/compile-cache-flush.js | 21 +++++++++ test/parallel/test-compile-cache-api-flush.js | 47 +++++++++++++++++++ 8 files changed, 130 insertions(+), 8 deletions(-) create mode 100644 test/fixtures/compile-cache-flush.js create mode 100644 test/parallel/test-compile-cache-api-flush.js diff --git a/doc/api/module.md b/doc/api/module.md index 3bb25c3f313da1..665a64c0505b66 100644 --- a/doc/api/module.md +++ b/doc/api/module.md @@ -199,6 +199,13 @@ Compilation cache generated by one version of Node.js can not be reused by a dif version of Node.js. Cache generated by different versions of Node.js will be stored separately if the same base directory is used to persist the cache, so they can co-exist. +At the moment, when the compile cache is enabled and a module is loaded afresh, the +code cache is generated from the compiled code immediately, but will only be written +to disk when the Node.js instance is about to exit. This is subject to change. The +[`module.flushCompileCache()`][] method can be used to ensure the accumulated code cache +is flushed to disk in case the application wants to spawn other Node.js instances +and let them share the cache long before the parent exits. + ### `module.getCompileCacheDir()` + +> Stability: 1.1 - Active Development + +Flush the [module compile cache][] accumulated from modules already loaded +in the current Node.js instance to disk. This returns after all the flushing +file system operations come to an end, no matter they succeed or not. If there +are any errors, this will fail silently, since compile cache misses should not +interfer with the actual operation of the application. + ### Class: `module.SourceMap`