From 88027e84d812e5d4558fa0f27c4d8baf7a671ebf Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Mon, 22 Jul 2024 12:00:39 -0400 Subject: [PATCH] fs: optimize `fs.cpSync` js calls PR-URL: https://github.com/nodejs/node/pull/53614 Reviewed-By: Matteo Collina Reviewed-By: Benjamin Gruenbaum Reviewed-By: James M Snell --- lib/internal/fs/cp/cp-sync.js | 164 ++++++---------------------- src/node_errors.h | 6 + src/node_file.cc | 128 ++++++++++++++++++++++ test/fixtures/permission/fs-read.js | 8 +- typings/internalBinding/fs.d.ts | 3 + 5 files changed, 171 insertions(+), 138 deletions(-) diff --git a/lib/internal/fs/cp/cp-sync.js b/lib/internal/fs/cp/cp-sync.js index aebc575365dd2e..1e922b7805fc8c 100644 --- a/lib/internal/fs/cp/cp-sync.js +++ b/lib/internal/fs/cp/cp-sync.js @@ -2,33 +2,25 @@ // This file is a modified version of the fs-extra's copySync method. -const { areIdentical, isSrcSubdir } = require('internal/fs/cp/cp'); +const fsBinding = internalBinding('fs'); +const { isSrcSubdir } = require('internal/fs/cp/cp'); const { codes: { - ERR_FS_CP_DIR_TO_NON_DIR, ERR_FS_CP_EEXIST, ERR_FS_CP_EINVAL, - ERR_FS_CP_FIFO_PIPE, - ERR_FS_CP_NON_DIR_TO_DIR, - ERR_FS_CP_SOCKET, ERR_FS_CP_SYMLINK_TO_SUBDIRECTORY, - ERR_FS_CP_UNKNOWN, - ERR_FS_EISDIR, ERR_INVALID_RETURN_VALUE, } } = require('internal/errors'); const { os: { errno: { EEXIST, - EISDIR, EINVAL, - ENOTDIR, }, }, } = internalBinding('constants'); const { chmodSync, copyFileSync, - existsSync, lstatSync, mkdirSync, opendirSync, @@ -42,7 +34,6 @@ const { dirname, isAbsolute, join, - parse, resolve, } = require('path'); const { isPromise } = require('util/types'); @@ -54,145 +45,38 @@ function cpSyncFn(src, dest, opts) { 'node is not recommended'; process.emitWarning(warning, 'TimestampPrecisionWarning'); } - const { srcStat, destStat, skipped } = checkPathsSync(src, dest, opts); - if (skipped) return; - checkParentPathsSync(src, srcStat, dest); - return checkParentDir(destStat, src, dest, opts); -} - -function checkPathsSync(src, dest, opts) { if (opts.filter) { const shouldCopy = opts.filter(src, dest); if (isPromise(shouldCopy)) { throw new ERR_INVALID_RETURN_VALUE('boolean', 'filter', shouldCopy); } - if (!shouldCopy) return { __proto__: null, skipped: true }; + if (!shouldCopy) return; } - const { srcStat, destStat } = getStatsSync(src, dest, opts); - if (destStat) { - if (areIdentical(srcStat, destStat)) { - throw new ERR_FS_CP_EINVAL({ - message: 'src and dest cannot be the same', - path: dest, - syscall: 'cp', - errno: EINVAL, - code: 'EINVAL', - }); - } - if (srcStat.isDirectory() && !destStat.isDirectory()) { - throw new ERR_FS_CP_DIR_TO_NON_DIR({ - message: `cannot overwrite non-directory ${dest} ` + - `with directory ${src}`, - path: dest, - syscall: 'cp', - errno: EISDIR, - code: 'EISDIR', - }); - } - if (!srcStat.isDirectory() && destStat.isDirectory()) { - throw new ERR_FS_CP_NON_DIR_TO_DIR({ - message: `cannot overwrite directory ${dest} ` + - `with non-directory ${src}`, - path: dest, - syscall: 'cp', - errno: ENOTDIR, - code: 'ENOTDIR', - }); - } - } - - if (srcStat.isDirectory() && isSrcSubdir(src, dest)) { - throw new ERR_FS_CP_EINVAL({ - message: `cannot copy ${src} to a subdirectory of self ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - code: 'EINVAL', - }); - } - return { __proto__: null, srcStat, destStat, skipped: false }; -} + fsBinding.cpSyncCheckPaths(src, dest, opts.dereference, opts.recursive); -function getStatsSync(src, dest, opts) { - const statFunc = opts.dereference ? statSync : lstatSync; - const srcStat = statFunc(src, { bigint: true, throwIfNoEntry: true }); - const destStat = statFunc(dest, { bigint: true, throwIfNoEntry: false }); - return { srcStat, destStat }; + return getStats(src, dest, opts); } -function checkParentPathsSync(src, srcStat, dest) { - const srcParent = resolve(dirname(src)); - const destParent = resolve(dirname(dest)); - if (destParent === srcParent || destParent === parse(destParent).root) return; - const destStat = statSync(destParent, { bigint: true, throwIfNoEntry: false }); - - if (destStat === undefined) { - return; - } - - if (areIdentical(srcStat, destStat)) { - throw new ERR_FS_CP_EINVAL({ - message: `cannot copy ${src} to a subdirectory of self ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - code: 'EINVAL', - }); - } - return checkParentPathsSync(src, srcStat, destParent); -} - -function checkParentDir(destStat, src, dest, opts) { - const destParent = dirname(dest); - if (!existsSync(destParent)) mkdirSync(destParent, { recursive: true }); - return getStats(destStat, src, dest, opts); -} - -function getStats(destStat, src, dest, opts) { +function getStats(src, dest, opts) { + // TODO(@anonrig): Avoid making two stat calls. const statSyncFn = opts.dereference ? statSync : lstatSync; const srcStat = statSyncFn(src); + const destStat = statSyncFn(dest, { bigint: true, throwIfNoEntry: false }); if (srcStat.isDirectory() && opts.recursive) { return onDir(srcStat, destStat, src, dest, opts); - } else if (srcStat.isDirectory()) { - throw new ERR_FS_EISDIR({ - message: `${src} is a directory (not copied)`, - path: src, - syscall: 'cp', - errno: EINVAL, - code: 'EISDIR', - }); } else if (srcStat.isFile() || srcStat.isCharacterDevice() || srcStat.isBlockDevice()) { return onFile(srcStat, destStat, src, dest, opts); } else if (srcStat.isSymbolicLink()) { - return onLink(destStat, src, dest, opts); - } else if (srcStat.isSocket()) { - throw new ERR_FS_CP_SOCKET({ - message: `cannot copy a socket file: ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - code: 'EINVAL', - }); - } else if (srcStat.isFIFO()) { - throw new ERR_FS_CP_FIFO_PIPE({ - message: `cannot copy a FIFO pipe: ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - code: 'EINVAL', - }); + return onLink(destStat, src, dest, opts.verbatimSymlinks); } - throw new ERR_FS_CP_UNKNOWN({ - message: `cannot copy an unknown file type: ${dest}`, - path: dest, - syscall: 'cp', - errno: EINVAL, - code: 'EINVAL', - }); + + // It is not possible to get here because all possible cases are handled above. + const assert = require('internal/assert'); + assert.fail('Unreachable code'); } function onFile(srcStat, destStat, src, dest, opts) { @@ -200,6 +84,7 @@ function onFile(srcStat, destStat, src, dest, opts) { return mayCopyFile(srcStat, src, dest, opts); } +// TODO(@anonrig): Move this function to C++. function mayCopyFile(srcStat, src, dest, opts) { if (opts.force) { unlinkSync(dest); @@ -249,6 +134,7 @@ function setDestTimestamps(src, dest) { return utimesSync(dest, updatedSrcStat.atime, updatedSrcStat.mtime); } +// TODO(@anonrig): Move this function to C++. function onDir(srcStat, destStat, src, dest, opts) { if (!destStat) return mkDirAndCopy(srcStat.mode, src, dest, opts); return copyDir(src, dest, opts); @@ -260,6 +146,7 @@ function mkDirAndCopy(srcMode, src, dest, opts) { return setDestMode(dest, srcMode); } +// TODO(@anonrig): Move this function to C++. function copyDir(src, dest, opts) { const dir = opendirSync(src); @@ -270,17 +157,28 @@ function copyDir(src, dest, opts) { const { name } = dirent; const srcItem = join(src, name); const destItem = join(dest, name); - const { destStat, skipped } = checkPathsSync(srcItem, destItem, opts); - if (!skipped) getStats(destStat, srcItem, destItem, opts); + let shouldCopy = true; + + if (opts.filter) { + shouldCopy = opts.filter(srcItem, destItem); + if (isPromise(shouldCopy)) { + throw new ERR_INVALID_RETURN_VALUE('boolean', 'filter', shouldCopy); + } + } + + if (shouldCopy) { + getStats(srcItem, destItem, opts); + } } } finally { dir.closeSync(); } } -function onLink(destStat, src, dest, opts) { +// TODO(@anonrig): Move this function to C++. +function onLink(destStat, src, dest, verbatimSymlinks) { let resolvedSrc = readlinkSync(src); - if (!opts.verbatimSymlinks && !isAbsolute(resolvedSrc)) { + if (!verbatimSymlinks && !isAbsolute(resolvedSrc)) { resolvedSrc = resolve(dirname(src), resolvedSrc); } if (!destStat) { diff --git a/src/node_errors.h b/src/node_errors.h index b6e6b6ee8f902c..8d70680171b1a8 100644 --- a/src/node_errors.h +++ b/src/node_errors.h @@ -70,7 +70,13 @@ void OOMErrorHandler(const char* location, const v8::OOMDetails& details); V(ERR_DLOPEN_FAILED, Error) \ V(ERR_ENCODING_INVALID_ENCODED_DATA, TypeError) \ V(ERR_EXECUTION_ENVIRONMENT_NOT_AVAILABLE, Error) \ + V(ERR_FS_CP_EINVAL, Error) \ + V(ERR_FS_CP_DIR_TO_NON_DIR, Error) \ + V(ERR_FS_CP_NON_DIR_TO_DIR, Error) \ V(ERR_FS_EISDIR, Error) \ + V(ERR_FS_CP_SOCKET, Error) \ + V(ERR_FS_CP_FIFO_PIPE, Error) \ + V(ERR_FS_CP_UNKNOWN, Error) \ V(ERR_ILLEGAL_CONSTRUCTOR, Error) \ V(ERR_INVALID_ADDRESS, Error) \ V(ERR_INVALID_ARG_VALUE, TypeError) \ diff --git a/src/node_file.cc b/src/node_file.cc index a1e913286bd768..c6556725109a6b 100644 --- a/src/node_file.cc +++ b/src/node_file.cc @@ -3118,6 +3118,130 @@ static void GetFormatOfExtensionlessFile( return args.GetReturnValue().Set(EXTENSIONLESS_FORMAT_JAVASCRIPT); } +static void CpSyncCheckPaths(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + + CHECK_EQ(args.Length(), 4); // src, dest, dereference, recursive + + BufferValue src(isolate, args[0]); + CHECK_NOT_NULL(*src); + ToNamespacedPath(env, &src); + THROW_IF_INSUFFICIENT_PERMISSIONS( + env, permission::PermissionScope::kFileSystemRead, src.ToStringView()); + auto src_path = std::filesystem::path(src.ToStringView()); + + BufferValue dest(isolate, args[1]); + CHECK_NOT_NULL(*dest); + ToNamespacedPath(env, &dest); + THROW_IF_INSUFFICIENT_PERMISSIONS( + env, permission::PermissionScope::kFileSystemWrite, dest.ToStringView()); + auto dest_path = std::filesystem::path(dest.ToStringView()); + + bool dereference = args[2]->IsTrue(); + bool recursive = args[3]->IsTrue(); + + std::error_code error_code; + auto src_status = dereference + ? std::filesystem::symlink_status(src_path, error_code) + : std::filesystem::status(src_path, error_code); + if (error_code) { + return env->ThrowUVException(EEXIST, "lstat", nullptr, src.out()); + } + auto dest_status = + dereference ? std::filesystem::symlink_status(dest_path, error_code) + : std::filesystem::status(dest_path, error_code); + + bool dest_exists = !error_code && dest_status.type() != + std::filesystem::file_type::not_found; + bool src_is_dir = src_status.type() == std::filesystem::file_type::directory; + + if (!error_code) { + // Check if src and dest are identical. + if (std::filesystem::equivalent(src_path, dest_path)) { + std::string message = + "src and dest cannot be the same " + dest_path.string(); + return THROW_ERR_FS_CP_EINVAL(env, message.c_str()); + } + + const bool dest_is_dir = + dest_status.type() == std::filesystem::file_type::directory; + + if (src_is_dir && !dest_is_dir) { + std::string message = "Cannot overwrite non-directory " + + src_path.string() + " with directory " + + dest_path.string(); + return THROW_ERR_FS_CP_DIR_TO_NON_DIR(env, message.c_str()); + } + + if (!src_is_dir && dest_is_dir) { + std::string message = "Cannot overwrite directory " + dest_path.string() + + " with non-directory " + src_path.string(); + return THROW_ERR_FS_CP_NON_DIR_TO_DIR(env, message.c_str()); + } + } + + std::string dest_path_str = dest_path.string(); + // Check if dest_path is a subdirectory of src_path. + if (src_is_dir && dest_path_str.starts_with(src_path.string())) { + std::string message = "Cannot copy " + src_path.string() + + " to a subdirectory of self " + dest_path.string(); + return THROW_ERR_FS_CP_EINVAL(env, message.c_str()); + } + + auto dest_parent = dest_path.parent_path(); + // "/" parent is itself. Therefore, we need to check if the parent is the same + // as itself. + while (src_path.parent_path() != dest_parent && + dest_parent.has_parent_path() && + dest_parent.parent_path() != dest_parent) { + if (std::filesystem::equivalent( + src_path, dest_path.parent_path(), error_code)) { + std::string message = "Cannot copy " + src_path.string() + + " to a subdirectory of self " + dest_path.string(); + return THROW_ERR_FS_CP_EINVAL(env, message.c_str()); + } + + // If equivalent fails, it's highly likely that dest_parent does not exist + if (error_code) { + break; + } + + dest_parent = dest_parent.parent_path(); + } + + if (src_is_dir && !recursive) { + std::string message = + "Recursive option not enabled, cannot copy a directory: " + + src_path.string(); + return THROW_ERR_FS_EISDIR(env, message.c_str()); + } + + switch (src_status.type()) { + case std::filesystem::file_type::socket: { + std::string message = "Cannot copy a socket file: " + dest_path.string(); + return THROW_ERR_FS_CP_SOCKET(env, message.c_str()); + } + case std::filesystem::file_type::fifo: { + std::string message = "Cannot copy a FIFO pipe: " + dest_path.string(); + return THROW_ERR_FS_CP_FIFO_PIPE(env, message.c_str()); + } + case std::filesystem::file_type::unknown: { + std::string message = + "Cannot copy an unknown file type: " + dest_path.string(); + return THROW_ERR_FS_CP_UNKNOWN(env, message.c_str()); + } + default: + break; + } + + // Optimization opportunity: Check if this "exists" call is good for + // performance. + if (!dest_exists || !std::filesystem::exists(dest_path.parent_path())) { + std::filesystem::create_directories(dest_path.parent_path(), error_code); + } +} + BindingData::FilePathIsFileReturnType BindingData::FilePathIsFile( Environment* env, const std::string& file_path) { THROW_IF_INSUFFICIENT_PERMISSIONS( @@ -3467,6 +3591,8 @@ static void CreatePerIsolateProperties(IsolateData* isolate_data, SetMethod(isolate, target, "mkdtemp", Mkdtemp); + SetMethod(isolate, target, "cpSyncCheckPaths", CpSyncCheckPaths); + StatWatcher::CreatePerIsolateProperties(isolate_data, target); BindingData::CreatePerIsolateProperties(isolate_data, target); @@ -3577,6 +3703,8 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(RealPath); registry->Register(CopyFile); + registry->Register(CpSyncCheckPaths); + registry->Register(Chmod); registry->Register(FChmod); diff --git a/test/fixtures/permission/fs-read.js b/test/fixtures/permission/fs-read.js index 92e53c0b046124..0ce7d65b21be1a 100644 --- a/test/fixtures/permission/fs-read.js +++ b/test/fixtures/permission/fs-read.js @@ -161,23 +161,21 @@ const regularFile = __filename; }, common.expectsError({ code: 'ERR_ACCESS_DENIED', permission: 'FileSystemRead', - // cpSync calls lstatSync before reading blockedFile - resource: blockedFile, + resource: path.toNamespacedPath(blockedFile), })); assert.throws(() => { fs.cpSync(blockedFileURL, path.join(blockedFolder, 'any-other-file')); }, common.expectsError({ code: 'ERR_ACCESS_DENIED', permission: 'FileSystemRead', - // cpSync calls lstatSync before reading blockedFile - resource: blockedFile, + resource: path.toNamespacedPath(blockedFile), })); assert.throws(() => { fs.cpSync(blockedFile, path.join(__dirname, 'any-other-file')); }, common.expectsError({ code: 'ERR_ACCESS_DENIED', permission: 'FileSystemRead', - resource: blockedFile, + resource: path.toNamespacedPath(blockedFile), })); } diff --git a/typings/internalBinding/fs.d.ts b/typings/internalBinding/fs.d.ts index 5c385157693e1a..2d86a412ba686d 100644 --- a/typings/internalBinding/fs.d.ts +++ b/typings/internalBinding/fs.d.ts @@ -76,6 +76,8 @@ declare namespace InternalFSBinding { function copyFile(src: StringOrBuffer, dest: StringOrBuffer, mode: number, req: undefined, ctx: FSSyncContext): void; function copyFile(src: StringOrBuffer, dest: StringOrBuffer, mode: number, usePromises: typeof kUsePromises): Promise; + function cpSyncCheckPaths(src: StringOrBuffer, dest: StringOrBuffer, dereference: boolean, recursive: boolean): void; + function fchmod(fd: number, mode: number, req: FSReqCallback): void; function fchmod(fd: number, mode: number): void; function fchmod(fd: number, mode: number, usePromises: typeof kUsePromises): Promise; @@ -257,6 +259,7 @@ export interface FsBinding { chown: typeof InternalFSBinding.chown; close: typeof InternalFSBinding.close; copyFile: typeof InternalFSBinding.copyFile; + cpSyncCheckPaths: typeof InternalFSBinding.cpSyncCheckPaths; fchmod: typeof InternalFSBinding.fchmod; fchown: typeof InternalFSBinding.fchown; fdatasync: typeof InternalFSBinding.fdatasync;