From 174bbb9b208410de5cf54ce61fc589688dc69f69 Mon Sep 17 00:00:00 2001 From: Greg Magolan Date: Fri, 7 Jun 2024 17:29:25 -0700 Subject: [PATCH] refactor: add option for optimized runfiles collection --- docs/js_binary.md | 7 ++-- docs/js_library.md | 4 ++- js/private/js_binary.bzl | 53 +++++++++++++++++++-------- js/private/js_helpers.bzl | 75 ++++++++++++++++++++++++++++++++++++--- js/private/js_library.bzl | 51 ++++++++++++++++++-------- 5 files changed, 153 insertions(+), 37 deletions(-) diff --git a/docs/js_binary.md b/docs/js_binary.md index eb9a75f2b..4f1098881 100644 --- a/docs/js_binary.md +++ b/docs/js_binary.md @@ -24,7 +24,8 @@ js_binary( js_binary(name, data, chdir, copy_data_to_bin, enable_runfiles, entry_point, env, expected_exit_code, fixed_args, include_npm, include_npm_sources, include_sources, include_transitive_sources, include_transitive_types, include_types, log_level, - no_copy_to_bin, node_options, node_toolchain, patch_node_fs, preserve_symlinks_main) + no_copy_to_bin, node_options, node_toolchain, optimized_runfiles_collection, patch_node_fs, + preserve_symlinks_main) Execute a program in the Node.js runtime. @@ -84,6 +85,7 @@ The following environment variables are made available to the Node.js runtime ba | no_copy_to_bin | List of files to not copy to the Bazel output tree when `copy_data_to_bin` is True.

This is useful for exceptional cases where a `copy_to_bin` is not possible or not suitable for an input file such as a file in an external repository. In most cases, this option is not needed. See `copy_data_to_bin` docstring for more info. | List of labels | optional | `[]` | | node_options | Options to pass to the node invocation on the command line.

https://nodejs.org/api/cli.html

These options are passed directly to the node invocation on the command line. Options passed here will take precendence over options passed via the NODE_OPTIONS environment variable. Options passed here are not added to the NODE_OPTIONS environment variable so will not be automatically picked up by child processes that inherit that enviroment variable. | List of strings | optional | `[]` | | node_toolchain | The Node.js toolchain to use for this target.

See https://bazelbuild.github.io/rules_nodejs/Toolchains.html

Typically this is left unset so that Bazel automatically selects the right Node.js toolchain for the target platform. See https://bazel.build/extending/toolchains#toolchain-resolution for more information. | Label | optional | `None` | +| optimized_runfiles_collection | Enable optimized runfiles collection where only default_runfiles of `deps` & `data` targets are gathered.

Under the hood this disables the defense-in-depth against `data` & `deps` targets not supplying all required runfiles, where runfiles collection also gathers the transitive sources & transitive npm sources from the `JsInfo` providers of `data` & `deps` targets. | Boolean | optional | `True` | | patch_node_fs | Patch the to Node.js `fs` API (https://nodejs.org/api/fs.html) for this node program to prevent the program from following symlinks out of the execroot, runfiles and the sandbox.

When enabled, `js_binary` patches the Node.js sync and async `fs` API functions `lstat`, `readlink`, `realpath`, `readdir` and `opendir` so that the node program being run cannot resolve symlinks out of the execroot and the runfiles tree. When in the sandbox, these patches prevent the program being run from resolving symlinks out of the sandbox.

When disabled, node programs can leave the execroot, runfiles and sandbox by following symlinks which can lead to non-hermetic behavior. | Boolean | optional | `True` | | preserve_symlinks_main | When True, the --preserve-symlinks-main flag is passed to node.

This prevents node from following an ESM entry script out of runfiles and the sandbox. This can happen for `.mjs` ESM entry points where the fs node patches, which guard the runfiles and sandbox, are not applied. See https://github.com/aspect-build/rules_js/issues/362 for more information. Once #362 is resolved, the default for this attribute can be set to False.

This flag was added in Node.js v10.2.0 (released 2018-05-23). If your node toolchain is configured to use a Node.js version older than this you'll need to set this attribute to False.

See https://nodejs.org/api/cli.html#--preserve-symlinks-main for more information. | Boolean | optional | `True` | @@ -96,7 +98,7 @@ The following environment variables are made available to the Node.js runtime ba js_test(name, data, chdir, copy_data_to_bin, enable_runfiles, entry_point, env, expected_exit_code, fixed_args, include_npm, include_npm_sources, include_sources, include_transitive_sources, include_transitive_types, include_types, log_level, no_copy_to_bin, node_options, - node_toolchain, patch_node_fs, preserve_symlinks_main) + node_toolchain, optimized_runfiles_collection, patch_node_fs, preserve_symlinks_main) Identical to js_binary, but usable under `bazel test`. @@ -145,6 +147,7 @@ the contract between Bazel and a test runner. | no_copy_to_bin | List of files to not copy to the Bazel output tree when `copy_data_to_bin` is True.

This is useful for exceptional cases where a `copy_to_bin` is not possible or not suitable for an input file such as a file in an external repository. In most cases, this option is not needed. See `copy_data_to_bin` docstring for more info. | List of labels | optional | `[]` | | node_options | Options to pass to the node invocation on the command line.

https://nodejs.org/api/cli.html

These options are passed directly to the node invocation on the command line. Options passed here will take precendence over options passed via the NODE_OPTIONS environment variable. Options passed here are not added to the NODE_OPTIONS environment variable so will not be automatically picked up by child processes that inherit that enviroment variable. | List of strings | optional | `[]` | | node_toolchain | The Node.js toolchain to use for this target.

See https://bazelbuild.github.io/rules_nodejs/Toolchains.html

Typically this is left unset so that Bazel automatically selects the right Node.js toolchain for the target platform. See https://bazel.build/extending/toolchains#toolchain-resolution for more information. | Label | optional | `None` | +| optimized_runfiles_collection | Enable optimized runfiles collection where only default_runfiles of `deps` & `data` targets are gathered.

Under the hood this disables the defense-in-depth against `data` & `deps` targets not supplying all required runfiles, where runfiles collection also gathers the transitive sources & transitive npm sources from the `JsInfo` providers of `data` & `deps` targets. | Boolean | optional | `True` | | patch_node_fs | Patch the to Node.js `fs` API (https://nodejs.org/api/fs.html) for this node program to prevent the program from following symlinks out of the execroot, runfiles and the sandbox.

When enabled, `js_binary` patches the Node.js sync and async `fs` API functions `lstat`, `readlink`, `realpath`, `readdir` and `opendir` so that the node program being run cannot resolve symlinks out of the execroot and the runfiles tree. When in the sandbox, these patches prevent the program being run from resolving symlinks out of the sandbox.

When disabled, node programs can leave the execroot, runfiles and sandbox by following symlinks which can lead to non-hermetic behavior. | Boolean | optional | `True` | | preserve_symlinks_main | When True, the --preserve-symlinks-main flag is passed to node.

This prevents node from following an ESM entry script out of runfiles and the sandbox. This can happen for `.mjs` ESM entry points where the fs node patches, which guard the runfiles and sandbox, are not applied. See https://github.com/aspect-build/rules_js/issues/362 for more information. Once #362 is resolved, the default for this attribute can be set to False.

This flag was added in Node.js v10.2.0 (released 2018-05-23). If your node toolchain is configured to use a Node.js version older than this you'll need to set this attribute to False.

See https://nodejs.org/api/cli.html#--preserve-symlinks-main for more information. | Boolean | optional | `True` | diff --git a/docs/js_library.md b/docs/js_library.md index 031de5297..36dd7528f 100644 --- a/docs/js_library.md +++ b/docs/js_library.md @@ -29,7 +29,8 @@ js_library( ## js_library
-js_library(name, deps, srcs, data, copy_data_to_bin, no_copy_to_bin, types)
+js_library(name, deps, srcs, data, copy_data_to_bin, no_copy_to_bin, optimized_runfiles_collection,
+           types)
 
A library of JavaScript sources. Provides JsInfo, the primary provider used in rules_js @@ -58,6 +59,7 @@ for more context on why we do this. | data | Runtime dependencies to include in binaries/tests that depend on this target.

The transitive npm dependencies, transitive sources, default outputs and runfiles of targets in the `data` attribute are added to the runfiles of this target. They should appear in the '*.runfiles' area of any executable which has a runtime dependency on this target.

If this list contains linked npm packages, npm package store targets or other targets that provide `JsInfo`, `NpmPackageStoreInfo` providers are gathered from `JsInfo`. This is done directly from the `npm_package_store_infos` field of these. For linked npm package targets, the underlying `npm_package_store` target(s) that back the links are used. Gathered `NpmPackageStoreInfo` providers are propagated to the direct dependencies of downstream linked targets.

NB: Linked npm package targets that are "dev" dependencies do not forward their underlying `npm_package_store` target(s) through `npm_package_store_infos` and will therefore not be propagated to the direct dependencies of downstream linked targets. npm packages that come in from `npm_translate_lock` are considered "dev" dependencies if they are have `dev: true` set in the pnpm lock file. This should be all packages that are only listed as "devDependencies" in all `package.json` files within the pnpm workspace. This behavior is intentional to mimic how `devDependencies` work in published npm packages. | List of labels | optional | `[]` | | copy_data_to_bin | When True, `data` files are copied to the Bazel output tree before being passed as inputs to runfiles. | Boolean | optional | `True` | | no_copy_to_bin | List of files to not copy to the Bazel output tree when `copy_data_to_bin` is True.

This is useful for exceptional cases where a `copy_to_bin` is not possible or not suitable for an input file such as a file in an external repository. In most cases, this option is not needed. See `copy_data_to_bin` docstring for more info. | List of labels | optional | `[]` | +| optimized_runfiles_collection | Enable optimized runfiles collection where only default_runfiles of `deps` & `data` targets are gathered.

Under the hood this disables the defense-in-depth against `data` & `deps` targets not supplying all required runfiles, where runfiles collection also gathers the transitive sources & transitive npm sources from the `JsInfo` providers of `data` & `deps` targets. | Boolean | optional | `True` | | types | Same as `srcs` except all files are also provided as "types" available to downstream rules for type checking.

For example, a js_library with only `.js` files that are intended to be imported as `.js` files by downstream type checking rules such as `ts_project` would list those files in `types`:

js_library(
    name = "js_lib",
    types = ["index.js"],
)
| List of labels | optional | `[]` | diff --git a/js/private/js_binary.bzl b/js/private/js_binary.bzl index 4b4732cb5..c1fa7330d 100644 --- a/js/private/js_binary.bzl +++ b/js/private/js_binary.bzl @@ -21,7 +21,7 @@ load("@aspect_bazel_lib//lib:expand_make_vars.bzl", "expand_locations", "expand_ load("@aspect_bazel_lib//lib:windows_utils.bzl", "create_windows_native_launcher_script") load("@bazel_skylib//lib:dicts.bzl", "dicts") load(":bash.bzl", "BASH_INITIALIZE_RUNFILES") -load(":js_helpers.bzl", "LOG_LEVELS", "envs_for_log_level", "gather_runfiles") +load(":js_helpers.bzl", "LOG_LEVELS", "envs_for_log_level", "gather_runfiles", "gather_runfiles_optimized") _DOC = """Execute a program in the Node.js runtime. @@ -259,6 +259,16 @@ _ATTRS = { """, default = True, ), + "optimized_runfiles_collection": attr.bool( + doc = """Enable optimized runfiles collection where only default_runfiles + of `deps` & `data` targets are gathered. + + Under the hood this disables the defense-in-depth against `data` & `deps` targets not supplying all required runfiles, + where runfiles collection also gathers the transitive sources & transitive npm sources from the `JsInfo` providers of + `data` & `deps` targets. + """, + default = True, + ), "include_npm": attr.bool( doc = """When True, npm is included in the runfiles of the target. @@ -523,20 +533,33 @@ def _create_launcher(ctx, log_prefix_rule_set, log_prefix_rule, fixed_args = [], fail("include_npm requires a minimum @rules_nodejs version of 5.7.0") launcher_files.extend(nodeinfo.npm_files) - runfiles = gather_runfiles( - ctx = ctx, - sources = [], - data = ctx.attr.data, - data_files = [entry_point] + ctx.files.data, - deps = [], - copy_data_files_to_bin = ctx.attr.copy_data_to_bin, - no_copy_to_bin = ctx.files.no_copy_to_bin, - include_sources = ctx.attr.include_sources, - include_types = ctx.attr.include_types, - include_transitive_sources = ctx.attr.include_transitive_sources, - include_transitive_types = ctx.attr.include_transitive_types, - include_npm_sources = ctx.attr.include_npm_sources, - ).merge(ctx.runfiles( + if ctx.attr.optimized_runfiles_collection: + runfiles = gather_runfiles_optimized( + ctx = ctx, + sources = depset(), + data = ctx.attr.data, + data_files = [entry_point] + ctx.files.data, + deps = [], + copy_data_files_to_bin = ctx.attr.copy_data_to_bin, + no_copy_to_bin = ctx.files.no_copy_to_bin, + ) + else: + runfiles = gather_runfiles( + ctx = ctx, + sources = [], + data = ctx.attr.data, + data_files = [entry_point] + ctx.files.data, + deps = [], + copy_data_files_to_bin = ctx.attr.copy_data_to_bin, + no_copy_to_bin = ctx.files.no_copy_to_bin, + include_sources = ctx.attr.include_sources, + include_types = ctx.attr.include_types, + include_transitive_sources = ctx.attr.include_transitive_sources, + include_transitive_types = ctx.attr.include_transitive_types, + include_npm_sources = ctx.attr.include_npm_sources, + ) + + runfiles = runfiles.merge(ctx.runfiles( files = launcher_files, transitive_files = transitive_launcher_files, )) diff --git a/js/private/js_helpers.bzl b/js/private/js_helpers.bzl index 513526053..478391896 100644 --- a/js/private/js_helpers.bzl +++ b/js/private/js_helpers.bzl @@ -114,6 +114,73 @@ this option is not needed. return copy_file_to_bin_action(ctx, file) +def gather_runfiles_optimized( + ctx, + sources, + data, + deps, + data_files = [], + copy_data_files_to_bin = False, + no_copy_to_bin = []): + """Creates a runfiles object containing files in `sources`, default outputs from `data` and transitive runfiles from `data` & `deps`. + + See https://bazel.build/extending/rules#runfiles for more info on providing runfiles in build rules. + + Args: + ctx: the rule context + + sources: list or depset of files which should be included in runfiles + + data: list of data targets; default outputs and transitive runfiles are gather from these targets + + See https://bazel.build/reference/be/common-definitions#typical.data and + https://bazel.build/concepts/dependencies#data-dependencies for more info and guidance + on common usage of the `data` attribute in build rules. + + deps: list of dependency targets; only transitive runfiles are gather from these targets + + data_files: a list of data files which should be included in runfiles + + Data files that are source files are copied to the Bazel output tree when + `copy_data_files_to_bin` is set to `True`. + + copy_data_files_to_bin: When True, `data_files` that are source files and are copied to the + Bazel output tree before being passed to returned runfiles. + + no_copy_to_bin: List of files to not copy to the Bazel output tree when `copy_data_to_bin` is True. + + This is useful for exceptional cases where a `copy_data_files_to_bin` is not possible or not suitable for an input + file such as a file in an external repository. In most cases, this option is not needed. + See `copy_data_files_to_bin` docstring for more info. + + Returns: + A [runfiles](https://bazel.build/rules/lib/runfiles) object created with [ctx.runfiles](https://bazel.build/rules/lib/ctx#runfiles). + """ + + # Includes sources + if type(sources) == "list": + sources = depset(sources) + transitive_files_depsets = [sources] + + files_runfiles = [] + for d in data_files: + if copy_data_files_to_bin and d.is_source and d not in no_copy_to_bin: + files_runfiles.append(copy_js_file_to_bin_action(ctx, d)) + else: + files_runfiles.append(d) + + if len(files_runfiles) > 0: + transitive_files_depsets.append(depset(files_runfiles)) + + # Merge in transitive runfiles of data & deps. + return ctx.runfiles( + files = files_runfiles, + transitive_files = depset(transitive = transitive_files_depsets), + ).merge_all([ + target[DefaultInfo].default_runfiles + for target in data + deps + ]) + def gather_runfiles( ctx, sources, @@ -129,7 +196,7 @@ def gather_runfiles( include_npm_sources = True): """Creates a runfiles object containing files in `sources`, default outputs from `data` and transitive runfiles from `data` & `deps`. - As a defense in depth against `data` & `deps` targets not supplying all required runfiles, also + As a defense-in-depth against `data` & `deps` targets not supplying all required runfiles, also gathers the transitive sources & transitive npm sources from the `JsInfo` providers of `data` & `deps` targets. @@ -140,20 +207,20 @@ def gather_runfiles( sources: list or depset of files which should be included in runfiles - deps: list of dependency targets; only transitive runfiles are gather from these targets - data: list of data targets; default outputs and transitive runfiles are gather from these targets See https://bazel.build/reference/be/common-definitions#typical.data and https://bazel.build/concepts/dependencies#data-dependencies for more info and guidance on common usage of the `data` attribute in build rules. + deps: list of dependency targets; only transitive runfiles are gather from these targets + data_files: a list of data files which should be included in runfiles Data files that are source files are copied to the Bazel output tree when `copy_data_files_to_bin` is set to `True`. - copy_data_files_to_bin: When True, `data` files that are source files and are copied to the + copy_data_files_to_bin: When True, `data_files` that are source files and are copied to the Bazel output tree before being passed to returned runfiles. no_copy_to_bin: List of files to not copy to the Bazel output tree when `copy_data_to_bin` is True. diff --git a/js/private/js_library.bzl b/js/private/js_library.bzl index fa731fd59..2d968711a 100644 --- a/js/private/js_library.bzl +++ b/js/private/js_library.bzl @@ -24,7 +24,7 @@ js_library( """ load("@aspect_bazel_lib//lib:copy_to_bin.bzl", "COPY_FILE_TO_BIN_TOOLCHAINS") -load(":js_helpers.bzl", "copy_js_file_to_bin_action", "gather_npm_package_store_infos", "gather_npm_sources", "gather_runfiles", "gather_transitive_sources", "gather_transitive_types") +load(":js_helpers.bzl", "copy_js_file_to_bin_action", "gather_npm_package_store_infos", "gather_npm_sources", "gather_runfiles", "gather_runfiles_optimized", "gather_transitive_sources", "gather_transitive_types") load(":js_info.bzl", "JsInfo", "js_info") _DOC = """A library of JavaScript sources. Provides JsInfo, the primary provider used in rules_js @@ -128,6 +128,16 @@ a runtime dependency on this target. doc = """When True, `data` files are copied to the Bazel output tree before being passed as inputs to runfiles.""", default = True, ), + "optimized_runfiles_collection": attr.bool( + doc = """Enable optimized runfiles collection where only default_runfiles + of `deps` & `data` targets are gathered. + + Under the hood this disables the defense-in-depth against `data` & `deps` targets not supplying all required runfiles, + where runfiles collection also gathers the transitive sources & transitive npm sources from the `JsInfo` providers of + `data` & `deps` targets. + """, + default = True, + ), } def _gather_sources_and_types(ctx, targets, files): @@ -226,20 +236,31 @@ def _js_library_impl(ctx): targets = ctx.attr.srcs + ctx.attr.data + ctx.attr.deps, ) - runfiles = gather_runfiles( - ctx = ctx, - sources = transitive_sources, - data = ctx.attr.data, - deps = ctx.attr.srcs + ctx.attr.types + ctx.attr.deps, - data_files = ctx.files.data, - copy_data_files_to_bin = ctx.attr.copy_data_to_bin, - no_copy_to_bin = ctx.files.no_copy_to_bin, - include_sources = True, - include_types = False, - include_transitive_sources = True, - include_transitive_types = False, - include_npm_sources = True, - ) + if ctx.attr.optimized_runfiles_collection: + runfiles = gather_runfiles_optimized( + ctx = ctx, + sources = transitive_sources, + data = ctx.attr.data, + deps = ctx.attr.srcs + ctx.attr.types + ctx.attr.deps, + data_files = ctx.files.data, + copy_data_files_to_bin = ctx.attr.copy_data_to_bin, + no_copy_to_bin = ctx.files.no_copy_to_bin, + ) + else: + runfiles = gather_runfiles( + ctx = ctx, + sources = transitive_sources, + data = ctx.attr.data, + deps = ctx.attr.srcs + ctx.attr.types + ctx.attr.deps, + data_files = ctx.files.data, + copy_data_files_to_bin = ctx.attr.copy_data_to_bin, + no_copy_to_bin = ctx.files.no_copy_to_bin, + include_sources = True, + include_types = False, + include_transitive_sources = True, + include_transitive_types = False, + include_npm_sources = True, + ) return [ js_info(