From f2ee72f9465622b7b17e7155cae5528d5b940563 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Sun, 3 Mar 2024 21:06:57 +0100 Subject: [PATCH] Make system module map generation faster and fully hermetic --- MODULE.bazel | 1 + README.md | 5 - toolchain/deps.bzl | 9 ++ toolchain/internal/BUILD.bazel | 2 +- toolchain/internal/configure.bzl | 15 ++- .../internal/generate_system_module_map.sh | 35 ------ toolchain/internal/system_module_map.bzl | 107 ++++++++++++------ toolchain/internal/template.modulemap | 4 + toolchain/toolchains.bzl.tpl | 6 + 9 files changed, 105 insertions(+), 79 deletions(-) delete mode 100755 toolchain/internal/generate_system_module_map.sh create mode 100644 toolchain/internal/template.modulemap diff --git a/MODULE.bazel b/MODULE.bazel index 4030a220..5db65fdd 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -21,3 +21,4 @@ module( bazel_dep(name = "bazel_skylib", version = "1.4.2") bazel_dep(name = "rules_cc", version = "0.0.8") bazel_dep(name = "platforms", version = "0.0.7") +bazel_dep(name = "bazel_features", version = "1.8.0") diff --git a/README.md b/README.md index 77329483..e32b1de6 100644 --- a/README.md +++ b/README.md @@ -232,11 +232,6 @@ deps (also known as "depend on what you use") for `cc_*` rules. This feature can be enabled by enabling the `layering_check` feature on a per-target, per-package or global basis. -If one of toolchain or sysroot are specified via an absolute path rather than -managed by Bazel, the `layering_check` feature may require running -`bazel clean --expunge` after making changes to the set of header files -installed on the host. - ## Prior Art Other examples of toolchain configuration: diff --git a/toolchain/deps.bzl b/toolchain/deps.bzl index 9906088a..337c7411 100644 --- a/toolchain/deps.bzl +++ b/toolchain/deps.bzl @@ -36,3 +36,12 @@ def bazel_toolchain_dependencies(): ) # Skip bazel_skylib_workspace because we are not using lib/unittest.bzl + + # Load bazel_features if the user has not defined it. + if not native.existing_rule("bazel_features"): + http_archive( + name = "bazel_features", + sha256 = "70d355d5e34c3fe453f5556d6c0f02ffed0eb2c7ce4c8ee016d94d654bc6a014", + strip_prefix = "bazel_features-1.8.0", + urls = ["https://github.com/bazel-contrib/bazel_features/releases/download/v1.8.0/bazel_features-v1.8.0.tar.gz"], + ) diff --git a/toolchain/internal/BUILD.bazel b/toolchain/internal/BUILD.bazel index 4727011a..35d620f8 100644 --- a/toolchain/internal/BUILD.bazel +++ b/toolchain/internal/BUILD.bazel @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -exports_files(["generate_system_module_map.sh"]) +exports_files(["template.modulemap"]) diff --git a/toolchain/internal/configure.bzl b/toolchain/internal/configure.bzl index 3cd385a8..0f148f6b 100644 --- a/toolchain/internal/configure.bzl +++ b/toolchain/internal/configure.bzl @@ -531,7 +531,14 @@ cc_toolchain( unfiltered_compile_flags = _list_to_string(_dict_value(toolchain_info.unfiltered_compile_flags_dict, target_pair)), extra_files_str = extra_files_str, host_tools_info = host_tools_info, - cxx_builtin_include_directories = _list_to_string(cxx_builtin_include_directories), + cxx_builtin_include_directories = _list_to_string([ + # Filter out non-existing directories with absolute paths as they + # result in a -Wincomplete-umbrella warning when mentioned in the + # system module map. + dir + for dir in cxx_builtin_include_directories + if _is_hermetic_or_exists(rctx, dir, sysroot_prefix) + ]), ) def _convenience_targets_str(rctx, use_absolute_paths, llvm_dist_rel_path, llvm_dist_label_prefix, host_dl_ext): @@ -571,3 +578,9 @@ native_binary( llvm_dist_label_prefix = llvm_dist_label_prefix, host_dl_ext = host_dl_ext, ) + +def _is_hermetic_or_exists(rctx, path, sysroot_prefix): + path = path.replace("%sysroot%", sysroot_prefix) + if not path.startswith("/"): + return True + return rctx.path(path).exists diff --git a/toolchain/internal/generate_system_module_map.sh b/toolchain/internal/generate_system_module_map.sh deleted file mode 100755 index a65cf7f0..00000000 --- a/toolchain/internal/generate_system_module_map.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/sh -# Based on: -# https://github.com/bazelbuild/bazel/blob/44c5a1bbb26d3c61b37529b38406f1f5b0832baf/tools/cpp/generate_system_module_map.sh -# -# Copyright 2020 The Bazel Authors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -eu - -echo 'module "crosstool" [system] {' - -for dir in "$@"; do - find -L "${dir}" -type f 2>/dev/null | LANG=C sort | uniq | while read -r header; do - case "${dir}" in - /*) ;; - *) header="${EXECROOT_PREFIX}${header}" ;; - esac - # The module map is expected to contain all possibly transitively included headers, including - # those provided by the sysroot or the host machine. - echo " textual header \"${header}\"" - done -done - -echo "}" diff --git a/toolchain/internal/system_module_map.bzl b/toolchain/internal/system_module_map.bzl index 938b2c16..1ba54dd4 100644 --- a/toolchain/internal/system_module_map.bzl +++ b/toolchain/internal/system_module_map.bzl @@ -12,64 +12,97 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("@bazel_features//:features.bzl", "bazel_features") load("@bazel_skylib//lib:paths.bzl", "paths") +def _textual_header(file, *, include_prefixes, execroot_prefix): + path = file.path + for include_prefix in include_prefixes: + if path.startswith(include_prefix): + return " textual header \"{}{}\"".format(execroot_prefix, path) + + # The file is not under any of the include prefixes, + return None + +def _umbrella_submodule(path): + return """ + module "{path}" {{ + umbrella "{path}" + }}""".format(path = path) + def _system_module_map(ctx): module_map = ctx.actions.declare_file(ctx.attr.name + ".modulemap") - dirs = [] - non_hermetic = False - for dir in ctx.attr.cxx_builtin_include_directories: - if ctx.attr.sysroot_path and dir.startswith("%sysroot%"): - dir = ctx.attr.sysroot_path + dir[len("%sysroot%"):] - if dir.startswith("/"): - non_hermetic = True - dirs.append(paths.normalize(dir)) - - # If the action references a file outside of the execroot, it isn't safe to - # cache or run remotely. - execution_requirements = {} - if non_hermetic: - execution_requirements = { - "no-cache": "", - "no-remote": "", - } + absolute_path_dirs = [] + relative_include_prefixes = [] + for include_dir in ctx.attr.cxx_builtin_include_directories: + if ctx.attr.sysroot_path and include_dir.startswith("%sysroot%"): + include_dir = ctx.attr.sysroot_path + include_dir[len("%sysroot%"):] + include_dir = paths.normalize(include_dir) + if include_dir.startswith("/"): + absolute_path_dirs.append(include_dir) + else: + relative_include_prefixes.append(include_dir + "/") # The builtin include directories are relative to the execroot, but the # paths in the module map must be relative to the directory that contains # the module map. execroot_prefix = (module_map.dirname.count("/") + 1) * "../" + textual_header_closure = lambda file: _textual_header( + file, + include_prefixes = relative_include_prefixes, + execroot_prefix = execroot_prefix, + ) - ctx.actions.run_shell( - outputs = [module_map], - inputs = ctx.attr.cxx_builtin_include_files[DefaultInfo].files, - command = """ -{tool} "$@" > {module_map} -""".format( - tool = ctx.executable._generate_system_module_map.path, - module_map = module_map.path, - ), - arguments = dirs, - tools = [ctx.executable._generate_system_module_map], - env = {"EXECROOT_PREFIX": execroot_prefix}, - execution_requirements = execution_requirements, - mnemonic = "LLVMSystemModuleMap", - progress_message = "Generating system module map", + if bazel_features.rules.expand_template_has_computed_substitutions: + template_dict = ctx.actions.template_dict() + template_dict.add_joined( + "%textual_headers%", + ctx.attr.cxx_builtin_include_files[DefaultInfo].files, + join_with = "\n", + map_each = textual_header_closure, + allow_closure = True, + ) + template_dict.add_joined( + "%umbrella_submodules%", + depset(absolute_path_dirs), + join_with = "\n", + map_each = _umbrella_submodule, + ) + expand_template_kwargs = {"computed_substitutions": template_dict} + else: + # Memory-inefficient, but works on older Bazel versions that only + # offered computed_substitutions as an experimental feature. + textual_headers_or_none = [ + textual_header_closure(file) + for file in ctx.attr.cxx_builtin_include_files[DefaultInfo].files.to_list() + ] + expand_template_kwargs = {"substitutions": { + "%textual_headers%": "\n".join([h for h in textual_headers_or_none if h]), + "%umbrella_submodules%": "\n".join([_umbrella_submodule(d) for d in absolute_path_dirs]), + }} + + ctx.actions.expand_template( + template = ctx.file._module_map_template, + output = module_map, + **expand_template_kwargs ) return DefaultInfo(files = depset([module_map])) system_module_map = rule( - doc = """Generates a Clang module map for the toolchain and sysroot headers.""", + doc = """Generates a Clang module map for the toolchain and sysroot headers. + + Files under the configured built-in include directories that are managed by + Bazel are included as textual headers. All directories referenced by + absolute paths are included as umbrella submodules.""", implementation = _system_module_map, attrs = { "cxx_builtin_include_files": attr.label(mandatory = True), "cxx_builtin_include_directories": attr.string_list(mandatory = True), "sysroot_path": attr.string(), - "_generate_system_module_map": attr.label( - default = ":generate_system_module_map.sh", + "_module_map_template": attr.label( + default = "template.modulemap", allow_single_file = True, - cfg = "exec", - executable = True, ), }, ) diff --git a/toolchain/internal/template.modulemap b/toolchain/internal/template.modulemap new file mode 100644 index 00000000..65811e0b --- /dev/null +++ b/toolchain/internal/template.modulemap @@ -0,0 +1,4 @@ +module "crosstool" [system] { +%textual_headers% +%umbrella_submodules% +} diff --git a/toolchain/toolchains.bzl.tpl b/toolchain/toolchains.bzl.tpl index 2b1961f7..8637aa73 100644 --- a/toolchain/toolchains.bzl.tpl +++ b/toolchain/toolchains.bzl.tpl @@ -12,7 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("@bazel_features//:deps.bzl", "bazel_features_deps") + def llvm_register_toolchains(): + # This doesn't really belong here, but it's the only place to make this + # required call in a backwards-compatible way. + bazel_features_deps() + native.register_toolchains( %{toolchain_labels} )