From dd351642dcdaefa9a5b82b4bc89f4a224113cbf6 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 13 Mar 2024 09:06:59 +0100 Subject: [PATCH] New attribute extra_compiler_files (#245) This helps inject files into the compiler action sandbox. The files can then be used in the compiler command line, e.g. the sanitizer ignore list. --------- Co-authored-by: Siddhartha Bagaria --- toolchain/cc_wrapper.sh.tpl | 49 +++++++++++--- toolchain/internal/configure.bzl | 8 ++- toolchain/internal/repo.bzl | 6 ++ toolchain/osx_cc_wrapper.sh.tpl | 106 +++++++++++++++++++------------ 4 files changed, 118 insertions(+), 51 deletions(-) diff --git a/toolchain/cc_wrapper.sh.tpl b/toolchain/cc_wrapper.sh.tpl index e70c880e..e972d57e 100644 --- a/toolchain/cc_wrapper.sh.tpl +++ b/toolchain/cc_wrapper.sh.tpl @@ -27,24 +27,55 @@ # shellcheck disable=SC1083 -set -eu +set -euo pipefail # See note in toolchain/internal/configure.bzl where we define # `wrapper_bin_prefix` for why this wrapper is needed. -# Call the C++ compiler. if [[ -f %{toolchain_path_prefix}bin/clang ]]; then - exec %{toolchain_path_prefix}bin/clang "$@" + execroot_path="" elif [[ ${BASH_SOURCE[0]} == "/"* ]]; then # Some consumers of `CcToolchainConfigInfo` (e.g. `cmake` from rules_foreign_cc) # change CWD and call $CC (this script) with its absolute path. - # the execroot (i.e. `cmake` from `rules_foreign_cc`) and call CC . For cases like this, - # we'll try to find `clang` relative to this script. - # This script is at _execroot_/external/_repo_name_/bin/clang_wrapper.sh - execroot_path="${BASH_SOURCE[0]%/*/*/*/*}" - clang="${execroot_path}/%{toolchain_path_prefix}bin/clang" - exec "${clang}" "${@}" + # For cases like this, we'll try to find `clang` through an absolute path. + # This script is at _execroot_/external/_repo_name_/bin/cc_wrapper.sh + execroot_path="${BASH_SOURCE[0]%/*/*/*/*}/" else echo >&2 "ERROR: could not find clang; PWD=\"${PWD}\"; PATH=\"${PATH}\"." exit 5 fi + +function sanitize_option() { + local -r opt=$1 + if [[ ${opt} == */cc_wrapper.sh ]]; then + printf "%s" "${execroot_path}%{toolchain_path_prefix}bin/clang" + elif [[ ${opt} =~ ^-fsanitize-(ignore|black)list=[^/] ]]; then + # shellcheck disable=SC2206 + parts=(${opt/=/ }) # Split flag name and value into array. + printf "%s" "${parts[0]}=${execroot_path}${parts[1]}" + else + printf "%s" "${opt}" + fi +} + +cmd=() +for ((i = 0; i <= $#; i++)); do + if [[ ${!i} == @* ]]; then + while IFS= read -r opt; do + opt="$( + set -e + sanitize_option "${opt}" + )" + cmd+=("${opt}") + done <"${!i:1}" + else + opt="$( + set -e + sanitize_option "${!i}" + )" + cmd+=("${opt}") + fi +done + +# Call the C++ compiler. +exec "${cmd[@]}" diff --git a/toolchain/internal/configure.bzl b/toolchain/internal/configure.bzl index 991e2cdc..270def71 100644 --- a/toolchain/internal/configure.bzl +++ b/toolchain/internal/configure.bzl @@ -159,6 +159,7 @@ def llvm_config_impl(rctx): coverage_link_flags_dict = rctx.attr.coverage_link_flags, unfiltered_compile_flags_dict = rctx.attr.unfiltered_compile_flags, llvm_version = llvm_version, + extra_compiler_files = rctx.attr.extra_compiler_files, ) host_dl_ext = "dylib" if os == "darwin" else "so" host_tools_info = dict([ @@ -412,7 +413,10 @@ filegroup( template = template + """ filegroup( name = "compiler-components-{suffix}", - srcs = [":sysroot-components-{suffix}"], + srcs = [ + ":sysroot-components-{suffix}", + {extra_compiler_files} + ], ) filegroup( @@ -445,6 +449,7 @@ filegroup( "{llvm_dist_label_prefix}clang", "{llvm_dist_label_prefix}include", ":sysroot-components-{suffix}", + {extra_compiler_files} ], ) @@ -541,6 +546,7 @@ cc_toolchain( extra_files_str = extra_files_str, host_tools_info = host_tools_info, cxx_builtin_include_directories = _list_to_string(cxx_builtin_include_directories), + extra_compiler_files = ("\"%s\"," % str(toolchain_info.extra_compiler_files)) if toolchain_info.extra_compiler_files else "", ) def _convenience_targets_str(rctx, use_absolute_paths, llvm_dist_rel_path, llvm_dist_label_prefix, host_dl_ext): diff --git a/toolchain/internal/repo.bzl b/toolchain/internal/repo.bzl index 4bdd1f28..10a5354b 100644 --- a/toolchain/internal/repo.bzl +++ b/toolchain/internal/repo.bzl @@ -233,6 +233,12 @@ _compiler_configuration_attrs = { mandatory = False, doc = ("Override the toolchain's `target_settings` attribute."), ), + "extra_compiler_files": attr.label( + mandatory = False, + doc = ("Files to be made available in the sandbox for compile actions. " + + "Mostly useful for providing files containing lists of flags, e.g. " + + "sanitizer ignorelists."), + ), } llvm_config_attrs = dict(common_attrs) diff --git a/toolchain/osx_cc_wrapper.sh.tpl b/toolchain/osx_cc_wrapper.sh.tpl index 4347e8bf..60658805 100755 --- a/toolchain/osx_cc_wrapper.sh.tpl +++ b/toolchain/osx_cc_wrapper.sh.tpl @@ -25,9 +25,9 @@ # See https://blogs.oracle.com/dipol/entry/dynamic_libraries_rpath_and_mac # on how to set those paths for Mach-O binaries. -# shellcheck disable=all +# shellcheck disable=SC1083 -set -eu +set -euo pipefail INSTALL_NAME_TOOL="/usr/bin/install_name_tool" @@ -39,29 +39,65 @@ OUTPUT= function parse_option() { local -r opt="$1" if [[ ${OUTPUT} == "1" ]]; then - OUTPUT=$opt - elif [[ $opt =~ ^-l(.*)$ ]]; then - LIBS="${BASH_REMATCH[1]} $LIBS" - elif [[ $opt =~ ^-L(.*)$ ]]; then - LIB_DIRS="${BASH_REMATCH[1]} $LIB_DIRS" - elif [[ $opt =~ ^\@loader_path/(.*)$ ]]; then + OUTPUT=${opt} + elif [[ ${opt} =~ ^-l(.*)$ ]]; then + LIBS="${BASH_REMATCH[1]} ${LIBS}" + elif [[ ${opt} =~ ^-L(.*)$ ]]; then + LIB_DIRS="${BASH_REMATCH[1]} ${LIB_DIRS}" + elif [[ ${opt} =~ ^\@loader_path/(.*)$ ]]; then RPATHS="${BASH_REMATCH[1]} ${RPATHS}" - elif [[ $opt =~ ^-Wl,-rpath,\@loader_path/(.*)$ ]]; then + elif [[ ${opt} =~ ^-Wl,-rpath,\@loader_path/(.*)$ ]]; then RPATHS="${BASH_REMATCH[1]} ${RPATHS}" - elif [[ $opt == "-o" ]]; then + elif [[ ${opt} == "-o" ]]; then # output is coming OUTPUT=1 fi } -# let parse the option list -for i in "$@"; do - if [[ $i == @* ]]; then +if [[ -f %{toolchain_path_prefix}bin/clang ]]; then + execroot_path="" +elif [[ ${BASH_SOURCE[0]} == "/"* ]]; then + # Some consumers of `CcToolchainConfigInfo` (e.g. `cmake` from rules_foreign_cc) + # change CWD and call $CC (this script) with its absolute path. + # For cases like this, we'll try to find `clang` through an absolute path. + # This script is at _execroot_/external/_repo_name_/bin/cc_wrapper.sh + execroot_path="${BASH_SOURCE[0]%/*/*/*/*}/" +else + echo >&2 "ERROR: could not find clang; PWD=\"${PWD}\"; PATH=\"${PATH}\"." + exit 5 +fi + +function sanitize_option() { + local -r opt=$1 + if [[ ${opt} == */cc_wrapper.sh ]]; then + printf "%s" "${execroot_path}%{toolchain_path_prefix}bin/clang" + elif [[ ${opt} =~ ^-fsanitize-(ignore|black)list=[^/] ]]; then + # shellcheck disable=SC2206 + parts=(${opt/=/ }) # Split flag name and value into array. + printf "%s" "${parts[0]}=${execroot_path}${parts[1]}" + else + printf "%s" "${opt}" + fi +} + +cmd=() +for ((i = 0; i <= $#; i++)); do + if [[ ${!i} == @* ]]; then while IFS= read -r opt; do - parse_option "$opt" - done <"${i:1}" || exit 1 + opt="$( + set -e + sanitize_option "${opt}" + )" + parse_option "${opt}" + cmd+=("${opt}") + done <"${!i:1}" else - parse_option "$i" + opt="$( + set -e + sanitize_option "${!i}" + )" + parse_option "${opt}" + cmd+=("${opt}") fi done @@ -86,27 +122,13 @@ if [[ ":${PATH}:" != *":/usr/bin:"* ]]; then fi # Call the C++ compiler. -if [[ -f %{toolchain_path_prefix}bin/clang ]]; then - %{toolchain_path_prefix}bin/clang "$@" -elif [[ ${BASH_SOURCE[0]} == "/"* ]]; then - # Some consumers of `CcToolchainConfigInfo` (e.g. `cmake` from rules_foreign_cc) - # change CWD and call $CC (this script) with its absolute path. - # the execroot (i.e. `cmake` from `rules_foreign_cc`) and call CC . For cases like this, - # we'll try to find `clang` relative to this script. - # This script is at _execroot_/external/_repo_name_/bin/cc_wrapper.sh - execroot_path="${BASH_SOURCE[0]%/*/*/*/*}" - clang="${execroot_path}/%{toolchain_path_prefix}bin/clang" - "${clang}" "${@}" -else - echo >&2 "ERROR: could not find clang; PWD=\"$(pwd)\"; PATH=\"${PATH}\"." - exit 5 -fi +"${cmd[@]}" function get_library_path() { for libdir in ${LIB_DIRS}; do - if [ -f ${libdir}/lib$1.so ]; then + if [[ -f "${libdir}/lib$1".so ]]; then echo "${libdir}/lib$1.so" - elif [ -f ${libdir}/lib$1.dylib ]; then + elif [[ -f "${libdir}"/lib"$1".dylib ]]; then echo "${libdir}/lib$1.dylib" fi done @@ -116,8 +138,9 @@ function get_library_path() { # and multi-level symlinks. function get_realpath() { local previous="$1" - local next=$(readlink "${previous}") - while [ -n "${next}" ]; do + local next + next="$(readlink "${previous}")" + while [[ -n ${next} ]]; do previous="${next}" next=$(readlink "${previous}") done @@ -127,24 +150,25 @@ function get_realpath() { # Get the path of a lib inside a tool function get_otool_path() { # the lib path is the path of the original lib relative to the workspace - get_realpath $1 | sed 's|^.*/bazel-out/|bazel-out/|' + get_realpath "$1" | sed 's|^.*/bazel-out/|bazel-out/|' } # Do replacements in the output for rpath in ${RPATHS}; do for lib in ${LIBS}; do unset libname - if [ -f "$(dirname ${OUTPUT})/${rpath}/lib${lib}.so" ]; then + if [[ -f "$(dirname "${OUTPUT}")/${rpath}/lib${lib}.so" ]]; then libname="lib${lib}.so" - elif [ -f "$(dirname ${OUTPUT})/${rpath}/lib${lib}.dylib" ]; then + elif [[ -f "$(dirname "${OUTPUT}")/${rpath}/lib${lib}.dylib" ]]; then libname="lib${lib}.dylib" fi # ${libname-} --> return $libname if defined, or undefined otherwise. This is to make # this set -e friendly if [[ -n ${libname-} ]]; then - libpath=$(get_library_path ${lib}) - if [ -n "${libpath}" ]; then - ${INSTALL_NAME_TOOL} -change $(get_otool_path "${libpath}") \ + libpath="$(get_library_path "${lib}")" + if [[ -n ${libpath} ]]; then + otool_path="$(get_otool_path "${libpath}")" + "${INSTALL_NAME_TOOL}" -change "${otool_path}" \ "@loader_path/${rpath}/${libname}" "${OUTPUT}" fi fi