From 9c25457204214a510f596bb9d30190e653a1ef8d Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Thu, 16 Jan 2025 18:59:44 +0100 Subject: [PATCH] Also check option values --- .../lib/runtime/BlazeCommandDispatcher.java | 33 +++++++++++++++++++ .../google/devtools/build/lib/skyframe/BUILD | 1 + .../lib/skyframe/BzlCompileFunction.java | 6 ++-- .../build/lib/util/StringEncoding.java | 11 +++++++ src/test/shell/bazel/client_test.sh | 18 ++++++++++ 5 files changed, 66 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java b/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java index e3c71287404889..15970b8d01620c 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java @@ -46,6 +46,7 @@ import com.google.devtools.build.lib.events.Reporter; import com.google.devtools.build.lib.events.StoredEventHandler; import com.google.devtools.build.lib.exec.ExecutionOptions; +import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions; import com.google.devtools.build.lib.profiler.MemoryProfiler; import com.google.devtools.build.lib.profiler.Profiler; import com.google.devtools.build.lib.profiler.ProfilerTask; @@ -64,6 +65,7 @@ import com.google.devtools.build.lib.util.InterruptedFailureDetails; import com.google.devtools.build.lib.util.LoggingUtil; import com.google.devtools.build.lib.util.Pair; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.util.io.CommandExtensionReporter; import com.google.devtools.build.lib.util.io.DelegatingOutErr; import com.google.devtools.build.lib.util.io.OutErr; @@ -348,6 +350,37 @@ private BlazeCommandResult execExclusively( /* invocationPolicyFlagListBuilder= */ ImmutableList.builder()); DetailedExitCode earlyExitCode = parseResults.detailedExitCode(); OptionsParsingResult options = optionHandler.getOptionsResult(); + BuildLanguageOptions.Utf8EnforcementMode utf8EnforcementMode = + options.getOptions(BuildLanguageOptions.class).incompatibleEnforceUtf8; + switch (utf8EnforcementMode) { + case OFF -> {} + case WARNING, ERROR -> { + var violatingOption = + optionsParser.asCompleteListOfParsedOptions().stream() + .filter( + optionValue -> + !StringEncoding.isValidInternalUtf8(optionValue.getUnconvertedValue())) + .findFirst(); + if (violatingOption.isPresent()) { + switch (utf8EnforcementMode) { + case WARNING -> { + storedEventHandler.handle( + Event.warn( + "Option '%s' is not valid UTF-8; this can lead to inconsistent behavior and will be disallowed in a future version of Bazel." + .formatted(violatingOption.get().getCommandLineForm()))); + } + case ERROR -> { + String message = + "ERROR: Option '%s' is not valid UTF-8." + .formatted(violatingOption.get().getCommandLineForm()); + outErr.printErrLn(message); + return createDetailedCommandResult( + message, FailureDetails.Command.Code.OPTIONS_PARSE_FAILURE); + } + } + } + } + } // The initCommand call also records the start time for the timestamp granularity monitor. List commandEnvWarnings = new ArrayList<>(); diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/BUILD b/src/main/java/com/google/devtools/build/lib/skyframe/BUILD index cf50c3fe78a04d..65e4e6145065e0 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/BUILD +++ b/src/main/java/com/google/devtools/build/lib/skyframe/BUILD @@ -806,6 +806,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/packages", "//src/main/java/com/google/devtools/build/lib/packages:autoload_symbols", "//src/main/java/com/google/devtools/build/lib/packages/semantics", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", "//src/main/java/net/starlark/java/eval", diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/BzlCompileFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/BzlCompileFunction.java index cc39a3f1a1763a..93a5bb7b143cfc 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/BzlCompileFunction.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/BzlCompileFunction.java @@ -14,7 +14,6 @@ package com.google.devtools.build.lib.skyframe; -import com.google.common.base.Utf8; import com.google.common.collect.ImmutableMap; import com.google.common.hash.HashFunction; import com.google.devtools.build.lib.actions.FileValue; @@ -24,6 +23,7 @@ import com.google.devtools.build.lib.packages.AutoloadSymbols; import com.google.devtools.build.lib.packages.BazelStarlarkEnvironment; import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.RootedPath; @@ -150,7 +150,7 @@ static BzlCompileValue computeInline( switch (semantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_UTF8)) { case OFF -> {} case WARNING -> { - if (!Utf8.isWellFormed(bytes)) { + if (!StringEncoding.isValidUtf8(bytes)) { env.getListener() .handle( Event.warn( @@ -159,7 +159,7 @@ static BzlCompileValue computeInline( } } case ERROR -> { - if (!Utf8.isWellFormed(bytes)) { + if (!StringEncoding.isValidUtf8(bytes)) { return BzlCompileValue.noFile( "compilation of module '%s' failed: not a valid UTF-8 encoded file", inputName); } diff --git a/src/main/java/com/google/devtools/build/lib/util/StringEncoding.java b/src/main/java/com/google/devtools/build/lib/util/StringEncoding.java index aeea4784ef807a..76148ae9c773af 100644 --- a/src/main/java/com/google/devtools/build/lib/util/StringEncoding.java +++ b/src/main/java/com/google/devtools/build/lib/util/StringEncoding.java @@ -18,6 +18,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.base.Preconditions; +import com.google.common.base.Utf8; import com.google.devtools.build.lib.unsafe.StringUnsafe; import java.lang.reflect.Field; import java.nio.charset.Charset; @@ -141,6 +142,16 @@ public static String unicodeToInternal(String s) { : s; } + /** Whether the given string is a valid UTF-8 string in the internal encoding. */ + public static boolean isValidInternalUtf8(String s) { + return isValidUtf8(STRING_UNSAFE.getInternalStringBytes(s)); + } + + /** Whether the given bytes encode a valid UTF-8 string. */ + public static boolean isValidUtf8(byte[] bytes) { + return Utf8.isWellFormed(bytes); + } + private static final StringUnsafe STRING_UNSAFE = StringUnsafe.getInstance(); /** diff --git a/src/test/shell/bazel/client_test.sh b/src/test/shell/bazel/client_test.sh index 26da0e615e99fc..9383375262c10c 100755 --- a/src/test/shell/bazel/client_test.sh +++ b/src/test/shell/bazel/client_test.sh @@ -93,4 +93,22 @@ function test_install_base_garbage_collection() { fi } +function test_unicode_invalid_option_value_enforce_utf8_off { + bazel info --repo_env=FOO="$(printf '\x80')" --noincompatible_enforce_utf8 \ + &> "$TEST_log" || fail "Expected success" + expect_not_log "not valid UTF-8" +} + +function test_unicode_invalid_option_value_enforce_utf8_warning { + bazel info --repo_env=FOO="$(printf '\x80')" --incompatible_enforce_utf8=warning \ + &> "$TEST_log" || fail "Expected success" + expect_log "WARNING: Option '--repo_env=FOO=.*' is not valid UTF-8; this can lead to inconsistent behavior and will be disallowed in a future version of Bazel." +} + +function test_unicode_invalid_option_value_enforce_utf8_error { + bazel info --repo_env=FOO="$(printf '\x80')" --incompatible_enforce_utf8 \ + &> "$TEST_log" && fail "Expected failure" + expect_log "ERROR: Option '--repo_env=FOO=.*' is not valid UTF-8." +} + run_suite "client_test"