From d9e0b8a5bbd42edd5e7141fd5e1d407afe9e71ee Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Thu, 30 Jan 2025 14:05:55 -0800 Subject: [PATCH] Add `--incompatible_enforce_starlark_utf8` If enabled (or set to `error`), fail if Starlark files are not UTF-8 encoded. If set to `warning` (the default), emits a warning instead. Bazel already assumes that Starlark files are UTF-8 encoded for e.g. filenames in actions executed remotely. This flag doesn't affect this, it only makes encoding failures more visible. Work towards #374 Closes #24944. PiperOrigin-RevId: 721513249 Change-Id: I1d3363168c6cd5d37abf96e0401e34866b6679d7 --- .../devtools/build/lib/bazel/bzlmod/BUILD | 3 + .../lib/bazel/bzlmod/CompiledModuleFile.java | 20 ++++- .../lib/bazel/bzlmod/VendorFileFunction.java | 24 +++++- .../semantics/BuildLanguageOptions.java | 38 +++++++++ .../com/google/devtools/build/lib/rules/BUILD | 1 + .../repository/ResolvedFileFunction.java | 19 ++++- .../google/devtools/build/lib/skyframe/BUILD | 16 ++++ .../lib/skyframe/BzlCompileFunction.java | 15 +++- .../build/lib/skyframe/PackageFunction.java | 30 +++++++- .../build/lib/skyframe/RepoFileFunction.java | 23 +++++- .../build/lib/skyframe/StarlarkUtil.java | 77 +++++++++++++++++++ .../lib/skyframe/WorkspaceFileFunction.java | 25 ++++-- .../net/starlark/java/syntax/ParserInput.java | 5 +- .../packages/semantics/ConsistencyTest.java | 10 +++ .../lib/skyframe/BzlCompileFunctionTest.java | 52 +++++++++++++ 15 files changed, 334 insertions(+), 24 deletions(-) create mode 100644 src/main/java/com/google/devtools/build/lib/skyframe/StarlarkUtil.java diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD index bc92a911b515e5..d1a314ac2621d3 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD @@ -185,6 +185,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/packages/semantics", "//src/main/java/com/google/devtools/build/lib/rules:repository/repo_recorded_input", "//src/main/java/com/google/devtools/build/lib/skyframe:sky_functions", + "//src/main/java/com/google/devtools/build/lib/skyframe:starlark_util", "//src/main/java/com/google/devtools/build/lib/skyframe/serialization:visible-for-serialization", "//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec", "//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec:serialization-constant", @@ -251,6 +252,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/cmdline", "//src/main/java/com/google/devtools/build/lib/events", "//src/main/java/com/google/devtools/build/lib/packages", + "//src/main/java/com/google/devtools/build/lib/packages/semantics", "//src/main/java/com/google/devtools/build/lib/profiler", "//src/main/java/com/google/devtools/build/lib/rules:repository/repo_recorded_input", "//src/main/java/com/google/devtools/build/lib/rules:repository/repository_directory_value", @@ -260,6 +262,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_function", "//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_value", "//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value", + "//src/main/java/com/google/devtools/build/lib/skyframe:starlark_util", "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/CompiledModuleFile.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/CompiledModuleFile.java index 14c67ca65a6457..0d178332688693 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/CompiledModuleFile.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/CompiledModuleFile.java @@ -20,7 +20,9 @@ import com.google.devtools.build.lib.events.ExtendedEventHandler; import com.google.devtools.build.lib.packages.BazelStarlarkEnvironment; import com.google.devtools.build.lib.packages.DotBazelFileSyntaxChecker; +import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions; import com.google.devtools.build.lib.server.FailureDetails.ExternalDeps.Code; +import com.google.devtools.build.lib.skyframe.StarlarkUtil; import net.starlark.java.eval.EvalException; import net.starlark.java.eval.Module; import net.starlark.java.eval.Starlark; @@ -62,9 +64,21 @@ public static CompiledModuleFile parseAndCompile( BazelStarlarkEnvironment starlarkEnv, ExtendedEventHandler eventHandler) throws ExternalDepsException { - StarlarkFile starlarkFile = - StarlarkFile.parse( - ParserInput.fromLatin1(moduleFile.getContent(), moduleFile.getLocation())); + ParserInput parserInput; + try { + parserInput = + StarlarkUtil.createParserInput( + moduleFile.getContent(), + moduleFile.getLocation(), + starlarkSemantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8), + eventHandler); + } catch ( + @SuppressWarnings("UnusedException") // createParserInput() reports its own error message + StarlarkUtil.InvalidUtf8Exception e) { + throw ExternalDepsException.withMessage( + Code.BAD_MODULE, "error reading MODULE.bazel file for %s", moduleKey); + } + StarlarkFile starlarkFile = StarlarkFile.parse(parserInput); if (!starlarkFile.ok()) { Event.replayEventsOn(eventHandler, starlarkFile.errors()); throw ExternalDepsException.withMessage( diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java index aaba70a5ffdf89..612b7cfdc69059 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java @@ -23,8 +23,10 @@ import com.google.devtools.build.lib.packages.BazelStarlarkEnvironment; import com.google.devtools.build.lib.packages.DotBazelFileSyntaxChecker; import com.google.devtools.build.lib.packages.VendorThreadContext; +import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions; import com.google.devtools.build.lib.rules.repository.RepositoryDelegatorFunction; import com.google.devtools.build.lib.skyframe.PrecomputedValue; +import com.google.devtools.build.lib.skyframe.StarlarkUtil; import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; @@ -115,7 +117,7 @@ private VendorThreadContext getVendorFileContext( Environment env, SkyKey skyKey, Path vendorFilePath, StarlarkSemantics starlarkSemantics) throws VendorFileFunctionException, InterruptedException { try (Mutability mu = Mutability.create("vendor file")) { - StarlarkFile vendorFile = readAndParseVendorFile(vendorFilePath, env); + StarlarkFile vendorFile = readAndParseVendorFile(vendorFilePath, env, starlarkSemantics); new DotBazelFileSyntaxChecker("VENDOR.bazel files", /* canLoadBzl= */ false) .check(vendorFile); net.starlark.java.eval.Module predeclaredEnv = @@ -147,7 +149,8 @@ private void createVendorFile(Path vendorPath, Path vendorFilePath) } } - private static StarlarkFile readAndParseVendorFile(Path path, Environment env) + private static StarlarkFile readAndParseVendorFile( + Path path, Environment env, StarlarkSemantics starlarkSemantics) throws VendorFileFunctionException { byte[] contents; try { @@ -156,8 +159,21 @@ private static StarlarkFile readAndParseVendorFile(Path path, Environment env) throw new VendorFileFunctionException( new IOException("error reading VENDOR.bazel file", e), Transience.TRANSIENT); } - StarlarkFile starlarkFile = - StarlarkFile.parse(ParserInput.fromLatin1(contents, path.getPathString())); + ParserInput parserInput; + try { + parserInput = + StarlarkUtil.createParserInput( + contents, + path.getPathString(), + starlarkSemantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8), + env.getListener()); + } catch ( + @SuppressWarnings("UnusedException") // createParserInput() reports its own error message + StarlarkUtil.InvalidUtf8Exception e) { + throw new VendorFileFunctionException( + new BadVendorFileException("error reading VENDOR.bazel file"), Transience.PERSISTENT); + } + StarlarkFile starlarkFile = StarlarkFile.parse(parserInput); if (!starlarkFile.ok()) { Event.replayEventsOn(env.getListener(), starlarkFile.errors()); throw new VendorFileFunctionException( diff --git a/src/main/java/com/google/devtools/build/lib/packages/semantics/BuildLanguageOptions.java b/src/main/java/com/google/devtools/build/lib/packages/semantics/BuildLanguageOptions.java index 0f1673f5120bcc..1bf3f9329a94b3 100644 --- a/src/main/java/com/google/devtools/build/lib/packages/semantics/BuildLanguageOptions.java +++ b/src/main/java/com/google/devtools/build/lib/packages/semantics/BuildLanguageOptions.java @@ -20,6 +20,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Interner; import com.google.devtools.build.lib.concurrent.BlazeInterners; +import com.google.devtools.common.options.BoolOrEnumConverter; import com.google.devtools.common.options.Converters.CommaSeparatedNonEmptyOptionListConverter; import com.google.devtools.common.options.Converters.CommaSeparatedOptionListConverter; import com.google.devtools.common.options.Converters.CommaSeparatedOptionSetConverter; @@ -824,6 +825,38 @@ public final class BuildLanguageOptions extends OptionsBase { + " number of files is not 1.") public boolean incompatibleLocationsPrefersExecutable; + /** An enum for specifying different modes for UTF-8 checking of Starlark files. */ + public enum Utf8EnforcementMode { + OFF, + WARNING, + ERROR; + + /** Converts to {@link Utf8EnforcementMode}. */ + public static class Converter extends BoolOrEnumConverter { + public Converter() { + super( + Utf8EnforcementMode.class, + "UTF-8 enforcement mode", + Utf8EnforcementMode.ERROR, + Utf8EnforcementMode.OFF); + } + } + } + + @Option( + name = "incompatible_enforce_starlark_utf8", + defaultValue = "warning", + converter = Utf8EnforcementMode.Converter.class, + documentationCategory = OptionDocumentationCategory.INPUT_STRICTNESS, + effectTags = {OptionEffectTag.LOADING_AND_ANALYSIS}, + metadataTags = {OptionMetadataTag.INCOMPATIBLE_CHANGE}, + help = + "If enabled (or set to 'error'), fail if Starlark files are not UTF-8 encoded. If set to" + + " 'warning', emit a warning instead. If set to 'off', Bazel assumes that Starlark" + + " files are UTF-8 encoded but does not verify this assumption. Note that Starlark" + + " files which are not UTF-8 encoded can cause Bazel to behave inconsistently.") + public Utf8EnforcementMode incompatibleEnforceStarlarkUtf8; + /** * An interner to reduce the number of StarlarkSemantics instances. A single Blaze instance should * never accumulate a large number of these and being able to shortcut on object identity makes a @@ -894,6 +927,7 @@ public StarlarkSemantics toStarlarkSemantics() { .setBool(StarlarkSemantics.PRINT_TEST_MARKER, internalStarlarkFlagTestCanary) .setBool( INCOMPATIBLE_DO_NOT_SPLIT_LINKING_CMDLINE, incompatibleDoNotSplitLinkingCmdline) + .set(INCOMPATIBLE_ENFORCE_STARLARK_UTF8, incompatibleEnforceStarlarkUtf8) .setBool( INCOMPATIBLE_USE_CC_CONFIGURE_FROM_RULES_CC, incompatibleUseCcConfigureFromRulesCc) .setBool( @@ -1066,6 +1100,10 @@ public StarlarkSemantics toStarlarkSemantics() { new StarlarkSemantics.Key<>("repositories_without_autoloads", ImmutableList.of()); public static final StarlarkSemantics.Key> EXPERIMENTAL_BUILTINS_INJECTION_OVERRIDE = new StarlarkSemantics.Key<>("experimental_builtins_injection_override", ImmutableList.of()); + public static final StarlarkSemantics.Key + INCOMPATIBLE_ENFORCE_STARLARK_UTF8 = + new StarlarkSemantics.Key<>( + "incompatible_enforce_starlark_utf8", Utf8EnforcementMode.WARNING); public static final StarlarkSemantics.Key MAX_COMPUTATION_STEPS = new StarlarkSemantics.Key<>("max_computation_steps", 0L); public static final StarlarkSemantics.Key NESTED_SET_DEPTH_LIMIT = diff --git a/src/main/java/com/google/devtools/build/lib/rules/BUILD b/src/main/java/com/google/devtools/build/lib/rules/BUILD index 202cae413a4d0a..f7464afeb90c5e 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/BUILD +++ b/src/main/java/com/google/devtools/build/lib/rules/BUILD @@ -415,6 +415,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/skyframe:repository_mapping_function", "//src/main/java/com/google/devtools/build/lib/skyframe:sky_functions", "//src/main/java/com/google/devtools/build/lib/skyframe:sky_value_dirtiness_checker", + "//src/main/java/com/google/devtools/build/lib/skyframe:starlark_util", "//src/main/java/com/google/devtools/build/lib/util", "//src/main/java/com/google/devtools/build/lib/util/io", "//src/main/java/com/google/devtools/build/lib/vfs", diff --git a/src/main/java/com/google/devtools/build/lib/rules/repository/ResolvedFileFunction.java b/src/main/java/com/google/devtools/build/lib/rules/repository/ResolvedFileFunction.java index 5e81081a1792d4..aa49f345d127b8 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/repository/ResolvedFileFunction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/repository/ResolvedFileFunction.java @@ -21,8 +21,10 @@ import com.google.devtools.build.lib.events.Event; import com.google.devtools.build.lib.packages.BuildFileContainsErrorsException; import com.google.devtools.build.lib.packages.NoSuchThingException; +import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions; import com.google.devtools.build.lib.rules.repository.ResolvedFileValue.ResolvedFileKey; import com.google.devtools.build.lib.skyframe.PrecomputedValue; +import com.google.devtools.build.lib.skyframe.StarlarkUtil; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.skyframe.SkyFunction; import com.google.devtools.build.skyframe.SkyFunctionException; @@ -72,8 +74,21 @@ public SkyValue compute(SkyKey skyKey, Environment env) key.getPath().asPath(), key.getPath().asPath().getFileSize()); // parse - StarlarkFile file = - StarlarkFile.parse(ParserInput.fromLatin1(bytes, key.getPath().asPath().toString())); + ParserInput parserInput; + try { + parserInput = + StarlarkUtil.createParserInput( + bytes, + key.getPath().asPath().toString(), + starlarkSemantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8), + env.getListener()); + } catch ( + @SuppressWarnings( + "UnusedException") // createParserInput() reports its own error message + StarlarkUtil.InvalidUtf8Exception e) { + throw resolvedValueError("Failed to read resolved file " + key.getPath()); + } + StarlarkFile file = StarlarkFile.parse(parserInput); if (!file.ok()) { Event.replayEventsOn(env.getListener(), file.errors()); throw resolvedValueError("Failed to parse resolved file " + key.getPath()); diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/BUILD b/src/main/java/com/google/devtools/build/lib/skyframe/BUILD index d75e34851349fb..5ef958b1db4a90 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/BUILD +++ b/src/main/java/com/google/devtools/build/lib/skyframe/BUILD @@ -192,6 +192,7 @@ java_library( ":skyframe_stats", ":skyvalue_retriever_utils", ":starlark_builtins_value", + ":starlark_util", ":state_informing_sky_function_environment", ":target_completion_value", ":target_cycle_reporter", @@ -793,11 +794,13 @@ java_library( ":bzl_compile_value", ":precomputed_value", ":starlark_builtins_value", + ":starlark_util", "//src/main/java/com/google/devtools/build/lib/actions:file_metadata", "//src/main/java/com/google/devtools/build/lib/cmdline", "//src/main/java/com/google/devtools/build/lib/events", "//src/main/java/com/google/devtools/build/lib/packages", "//src/main/java/com/google/devtools/build/lib/packages:autoload_symbols", + "//src/main/java/com/google/devtools/build/lib/packages/semantics", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", "//src/main/java/net/starlark/java/eval", @@ -2337,10 +2340,12 @@ java_library( deps = [ ":precomputed_value", ":repo_file_value", + ":starlark_util", "//src/main/java/com/google/devtools/build/lib/actions:file_metadata", "//src/main/java/com/google/devtools/build/lib/cmdline", "//src/main/java/com/google/devtools/build/lib/events", "//src/main/java/com/google/devtools/build/lib/packages", + "//src/main/java/com/google/devtools/build/lib/packages/semantics", "//src/main/java/com/google/devtools/build/lib/rules:repository/repository_directory_value", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", @@ -3269,3 +3274,14 @@ java_library( "//third_party:guava", ], ) + +java_library( + name = "starlark_util", + srcs = ["StarlarkUtil.java"], + deps = [ + "//src/main/java/com/google/devtools/build/lib/events", + "//src/main/java/com/google/devtools/build/lib/packages/semantics", + "//src/main/java/net/starlark/java/syntax", + "//third_party:guava", + ], +) diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/BzlCompileFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/BzlCompileFunction.java index f5bc53498b3c17..e4bb1ab9eb09ab 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/BzlCompileFunction.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/BzlCompileFunction.java @@ -22,6 +22,7 @@ import com.google.devtools.build.lib.events.Event; import com.google.devtools.build.lib.packages.AutoloadSymbols; import com.google.devtools.build.lib.packages.BazelStarlarkEnvironment; +import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.RootedPath; @@ -169,7 +170,19 @@ static BzlCompileValue computeInline( } // We have all deps. Parse, resolve, and return. - ParserInput input = ParserInput.fromLatin1(bytes, inputName); + ParserInput input; + try { + input = + StarlarkUtil.createParserInput( + bytes, + inputName, + semantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8), + env.getListener()); + } catch ( + @SuppressWarnings("UnusedException") // createParserInput() reports its own error message + StarlarkUtil.InvalidUtf8Exception e) { + return BzlCompileValue.noFile("compilation of '%s' failed", inputName); + } FileOptions options = FileOptions.builder() // By default, Starlark load statements create file-local bindings. diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/PackageFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/PackageFunction.java index c5fb1bbca4f8ad..aa1773136139ec 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/PackageFunction.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/PackageFunction.java @@ -32,6 +32,8 @@ import com.google.devtools.build.lib.cmdline.RepositoryMapping; import com.google.devtools.build.lib.cmdline.RepositoryName; import com.google.devtools.build.lib.events.Event; +import com.google.devtools.build.lib.events.ExtendedEventHandler; +import com.google.devtools.build.lib.events.StoredEventHandler; import com.google.devtools.build.lib.io.FileSymlinkException; import com.google.devtools.build.lib.io.InconsistentFilesystemException; import com.google.devtools.build.lib.packages.AutoloadSymbols; @@ -1050,7 +1052,8 @@ private LoadedPackage loadPackage( buildFileRootedPath, buildFileValue, starlarkBuiltinsValue, - preludeBindings); + preludeBindings, + env.getListener()); state.compiledBuildFile = compiled; } @@ -1202,7 +1205,8 @@ private CompiledBuildFile compileBuildFile( RootedPath buildFilePath, FileValue buildFileValue, StarlarkBuiltinsValue starlarkBuiltinsValue, - @Nullable Map preludeBindings) + @Nullable Map preludeBindings, + ExtendedEventHandler reporter) throws PackageFunctionException { // Though it could be in principle, `cpuBoundSemaphore` is not held here as this method does // not show up in profiles as being significantly impacted by thrashing. It could be worth doing @@ -1236,7 +1240,27 @@ private CompiledBuildFile compileBuildFile( // If control flow reaches here, we're in territory that is deliberately unsound. // See the javadoc for ActionOnIOExceptionReadingBuildFile. } - ParserInput input = ParserInput.fromLatin1(buildFileBytes, inputFile.toString()); + StoredEventHandler handler = new StoredEventHandler(); + ParserInput input; + try { + input = + StarlarkUtil.createParserInput( + buildFileBytes, + inputFile.toString(), + semantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8), + handler); + } catch (StarlarkUtil.InvalidUtf8Exception e) { + handler.replayOn(reporter); + throw PackageFunctionException.builder() + .setType(PackageFunctionException.Type.BUILD_FILE_CONTAINS_ERRORS) + .setPackageIdentifier(packageId) + .setTransience(Transience.PERSISTENT) + .setException(e) + .setMessage("error reading " + inputFile.toString()) + .setPackageLoadingCode(PackageLoading.Code.STARLARK_EVAL_ERROR) + .build(); + } + handler.replayOn(reporter); // Options for processing BUILD files. FileOptions options = diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/RepoFileFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/RepoFileFunction.java index e41f2eae14db28..5c825db1725fdb 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/RepoFileFunction.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/RepoFileFunction.java @@ -25,6 +25,7 @@ import com.google.devtools.build.lib.packages.BazelStarlarkEnvironment; import com.google.devtools.build.lib.packages.DotBazelFileSyntaxChecker; import com.google.devtools.build.lib.packages.RepoThreadContext; +import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions; import com.google.devtools.build.lib.rules.repository.RepositoryDirectoryValue; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; @@ -98,11 +99,12 @@ public SkyValue compute(SkyKey skyKey, Environment env) return null; } - StarlarkFile repoFile = readAndParseRepoFile(repoFilePath.asPath(), env); + StarlarkFile repoFile = readAndParseRepoFile(repoFilePath.asPath(), env, starlarkSemantics); return evalRepoFile(repoFile, repoName, starlarkSemantics, env.getListener()); } - private static StarlarkFile readAndParseRepoFile(Path path, Environment env) + private static StarlarkFile readAndParseRepoFile( + Path path, Environment env, StarlarkSemantics starlarkSemantics) throws RepoFileFunctionException { byte[] contents; try { @@ -111,8 +113,21 @@ private static StarlarkFile readAndParseRepoFile(Path path, Environment env) throw new RepoFileFunctionException( new IOException("error reading REPO.bazel file at " + path, e), Transience.TRANSIENT); } - StarlarkFile starlarkFile = - StarlarkFile.parse(ParserInput.fromLatin1(contents, path.getPathString())); + ParserInput parserInput; + try { + parserInput = + StarlarkUtil.createParserInput( + contents, + path.getPathString(), + starlarkSemantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8), + env.getListener()); + } catch ( + @SuppressWarnings("UnusedException") // createParserInput() reports its own error message + StarlarkUtil.InvalidUtf8Exception e) { + throw new RepoFileFunctionException( + new BadRepoFileException("error reading REPO.bazel file at " + path)); + } + StarlarkFile starlarkFile = StarlarkFile.parse(parserInput); if (!starlarkFile.ok()) { Event.replayEventsOn(env.getListener(), starlarkFile.errors()); throw new RepoFileFunctionException( diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/StarlarkUtil.java b/src/main/java/com/google/devtools/build/lib/skyframe/StarlarkUtil.java new file mode 100644 index 00000000000000..8e63585c825780 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/StarlarkUtil.java @@ -0,0 +1,77 @@ +// Copyright 2025 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.skyframe; + +import com.google.common.base.Utf8; +import com.google.devtools.build.lib.events.Event; +import com.google.devtools.build.lib.events.EventHandler; +import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions; +import com.google.devtools.build.lib.packages.semantics.BuildLanguageOptions.Utf8EnforcementMode; +import net.starlark.java.syntax.Location; +import net.starlark.java.syntax.ParserInput; + +/** Helper functions for Bazel's use of Starlark. */ +public final class StarlarkUtil { + + public static final String INVALID_UTF_8_MESSAGE = + "not a valid UTF-8 encoded file; this can lead to inconsistent behavior and" + + " will be disallowed in a future version of Bazel"; + + /** + * Produces a {@link ParserInput} from the raw bytes of a file while optionally enforcing that the + * contents are valid UTF-8. + * + *

Warnings and errors are reported to the {@link EventHandler}. + * + * @throws InvalidUtf8Exception if the bytes are not valid UTF-8 and the enforcement mode is + * {@link Utf8EnforcementMode#ERROR}. + */ + // This method is the only one that is supposed to use the deprecated ParserInput.fromLatin1 + // method. + @SuppressWarnings("deprecation") // See https://github.com/bazelbuild/bazel/issues/374 + public static ParserInput createParserInput( + byte[] bytes, String file, Utf8EnforcementMode utf8EnforcementMode, EventHandler reporter) + throws InvalidUtf8Exception { + switch (utf8EnforcementMode) { + case OFF -> {} + case WARNING -> { + if (!Utf8.isWellFormed(bytes)) { + reporter.handle(Event.warn(Location.fromFile(file), INVALID_UTF_8_MESSAGE)); + } + } + case ERROR -> { + if (!Utf8.isWellFormed(bytes)) { + reporter.handle( + Event.error( + Location.fromFile(file), + String.format( + "%s. For a temporary workaround, see the --%s flag.", + INVALID_UTF_8_MESSAGE, + BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8))); + throw new InvalidUtf8Exception(file + ": " + INVALID_UTF_8_MESSAGE); + } + } + } + return ParserInput.fromLatin1(bytes, file); + } + + /** Exception thrown when a Starlark file is not valid UTF-8. */ + public static final class InvalidUtf8Exception extends Exception { + public InvalidUtf8Exception(String message) { + super(message); + } + } + + private StarlarkUtil() {} +} diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/WorkspaceFileFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/WorkspaceFileFunction.java index b6539bd4ae2067..74b975ac71c803 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/WorkspaceFileFunction.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/WorkspaceFileFunction.java @@ -187,10 +187,10 @@ public SkyValue compute(SkyKey skyKey, Environment env) } else if (useWorkspaceBzlmodFile) { // If Bzlmod is enabled and WORKSPACE.bzlmod exists, then use this file instead of the // original WORKSPACE file. - userWorkspaceFile = parseWorkspaceFile(workspaceBzlmodFile, options, env); + userWorkspaceFile = parseWorkspaceFile(workspaceBzlmodFile, options, starlarkSemantics, env); } else if (workspaceFileValue.exists()) { // normal WORKSPACE file - userWorkspaceFile = parseWorkspaceFile(workspaceFile, options, env); + userWorkspaceFile = parseWorkspaceFile(workspaceFile, options, starlarkSemantics, env); } boolean shouldSkipWorkspacePrefix = useWorkspaceResolvedFile || useWorkspaceBzlmodFile; @@ -414,7 +414,10 @@ public SkyValue compute(SkyKey skyKey, Environment env) } private static StarlarkFile parseWorkspaceFile( - RootedPath workspaceFile, FileOptions options, Environment env) + RootedPath workspaceFile, + FileOptions options, + StarlarkSemantics starlarkSemantics, + Environment env) throws WorkspaceFileFunctionException { Path workspacePath = workspaceFile.asPath(); byte[] bytes; @@ -423,8 +426,20 @@ private static StarlarkFile parseWorkspaceFile( } catch (IOException ex) { throw new WorkspaceFileFunctionException(ex, Transience.TRANSIENT); } - StarlarkFile file = - StarlarkFile.parse(ParserInput.fromLatin1(bytes, workspacePath.toString()), options); + ParserInput parserInput; + try { + parserInput = + StarlarkUtil.createParserInput( + bytes, + workspacePath.toString(), + starlarkSemantics.get(BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8), + env.getListener()); + } catch ( + @SuppressWarnings("UnusedException") // createParserInput() reports its own error message + StarlarkUtil.InvalidUtf8Exception e) { + throw resolvedValueError("Failed to read WORKSPACE file"); + } + StarlarkFile file = StarlarkFile.parse(parserInput); if (!file.ok()) { Event.replayEventsOn(env.getListener(), file.errors()); throw resolvedValueError("Failed to parse WORKSPACE file"); diff --git a/src/main/java/net/starlark/java/syntax/ParserInput.java b/src/main/java/net/starlark/java/syntax/ParserInput.java index 33f6a750f834cf..2bcdab9c29b26c 100644 --- a/src/main/java/net/starlark/java/syntax/ParserInput.java +++ b/src/main/java/net/starlark/java/syntax/ParserInput.java @@ -83,9 +83,10 @@ public static ParserInput fromUTF8(byte[] bytes, String file) { * Returns an input source that reads from a Latin1-encoded byte array. The caller is free to * subsequently mutate the array. * - *

This function exists to support legacy uses of Latin1 in Bazel. Do not use Latin1 in new - * applications. (Consider this deprecated, without the fussy warnings.) + * @deprecated This function exists to support legacy uses of Latin1 in Bazel. Do not use Latin1 + * in new applications. */ + @Deprecated public static ParserInput fromLatin1(byte[] bytes, String file) { char[] chars = new char[bytes.length]; for (int i = 0; i < bytes.length; i++) { diff --git a/src/test/java/com/google/devtools/build/lib/packages/semantics/ConsistencyTest.java b/src/test/java/com/google/devtools/build/lib/packages/semantics/ConsistencyTest.java index aa353e5671ad8a..6ecb0b20f1738c 100644 --- a/src/test/java/com/google/devtools/build/lib/packages/semantics/ConsistencyTest.java +++ b/src/test/java/com/google/devtools/build/lib/packages/semantics/ConsistencyTest.java @@ -23,6 +23,7 @@ import com.google.devtools.common.options.Options; import com.google.devtools.common.options.OptionsParser; import java.util.Arrays; +import java.util.Locale; import java.util.Random; import net.starlark.java.eval.StarlarkSemantics; import org.junit.Test; @@ -142,6 +143,11 @@ private static BuildLanguageOptions buildRandomOptions(Random rand) throws Excep "--incompatible_do_not_split_linking_cmdline=" + rand.nextBoolean(), "--incompatible_enable_deprecated_label_apis=" + rand.nextBoolean(), "--incompatible_java_common_parameters=" + rand.nextBoolean(), + "--incompatible_enforce_starlark_utf8=" + + BuildLanguageOptions.Utf8EnforcementMode.values()[ + rand.nextInt(BuildLanguageOptions.Utf8EnforcementMode.values().length)] + .toString() + .toLowerCase(Locale.ROOT), "--incompatible_locations_prefers_executable=" + rand.nextBoolean(), "--incompatible_merge_fixed_and_default_shell_env=" + rand.nextBoolean(), "--incompatible_no_attr_license=" + rand.nextBoolean(), @@ -194,6 +200,10 @@ private static StarlarkSemantics buildRandomSemantics(Random rand) { .setBool(BuildLanguageOptions.INCOMPATIBLE_DO_NOT_SPLIT_LINKING_CMDLINE, rand.nextBoolean()) .setBool(BuildLanguageOptions.INCOMPATIBLE_ENABLE_DEPRECATED_LABEL_APIS, rand.nextBoolean()) .setBool(BuildLanguageOptions.INCOMPATIBLE_JAVA_COMMON_PARAMETERS, rand.nextBoolean()) + .set( + BuildLanguageOptions.INCOMPATIBLE_ENFORCE_STARLARK_UTF8, + BuildLanguageOptions.Utf8EnforcementMode.values()[ + rand.nextInt(BuildLanguageOptions.Utf8EnforcementMode.values().length)]) .setBool(BuildLanguageOptions.INCOMPATIBLE_LOCATIONS_PREFERS_EXECUTABLE, rand.nextBoolean()) .setBool( BuildLanguageOptions.INCOMPATIBLE_MERGE_FIXED_AND_DEFAULT_SHELL_ENV, rand.nextBoolean()) diff --git a/src/test/java/com/google/devtools/build/lib/skyframe/BzlCompileFunctionTest.java b/src/test/java/com/google/devtools/build/lib/skyframe/BzlCompileFunctionTest.java index 846911cfa12ba8..487db9e8dd9ede 100644 --- a/src/test/java/com/google/devtools/build/lib/skyframe/BzlCompileFunctionTest.java +++ b/src/test/java/com/google/devtools/build/lib/skyframe/BzlCompileFunctionTest.java @@ -168,4 +168,56 @@ public void testBigIntegerLiterals() throws Exception { assertThat(val.toString()).isEqualTo("[-9223372036854775809, 9223372036854775808]"); } } + + @Test + public void testInvalidUtf8_enforcementOff() throws Exception { + setBuildLanguageOptions("--noincompatible_enforce_starlark_utf8"); + + scratch.file("pkg/BUILD"); + scratch.file("pkg/foo.bzl", new byte[] {'#', ' ', (byte) 0x80}); + + SkyKey skyKey = BzlCompileValue.key(root, Label.parseCanonicalUnchecked("//pkg:foo.bzl")); + EvaluationResult result = + SkyframeExecutorTestUtils.evaluate( + getSkyframeExecutor(), skyKey, /* keepGoing= */ false, reporter); + assertThat(result.get(skyKey).lookupSuccessful()).isTrue(); + assertNoEvents(); + } + + @Test + public void testInvalidUtf8_enforcementWarning() throws Exception { + setBuildLanguageOptions("--incompatible_enforce_starlark_utf8=warning"); + + scratch.file("pkg/BUILD"); + scratch.file("pkg/foo.bzl", new byte[] {'#', ' ', (byte) 0x80}); + + SkyKey skyKey = BzlCompileValue.key(root, Label.parseCanonicalUnchecked("//pkg:foo.bzl")); + EvaluationResult result = + SkyframeExecutorTestUtils.evaluate( + getSkyframeExecutor(), skyKey, /* keepGoing= */ false, reporter); + assertThat(result.get(skyKey).lookupSuccessful()).isTrue(); + assertContainsEvent( + "WARNING /workspace/pkg/foo.bzl: not a valid UTF-8 encoded file; this can lead to" + + " inconsistent behavior and will be disallowed in a future version of Bazel"); + } + + @Test + public void testInvalidUtf8_enforcementError() throws Exception { + reporter.removeHandler(failFastHandler); + setBuildLanguageOptions("--incompatible_enforce_starlark_utf8"); + + scratch.file("pkg/BUILD"); + scratch.file("pkg/foo.bzl", new byte[] {'#', ' ', (byte) 0x80}); + + SkyKey skyKey = BzlCompileValue.key(root, Label.parseCanonicalUnchecked("//pkg:foo.bzl")); + EvaluationResult result = + SkyframeExecutorTestUtils.evaluate( + getSkyframeExecutor(), skyKey, /* keepGoing= */ false, reporter); + assertThat(result.get(skyKey).lookupSuccessful()).isFalse(); + assertThat(result.get(skyKey).getError()) + .isEqualTo("compilation of '/workspace/pkg/foo.bzl' failed"); + assertContainsEvent( + "ERROR /workspace/pkg/foo.bzl: not a valid UTF-8 encoded file; this can lead to" + + " inconsistent behavior and will be disallowed in a future version of Bazel"); + } }