Skip to content

Commit

Permalink
Default to a UTF-8 locale in Java stub template
Browse files Browse the repository at this point in the history
On non-macOS Unix, without any locale variable set, the OpenJDK defaults
to using ASCII rather than UTF-8 as the encoding for file system paths
(i.e., the value of the `sun.jnu.encoding` property).

Fixes bazelbuild#15106

Closes bazelbuild#15159.

PiperOrigin-RevId: 445520597
  • Loading branch information
fmeum authored and copybara-github committed Apr 29, 2022
1 parent 793ae52 commit 17cfa01
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,16 @@ function is_macos() {
[[ "${OSTYPE}" =~ darwin* ]]
}

function available_utf8_locale() {
# Both C.UTF-8 and en_US.UTF-8 do not cause any language-specific effects
# when set as LC_CTYPE, but neither is certain to exist on all systems.
if [[ $(LC_CTYPE=C.UTF-8 locale charmap 2>/dev/null) == "UTF-8" ]]; then
echo "C.UTF-8"
elif [[ $(LC_CTYPE=en_US.UTF-8 locale charmap 2>/dev/null) == "UTF-8" ]]; then
echo "en_US.UTF-8"
fi
}

# Parse arguments sequentially until the first unrecognized arg is encountered.
# Scan the remaining args for --wrapper_script_flag=X options and process them.
ARGS=()
Expand Down Expand Up @@ -362,6 +372,17 @@ if [ -z "$CLASSPATH_LIMIT" ]; then
is_windows && CLASSPATH_LIMIT=7000 || CLASSPATH_LIMIT=120000
fi

# On non-macOS Unix, without any locale variable set, the JVM would use
# using ASCII rather than UTF-8 as the encoding for file system paths.
if ! is_macos; then
if [ -z ${LC_CTYPE+x} ] && [ -z ${LC_ALL+x} ] && [ -z ${LANG+x} ]; then
UTF8_LOCALE=$(available_utf8_locale)
if [[ -n "$UTF8_LOCALE" ]]; then
export LC_CTYPE="$UTF8_LOCALE"
fi
fi
fi

if (("${#CLASSPATH}" > ${CLASSPATH_LIMIT})); then
export JACOCO_IS_JAR_WRAPPED=1
create_and_run_classpath_jar
Expand Down
31 changes: 31 additions & 0 deletions src/test/shell/bazel/unicode_filenames_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -206,4 +206,35 @@ function test_utf8_source_artifact_in_bep() {
expect_log '"name":"pkg/srcs/ünïcödë fïlë.txt"'
}

function test_utf8_filename_in_java_test() {
# Intentionally do not check for available locales: Either C.UTF_8 or
# en_US.UTF-8 should exist on all CI machines - if not, we want to learn about
# this so that the Java stub template can be adapted accordingly.

touch WORKSPACE
mkdir pkg

cat >pkg/BUILD <<'EOF'
java_test(
name = "Test",
srcs = ["Test.java"],
main_class = "Test",
use_testrunner = False,
)
EOF

cat >pkg/Test.java <<'EOF'
import java.nio.file.Files;
import java.io.IOException;
class Test {
public static void main(String[] args) throws IOException {
Files.createTempFile("æøå", null);
}
}
EOF

bazel test //pkg:Test --test_output=errors 2>$TEST_log || fail "Test should pass"
}

run_suite "Tests for handling of Unicode filenames"

0 comments on commit 17cfa01

Please sign in to comment.