From cb4346da2d90414442cb7d3c555c8d493a216414 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Tue, 16 Jan 2024 21:10:51 +0100 Subject: [PATCH] Print stack traces without panicking on test timeout --- go/tools/bzltestutil/timeout.go | 15 ++++---- go/tools/bzltestutil/wrap.go | 21 ++++-------- tests/core/go_test/BUILD.bazel | 9 +++++ tests/core/go_test/sigterm_handler_test.go | 40 ++++++++++++++++++++++ tests/core/go_test/timeout_test.go | 6 ++-- 5 files changed, 67 insertions(+), 24 deletions(-) create mode 100644 tests/core/go_test/sigterm_handler_test.go diff --git a/go/tools/bzltestutil/timeout.go b/go/tools/bzltestutil/timeout.go index 70d26d516b..85de879811 100644 --- a/go/tools/bzltestutil/timeout.go +++ b/go/tools/bzltestutil/timeout.go @@ -15,22 +15,25 @@ package bzltestutil import ( + "fmt" "os" "os/signal" - "runtime/debug" + "runtime" "syscall" ) func RegisterTimeoutHandler() { - // When the Bazel test timeout is reached, Bazel sends a SIGTERM. We - // panic just like native go test would so that the user gets stack - // traces of all running go routines. + // When the Bazel test timeout is reached, Bazel sends a SIGTERM. We print stack traces for all + // goroutines just like native go test would. We do not panic (like native go test does) because + // users may legitimately want to use SIGTERM in tests and prints are less disruptive than + // panics in that case. // See https://github.com/golang/go/blob/e816eb50140841c524fd07ecb4eaa078954eb47c/src/testing/testing.go#L2351 c := make(chan os.Signal, 1) signal.Notify(c, syscall.SIGTERM) go func() { <-c - debug.SetTraceback("all") - panic("test timed out") + buf := make([]byte, 1<<24) + stacklen := runtime.Stack(buf, true) + fmt.Printf("Received SIGTERM, printing stack traces of all goroutines:\n%s", buf[:stacklen]) }() } diff --git a/go/tools/bzltestutil/wrap.go b/go/tools/bzltestutil/wrap.go index 511768c82f..effe47b136 100644 --- a/go/tools/bzltestutil/wrap.go +++ b/go/tools/bzltestutil/wrap.go @@ -125,27 +125,18 @@ func Wrap(pkg string) error { exePath = filepath.Join(chdir.TestExecDir, exePath) } - c := make(chan os.Signal, 1) - signal.Notify(c, syscall.SIGTERM) + // If Bazel sends a SIGTERM because the test timed out, it sends it to all child processes. As + // a result, the child process will print stack traces of all Go routines and we want the + // wrapper to be around to capute and forward this output. Thus, we need to ignore the signal + // and will be killed by Bazel after the grace period instead. + signal.Ignore(syscall.SIGTERM) cmd := exec.Command(exePath, args...) cmd.Env = append(os.Environ(), "GO_TEST_WRAP=0") cmd.Stderr = io.MultiWriter(os.Stderr, streamMerger.ErrW) cmd.Stdout = io.MultiWriter(os.Stdout, streamMerger.OutW) streamMerger.Start() - err := cmd.Start() - if err == nil { - go func() { - // When the Bazel test timeout is reached, Bazel sends a SIGTERM that - // we need to forward to the inner process. - // TODO: This never triggers on Windows, even though Go should simulate - // a SIGTERM when Windows asks the process to close. It is not clear - // whether Bazel uses the required graceful shutdown mechanism. - <-c - cmd.Process.Signal(syscall.SIGTERM) - }() - err = cmd.Wait() - } + err := cmd.Run() streamMerger.ErrW.Close() streamMerger.OutW.Close() streamMerger.Wait() diff --git a/tests/core/go_test/BUILD.bazel b/tests/core/go_test/BUILD.bazel index e44564cca9..9a4fe21bad 100644 --- a/tests/core/go_test/BUILD.bazel +++ b/tests/core/go_test/BUILD.bazel @@ -250,6 +250,15 @@ go_test( shard_count = 2, ) +go_test( + name = "sigterm_handler_test", + srcs = ["sigterm_handler_test.go"], + target_compatible_with = select({ + "@platforms//os:windows": ["@platforms//:incompatible"], + "//conditions:default": [], + }), +) + go_bazel_test( name = "env_inherit_test", srcs = ["env_inherit_test.go"], diff --git a/tests/core/go_test/sigterm_handler_test.go b/tests/core/go_test/sigterm_handler_test.go new file mode 100644 index 0000000000..2b0c8275dd --- /dev/null +++ b/tests/core/go_test/sigterm_handler_test.go @@ -0,0 +1,40 @@ +package sigterm_handler_test + +import ( + "os" + "os/signal" + "sync" + "syscall" + "testing" + "time" +) + +func TestRegisterSignalHandler(t *testing.T) { + called := false + var wg sync.WaitGroup + + wg.Add(1) + + c := make(chan os.Signal, 1) + signal.Notify(c, syscall.SIGTERM) + + go func() { + switch <-c { + case syscall.SIGTERM: + called = true + wg.Done() + } + }() + + if err := syscall.Kill(os.Getpid(), syscall.SIGTERM); err != nil { + t.Fatalf("Failed to send SIGTERM: %v", err) + } + wg.Wait() + + // Give any signal handlers registered by rules_go a chance to run. + time.Sleep(1 * time.Second) + + if !called { + t.Fatal("Our handler has not run") + } +} diff --git a/tests/core/go_test/timeout_test.go b/tests/core/go_test/timeout_test.go index 3c05206bf0..8d56feee55 100644 --- a/tests/core/go_test/timeout_test.go +++ b/tests/core/go_test/timeout_test.go @@ -38,7 +38,7 @@ func neverTerminates() { func TestTimeout(t *testing.T) { if runtime.GOOS == "windows" { - t.Skip("panics on timeouts are not yet supported on Windows") + t.Skip("stack traces on timeouts are not yet supported on Windows") } if err := bazel_testing.RunBazel("test", "//:timeout_test", "--test_timeout=3"); err == nil { @@ -57,8 +57,8 @@ func TestTimeout(t *testing.T) { } testLog := string(b) - if !strings.Contains(testLog, "panic: test timed out") { - t.Fatalf("test log does not contain expected panic:\n%s", testLog) + if !strings.Contains(testLog, "Received SIGTERM, printing stack traces of all goroutines:") { + t.Fatalf("test log does not contain expected header:\n%s", testLog) } if !strings.Contains(testLog, "timeout_test.neverTerminates(") { t.Fatalf("test log does not contain expected stack trace:\n%s", testLog)