From ceb2d66e91c273eef01a688cea4490e41f27ca7e Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Thu, 15 Aug 2024 15:58:01 +0200 Subject: [PATCH] test/e2e: on test failures dump server stack strace To debug #22246 Signed-off-by: Paul Holzinger --- test/e2e/common_test.go | 29 ++++++++++++++++++++++++++-- test/e2e/libpod_suite_remote_test.go | 6 +++++- test/e2e/libpod_suite_test.go | 3 +++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/test/e2e/common_test.go b/test/e2e/common_test.go index ebd1f2d99481..3800cff3952f 100644 --- a/test/e2e/common_test.go +++ b/test/e2e/common_test.go @@ -688,11 +688,36 @@ func (p *PodmanTestIntegration) Cleanup() { // first stop everything, rm -fa is unreliable // https://github.com/containers/podman/issues/18180 stop := p.Podman([]string{"stop", "--all", "-t", "0"}) - stop.WaitWithDefaultTimeout() + Eventually(stop, DefaultWaitTimeout).Should(Exit(), func() string { + p.stopRemoteService(syscall.SIGABRT) + + // Note eventually does not kill the command as such the command is leaked forever without killing it + // Also let's use SIGABRT to create a go stack trace so in case there is a deadlock we see it. + stop.Signal(syscall.SIGABRT) + // Give some time to let the command print the output so it is not printed much later + // in the log at the wrong place. + time.Sleep(1 * time.Second) + + // As the output is logged by default there no need to dump it here. + return fmt.Sprintf("command timed out after %ds: %v", + DefaultWaitTimeout, stop.Command.Args) + }) // Remove all pods... podrm := p.Podman([]string{"pod", "rm", "-fa", "-t", "0"}) - podrm.WaitWithDefaultTimeout() + Eventually(podrm, DefaultWaitTimeout).Should(Exit(), func() string { + p.stopRemoteService(syscall.SIGABRT) + + // Note eventually does not kill the command as such the command is leaked forever without killing it + // Also let's use SIGABRT to create a go stack trace so in case there is a deadlock we see it. + podrm.Signal(syscall.SIGABRT) + // Give some time to let the command print the output so it is not printed much later + // in the log at the wrong place. + time.Sleep(1 * time.Second) + // As the output is logged by default there no need to dump it here. + return fmt.Sprintf("command timed out after %ds: %v", + DefaultWaitTimeout, stop.Command.Args) + }) // ...and containers rmall := p.Podman([]string{"rm", "-fa", "-t", "0"}) diff --git a/test/e2e/libpod_suite_remote_test.go b/test/e2e/libpod_suite_remote_test.go index 44a79eaf3621..524790ef21ad 100644 --- a/test/e2e/libpod_suite_remote_test.go +++ b/test/e2e/libpod_suite_remote_test.go @@ -103,7 +103,11 @@ func (p *PodmanTestIntegration) StartRemoteService() { } func (p *PodmanTestIntegration) StopRemoteService() { - if err := p.RemoteSession.Signal(syscall.SIGTERM); err != nil { + p.stopRemoteService(syscall.SIGTERM) +} + +func (p *PodmanTestIntegration) stopRemoteService(signal syscall.Signal) { + if err := p.RemoteSession.Signal(signal); err != nil { GinkgoWriter.Printf("unable to clean up service %d, %v\n", p.RemoteSession.Pid, err) } if _, err := p.RemoteSession.Wait(); err != nil { diff --git a/test/e2e/libpod_suite_test.go b/test/e2e/libpod_suite_test.go index 549777e79675..b6e5d420ae0a 100644 --- a/test/e2e/libpod_suite_test.go +++ b/test/e2e/libpod_suite_test.go @@ -5,6 +5,7 @@ package integration import ( "os" "path/filepath" + "syscall" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -74,6 +75,8 @@ func (p *PodmanTestIntegration) RestoreArtifact(image string) error { func (p *PodmanTestIntegration) StopRemoteService() {} +func (p *PodmanTestIntegration) stopRemoteService(signal syscall.Signal) {} + // We don't support running API service when local func (p *PodmanTestIntegration) StartRemoteService() { }