Skip to content

Commit

Permalink
Fix flakiness in TestFirecrackerRun_ReapOrphanedZombieProcess (#8294)
Browse files Browse the repository at this point in the history
Instead of using sleeps, use a named pipe to synchronize between
processes. This reduces flakiness in this test method from 24/100 to
0/100.
  • Loading branch information
vanja-p authored Feb 3, 2025
1 parent e6c1c33 commit 0ff4ad1
Showing 1 changed file with 21 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1376,31 +1376,34 @@ func TestFirecrackerRun_ReapOrphanedZombieProcess(t *testing.T) {
})
testfs.MakeExecutable(t, workDir, "procinfo")

// Run a shell subprocess that spawns a "sleep 1" child process in the
// background, then exits immediately.
// The sleep process should be orphaned once the parent shell exits,
// then reparented to pid 1 (init).
// Once the sleep process exits, it should be reaped by the init process.
// Run a shell subprocess that spawns child process (block) in the
// background, and exits immediately. "block" waits for a read on a named
// pipe before it exits. "block" should be orphaned once the parent shell
// exits, then reparented to pid 1 (init).
// Once "block" exits, it should be reaped by the init process.

cmd := &repb.Command{
Arguments: []string{"bash", "-e", "-c", `
mkfifo sync_pipe
sh -c '
sleep 0.1 &
printf "%s" "$!" > sleep.pid
sh -c "message=''; read message <sync_pipe" &
printf "%s" "$!" > block.pid
echo "Before reparent:"
./procinfo "$(cat sleep.pid)"
./procinfo "$(cat block.pid)"
' &
printf "%s" "$!" > sh.pid
wait
echo "After reparent:"
./procinfo "$(cat sleep.pid)"
./procinfo "$(cat block.pid)"
# Tell the child it can exit
echo "" >sync_pipe
sleep 0.2
echo "After exit:"
# Note: procinfo is expected to fail here.
./procinfo "$(cat sleep.pid)" || true
./procinfo "$(cat block.pid)" || true
`},
}

Expand All @@ -1418,7 +1421,7 @@ func TestFirecrackerRun_ReapOrphanedZombieProcess(t *testing.T) {
}
c, err := firecracker.NewContainer(ctx, env, &repb.ExecutionTask{
Command: &repb.Command{
OutputPaths: []string{"sh.pid", "sleep.pid"},
OutputPaths: []string{"sh.pid", "block.pid"},
},
}, opts)
if err != nil {
Expand All @@ -1431,25 +1434,25 @@ func TestFirecrackerRun_ReapOrphanedZombieProcess(t *testing.T) {
t.Fatal(res.Error)
}
assert.Empty(t, string(res.Stderr))
require.Equal(t, 0, res.ExitCode)
assert.Equal(t, 0, res.ExitCode)

initPID := 1
shPID := testfs.ReadFileAsString(t, opts.ActionWorkingDirectory, "sh.pid")
sleepPID := testfs.ReadFileAsString(t, opts.ActionWorkingDirectory, "sleep.pid")
blockPID := testfs.ReadFileAsString(t, opts.ActionWorkingDirectory, "block.pid")

// Note, state codes are documented here:
// https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES

expectedOutput := "Before reparent:\n" +
// Just after starting, the sleep process should be in state "S"
// Just after starting, the block process should be in state "S"
// (sleeping) and still parented to the sh process that spawned it.
fmt.Sprintf("%s %s S sleep\n", sleepPID, shPID) +
fmt.Sprintf("%s %s S sh\n", blockPID, shPID) +
"After reparent:\n" +
// After the sh process exits it should have been reparented to pid 1
// (init) and still in sleeping state.
fmt.Sprintf("%s %d S sleep\n", sleepPID, initPID) +
fmt.Sprintf("%s %d S sh\n", blockPID, initPID) +
"After exit:\n" +
// ps output should be empty after the sleep proces exits.
// ps output should be empty after the block proces exits.
// If it shows state "Z" ("zombie"), it wasn't properly reaped.
""

Expand Down

0 comments on commit 0ff4ad1

Please sign in to comment.