From 15188dce0566ffcba9e7da6fbde69625f49e0e16 Mon Sep 17 00:00:00 2001
From: Valentin Rothberg <vrothberg@redhat.com>
Date: Fri, 10 Jun 2022 12:50:27 +0200
Subject: [PATCH 1/3] conmon: silence json-file error

We should just silently fall through.  The log was flooding the
system-service logs when running Gitlab runner.

Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
---
 libpod/oci_conmon_linux.go |  4 ++--
 test/system/030-run.bats   | 12 +-----------
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index fde8624b0a7f..b07660937550 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -1354,8 +1354,6 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
 		logDriverArg = define.NoLogging
 	case define.PassthroughLogging:
 		logDriverArg = define.PassthroughLogging
-	case define.JSONLogging:
-		fallthrough
 	//lint:ignore ST1015 the default case has to be here
 	default: //nolint:stylecheck,gocritic
 		// No case here should happen except JSONLogging, but keep this here in case the options are extended
@@ -1365,6 +1363,8 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
 		// to get here, either a user would specify `--log-driver ""`, or this came from another place in libpod
 		// since the former case is obscure, and the latter case isn't an error, let's silently fallthrough
 		fallthrough
+	case define.JSONLogging:
+		fallthrough
 	case define.KubernetesLogging:
 		logDriverArg = fmt.Sprintf("%s:%s", define.KubernetesLogging, logPath)
 	}
diff --git a/test/system/030-run.bats b/test/system/030-run.bats
index 117d791d65b3..56cf4f266ecf 100644
--- a/test/system/030-run.bats
+++ b/test/system/030-run.bats
@@ -376,17 +376,7 @@ json-file | f
     while read driver do_check; do
         msg=$(random_string 15)
         run_podman run --name myctr --log-driver $driver $IMAGE echo $msg
-
-        # Simple output check
-        # Special case: 'json-file' emits a warning, the rest do not
-        # ...but with podman-remote the warning is on the server only
-        if [[ $do_check == 'f' ]] && ! is_remote; then      # 'f' for 'fallback'
-            is "${lines[0]}" ".* level=error msg=\"json-file logging specified but not supported. Choosing k8s-file logging instead\"" \
-               "Fallback warning emitted"
-            is "${lines[1]}" "$msg" "basic output sanity check (driver=$driver)"
-        else
-            is "$output" "$msg" "basic output sanity check (driver=$driver)"
-        fi
+        is "$output" "$msg" "basic output sanity check (driver=$driver)"
 
         # Simply confirm that podman preserved our argument as-is
         run_podman inspect --format '{{.HostConfig.LogConfig.Type}}' myctr

From 30e7cbccc1942d4f8e3b4f489b9f33f30dec7233 Mon Sep 17 00:00:00 2001
From: Valentin Rothberg <vrothberg@redhat.com>
Date: Fri, 10 Jun 2022 12:38:28 +0200
Subject: [PATCH 2/3] libpod: fix wait and exit-code logic

This commit addresses three intertwined bugs to fix an issue when using
Gitlab runner on Podman.  The three bug fixes are not split into
separate commits as tests won't pass otherwise; avoidable noise when
bisecting future issues.

1) Podman conflated states: even when asking to wait for the `exited`
   state, Podman returned as soon as a container transitioned to
   `stopped`.  The issues surfaced in Gitlab tests to fail [1] as
   `conmon`'s buffers have not (yet) been emptied when attaching to a
   container right after a wait.  The race window was extremely narrow,
   and I only managed to reproduce with the Gitlab runner [1] unit
   tests.

2) The clearer separation between `exited` and `stopped` revealed a race
   condition predating the changes.  If a container is configured for
   autoremoval (e.g., via `run --rm`), the "run" process competes with
   the "cleanup" process running in the background.  The window of the
   race condition was sufficiently large that the "cleanup" process has
   already removed the container and storage before the "run" process
   could read the exit code and hence waited indefinitely.

   Address the exit-code race condition by recording exit codes in the
   main libpod database.  Exit codes can now be read from a database.
   When waiting for a container to exit, Podman first waits for the
   container to transition to `exited` and will then query the database
   for its exit code. Outdated exit codes are pruned during cleanup
   (i.e., non-performance critical) and when refreshing the database
   after a reboot.  An exit code is considered outdated when it is older
   than 5 minutes.

   While the race condition predates this change, the waiting process
   has apparently always been fast enough in catching the exit code due
   to issue 1): `exited` and `stopped` were conflated.  The waiting
   process hence caught the exit code after the container transitioned
   to `stopped` but before it `exited` and got removed.

3) With 1) and 2), Podman is now waiting for a container to properly
   transition to the `exited` state.  Some tests did not pass after 1)
   and 2) which revealed the third bug: `conmon` was executed with its
   working directory pointing to the OCI runtime bundle of the
   container.  The changed working directory broke resolving relative
   paths in the "cleanup" process.  The "cleanup" process error'ed
   before actually cleaning up the container and waiting "main" process
   ran indefinitely - or until hitting a timeout.  Fix the issue by
   executing `conmon` with the same working directory as Podman.

Note that fixing 3) *may* address a number of issues we have seen in the
past where for *some* reason cleanup processes did not fire.

[1] https://gitlab.com/gitlab-org/gitlab-runner/-/issues/27119#note_970712864

Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>

[MH: Minor reword of commit message]

Signed-off-by: Matthew Heon <mheon@redhat.com>
---
 libpod/boltdb_state.go               | 175 +++++++++++++++++++++++++++
 libpod/boltdb_state_internal.go      |  22 ++++
 libpod/container_api.go              |  97 +++++++++++----
 libpod/container_internal.go         |  52 +++-----
 libpod/define/errors.go              |   4 +
 libpod/events.go                     |   3 +
 libpod/oci_conmon_linux.go           |  24 +---
 libpod/state.go                      |   9 ++
 pkg/api/handlers/utils/containers.go |   1 -
 pkg/domain/infra/abi/containers.go   |  22 +---
 10 files changed, 311 insertions(+), 98 deletions(-)

diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go
index c3db6152a994..e5219ab8c946 100644
--- a/libpod/boltdb_state.go
+++ b/libpod/boltdb_state.go
@@ -5,8 +5,10 @@ import (
 	"fmt"
 	"net"
 	"os"
+	"strconv"
 	"strings"
 	"sync"
+	"time"
 
 	"github.com/containers/common/libnetwork/types"
 	"github.com/containers/podman/v4/libpod/define"
@@ -63,6 +65,13 @@ type BoltState struct {
 //   initially created the database. This must match for any further instances
 //   that access the database, to ensure that state mismatches with
 //   containers/storage do not occur.
+// - exitCodeBucket/exitCodeTimeStampBucket: (#14559) exit codes must be part
+//   of the database to resolve a previous race condition when one process waits
+//   for the exit file to be written and another process removes it along with
+//   the container during auto-removal.  The same race would happen trying to
+//   read the exit code from the containers bucket.  Hence, exit codes go into
+//   their own bucket.  To avoid the rather expensive JSON (un)marshaling, we
+//   have two buckets: one for the exit codes, the other for the timestamps.
 
 // NewBoltState creates a new bolt-backed state database
 func NewBoltState(path string, runtime *Runtime) (State, error) {
@@ -98,6 +107,8 @@ func NewBoltState(path string, runtime *Runtime) (State, error) {
 		allVolsBkt,
 		execBkt,
 		runtimeConfigBkt,
+		exitCodeBkt,
+		exitCodeTimeStampBkt,
 	}
 
 	// Does the DB need an update?
@@ -150,6 +161,10 @@ func (s *BoltState) Refresh() error {
 		return define.ErrDBClosed
 	}
 
+	if err := s.PruneContainerExitCodes(); err != nil {
+		return err
+	}
+
 	db, err := s.getDBCon()
 	if err != nil {
 		return err
@@ -1341,6 +1356,166 @@ func (s *BoltState) GetContainerConfig(id string) (*ContainerConfig, error) {
 	return config, nil
 }
 
+// AddContainerExitCode adds the exit code for the specified container to the database.
+func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error {
+	db, err := s.getDBCon()
+	if err != nil {
+		return err
+	}
+	defer s.deferredCloseDBCon(db)
+
+	rawID := []byte(id)
+	rawExitCode := []byte(strconv.Itoa(int(exitCode)))
+	rawTimeStamp, err := time.Now().MarshalText()
+	if err != nil {
+		return fmt.Errorf("marshaling exit-code time stamp: %w", err)
+	}
+
+	return db.Update(func(tx *bolt.Tx) error {
+		exitCodeBucket, err := getExitCodeBucket(tx)
+		if err != nil {
+			return err
+		}
+		timeStampBucket, err := getExitCodeTimeStampBucket(tx)
+		if err != nil {
+			return err
+		}
+
+		if err := exitCodeBucket.Put(rawID, rawExitCode); err != nil {
+			return fmt.Errorf("adding exit code of container %s to DB: %w", id, err)
+		}
+		if err := timeStampBucket.Put(rawID, rawTimeStamp); err != nil {
+			if rmErr := exitCodeBucket.Delete(rawID); rmErr != nil {
+				logrus.Errorf("Removing exit code of container %s from DB: %v", id, rmErr)
+			}
+			return fmt.Errorf("adding exit-code time stamp of container %s to DB: %w", id, err)
+		}
+
+		return nil
+	})
+}
+
+// GetContainerExitCode returns the exit code for the specified container.
+func (s *BoltState) GetContainerExitCode(id string) (int32, error) {
+	db, err := s.getDBCon()
+	if err != nil {
+		return -1, err
+	}
+	defer s.deferredCloseDBCon(db)
+
+	rawID := []byte(id)
+	result := int32(-1)
+	return result, db.View(func(tx *bolt.Tx) error {
+		exitCodeBucket, err := getExitCodeBucket(tx)
+		if err != nil {
+			return err
+		}
+
+		rawExitCode := exitCodeBucket.Get(rawID)
+		if rawExitCode == nil {
+			return fmt.Errorf("getting exit code of container %s from DB: %w", id, define.ErrNoSuchExitCode)
+		}
+
+		exitCode, err := strconv.Atoi(string(rawExitCode))
+		if err != nil {
+			return fmt.Errorf("converting raw exit code %v of container %s: %w", rawExitCode, id, err)
+		}
+
+		result = int32(exitCode)
+		return nil
+	})
+}
+
+// GetContainerExitCodeTimeStamp returns the time stamp when the exit code of
+// the specified container was added to the database.
+func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error) {
+	db, err := s.getDBCon()
+	if err != nil {
+		return nil, err
+	}
+	defer s.deferredCloseDBCon(db)
+
+	rawID := []byte(id)
+	var result time.Time
+	return &result, db.View(func(tx *bolt.Tx) error {
+		timeStampBucket, err := getExitCodeTimeStampBucket(tx)
+		if err != nil {
+			return err
+		}
+
+		rawTimeStamp := timeStampBucket.Get(rawID)
+		if rawTimeStamp == nil {
+			return fmt.Errorf("getting exit-code time stamp of container %s from DB: %w", id, define.ErrNoSuchExitCode)
+		}
+
+		if err := result.UnmarshalText(rawTimeStamp); err != nil {
+			return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, id, err)
+		}
+
+		return nil
+	})
+}
+
+// PrunExitCodes removes exit codes older than 5 minutes.
+func (s *BoltState) PruneContainerExitCodes() error {
+	db, err := s.getDBCon()
+	if err != nil {
+		return err
+	}
+	defer s.deferredCloseDBCon(db)
+
+	threshold := time.Minute * 5
+	return db.View(func(tx *bolt.Tx) error {
+		timeStampBucket, err := getExitCodeTimeStampBucket(tx)
+		if err != nil {
+			return err
+		}
+
+		return timeStampBucket.ForEach(func(rawID, rawTimeStamp []byte) error {
+			var timeStamp time.Time
+			if err := timeStamp.UnmarshalText(rawTimeStamp); err != nil {
+				return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, string(rawID), err)
+			}
+			if time.Since(timeStamp) > threshold {
+				// Since the DB connection is locked, pass it
+				// to remove the exit codes to avoid race
+				// conditions.
+				return s.removeContainerExitCode(rawID, db)
+			}
+			return nil
+		})
+	})
+}
+
+// removeContainerExitCode removes the exit code and time stamp of the specified container.
+func (s *BoltState) removeContainerExitCode(rawID []byte, db *bolt.DB) error {
+	return db.Update(func(tx *bolt.Tx) error {
+		exitCodeBucket, err := getExitCodeBucket(tx)
+		if err != nil {
+			return err
+		}
+		timeStampBucket, err := getExitCodeTimeStampBucket(tx)
+		if err != nil {
+			return err
+		}
+
+		var finalErr error
+		if err := exitCodeBucket.Delete(rawID); err != nil {
+			finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", string(rawID), err)
+		}
+		if err := timeStampBucket.Delete(rawID); err != nil {
+			err = fmt.Errorf("removing exit-code time stamp of container %s from DB: %w", string(rawID), err)
+			if finalErr != nil {
+				logrus.Error(err)
+			} else {
+				finalErr = err
+			}
+		}
+
+		return finalErr
+	})
+}
+
 // AddExecSession adds an exec session to the state.
 func (s *BoltState) AddExecSession(ctr *Container, session *ExecSession) error {
 	if !s.valid {
diff --git a/libpod/boltdb_state_internal.go b/libpod/boltdb_state_internal.go
index d6f035af9367..edba78d6dc87 100644
--- a/libpod/boltdb_state_internal.go
+++ b/libpod/boltdb_state_internal.go
@@ -29,6 +29,9 @@ const (
 	aliasesName       = "aliases"
 	runtimeConfigName = "runtime-config"
 
+	exitCodeName          = "exit-code"
+	exitCodeTimeStampName = "exit-code-time-stamp"
+
 	configName         = "config"
 	stateName          = "state"
 	dependenciesName   = "dependencies"
@@ -65,6 +68,9 @@ var (
 	volDependenciesBkt = []byte(volCtrDependencies)
 	networksBkt        = []byte(networksName)
 
+	exitCodeBkt          = []byte(exitCodeName)
+	exitCodeTimeStampBkt = []byte(exitCodeTimeStampName)
+
 	configKey     = []byte(configName)
 	stateKey      = []byte(stateName)
 	netNSKey      = []byte(netNSName)
@@ -362,6 +368,22 @@ func getRuntimeConfigBucket(tx *bolt.Tx) (*bolt.Bucket, error) {
 	return bkt, nil
 }
 
+func getExitCodeBucket(tx *bolt.Tx) (*bolt.Bucket, error) {
+	bkt := tx.Bucket(exitCodeBkt)
+	if bkt == nil {
+		return nil, errors.Wrapf(define.ErrDBBadConfig, "exit-code container bucket not found in DB")
+	}
+	return bkt, nil
+}
+
+func getExitCodeTimeStampBucket(tx *bolt.Tx) (*bolt.Bucket, error) {
+	bkt := tx.Bucket(exitCodeTimeStampBkt)
+	if bkt == nil {
+		return nil, errors.Wrapf(define.ErrDBBadConfig, "exit-code time stamp bucket not found in DB")
+	}
+	return bkt, nil
+}
+
 func (s *BoltState) getContainerConfigFromDB(id []byte, config *ContainerConfig, ctrsBkt *bolt.Bucket) error {
 	ctrBkt := ctrsBkt.Bucket(id)
 	if ctrBkt == nil {
diff --git a/libpod/container_api.go b/libpod/container_api.go
index b064d35281b9..c14fe95b09c1 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -2,6 +2,7 @@ package libpod
 
 import (
 	"context"
+	"fmt"
 	"io"
 	"io/ioutil"
 	"net/http"
@@ -490,41 +491,84 @@ func (c *Container) RemoveArtifact(name string) error {
 
 // Wait blocks until the container exits and returns its exit code.
 func (c *Container) Wait(ctx context.Context) (int32, error) {
-	return c.WaitWithInterval(ctx, DefaultWaitInterval)
+	return c.WaitForExit(ctx, DefaultWaitInterval)
 }
 
-// WaitWithInterval blocks until the container to exit and returns its exit
-// code. The argument is the interval at which checks the container's status.
-func (c *Container) WaitWithInterval(ctx context.Context, waitTimeout time.Duration) (int32, error) {
+// WaitForExit blocks until the container exits and returns its exit code. The
+// argument is the interval at which checks the container's status.
+func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration) (int32, error) {
 	if !c.valid {
 		return -1, define.ErrCtrRemoved
 	}
 
-	exitFile, err := c.exitFilePath()
-	if err != nil {
-		return -1, err
-	}
-	chWait := make(chan error, 1)
+	id := c.ID()
+	var conmonTimer time.Timer
+	conmonTimerSet := false
 
-	go func() {
-		<-ctx.Done()
-		chWait <- define.ErrCanceled
-	}()
+	getExitCode := func() (bool, int32, error) {
+		containerRemoved := false
+		if !c.batched {
+			c.lock.Lock()
+			defer c.lock.Unlock()
+		}
 
-	for {
-		// ignore errors here (with exception of cancellation), it is only used to avoid waiting
-		// too long.
-		_, e := WaitForFile(exitFile, chWait, waitTimeout)
-		if e == define.ErrCanceled {
-			return -1, define.ErrCanceled
+		if err := c.syncContainer(); err != nil {
+			if !errors.Is(err, define.ErrNoSuchCtr) {
+				return false, -1, err
+			}
+			containerRemoved = true
+		}
+
+		// If conmon is not alive anymore set a timer to make sure
+		// we're returning even if conmon has forcefully been killed.
+		if !conmonTimerSet && !containerRemoved {
+			conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
+			switch {
+			case errors.Is(err, define.ErrNoSuchCtr):
+				containerRemoved = true
+			case err != nil:
+				return false, -1, err
+			case !conmonAlive:
+				timerDuration := time.Second * 20
+				conmonTimer = *time.NewTimer(timerDuration)
+				conmonTimerSet = true
+			}
+		}
+
+		if !containerRemoved {
+			// If conmon is dead for more than $timerDuration or if the
+			// container has exited properly, try to look up the exit code.
+			select {
+			case <-conmonTimer.C:
+				logrus.Debugf("Exceeded conmon timeout waiting for container %s to exit", id)
+			default:
+				if !c.ensureState(define.ContainerStateExited, define.ContainerStateConfigured) {
+					return false, -1, nil
+				}
+			}
 		}
 
-		stopped, code, err := c.isStopped()
+		exitCode, err := c.runtime.state.GetContainerExitCode(id)
+		if err != nil {
+			return true, -1, err
+		}
+
+		return true, exitCode, nil
+	}
+
+	for {
+		hasExited, exitCode, err := getExitCode()
+		if hasExited {
+			return exitCode, err
+		}
 		if err != nil {
 			return -1, err
 		}
-		if stopped {
-			return code, nil
+		select {
+		case <-ctx.Done():
+			return -1, fmt.Errorf("waiting for exit code of container %s canceled", id)
+		default:
+			time.Sleep(pollInterval)
 		}
 	}
 }
@@ -551,11 +595,12 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
 	wantedStates := make(map[define.ContainerStatus]bool, len(conditions))
 
 	for _, condition := range conditions {
-		if condition == define.ContainerStateStopped || condition == define.ContainerStateExited {
+		switch condition {
+		case define.ContainerStateExited, define.ContainerStateStopped:
 			waitForExit = true
-			continue
+		default:
+			wantedStates[condition] = true
 		}
-		wantedStates[condition] = true
 	}
 
 	trySend := func(code int32, err error) {
@@ -572,7 +617,7 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
 		go func() {
 			defer wg.Done()
 
-			code, err := c.WaitWithInterval(ctx, waitTimeout)
+			code, err := c.WaitForExit(ctx, waitTimeout)
 			trySend(code, err)
 		}()
 	}
diff --git a/libpod/container_internal.go b/libpod/container_internal.go
index ce48987f61d2..ae61298f376a 100644
--- a/libpod/container_internal.go
+++ b/libpod/container_internal.go
@@ -219,7 +219,7 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
 	// Write an event for the container's death
 	c.newContainerExitedEvent(c.state.ExitCode)
 
-	return nil
+	return c.runtime.state.AddContainerExitCode(c.ID(), c.state.ExitCode)
 }
 
 func (c *Container) shouldRestart() bool {
@@ -784,20 +784,6 @@ func (c *Container) getArtifactPath(name string) string {
 	return filepath.Join(c.config.StaticDir, artifactsDir, name)
 }
 
-// Used with Wait() to determine if a container has exited
-func (c *Container) isStopped() (bool, int32, error) {
-	if !c.batched {
-		c.lock.Lock()
-		defer c.lock.Unlock()
-	}
-	err := c.syncContainer()
-	if err != nil {
-		return true, -1, err
-	}
-
-	return !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping), c.state.ExitCode, nil
-}
-
 // save container state to the database
 func (c *Container) save() error {
 	if err := c.runtime.state.SaveContainer(c); err != nil {
@@ -1282,13 +1268,6 @@ func (c *Container) stop(timeout uint) error {
 		}
 	}
 
-	// Check if conmon is still alive.
-	// If it is not, we won't be getting an exit file.
-	conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
-	if err != nil {
-		return err
-	}
-
 	// Set the container state to "stopping" and unlock the container
 	// before handing it over to conmon to unblock other commands.  #8501
 	// demonstrates nicely that a high stop timeout will block even simple
@@ -1341,21 +1320,18 @@ func (c *Container) stop(timeout uint) error {
 	}
 
 	c.newContainerEvent(events.Stop)
-
-	c.state.PID = 0
-	c.state.ConmonPID = 0
 	c.state.StoppedByUser = true
 
+	conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
+	if err != nil {
+		return err
+	}
 	if !conmonAlive {
-		// Conmon is dead, so we can't expect an exit code.
-		c.state.ExitCode = -1
-		c.state.FinishedTime = time.Now()
-		c.state.State = define.ContainerStateStopped
-		if err := c.save(); err != nil {
-			logrus.Errorf("Saving container %s status: %v", c.ID(), err)
+		if err := c.checkExitFile(); err != nil {
+			return err
 		}
 
-		return errors.Wrapf(define.ErrConmonDead, "container %s conmon process missing, cannot retrieve exit code", c.ID())
+		return c.save()
 	}
 
 	if err := c.save(); err != nil {
@@ -1939,6 +1915,18 @@ func (c *Container) cleanup(ctx context.Context) error {
 		}
 	}
 
+	// Prune the exit codes of other container during clean up.
+	// Since Podman is no daemon, we have to clean them up somewhere.
+	// Cleanup seems like a good place as it's not performance
+	// critical.
+	if err := c.runtime.state.PruneContainerExitCodes(); err != nil {
+		if lastError == nil {
+			lastError = err
+		} else {
+			logrus.Errorf("Pruning container exit codes: %v", err)
+		}
+	}
+
 	return lastError
 }
 
diff --git a/libpod/define/errors.go b/libpod/define/errors.go
index f5a7c73e5dd1..9757a85b13b9 100644
--- a/libpod/define/errors.go
+++ b/libpod/define/errors.go
@@ -24,6 +24,10 @@ var (
 	// not exist.
 	ErrNoSuchExecSession = errors.New("no such exec session")
 
+	// ErrNoSuchExitCode indicates that the requested container exit code
+	// does not exist.
+	ErrNoSuchExitCode = errors.New("no such exit code")
+
 	// ErrDepExists indicates that the current object has dependencies and
 	// cannot be removed before them.
 	ErrDepExists = errors.New("dependency exists")
diff --git a/libpod/events.go b/libpod/events.go
index f09d8402affd..021b3b53c76e 100644
--- a/libpod/events.go
+++ b/libpod/events.go
@@ -151,6 +151,9 @@ func (r *Runtime) GetEvents(ctx context.Context, filters []string) ([]*events.Ev
 // GetLastContainerEvent takes a container name or ID and an event status and returns
 // the last occurrence of the container event
 func (r *Runtime) GetLastContainerEvent(ctx context.Context, nameOrID string, containerEvent events.Status) (*events.Event, error) {
+	// FIXME: events should be read in reverse order!
+	// https://github.com/containers/podman/issues/14579
+
 	// check to make sure the event.Status is valid
 	if _, err := events.StringToStatus(containerEvent.String()); err != nil {
 		return nil, err
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index b07660937550..d417626dc8cf 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -264,11 +264,6 @@ func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *Conta
 // status, but will instead only check for the existence of the conmon exit file
 // and update state to stopped if it exists.
 func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
-	exitFile, err := r.ExitFilePath(ctr)
-	if err != nil {
-		return err
-	}
-
 	runtimeDir, err := util.GetRuntimeDir()
 	if err != nil {
 		return err
@@ -340,22 +335,10 @@ func (r *ConmonOCIRuntime) UpdateContainerStatus(ctr *Container) error {
 	// Only grab exit status if we were not already stopped
 	// If we were, it should already be in the database
 	if ctr.state.State == define.ContainerStateStopped && oldState != define.ContainerStateStopped {
-		var fi os.FileInfo
-		chWait := make(chan error)
-		defer close(chWait)
-
-		_, err := WaitForFile(exitFile, chWait, time.Second*5)
-		if err == nil {
-			fi, err = os.Stat(exitFile)
+		if _, err := ctr.Wait(context.Background()); err != nil {
+			logrus.Errorf("Waiting for container %s to exit: %v", ctr.ID(), err)
 		}
-		if err != nil {
-			ctr.state.ExitCode = -1
-			ctr.state.FinishedTime = time.Now()
-			logrus.Errorf("No exit file for container %s found: %v", ctr.ID(), err)
-			return nil
-		}
-
-		return ctr.handleExitFile(exitFile, fi)
+		return nil
 	}
 
 	// Handle ContainerStateStopping - keep it unless the container
@@ -1166,7 +1149,6 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
 	}).Debugf("running conmon: %s", r.conmonPath)
 
 	cmd := exec.Command(r.conmonPath, args...)
-	cmd.Dir = ctr.bundlePath()
 	cmd.SysProcAttr = &syscall.SysProcAttr{
 		Setpgid: true,
 	}
diff --git a/libpod/state.go b/libpod/state.go
index 4710237693e5..4fbd3c302afb 100644
--- a/libpod/state.go
+++ b/libpod/state.go
@@ -111,6 +111,15 @@ type State interface {
 	// Return a container config from the database by full ID
 	GetContainerConfig(id string) (*ContainerConfig, error)
 
+	// Add the exit code for the specified container to the database.
+	AddContainerExitCode(id string, exitCode int32) error
+
+	// Return the exit code for the specified container.
+	GetContainerExitCode(id string) (int32, error)
+
+	// Remove exit codes older than 5 minutes.
+	PruneContainerExitCodes() error
+
 	// Add creates a reference to an exec session in the database.
 	// The container the exec session is attached to will be recorded.
 	// The container state will not be modified.
diff --git a/pkg/api/handlers/utils/containers.go b/pkg/api/handlers/utils/containers.go
index 8588b49ba22a..1795f6ce1131 100644
--- a/pkg/api/handlers/utils/containers.go
+++ b/pkg/api/handlers/utils/containers.go
@@ -191,7 +191,6 @@ func waitDockerCondition(ctx context.Context, containerName string, interval tim
 var notRunningStates = []define.ContainerStatus{
 	define.ContainerStateCreated,
 	define.ContainerStateRemoving,
-	define.ContainerStateStopped,
 	define.ContainerStateExited,
 	define.ContainerStateConfigured,
 }
diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go
index 1934533cf66f..281e448f631a 100644
--- a/pkg/domain/infra/abi/containers.go
+++ b/pkg/domain/infra/abi/containers.go
@@ -16,7 +16,6 @@ import (
 	"github.com/containers/image/v5/manifest"
 	"github.com/containers/podman/v4/libpod"
 	"github.com/containers/podman/v4/libpod/define"
-	"github.com/containers/podman/v4/libpod/events"
 	"github.com/containers/podman/v4/libpod/logs"
 	"github.com/containers/podman/v4/pkg/checkpoint"
 	"github.com/containers/podman/v4/pkg/domain/entities"
@@ -939,6 +938,7 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
 				}
 				return reports, errors.Wrapf(err, "unable to start container %s", ctr.ID())
 			}
+
 			exitCode = ic.GetContainerExitCode(ctx, ctr)
 			reports = append(reports, &entities.ContainerStartReport{
 				Id:       ctr.ID(),
@@ -1099,25 +1099,11 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
 
 func (ic *ContainerEngine) GetContainerExitCode(ctx context.Context, ctr *libpod.Container) int {
 	exitCode, err := ctr.Wait(ctx)
-	if err == nil {
-		return int(exitCode)
-	}
-	if errors.Cause(err) != define.ErrNoSuchCtr {
-		logrus.Errorf("Could not retrieve exit code: %v", err)
+	if err != nil {
+		logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err)
 		return define.ExecErrorCodeNotFound
 	}
-	// Make 4 attempt with 0.25s backoff between each for 1 second total
-	var event *events.Event
-	for i := 0; i < 4; i++ {
-		event, err = ic.Libpod.GetLastContainerEvent(ctx, ctr.ID(), events.Exited)
-		if err != nil {
-			time.Sleep(250 * time.Millisecond)
-			continue
-		}
-		return event.ContainerExitCode
-	}
-	logrus.Errorf("Could not retrieve exit code from event: %v", err)
-	return define.ExecErrorCodeNotFound
+	return int(exitCode)
 }
 
 func (ic *ContainerEngine) ContainerLogs(ctx context.Context, containers []string, options entities.ContainerLogsOptions) error {

From 3a810b8d2ad61a7bdca9b10e183ea7f6708474eb Mon Sep 17 00:00:00 2001
From: Matthew Heon <mheon@redhat.com>
Date: Wed, 22 Jun 2022 16:37:00 -0400
Subject: [PATCH 3/3] Two fixes for DB exit code handling

Firstly: don't prune exit codes after a refresh - instead, clear
the table entirely. We are guaranteed that all containers are
gone after a refresh, we should not worry about exit codes given
this.

Secondly: alter the way pruning was done. We were updating the DB
by calling Update from within an existing View, and stacking an
RW transaction on top of an existing RO one seems dodgy; further,
modifying a bucket while iterating over it with ForEach is
undefined behavior.

Hopefully this will resolve our CI issues.

Signed-off-by: Matthew Heon <mheon@redhat.com>
---
 libpod/boltdb_state.go | 141 +++++++++++++++++++++++++++++++----------
 1 file changed, 107 insertions(+), 34 deletions(-)

diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go
index e5219ab8c946..471f64b8425f 100644
--- a/libpod/boltdb_state.go
+++ b/libpod/boltdb_state.go
@@ -161,10 +161,6 @@ func (s *BoltState) Refresh() error {
 		return define.ErrDBClosed
 	}
 
-	if err := s.PruneContainerExitCodes(); err != nil {
-		return err
-	}
-
 	db, err := s.getDBCon()
 	if err != nil {
 		return err
@@ -207,6 +203,45 @@ func (s *BoltState) Refresh() error {
 			return err
 		}
 
+		exitCodeBucket, err := getExitCodeBucket(tx)
+		if err != nil {
+			return err
+		}
+
+		timeStampBucket, err := getExitCodeTimeStampBucket(tx)
+		if err != nil {
+			return err
+		}
+
+		// Clear all exec exit codes
+		toRemoveExitCodes := []string{}
+		err = exitCodeBucket.ForEach(func(id, _ []byte) error {
+			toRemoveExitCodes = append(toRemoveExitCodes, string(id))
+			return nil
+		})
+		if err != nil {
+			return errors.Wrapf(err, "error reading exit codes bucket")
+		}
+		for _, id := range toRemoveExitCodes {
+			if err := exitCodeBucket.Delete([]byte(id)); err != nil {
+				return errors.Wrapf(err, "error removing exit code for ID %s", id)
+			}
+		}
+
+		toRemoveTimeStamps := []string{}
+		err = timeStampBucket.ForEach(func(id, _ []byte) error {
+			toRemoveTimeStamps = append(toRemoveTimeStamps, string(id))
+			return nil
+		})
+		if err != nil {
+			return errors.Wrapf(err, "reading timestamps bucket")
+		}
+		for _, id := range toRemoveTimeStamps {
+			if err := timeStampBucket.Delete([]byte(id)); err != nil {
+				return errors.Wrapf(err, "removing timestamp for ID %s", id)
+			}
+		}
+
 		// Iterate through all IDs. Check if they are containers.
 		// If they are, unmarshal their state, and then clear
 		// PID, mountpoint, and state for all of them
@@ -1358,6 +1393,14 @@ func (s *BoltState) GetContainerConfig(id string) (*ContainerConfig, error) {
 
 // AddContainerExitCode adds the exit code for the specified container to the database.
 func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error {
+	if len(id) == 0 {
+		return define.ErrEmptyID
+	}
+
+	if !s.valid {
+		return define.ErrDBClosed
+	}
+
 	db, err := s.getDBCon()
 	if err != nil {
 		return err
@@ -1397,6 +1440,14 @@ func (s *BoltState) AddContainerExitCode(id string, exitCode int32) error {
 
 // GetContainerExitCode returns the exit code for the specified container.
 func (s *BoltState) GetContainerExitCode(id string) (int32, error) {
+	if len(id) == 0 {
+		return -1, define.ErrEmptyID
+	}
+
+	if !s.valid {
+		return -1, define.ErrDBClosed
+	}
+
 	db, err := s.getDBCon()
 	if err != nil {
 		return -1, err
@@ -1429,6 +1480,14 @@ func (s *BoltState) GetContainerExitCode(id string) (int32, error) {
 // GetContainerExitCodeTimeStamp returns the time stamp when the exit code of
 // the specified container was added to the database.
 func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error) {
+	if len(id) == 0 {
+		return nil, define.ErrEmptyID
+	}
+
+	if !s.valid {
+		return nil, define.ErrDBClosed
+	}
+
 	db, err := s.getDBCon()
 	if err != nil {
 		return nil, err
@@ -1456,16 +1515,22 @@ func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error)
 	})
 }
 
-// PrunExitCodes removes exit codes older than 5 minutes.
+// PruneExitCodes removes exit codes older than 5 minutes.
 func (s *BoltState) PruneContainerExitCodes() error {
+	if !s.valid {
+		return define.ErrDBClosed
+	}
+
 	db, err := s.getDBCon()
 	if err != nil {
 		return err
 	}
 	defer s.deferredCloseDBCon(db)
 
+	toRemoveIDs := []string{}
+
 	threshold := time.Minute * 5
-	return db.View(func(tx *bolt.Tx) error {
+	err = db.View(func(tx *bolt.Tx) error {
 		timeStampBucket, err := getExitCodeTimeStampBucket(tx)
 		if err != nil {
 			return err
@@ -1477,43 +1542,51 @@ func (s *BoltState) PruneContainerExitCodes() error {
 				return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, string(rawID), err)
 			}
 			if time.Since(timeStamp) > threshold {
-				// Since the DB connection is locked, pass it
-				// to remove the exit codes to avoid race
-				// conditions.
-				return s.removeContainerExitCode(rawID, db)
+				toRemoveIDs = append(toRemoveIDs, string(rawID))
 			}
 			return nil
 		})
 	})
-}
+	if err != nil {
+		return errors.Wrapf(err, "reading exit codes to prune")
+	}
 
-// removeContainerExitCode removes the exit code and time stamp of the specified container.
-func (s *BoltState) removeContainerExitCode(rawID []byte, db *bolt.DB) error {
-	return db.Update(func(tx *bolt.Tx) error {
-		exitCodeBucket, err := getExitCodeBucket(tx)
-		if err != nil {
-			return err
-		}
-		timeStampBucket, err := getExitCodeTimeStampBucket(tx)
-		if err != nil {
-			return err
-		}
+	if len(toRemoveIDs) > 0 {
+		err = db.Update(func(tx *bolt.Tx) error {
+			exitCodeBucket, err := getExitCodeBucket(tx)
+			if err != nil {
+				return err
+			}
+			timeStampBucket, err := getExitCodeTimeStampBucket(tx)
+			if err != nil {
+				return err
+			}
 
-		var finalErr error
-		if err := exitCodeBucket.Delete(rawID); err != nil {
-			finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", string(rawID), err)
-		}
-		if err := timeStampBucket.Delete(rawID); err != nil {
-			err = fmt.Errorf("removing exit-code time stamp of container %s from DB: %w", string(rawID), err)
-			if finalErr != nil {
-				logrus.Error(err)
-			} else {
-				finalErr = err
+			var finalErr error
+			for _, id := range toRemoveIDs {
+				rawID := []byte(id)
+				if err := exitCodeBucket.Delete(rawID); err != nil {
+					if finalErr != nil {
+						logrus.Error(finalErr)
+					}
+					finalErr = fmt.Errorf("removing exit code of container %s from DB: %w", id, err)
+				}
+				if err := timeStampBucket.Delete(rawID); err != nil {
+					if finalErr != nil {
+						logrus.Error(finalErr)
+					}
+					finalErr = fmt.Errorf("removing exit code timestamp of container %s from DB: %w", id, err)
+				}
 			}
+
+			return finalErr
+		})
+		if err != nil {
+			return errors.Wrapf(err, "pruning exit codes")
 		}
+	}
 
-		return finalErr
-	})
+	return nil
 }
 
 // AddExecSession adds an exec session to the state.