Skip to content

Commit

Permalink
fix(executor/docker): re-revert -- fix random errors with message "No…
Browse files Browse the repository at this point in the history
… such container:path". Fixes #6352 (#6508)

* Revert "Revert "fix(executor/docker): fix random errors with message "No such container:path". Fixes #6352 (#6483)""

This reverts commit a3fd704.

Signed-off-by: Yuan Gong <gongyuan94@gmail.com>

* fix(executor/docker): handle corner cases for containers in "Created" state

Signed-off-by: Yuan Gong <gongyuan94@gmail.com>
  • Loading branch information
Bobgy authored Aug 9, 2021
1 parent e2e822d commit 24bb1b7
Showing 1 changed file with 58 additions and 1 deletion.
59 changes: 58 additions & 1 deletion workflow/executor/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,15 +212,72 @@ func (d *DockerExecutor) Wait(ctx context.Context, containerNames []string) erro
if err != nil && strings.Contains(err.Error(), "No such container") {
// e.g. reason could be ContainerCannotRun
log.WithError(err).Info("ignoring error as container may have been re-created and therefore container ID may have changed")
time.Sleep(time.Second)
continue
}
return err
if err != nil {
return err
}
completed, err := d.validateCompleted(ctx, containerNames)
if err != nil {
return err
}
if completed {
// We waited until everything completed!
return nil
}
}
time.Sleep(time.Second)
}
}
}

// After docker wait, sometimes a container can still be in "Created" state.
// https://github.com/argoproj/argo-workflows/issues/6352
// To workaround this issue, validate containers actually completed.
func (d *DockerExecutor) validateCompleted(ctx context.Context, containerNames []string) (bool, error) {
containers, err := d.listContainers()
if err != nil {
return false, err
}
for _, name := range containerNames {
container, ok := containers[name]
if !ok {
// ignore containers no longer found
continue
}
if container.status == "Created" {
// For containers with status == "Created", there are two
// possibilities:
// 1. the container will start running soon
// 2. the container failed to start, for example,
// because its entrypoint is set to an invalid path.
// You can reproduce this behavior by running:
// docker inspect $(docker run -d --entrypoint invalid argoproj/argosay:v2) | less
// In this case, we can distinguish the container by
// checking its exit code.
code, err := d.GetExitCode(ctx, container.containerID)
if err != nil {
return false, err
}
if code == "0" {
// If code is zero, it's case #1, the container
// will start soon.
log.Infof("unexpected: container %q still has state %q after docker wait", name, container.status)
return false, nil
} else {
// If code is non-zero, then it's case #2, the
// container failed to start, but its state is
// stuck at "Created".
// Therefore, we can treat this container as
// completed.
continue
}
}
}
return true, nil
}

func (d *DockerExecutor) listContainers() (map[string]ctr, error) {
output, err := common.RunCommand(
"docker",
Expand Down

0 comments on commit 24bb1b7

Please sign in to comment.