From d1f9611cb044d17f6f678d88673d35362064de75 Mon Sep 17 00:00:00 2001 From: Lantao Liu Date: Thu, 11 Apr 2019 16:36:44 -0700 Subject: [PATCH] Use wait instead of `TaskExit`. Signed-off-by: Lantao Liu --- pkg/server/container_remove.go | 3 + pkg/server/container_remove_test.go | 9 ++ pkg/server/container_start.go | 114 ++++++++++++++++---------- pkg/server/container_start_test.go | 98 ++++++++++++++++++++++ pkg/server/container_stop.go | 57 ++++++------- pkg/server/events.go | 63 ++++++++++---- pkg/server/events_test.go | 18 ++-- pkg/server/helpers.go | 28 ------- pkg/server/restart.go | 38 +++++++-- pkg/server/sandbox_run.go | 113 ++++++++++--------------- pkg/server/sandbox_stop.go | 46 ++++++----- pkg/store/container/container_test.go | 2 +- pkg/store/container/status.go | 4 + pkg/store/container/status_test.go | 2 + pkg/store/sandbox/sandbox.go | 20 +---- pkg/store/sandbox/sandbox_test.go | 13 +-- pkg/store/sandbox/status.go | 20 ++--- pkg/store/sandbox/status_test.go | 2 +- 18 files changed, 387 insertions(+), 263 deletions(-) create mode 100644 pkg/server/container_start_test.go diff --git a/pkg/server/container_remove.go b/pkg/server/container_remove.go index fe37a8ac7..9b70cac00 100644 --- a/pkg/server/container_remove.go +++ b/pkg/server/container_remove.go @@ -105,6 +105,9 @@ func setContainerRemoving(container containerstore.Container) error { if status.State() == runtime.ContainerState_CONTAINER_UNKNOWN { return status, errors.New("container state is unknown, to stop first") } + if status.Starting { + return status, errors.New("container is in starting state, can't be removed") + } if status.Removing { return status, errors.New("container is already in removing state") } diff --git a/pkg/server/container_remove_test.go b/pkg/server/container_remove_test.go index 880b143e4..54508b8bf 100644 --- a/pkg/server/container_remove_test.go +++ b/pkg/server/container_remove_test.go @@ -40,6 +40,13 @@ func TestSetContainerRemoving(t *testing.T) { }, expectErr: true, }, + "should return error when container is in starting state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + Starting: true, + }, + expectErr: true, + }, "should return error when container is in removing state": { status: containerstore.Status{ CreatedAt: time.Now().UnixNano(), @@ -71,6 +78,8 @@ func TestSetContainerRemoving(t *testing.T) { } else { assert.NoError(t, err) assert.True(t, container.Status.Get().Removing, "removing should be set") + assert.NoError(t, resetContainerRemoving(container)) + assert.False(t, container.Status.Get().Removing, "removing should be reset") } } } diff --git a/pkg/server/container_start.go b/pkg/server/container_start.go index 59fa5ed69..cd75f2a1b 100644 --- a/pkg/server/container_start.go +++ b/pkg/server/container_start.go @@ -38,64 +38,48 @@ import ( // StartContainer starts the container. func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) { - container, err := c.containerStore.Get(r.GetContainerId()) + cntr, err := c.containerStore.Get(r.GetContainerId()) if err != nil { return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId()) } - var startErr error - // update container status in one transaction to avoid race with event monitor. - if err := container.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { - // Always apply status change no matter startContainer fails or not. Because startContainer - // may change container state no matter it fails or succeeds. - startErr = c.startContainer(ctx, container, &status) - return status, nil - }); startErr != nil { - return nil, startErr - } else if err != nil { - return nil, errors.Wrapf(err, "failed to update container %q metadata", container.ID) - } - return &runtime.StartContainerResponse{}, nil -} - -// startContainer actually starts the container. The function needs to be run in one transaction. Any updates -// to the status passed in will be applied no matter the function returns error or not. -func (c *criService) startContainer(ctx context.Context, - cntr containerstore.Container, - status *containerstore.Status) (retErr error) { id := cntr.ID meta := cntr.Metadata container := cntr.Container config := meta.Config - // Return error if container is not in created state. - if status.State() != runtime.ContainerState_CONTAINER_CREATED { - return errors.Errorf("container %q is in %s state", id, criContainerStateToString(status.State())) - } - // Do not start the container when there is a removal in progress. - if status.Removing { - return errors.Errorf("container %q is in removing state", id) + // Set starting state to prevent other start/remove operations against this container + // while it's being started. + if err := setContainerStarting(cntr); err != nil { + return nil, errors.Wrapf(err, "failed to set starting state for container %q", id) } - defer func() { if retErr != nil { // Set container to exited if fail to start. - status.Pid = 0 - status.FinishedAt = time.Now().UnixNano() - status.ExitCode = errorStartExitCode - status.Reason = errorStartReason - status.Message = retErr.Error() + if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { + status.Pid = 0 + status.FinishedAt = time.Now().UnixNano() + status.ExitCode = errorStartExitCode + status.Reason = errorStartReason + status.Message = retErr.Error() + return status, nil + }); err != nil { + logrus.WithError(err).Errorf("failed to set start failure state for container %q", id) + } + } + if err := resetContainerStarting(cntr); err != nil { + logrus.WithError(err).Errorf("failed to reset starting state for container %q", id) } }() // Get sandbox config from sandbox store. sandbox, err := c.sandboxStore.Get(meta.SandboxID) if err != nil { - return errors.Wrapf(err, "sandbox %q not found", meta.SandboxID) + return nil, errors.Wrapf(err, "sandbox %q not found", meta.SandboxID) } sandboxID := meta.SandboxID if sandbox.Status.Get().State != sandboxstore.StateReady { - return errors.Errorf("sandbox container %q is not running", sandboxID) + return nil, errors.Errorf("sandbox container %q is not running", sandboxID) } ioCreation := func(id string) (_ containerdio.IO, err error) { @@ -110,7 +94,7 @@ func (c *criService) startContainer(ctx context.Context, ctrInfo, err := container.Info(ctx) if err != nil { - return errors.Wrap(err, "failed to get container info") + return nil, errors.Wrap(err, "failed to get container info") } var taskOpts []containerd.NewTaskOpts @@ -120,7 +104,7 @@ func (c *criService) startContainer(ctx context.Context, } task, err := container.NewTask(ctx, ioCreation, taskOpts...) if err != nil { - return errors.Wrap(err, "failed to create containerd task") + return nil, errors.Wrap(err, "failed to create containerd task") } defer func() { if retErr != nil { @@ -133,15 +117,61 @@ func (c *criService) startContainer(ctx context.Context, } }() + // wait is a long running background request, no timeout needed. + exitCh, err := task.Wait(ctrdutil.NamespacedContext()) + if err != nil { + return nil, errors.Wrap(err, "failed to wait for containerd task") + } + // Start containerd task. if err := task.Start(ctx); err != nil { - return errors.Wrapf(err, "failed to start containerd task %q", id) + return nil, errors.Wrapf(err, "failed to start containerd task %q", id) } // Update container start timestamp. - status.Pid = task.Pid() - status.StartedAt = time.Now().UnixNano() - return nil + if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) { + status.Pid = task.Pid() + status.StartedAt = time.Now().UnixNano() + return status, nil + }); err != nil { + return nil, errors.Wrapf(err, "failed to update container %q state", id) + } + + // start the monitor after updating container state, this ensures that + // event monitor receives the TaskExit event and update container state + // after this. + c.eventMonitor.startExitMonitor(context.Background(), id, task.Pid(), exitCh) + + return &runtime.StartContainerResponse{}, nil +} + +// setContainerStarting sets the container into starting state. In starting state, the +// container will not be removed or started again. +func setContainerStarting(container containerstore.Container) error { + return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { + // Return error if container is not in created state. + if status.State() != runtime.ContainerState_CONTAINER_CREATED { + return status, errors.Errorf("container is in %s state", criContainerStateToString(status.State())) + } + // Do not start the container when there is a removal in progress. + if status.Removing { + return status, errors.New("container is in removing state, can't be started") + } + if status.Starting { + return status, errors.New("container is already in starting state") + } + status.Starting = true + return status, nil + }) +} + +// resetContainerStarting resets the container starting state on start failure. So +// that we could remove the container later. +func resetContainerStarting(container containerstore.Container) error { + return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) { + status.Starting = false + return status, nil + }) } // createContainerLoggers creates container loggers and return write closer for stdout and stderr. diff --git a/pkg/server/container_start_test.go b/pkg/server/container_start_test.go new file mode 100644 index 000000000..2b9784de3 --- /dev/null +++ b/pkg/server/container_start_test.go @@ -0,0 +1,98 @@ +/* +Copyright 2019 The containerd Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package server + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + + containerstore "github.com/containerd/cri/pkg/store/container" +) + +// TestSetContainerStarting tests setContainerStarting sets removing +// state correctly. +func TestSetContainerStarting(t *testing.T) { + testID := "test-id" + for desc, test := range map[string]struct { + status containerstore.Status + expectErr bool + }{ + + "should not return error when container is in created state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + }, + expectErr: false, + }, + "should return error when container is in running state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + StartedAt: time.Now().UnixNano(), + }, + expectErr: true, + }, + "should return error when container is in exited state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + StartedAt: time.Now().UnixNano(), + FinishedAt: time.Now().UnixNano(), + }, + expectErr: true, + }, + "should return error when container is in unknown state": { + status: containerstore.Status{ + CreatedAt: 0, + StartedAt: 0, + FinishedAt: 0, + }, + expectErr: true, + }, + "should return error when container is in starting state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + Starting: true, + }, + expectErr: true, + }, + "should return error when container is in removing state": { + status: containerstore.Status{ + CreatedAt: time.Now().UnixNano(), + Removing: true, + }, + expectErr: true, + }, + } { + t.Logf("TestCase %q", desc) + container, err := containerstore.NewContainer( + containerstore.Metadata{ID: testID}, + containerstore.WithFakeStatus(test.status), + ) + assert.NoError(t, err) + err = setContainerStarting(container) + if test.expectErr { + assert.Error(t, err) + assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated") + } else { + assert.NoError(t, err) + assert.True(t, container.Status.Get().Starting, "starting should be set") + assert.NoError(t, resetContainerStarting(container)) + assert.False(t, container.Status.Get().Starting, "starting should be reset") + } + } +} diff --git a/pkg/server/container_stop.go b/pkg/server/container_stop.go index 0ad59deb6..a1e53a5ff 100644 --- a/pkg/server/container_stop.go +++ b/pkg/server/container_stop.go @@ -28,6 +28,7 @@ import ( "golang.org/x/sys/unix" runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" + ctrdutil "github.com/containerd/cri/pkg/containerd/util" "github.com/containerd/cri/pkg/store" containerstore "github.com/containerd/cri/pkg/store/container" ) @@ -74,36 +75,34 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore return errors.Wrapf(err, "failed to get task for container %q", id) } // Don't return for unknown state, some cleanup needs to be done. - if state != runtime.ContainerState_CONTAINER_UNKNOWN { - return nil + if state == runtime.ContainerState_CONTAINER_UNKNOWN { + return cleanupUnknownContainer(ctx, id, container) } - // Task is an interface, explicitly set it to nil just in case. - task = nil + return nil } // Handle unknown state. if state == runtime.ContainerState_CONTAINER_UNKNOWN { - status, err := getTaskStatus(ctx, task) + // Start an exit handler for containers in unknown state. + waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext()) + defer waitCancel() + exitCh, err := task.Wait(waitCtx) if err != nil { - return errors.Wrapf(err, "failed to get task status for %q", id) - } - switch status.Status { - case containerd.Running, containerd.Created: - // The task is still running, continue stopping the task. - case containerd.Stopped: - // The task has exited. If the task exited after containerd - // started, the event monitor will receive its exit event; if it - // exited before containerd started, the event monitor will never - // receive its exit event. - // However, we can't tell that because the task state was not - // successfully loaded during containerd start (container is - // in UNKNOWN state). - // So always do cleanup here, just in case that we've missed the - // exit event. - return cleanupUnknownContainer(ctx, id, status, container) - default: - return errors.Wrapf(err, "unsupported task status %q", status.Status) + if !errdefs.IsNotFound(err) { + return errors.Wrapf(err, "failed to wait for task for %q", id) + } + return cleanupUnknownContainer(ctx, id, container) } + + exitCtx, exitCancel := context.WithCancel(context.Background()) + stopCh := c.eventMonitor.startExitMonitor(exitCtx, id, task.Pid(), exitCh) + defer func() { + exitCancel() + // This ensures that exit monitor is stopped before + // `Wait` is cancelled, so no exit event is generated + // because of the `Wait` cancellation. + <-stopCh + }() } // We only need to kill the task. The event handler will Delete the @@ -176,19 +175,13 @@ func (c *criService) waitContainerStop(ctx context.Context, container containers } // cleanupUnknownContainer cleanup stopped container in unknown state. -func cleanupUnknownContainer(ctx context.Context, id string, status containerd.Status, - cntr containerstore.Container) error { +func cleanupUnknownContainer(ctx context.Context, id string, cntr containerstore.Container) error { // Reuse handleContainerExit to do the cleanup. - // NOTE(random-liu): If the task did exit after containerd started, both - // the event monitor and the cleanup function would update the container - // state. The final container state will be whatever being updated first. - // There is no way to completely avoid this race condition, and for best - // effort unknown state container cleanup, this seems acceptable. return handleContainerExit(ctx, &eventtypes.TaskExit{ ContainerID: id, ID: id, Pid: 0, - ExitStatus: status.ExitStatus, - ExitedAt: status.ExitTime, + ExitStatus: unknownExitCode, + ExitedAt: time.Now(), }, cntr) } diff --git a/pkg/server/events.go b/pkg/server/events.go index bc384f754..e4536a559 100644 --- a/pkg/server/events.go +++ b/pkg/server/events.go @@ -50,13 +50,17 @@ const ( // Add a timeout for each event handling, events that timeout will be requeued and // handled again in the future. handleEventTimeout = 10 * time.Second + + exitChannelSize = 1024 ) // eventMonitor monitors containerd event and updates internal state correspondingly. // TODO(random-liu): Handle event for each container in a separate goroutine. type eventMonitor struct { - c *criService - ch <-chan *events.Envelope + c *criService + ch <-chan *events.Envelope + // exitCh receives container/sandbox exit events from exit monitors. + exitCh chan *eventtypes.TaskExit errCh <-chan error ctx context.Context cancel context.CancelFunc @@ -89,6 +93,7 @@ func newEventMonitor(c *criService) *eventMonitor { c: c, ctx: ctx, cancel: cancel, + exitCh: make(chan *eventtypes.TaskExit, exitChannelSize), backOff: newBackOff(), } } @@ -98,13 +103,38 @@ func (em *eventMonitor) subscribe(subscriber events.Subscriber) { // note: filters are any match, if you want any match but not in namespace foo // then you have to manually filter namespace foo filters := []string{ - `topic=="/tasks/exit"`, `topic=="/tasks/oom"`, `topic~="/images/"`, } em.ch, em.errCh = subscriber.Subscribe(em.ctx, filters...) } +// startExitMonitor starts an exit monitor for a given container/sandbox. +func (em *eventMonitor) startExitMonitor(ctx context.Context, id string, pid uint32, exitCh <-chan containerd.ExitStatus) <-chan struct{} { + stopCh := make(chan struct{}) + go func() { + defer close(stopCh) + select { + case exitRes := <-exitCh: + exitStatus, exitedAt, err := exitRes.Result() + if err != nil { + logrus.WithError(err).Errorf("Failed to get task exit status for %q", id) + exitStatus = unknownExitCode + exitedAt = time.Now() + } + em.exitCh <- &eventtypes.TaskExit{ + ContainerID: id, + ID: id, + Pid: pid, + ExitStatus: exitStatus, + ExitedAt: exitedAt, + } + case <-ctx.Done(): + } + }() + return stopCh +} + func convertEvent(e *gogotypes.Any) (string, interface{}, error) { id := "" evt, err := typeurl.UnmarshalAny(e) @@ -113,8 +143,6 @@ func convertEvent(e *gogotypes.Any) (string, interface{}, error) { } switch e := evt.(type) { - case *eventtypes.TaskExit: - id = e.ContainerID case *eventtypes.TaskOOM: id = e.ContainerID case *eventtypes.ImageCreate: @@ -142,6 +170,18 @@ func (em *eventMonitor) start() <-chan error { defer close(errCh) for { select { + case e := <-em.exitCh: + logrus.Debugf("Received exit event %+v", e) + id := e.ID + if em.backOff.isInBackOff(id) { + logrus.Infof("Events for %q is in backoff, enqueue event %+v", id, e) + em.backOff.enBackOff(id, e) + break + } + if err := em.handleEvent(e); err != nil { + logrus.WithError(err).Errorf("Failed to handle exit event %+v for %s", e, id) + em.backOff.enBackOff(id, e) + } case e := <-em.ch: logrus.Debugf("Received containerd event timestamp - %v, namespace - %q, topic - %q", e.Timestamp, e.Namespace, e.Topic) if e.Namespace != constants.K8sContainerdNamespace { @@ -213,8 +253,7 @@ func (em *eventMonitor) handleEvent(any interface{}) error { } else if err != store.ErrNotExist { return errors.Wrap(err, "can't find container for TaskExit event") } - // Use GetAll to include sandbox in init state. - sb, err := em.c.sandboxStore.GetAll(e.ID) + sb, err := em.c.sandboxStore.Get(e.ID) if err == nil { if err := handleSandboxExit(ctx, e, sb); err != nil { return errors.Wrap(err, "failed to handle sandbox TaskExit event") @@ -322,15 +361,7 @@ func handleSandboxExit(ctx context.Context, e *eventtypes.TaskExit, sb sandboxst } } err = sb.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) { - // NOTE(random-liu): We SHOULD NOT change INIT state here. - // If sandbox state is INIT when event monitor receives an TaskExit event, - // it means that sandbox start has failed. In that case, `RunPodSandbox` will - // cleanup everything immediately. - // Once sandbox state goes out of INIT, it becomes visable to the user, which - // is not what we want. - if status.State != sandboxstore.StateInit { - status.State = sandboxstore.StateNotReady - } + status.State = sandboxstore.StateNotReady status.Pid = 0 return status, nil }) diff --git a/pkg/server/events_test.go b/pkg/server/events_test.go index 836dd2771..1a9e2d816 100644 --- a/pkg/server/events_test.go +++ b/pkg/server/events_test.go @@ -21,9 +21,7 @@ import ( "time" eventtypes "github.com/containerd/containerd/api/events" - //"github.com/containerd/containerd/api/services/events/v1" "github.com/containerd/typeurl" - //gogotypes "github.com/gogo/protobuf/types" "github.com/stretchr/testify/assert" "k8s.io/apimachinery/pkg/util/clock" ) @@ -35,22 +33,22 @@ func TestBackOff(t *testing.T) { inputQueues := map[string]*backOffQueue{ "container1": { events: []interface{}{ - &eventtypes.TaskExit{ContainerID: "container1", ID: "1"}, - &eventtypes.TaskExit{ContainerID: "container1", ID: "2"}, + &eventtypes.TaskOOM{ContainerID: "container1"}, + &eventtypes.TaskOOM{ContainerID: "container1"}, }, }, "container2": { events: []interface{}{ - &eventtypes.TaskExit{ContainerID: "container2", ID: "1"}, - &eventtypes.TaskExit{ContainerID: "container2", ID: "2"}, + &eventtypes.TaskOOM{ContainerID: "container2"}, + &eventtypes.TaskOOM{ContainerID: "container2"}, }, }, } expectedQueues := map[string]*backOffQueue{ "container2": { events: []interface{}{ - &eventtypes.TaskExit{ContainerID: "container2", ID: "1"}, - &eventtypes.TaskExit{ContainerID: "container2", ID: "2"}, + &eventtypes.TaskOOM{ContainerID: "container2"}, + &eventtypes.TaskOOM{ContainerID: "container2"}, }, expireTime: testClock.Now().Add(backOffInitDuration), duration: backOffInitDuration, @@ -58,8 +56,8 @@ func TestBackOff(t *testing.T) { }, "container1": { events: []interface{}{ - &eventtypes.TaskExit{ContainerID: "container1", ID: "1"}, - &eventtypes.TaskExit{ContainerID: "container1", ID: "2"}, + &eventtypes.TaskOOM{ContainerID: "container1"}, + &eventtypes.TaskOOM{ContainerID: "container1"}, }, expireTime: testClock.Now().Add(backOffInitDuration), duration: backOffInitDuration, diff --git a/pkg/server/helpers.go b/pkg/server/helpers.go index 23606a687..3acda66fb 100644 --- a/pkg/server/helpers.go +++ b/pkg/server/helpers.go @@ -23,12 +23,9 @@ import ( "regexp" "strconv" "strings" - "time" "github.com/BurntSushi/toml" - "github.com/containerd/containerd" "github.com/containerd/containerd/containers" - "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/runtime/linux/runctypes" runcoptions "github.com/containerd/containerd/runtime/v2/runc/options" "github.com/containerd/typeurl" @@ -477,31 +474,6 @@ func unknownSandboxStatus() sandboxstore.Status { } } -// unknownExitStatus generates containerd.Status for container exited with unknown exit code. -func unknownExitStatus() containerd.Status { - return containerd.Status{ - Status: containerd.Stopped, - ExitStatus: unknownExitCode, - ExitTime: time.Now(), - } -} - -// getTaskStatus returns status for a given task. It returns unknown exit status if -// the task is nil or not found. -func getTaskStatus(ctx context.Context, task containerd.Task) (containerd.Status, error) { - if task == nil { - return unknownExitStatus(), nil - } - status, err := task.Status(ctx) - if err != nil { - if !errdefs.IsNotFound(err) { - return containerd.Status{}, err - } - return unknownExitStatus(), nil - } - return status, nil -} - // getPassthroughAnnotations filters requested pod annotations by comparing // against permitted annotations for the given runtime. func getPassthroughAnnotations(podAnnotations map[string]string, diff --git a/pkg/server/restart.go b/pkg/server/restart.go index 7d1c1c8f7..335f3175f 100644 --- a/pkg/server/restart.go +++ b/pkg/server/restart.go @@ -34,6 +34,7 @@ import ( "golang.org/x/net/context" runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" + ctrdutil "github.com/containerd/cri/pkg/containerd/util" "github.com/containerd/cri/pkg/netns" cio "github.com/containerd/cri/pkg/server/io" containerstore "github.com/containerd/cri/pkg/store/container" @@ -57,7 +58,7 @@ func (c *criService) recover(ctx context.Context) error { return errors.Wrap(err, "failed to list sandbox containers") } for _, sandbox := range sandboxes { - sb, err := loadSandbox(ctx, sandbox) + sb, err := c.loadSandbox(ctx, sandbox) if err != nil { logrus.WithError(err).Errorf("Failed to load sandbox %q", sandbox.ID()) continue @@ -275,6 +276,22 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe status.StartedAt = time.Now().UnixNano() status.Pid = t.Pid() } + // Wait for the task for exit monitor. + // wait is a long running background request, no timeout needed. + exitCh, err := t.Wait(ctrdutil.NamespacedContext()) + if err != nil { + if !errdefs.IsNotFound(err) { + return errors.Wrap(err, "failed to wait for task") + } + // Container was in running state, but its task has been deleted, + // set unknown exited state. + status.FinishedAt = time.Now().UnixNano() + status.ExitCode = unknownExitCode + status.Reason = unknownExitReason + } else { + // Start exit monitor. + c.eventMonitor.startExitMonitor(context.Background(), id, status.Pid, exitCh) + } case containerd.Stopped: // Task is stopped. Updata status and delete the task. if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { @@ -304,7 +321,7 @@ func (c *criService) loadContainer(ctx context.Context, cntr containerd.Containe } // loadSandbox loads sandbox from containerd. -func loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) { +func (c *criService) loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) { ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout) defer cancel() var sandbox sandboxstore.Sandbox @@ -358,9 +375,20 @@ func loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.S status.State = sandboxstore.StateNotReady } else { if taskStatus.Status == containerd.Running { - // Task is running, set sandbox state as READY. - status.State = sandboxstore.StateReady - status.Pid = t.Pid() + // Wait for the task for sandbox monitor. + // wait is a long running background request, no timeout needed. + exitCh, err := t.Wait(ctrdutil.NamespacedContext()) + if err != nil { + if !errdefs.IsNotFound(err) { + return status, errors.Wrap(err, "failed to wait for task") + } + status.State = sandboxstore.StateNotReady + } else { + // Task is running, set sandbox state as READY. + status.State = sandboxstore.StateReady + status.Pid = t.Pid() + c.eventMonitor.startExitMonitor(context.Background(), meta.ID, status.Pid, exitCh) + } } else { // Task is not running. Delete the task and set sandbox state as NOTREADY. if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { diff --git a/pkg/server/sandbox_run.go b/pkg/server/sandbox_run.go index aaddfd1db..3aeeda3a8 100644 --- a/pkg/server/sandbox_run.go +++ b/pkg/server/sandbox_run.go @@ -87,7 +87,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox RuntimeHandler: r.GetRuntimeHandler(), }, sandboxstore.Status{ - State: sandboxstore.StateInit, + State: sandboxstore.StateUnknown, }, ) @@ -253,89 +253,66 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox if err != nil { return nil, errors.Wrap(err, "failed to get sandbox container info") } - if err := sandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) { - status.CreatedAt = info.CreatedAt - return status, nil - }); err != nil { - return nil, errors.Wrap(err, "failed to update sandbox created timestamp") - } - // Add sandbox into sandbox store in INIT state. - sandbox.Container = container - if err := c.sandboxStore.Add(sandbox); err != nil { - return nil, errors.Wrapf(err, "failed to add sandbox %+v into store", sandbox) + // Create sandbox task in containerd. + log.Tracef("Create sandbox container (id=%q, name=%q).", + id, name) + + var taskOpts []containerd.NewTaskOpts + // TODO(random-liu): Remove this after shim v1 is deprecated. + if c.config.NoPivot && ociRuntime.Type == linuxRuntime { + taskOpts = append(taskOpts, containerd.WithNoPivotRoot) + } + // We don't need stdio for sandbox container. + task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...) + if err != nil { + return nil, errors.Wrap(err, "failed to create containerd task") } defer func() { - // Delete sandbox from sandbox store if there is an error. if retErr != nil { - c.sandboxStore.Delete(id) + deferCtx, deferCancel := ctrdutil.DeferContext() + defer deferCancel() + // Cleanup the sandbox container if an error is returned. + if _, err := task.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { + logrus.WithError(err).Errorf("Failed to delete sandbox container %q", id) + } } }() - // NOTE(random-liu): Sandbox state only stay in INIT state after this point - // and before the end of this function. - // * If `Update` succeeds, sandbox state will become READY in one transaction. - // * If `Update` fails, sandbox will be removed from the store in the defer above. - // * If containerd stops at any point before `Update` finishes, because sandbox - // state is not checkpointed, it will be recovered from corresponding containerd task - // status during restart: - // * If the task is running, sandbox state will be READY, - // * Or else, sandbox state will be NOTREADY. - // - // In any case, sandbox will leave INIT state, so it's safe to ignore sandbox - // in INIT state in other functions. - - // Start sandbox container in one transaction to avoid race condition with - // event monitor. - if err := sandbox.Status.Update(func(status sandboxstore.Status) (_ sandboxstore.Status, retErr error) { - // NOTE(random-liu): We should not change the sandbox state to NOTREADY - // if `Update` fails. - // - // If `Update` fails, the sandbox will be cleaned up by all the defers - // above. We should not let user see this sandbox, or else they will - // see the sandbox disappear after the defer clean up, which may confuse - // them. - // - // Given so, we should keep the sandbox in INIT state if `Update` fails, - // and ignore sandbox in INIT state in all the inspection functions. - - // Create sandbox task in containerd. - log.Tracef("Create sandbox container (id=%q, name=%q).", - id, name) - var taskOpts []containerd.NewTaskOpts - // TODO(random-liu): Remove this after shim v1 is deprecated. - if c.config.NoPivot && ociRuntime.Type == linuxRuntime { - taskOpts = append(taskOpts, containerd.WithNoPivotRoot) - } - // We don't need stdio for sandbox container. - task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...) - if err != nil { - return status, errors.Wrap(err, "failed to create containerd task") - } - defer func() { - if retErr != nil { - deferCtx, deferCancel := ctrdutil.DeferContext() - defer deferCancel() - // Cleanup the sandbox container if an error is returned. - // It's possible that task is deleted by event monitor. - if _, err := task.Delete(deferCtx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { - logrus.WithError(err).Errorf("Failed to delete sandbox container %q", id) - } - } - }() + // wait is a long running background request, no timeout needed. + exitCh, err := task.Wait(ctrdutil.NamespacedContext()) + if err != nil { + return nil, errors.Wrap(err, "failed to wait for sandbox container task") + } - if err := task.Start(ctx); err != nil { - return status, errors.Wrapf(err, "failed to start sandbox container task %q", id) - } + if err := task.Start(ctx); err != nil { + return nil, errors.Wrapf(err, "failed to start sandbox container task %q", id) + } + if err := sandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) { // Set the pod sandbox as ready after successfully start sandbox container. status.Pid = task.Pid() status.State = sandboxstore.StateReady + status.CreatedAt = info.CreatedAt return status, nil }); err != nil { - return nil, errors.Wrap(err, "failed to start sandbox container") + return nil, errors.Wrap(err, "failed to update sandbox status") + } + + // Add sandbox into sandbox store in INIT state. + sandbox.Container = container + + if err := c.sandboxStore.Add(sandbox); err != nil { + return nil, errors.Wrapf(err, "failed to add sandbox %+v into store", sandbox) } + // start the monitor after adding sandbox into the store, this ensures + // that sandbox is in the store, when event monitor receives the TaskExit event. + // + // TaskOOM from containerd may come before sandbox is added to store, + // but we don't care about sandbox TaskOOM right now, so it is fine. + c.eventMonitor.startExitMonitor(context.Background(), id, task.Pid(), exitCh) + return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil } diff --git a/pkg/server/sandbox_stop.go b/pkg/server/sandbox_stop.go index a648f6fa2..d04118dd3 100644 --- a/pkg/server/sandbox_stop.go +++ b/pkg/server/sandbox_stop.go @@ -19,7 +19,6 @@ package server import ( "time" - "github.com/containerd/containerd" eventtypes "github.com/containerd/containerd/api/events" "github.com/containerd/containerd/errdefs" cni "github.com/containerd/go-cni" @@ -29,6 +28,7 @@ import ( "golang.org/x/sys/unix" runtime "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" + ctrdutil "github.com/containerd/cri/pkg/containerd/util" sandboxstore "github.com/containerd/cri/pkg/store/sandbox" ) @@ -97,6 +97,7 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb // `task.Delete` is not called here because it will be called when // the event monitor handles the `TaskExit` event. func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxstore.Sandbox) error { + id := sandbox.ID container := sandbox.Container state := sandbox.Status.Get().State task, err := container.Task(ctx, nil) @@ -105,29 +106,35 @@ func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxst return errors.Wrap(err, "failed to get sandbox container") } // Don't return for unknown state, some cleanup needs to be done. - if state != sandboxstore.StateUnknown { - return nil + if state == sandboxstore.StateUnknown { + return cleanupUnknownSandbox(ctx, id, sandbox) } - // Task is an interface, explicitly set it to nil just in case. - task = nil + return nil } // Handle unknown state. // The cleanup logic is the same with container unknown state. if state == sandboxstore.StateUnknown { - status, err := getTaskStatus(ctx, task) + // Start an exit handler for containers in unknown state. + waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext()) + defer waitCancel() + exitCh, err := task.Wait(waitCtx) if err != nil { - return errors.Wrapf(err, "failed to get task status for %q", sandbox.ID) - } - switch status.Status { - case containerd.Running, containerd.Created: - // The task is still running, continue stopping the task. - case containerd.Stopped: - // The task has exited, explicitly cleanup. - return cleanupUnknownSandbox(ctx, sandbox.ID, status, sandbox) - default: - return errors.Wrapf(err, "unsupported task status %q", status.Status) + if !errdefs.IsNotFound(err) { + return errors.Wrap(err, "failed to wait for task") + } + return cleanupUnknownSandbox(ctx, id, sandbox) } + + exitCtx, exitCancel := context.WithCancel(context.Background()) + stopCh := c.eventMonitor.startExitMonitor(exitCtx, id, task.Pid(), exitCh) + defer func() { + exitCancel() + // This ensures that exit monitor is stopped before + // `Wait` is cancelled, so no exit event is generated + // because of the `Wait` cancellation. + <-stopCh + }() } // Kill the sandbox container. @@ -166,14 +173,13 @@ func (c *criService) teardownPod(id string, path string, config *runtime.PodSand } // cleanupUnknownSandbox cleanup stopped sandbox in unknown state. -func cleanupUnknownSandbox(ctx context.Context, id string, status containerd.Status, - sandbox sandboxstore.Sandbox) error { +func cleanupUnknownSandbox(ctx context.Context, id string, sandbox sandboxstore.Sandbox) error { // Reuse handleSandboxExit to do the cleanup. return handleSandboxExit(ctx, &eventtypes.TaskExit{ ContainerID: id, ID: id, Pid: 0, - ExitStatus: status.ExitStatus, - ExitedAt: status.ExitTime, + ExitStatus: unknownExitCode, + ExitedAt: time.Now(), }, sandbox) } diff --git a/pkg/store/container/container_test.go b/pkg/store/container/container_test.go index 44cff4f08..adfebf8b1 100644 --- a/pkg/store/container/container_test.go +++ b/pkg/store/container/container_test.go @@ -112,7 +112,7 @@ func TestContainerStore(t *testing.T) { ExitCode: 3, Reason: "TestReason-4a333", Message: "TestMessage-4a333", - Removing: true, + Starting: true, }, "4abcd": { Pid: 4, diff --git a/pkg/store/container/status.go b/pkg/store/container/status.go index 5d040ffa4..2fe2bfb5f 100644 --- a/pkg/store/container/status.go +++ b/pkg/store/container/status.go @@ -88,6 +88,10 @@ type Status struct { // Human-readable message indicating details about why container is in its // current state. Message string + // Starting indicates that the container is in starting state. + // This field doesn't need to be checkpointed. + // TODO(now): Add unit test. + Starting bool `json:"-"` // Removing indicates that the container is in removing state. // This field doesn't need to be checkpointed. Removing bool `json:"-"` diff --git a/pkg/store/container/status_test.go b/pkg/store/container/status_test.go index ea36e5247..774bb23c3 100644 --- a/pkg/store/container/status_test.go +++ b/pkg/store/container/status_test.go @@ -75,6 +75,7 @@ func TestStatusEncodeDecode(t *testing.T) { Reason: "test-reason", Message: "test-message", Removing: true, + Starting: true, } assert := assertlib.New(t) data, err := s.encode() @@ -82,6 +83,7 @@ func TestStatusEncodeDecode(t *testing.T) { newS := &Status{} assert.NoError(newS.decode(data)) s.Removing = false // Removing should not be encoded. + s.Starting = false // Starting should not be encoded. assert.Equal(s, newS) unsupported, err := json.Marshal(&versionedStatus{ diff --git a/pkg/store/sandbox/sandbox.go b/pkg/store/sandbox/sandbox.go index 69189069f..2fc01c5b3 100644 --- a/pkg/store/sandbox/sandbox.go +++ b/pkg/store/sandbox/sandbox.go @@ -86,22 +86,9 @@ func (s *Store) Add(sb Sandbox) error { return nil } -// Get returns the sandbox with specified id. Returns store.ErrNotExist -// if the sandbox doesn't exist. +// Get returns the sandbox with specified id. +// Returns store.ErrNotExist if the sandbox doesn't exist. func (s *Store) Get(id string) (Sandbox, error) { - sb, err := s.GetAll(id) - if err != nil { - return sb, err - } - if sb.Status.Get().State == StateInit { - return Sandbox{}, store.ErrNotExist - } - return sb, nil -} - -// GetAll returns the sandbox with specified id, including sandbox in unknown -// state. Returns store.ErrNotExist if the sandbox doesn't exist. -func (s *Store) GetAll(id string) (Sandbox, error) { s.lock.RLock() defer s.lock.RUnlock() id, err := s.idIndex.Get(id) @@ -123,9 +110,6 @@ func (s *Store) List() []Sandbox { defer s.lock.RUnlock() var sandboxes []Sandbox for _, sb := range s.sandboxes { - if sb.Status.Get().State == StateInit { - continue - } sandboxes = append(sandboxes, sb) } return sandboxes diff --git a/pkg/store/sandbox/sandbox_test.go b/pkg/store/sandbox/sandbox_test.go index 23523e292..2719c96d3 100644 --- a/pkg/store/sandbox/sandbox_test.go +++ b/pkg/store/sandbox/sandbox_test.go @@ -106,7 +106,7 @@ func TestSandboxStore(t *testing.T) { }, NetNSPath: "TestNetNS-3defg", }, - Status{State: StateInit}, + Status{State: StateUnknown}, ) assert := assertlib.New(t) s := NewStore() @@ -125,21 +125,16 @@ func TestSandboxStore(t *testing.T) { assert.Equal(sb, got) } - t.Logf("should not be able to get unknown sandbox") + t.Logf("should be able to get sandbox in unknown state with Get") got, err := s.Get(unknown.ID) - assert.Equal(store.ErrNotExist, err) - assert.Equal(Sandbox{}, got) - - t.Logf("should be able to get unknown sandbox with GetAll") - got, err = s.GetAll(unknown.ID) assert.NoError(err) assert.Equal(unknown, got) t.Logf("should be able to list sandboxes") + sbNum := len(sandboxes) + 1 sbs := s.List() - assert.Len(sbs, len(sandboxes)) + assert.Len(sbs, sbNum) - sbNum := len(sandboxes) for testID, v := range sandboxes { truncID := genTruncIndex(testID) diff --git a/pkg/store/sandbox/status.go b/pkg/store/sandbox/status.go index 201b19ba4..c5480bb5d 100644 --- a/pkg/store/sandbox/status.go +++ b/pkg/store/sandbox/status.go @@ -26,11 +26,11 @@ import ( // | | // | Create(Run) | Load // | | -// Start +----v----+ | -// (failed) | | | -// +-------------+ INIT | +-----------+ -// | | | | | -// | +----+----+ | | +// Start | | +// (failed) | | +// +------------------+ +-----------+ +// | | | | +// | | | | // | | | | // | | Start(Run) | | // | | | | @@ -53,23 +53,17 @@ import ( // +-------------> DELETED // State is the sandbox state we use in containerd/cri. -// It includes init and unknown, which are internal states not defined in CRI. +// It includes unknown, which is internal states not defined in CRI. // The state mapping from internal states to CRI states: // * ready -> ready // * not ready -> not ready -// * init -> not exist // * unknown -> not ready type State uint32 const ( - // StateInit is init state of sandbox. Sandbox - // is in init state before its corresponding sandbox container - // is created. Sandbox in init state should be ignored by most - // functions, unless the caller needs to update sandbox state. - StateInit State = iota // StateReady is ready state, it means sandbox container // is running. - StateReady + StateReady = iota // StateNotReady is notready state, it ONLY means sandbox // container is not running. // StopPodSandbox should still be called for NOTREADY sandbox to diff --git a/pkg/store/sandbox/status_test.go b/pkg/store/sandbox/status_test.go index 31e9253ee..6973ac66c 100644 --- a/pkg/store/sandbox/status_test.go +++ b/pkg/store/sandbox/status_test.go @@ -28,7 +28,7 @@ func TestStatus(t *testing.T) { testStatus := Status{ Pid: 123, CreatedAt: time.Now(), - State: StateInit, + State: StateUnknown, } updateStatus := Status{ Pid: 456,