Skip to content
This repository has been archived by the owner on Mar 9, 2022. It is now read-only.

Commit

Permalink
Use wait instead of TaskExit.
Browse files Browse the repository at this point in the history
Signed-off-by: Lantao Liu <lantaol@google.com>
  • Loading branch information
Random-Liu committed Apr 12, 2019
1 parent ebb0928 commit 3b49d3f
Show file tree
Hide file tree
Showing 15 changed files with 495 additions and 181 deletions.
3 changes: 3 additions & 0 deletions pkg/server/container_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ func setContainerRemoving(container containerstore.Container) error {
if status.State() == runtime.ContainerState_CONTAINER_UNKNOWN {
return status, errors.New("container state is unknown, to stop first")
}
if status.Starting {
return status, errors.New("container is in starting state, can't be removed")
}
if status.Removing {
return status, errors.New("container is already in removing state")
}
Expand Down
9 changes: 9 additions & 0 deletions pkg/server/container_remove_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@ func TestSetContainerRemoving(t *testing.T) {
},
expectErr: true,
},
"should return error when container is in starting state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Starting: true,
},
expectErr: true,
},
"should return error when container is in removing state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Expand Down Expand Up @@ -71,6 +78,8 @@ func TestSetContainerRemoving(t *testing.T) {
} else {
assert.NoError(t, err)
assert.True(t, container.Status.Get().Removing, "removing should be set")
assert.NoError(t, resetContainerRemoving(container))
assert.False(t, container.Status.Get().Removing, "removing should be reset")
}
}
}
115 changes: 73 additions & 42 deletions pkg/server/container_start.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,64 +38,49 @@ import (

// StartContainer starts the container.
func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
container, err := c.containerStore.Get(r.GetContainerId())
cntr, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
}

var startErr error
// update container status in one transaction to avoid race with event monitor.
if err := container.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
// Always apply status change no matter startContainer fails or not. Because startContainer
// may change container state no matter it fails or succeeds.
startErr = c.startContainer(ctx, container, &status)
return status, nil
}); startErr != nil {
return nil, startErr
} else if err != nil {
return nil, errors.Wrapf(err, "failed to update container %q metadata", container.ID)
}
return &runtime.StartContainerResponse{}, nil
}

// startContainer actually starts the container. The function needs to be run in one transaction. Any updates
// to the status passed in will be applied no matter the function returns error or not.
func (c *criService) startContainer(ctx context.Context,
cntr containerstore.Container,
status *containerstore.Status) (retErr error) {
id := cntr.ID
meta := cntr.Metadata
container := cntr.Container
config := meta.Config

// Return error if container is not in created state.
if status.State() != runtime.ContainerState_CONTAINER_CREATED {
return errors.Errorf("container %q is in %s state", id, criContainerStateToString(status.State()))
}
// Do not start the container when there is a removal in progress.
if status.Removing {
return errors.Errorf("container %q is in removing state", id)
// Set starting state to prevent other start/remove operations against this container
// while it's being started.
if err := setContainerStarting(cntr); err != nil {
return nil, errors.Wrapf(err, "failed to set starting state for container %q", id)
}

defer func() {
if retErr != nil {
// Set container to exited if fail to start.
status.Pid = 0
status.FinishedAt = time.Now().UnixNano()
status.ExitCode = errorStartExitCode
status.Reason = errorStartReason
status.Message = retErr.Error()
if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
status.Pid = 0
status.FinishedAt = time.Now().UnixNano()
status.ExitCode = errorStartExitCode
status.Reason = errorStartReason
status.Message = retErr.Error()
return status, nil
}); err != nil {
logrus.WithError(err).Errorf("failed to set start failure state for container %q", id)
}
}
// Reset starting if start failed.
if err := resetContainerStarting(cntr); err != nil {
logrus.WithError(err).Errorf("failed to reset starting state for container %q", id)
}
}()

// Get sandbox config from sandbox store.
sandbox, err := c.sandboxStore.Get(meta.SandboxID)
if err != nil {
return errors.Wrapf(err, "sandbox %q not found", meta.SandboxID)
return nil, errors.Wrapf(err, "sandbox %q not found", meta.SandboxID)
}
sandboxID := meta.SandboxID
if sandbox.Status.Get().State != sandboxstore.StateReady {
return errors.Errorf("sandbox container %q is not running", sandboxID)
return nil, errors.Errorf("sandbox container %q is not running", sandboxID)
}

ioCreation := func(id string) (_ containerdio.IO, err error) {
Expand All @@ -110,7 +95,7 @@ func (c *criService) startContainer(ctx context.Context,

ctrInfo, err := container.Info(ctx)
if err != nil {
return errors.Wrap(err, "failed to get container info")
return nil, errors.Wrap(err, "failed to get container info")
}

var taskOpts []containerd.NewTaskOpts
Expand All @@ -120,7 +105,7 @@ func (c *criService) startContainer(ctx context.Context,
}
task, err := container.NewTask(ctx, ioCreation, taskOpts...)
if err != nil {
return errors.Wrap(err, "failed to create containerd task")
return nil, errors.Wrap(err, "failed to create containerd task")
}
defer func() {
if retErr != nil {
Expand All @@ -133,15 +118,61 @@ func (c *criService) startContainer(ctx context.Context,
}
}()

// wait is a long running background request, no timeout needed.
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
if err != nil {
return nil, errors.Wrap(err, "failed to wait for containerd task")
}

// Start containerd task.
if err := task.Start(ctx); err != nil {
return errors.Wrapf(err, "failed to start containerd task %q", id)
return nil, errors.Wrapf(err, "failed to start containerd task %q", id)
}

// Update container start timestamp.
status.Pid = task.Pid()
status.StartedAt = time.Now().UnixNano()
return nil
if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
status.Pid = task.Pid()
status.StartedAt = time.Now().UnixNano()
return status, nil
}); err != nil {
return nil, errors.Wrapf(err, "failed to update container %q state", id)
}

// start the monitor after updating container state, this ensures that
// event monitor receives the TaskExit event and update container state
// after this.
c.eventMonitor.startExitMonitor(id, task.Pid(), exitCh)

return &runtime.StartContainerResponse{}, nil
}

// setContainerStarting sets the container into starting state. In starting state, the
// container will not be removed or started again.
func setContainerStarting(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
// Return error if container is not in created state.
if status.State() != runtime.ContainerState_CONTAINER_CREATED {
return status, errors.Errorf("container is in %s state", criContainerStateToString(status.State()))
}
// Do not start the container when there is a removal in progress.
if status.Removing {
return status, errors.New("container is in removing state, can't be started")
}
if status.Starting {
return status, errors.New("container is already in starting state")
}
status.Starting = true
return status, nil
})
}

// resetContainerStarting resets the container starting state on start failure. So
// that we could remove the container later.
func resetContainerStarting(container containerstore.Container) error {
return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
status.Starting = false
return status, nil
})
}

// createContainerLoggers creates container loggers and return write closer for stdout and stderr.
Expand Down
98 changes: 98 additions & 0 deletions pkg/server/container_start_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
Copyright 2019 The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package server

import (
"testing"
"time"

"github.com/stretchr/testify/assert"

containerstore "github.com/containerd/cri/pkg/store/container"
)

// TestSetContainerStarting tests setContainerStarting sets removing
// state correctly.
func TestSetContainerStarting(t *testing.T) {
testID := "test-id"
for desc, test := range map[string]struct {
status containerstore.Status
expectErr bool
}{

"should not return error when container is in created state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
},
expectErr: false,
},
"should return error when container is in running state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
},
expectErr: true,
},
"should return error when container is in exited state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
},
expectErr: true,
},
"should return error when container is in unknown state": {
status: containerstore.Status{
CreatedAt: 0,
StartedAt: 0,
FinishedAt: 0,
},
expectErr: true,
},
"should return error when container is in starting state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Starting: true,
},
expectErr: true,
},
"should return error when container is in removing state": {
status: containerstore.Status{
CreatedAt: time.Now().UnixNano(),
Removing: true,
},
expectErr: true,
},
} {
t.Logf("TestCase %q", desc)
container, err := containerstore.NewContainer(
containerstore.Metadata{ID: testID},
containerstore.WithFakeStatus(test.status),
)
assert.NoError(t, err)
err = setContainerStarting(container)
if test.expectErr {
assert.Error(t, err)
assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated")
} else {
assert.NoError(t, err)
assert.True(t, container.Status.Get().Starting, "starting should be set")
assert.NoError(t, resetContainerStarting(container))
assert.False(t, container.Status.Get().Starting, "starting should be reset")
}
}
}
Loading

0 comments on commit 3b49d3f

Please sign in to comment.