From 798c75f1577b6375ab60a6b39c896001cb33dbfa Mon Sep 17 00:00:00 2001 From: David Porter Date: Mon, 25 Apr 2022 14:53:53 -0700 Subject: [PATCH] Retry containerd task api request when task is in unknown state In some cases, it appears that containerd task api can return a zero pid when the task is not ready yet. In these cases, the process task state is reported to be in unknown state. In this case, we should retry the containerd task api request as the task can take some time to move away from unknown state and be initialized. Signed-off-by: David Porter --- container/containerd/client.go | 9 +++++++++ container/containerd/handler.go | 9 +++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/container/containerd/client.go b/container/containerd/client.go index 02fd8ce782..c4921a5b7a 100644 --- a/container/containerd/client.go +++ b/container/containerd/client.go @@ -16,6 +16,7 @@ package containerd import ( "context" + "errors" "fmt" "net" "sync" @@ -24,6 +25,7 @@ import ( containersapi "github.com/containerd/containerd/api/services/containers/v1" tasksapi "github.com/containerd/containerd/api/services/tasks/v1" versionapi "github.com/containerd/containerd/api/services/version/v1" + tasktypes "github.com/containerd/containerd/api/types/task" ptypes "github.com/gogo/protobuf/types" "github.com/google/cadvisor/container/containerd/containers" "github.com/google/cadvisor/container/containerd/errdefs" @@ -44,6 +46,10 @@ type ContainerdClient interface { Version(ctx context.Context) (string, error) } +var ( + ErrTaskIsInUnknownState = errors.New("containerd task is in unknown state") // used when process reported in containerd task is in Unknown State +) + var once sync.Once var ctrdClient ContainerdClient = nil @@ -114,6 +120,9 @@ func (c *client) TaskPid(ctx context.Context, id string) (uint32, error) { if err != nil { return 0, errdefs.FromGRPC(err) } + if response.Process.Status == tasktypes.StatusUnknown { + return 0, ErrTaskIsInUnknownState + } return response.Process.Pid, nil } diff --git a/container/containerd/handler.go b/container/containerd/handler.go index 0fc8803542..848550ec6e 100644 --- a/container/containerd/handler.go +++ b/container/containerd/handler.go @@ -17,6 +17,7 @@ package containerd import ( "encoding/json" + "errors" "fmt" "strings" "time" @@ -101,10 +102,14 @@ func newContainerdContainerHandler( if err == nil { break } - retry-- - if !errdefs.IsNotFound(err) || retry == 0 { + + // Retry when task is not created yet or task is in unknown state (likely in process of initializing) + isRetriableError := errdefs.IsNotFound(err) || errors.Is(err, ErrTaskIsInUnknownState) + if !isRetriableError || retry == 0 { return nil, err } + + retry-- time.Sleep(backoff) backoff *= 2 }