From d3b96f4e31c4d915d66c7aa028a9e79c8ebd9219 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Fri, 26 Mar 2021 17:12:06 -0700 Subject: [PATCH] Add retry on image push 5xx errors Some registries can be flaky and return intermittent 5xx errors. This change allows those errors to be retried, similarly to network-level errors. Note that this needs the upstream containerd fix https://github.com/containerd/containerd/pull/5276 to work reliably. This was tested with a registry that was modified to return 504 on every other manifest PUT. Without the change, exports to the registry fail every other attempt. With the change and the related containerd change, exports to the registry always succeed. Signed-off-by: Aaron Lehmann --- util/contentutil/copy.go | 2 +- util/imageutil/config.go | 2 +- util/progress/logs/logs.go | 1 + util/resolver/retryhandler/retry.go | 9 +++++++++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/util/contentutil/copy.go b/util/contentutil/copy.go index 08c604730061..b471d8b94850 100644 --- a/util/contentutil/copy.go +++ b/util/contentutil/copy.go @@ -65,7 +65,7 @@ func CopyChain(ctx context.Context, ingester content.Ingester, provider content. handlers := []images.Handler{ images.ChildrenHandler(provider), filterHandler, - retryhandler.New(remotes.FetchHandler(ingester, &localFetcher{provider}), nil), + retryhandler.New(remotes.FetchHandler(ingester, &localFetcher{provider}), func(_ []byte) {}), } if err := images.Dispatch(ctx, images.Handlers(handlers...), nil, desc); err != nil { diff --git a/util/imageutil/config.go b/util/imageutil/config.go index c1ea0214523e..0be587058a6c 100644 --- a/util/imageutil/config.go +++ b/util/imageutil/config.go @@ -101,7 +101,7 @@ func Config(ctx context.Context, str string, resolver remotes.Resolver, cache Co children := childrenConfigHandler(cache, platform) handlers := []images.Handler{ - retryhandler.New(remotes.FetchHandler(cache, fetcher), nil), + retryhandler.New(remotes.FetchHandler(cache, fetcher), func(_ []byte) {}), children, } if err := images.Dispatch(ctx, images.Handlers(handlers...), nil, desc); err != nil { diff --git a/util/progress/logs/logs.go b/util/progress/logs/logs.go index fe30145e5eb0..15944d8c6205 100644 --- a/util/progress/logs/logs.go +++ b/util/progress/logs/logs.go @@ -133,6 +133,7 @@ func (sw *streamWriter) Close() error { func LoggerFromContext(ctx context.Context) func([]byte) { return func(dt []byte) { pw, _, _ := progress.FromContext(ctx) + defer pw.Close() pw.Write(identity.NewID(), client.VertexLog{ Stream: stderr, Data: []byte(dt), diff --git a/util/resolver/retryhandler/retry.go b/util/resolver/retryhandler/retry.go index 147aff5ce3e6..e30f83d7f9f1 100644 --- a/util/resolver/retryhandler/retry.go +++ b/util/resolver/retryhandler/retry.go @@ -10,6 +10,7 @@ import ( "time" "github.com/containerd/containerd/images" + remoteserrors "github.com/containerd/containerd/remotes/errors" ocispec "github.com/opencontainers/image-spec/specs-go/v1" "github.com/pkg/errors" ) @@ -48,6 +49,14 @@ func New(f images.HandlerFunc, logger func([]byte)) images.HandlerFunc { } func retryError(err error) bool { + // Retry on 5xx errors + var errUnexpectedStatus remoteserrors.ErrUnexpectedStatus + if errors.As(err, &errUnexpectedStatus) && + errUnexpectedStatus.StatusCode >= 500 && + errUnexpectedStatus.StatusCode <= 599 { + return true + } + if errors.Is(err, io.EOF) || errors.Is(err, syscall.ECONNRESET) || errors.Is(err, syscall.EPIPE) { return true }