Skip to content

Commit

Permalink
ssa: Improve wait error reporting
Browse files Browse the repository at this point in the history
If fail fast is enable, include only the failed resources
in the returned error.

Signed-off-by: Stefan Prodan <stefan.prodan@gmail.com>
  • Loading branch information
stefanprodan committed Jan 20, 2024
1 parent 404873e commit acf4807
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 74 deletions.
55 changes: 29 additions & 26 deletions ssa/manager_wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package ssa

import (
"context"
"errors"
"fmt"
"strings"
"time"
Expand Down Expand Up @@ -83,7 +84,6 @@ func (m *ResourceManager) WaitForSet(set object.ObjMetadataSet, opts WaitOptions
eventsChan := m.poller.Poll(ctx, set, pollingOpts)

lastStatus := make(map[object.ObjMetadata]*event.ResourceStatus)
var failedResources int

done := statusCollector.ListenWithObserver(eventsChan, collector.ObserverFunc(
func(statusCollector *collector.ResourceStatusCollector, e event.Event) {
Expand All @@ -96,7 +96,7 @@ func (m *ResourceManager) WaitForSet(set object.ObjMetadataSet, opts WaitOptions
// skip DeadlineExceeded errors because kstatus emits that error
// for every resource it's monitoring even when only one of them
// actually fails.
if rs.Error != context.DeadlineExceeded {
if !errors.Is(rs.Error, context.DeadlineExceeded) {
lastStatus[rs.Identifier] = rs
}

Expand All @@ -105,7 +105,6 @@ func (m *ResourceManager) WaitForSet(set object.ObjMetadataSet, opts WaitOptions
}
rss = append(rss, rs)
}
failedResources = countFailed

desired := status.CurrentStatus
aggStatus := aggregator.AggregateStatus(rss, desired)
Expand All @@ -122,32 +121,36 @@ func (m *ResourceManager) WaitForSet(set object.ObjMetadataSet, opts WaitOptions
return statusCollector.Error
}

if ctx.Err() == context.DeadlineExceeded || (opts.FailFast && failedResources > 0) {
msg := "failed early due to stalled resources"
if ctx.Err() == context.DeadlineExceeded {
msg = "timeout waiting for"
}

var errors = []string{}
for id, rs := range statusCollector.ResourceStatuses {
if rs == nil {
errors = append(errors, fmt.Sprintf("can't determine status for %s", utils.FmtObjMetadata(id)))
continue
var errs []string
for id, rs := range statusCollector.ResourceStatuses {
switch {
case rs == nil || lastStatus[id] == nil:
errs = append(errs, fmt.Sprintf("can't determine status for %s", utils.FmtObjMetadata(id)))
case lastStatus[id].Status == status.FailedStatus:
var builder strings.Builder
builder.WriteString(fmt.Sprintf("%s status: '%s'",
utils.FmtObjMetadata(rs.Identifier), lastStatus[id].Status))
if rs.Error != nil {
builder.WriteString(fmt.Sprintf(": %s", rs.Error))
}
if lastStatus[id] == nil {
// this is only nil in the rare case where no status can be determined for the resource at all
errors = append(errors, fmt.Sprintf("%s (unknown status)", utils.FmtObjMetadata(rs.Identifier)))
} else if lastStatus[id].Status != status.CurrentStatus {
var builder strings.Builder
builder.WriteString(fmt.Sprintf("%s status: '%s'",
utils.FmtObjMetadata(rs.Identifier), lastStatus[id].Status))
if rs.Error != nil {
builder.WriteString(fmt.Sprintf(": %s", rs.Error))
}
errors = append(errors, builder.String())
errs = append(errs, builder.String())
case errors.Is(ctx.Err(), context.DeadlineExceeded) && lastStatus[id].Status != status.CurrentStatus:
var builder strings.Builder
builder.WriteString(fmt.Sprintf("%s status: '%s'",
utils.FmtObjMetadata(rs.Identifier), lastStatus[id].Status))
if rs.Error != nil {
builder.WriteString(fmt.Sprintf(": %s", rs.Error))
}
errs = append(errs, builder.String())
}
}

if len(errs) > 0 {
msg := "failed early due to stalled resources"
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
msg = "timeout waiting for"
}
return fmt.Errorf("%s: [%s]", msg, strings.Join(errors, ", "))
return fmt.Errorf("%s: [%s]", msg, strings.Join(errs, ", "))
}

return nil
Expand Down
63 changes: 15 additions & 48 deletions ssa/manager_wait_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func TestWaitForSet_failFast(t *testing.T) {
t.Fatal(err)
}

// Set Progressing Condition to false and reason to ProgressDeadlineExceeded
// Set Progressing Condition to false and reason to ProgressDeadlineExceeded.
// This tells kstatus that the deployment has stalled.
cond := appsv1.DeploymentCondition{
Type: appsv1.DeploymentProgressing,
Expand All @@ -160,6 +160,8 @@ func TestWaitForSet_failFast(t *testing.T) {
t.Fatal(err)
}

// Set PVC phase to Pending.
// This tells kstatus that the PVC is in progress.
clusterPvc := &unstructured.Unstructured{}
clusterPvc.SetGroupVersionKind(schema.GroupVersionKind{
Group: "",
Expand All @@ -170,7 +172,7 @@ func TestWaitForSet_failFast(t *testing.T) {
t.Fatal(err)
}

if err := unstructured.SetNestedField(clusterPvc.Object, "Bound", "status", "phase"); err != nil {
if err := unstructured.SetNestedField(clusterPvc.Object, "Pending", "status", "phase"); err != nil {
t.Fatal(err)
}

Expand All @@ -185,7 +187,7 @@ func TestWaitForSet_failFast(t *testing.T) {
t.Fatal(err)
}

t.Run("timeout when failfast is set to false", func(t *testing.T) {
t.Run("timeout when fail fast is disabled", func(t *testing.T) {
err = manager.WaitForSet(cs.ToObjMetadataSet(), WaitOptions{
Interval: interval,
Timeout: timeout,
Expand All @@ -199,54 +201,15 @@ func TestWaitForSet_failFast(t *testing.T) {
}

if !strings.Contains(err.Error(), deployFailedMsg) {
t.Fatal("expected error to contain status of failed deployment")
}
})

t.Run("return early when failfast is set to true", func(t *testing.T) {
err = manager.WaitForSet(cs.ToObjMetadataSet(), WaitOptions{
Interval: interval,
Timeout: timeout,
FailFast: true,
})

deployFailedMsg := fmt.Sprintf("%s status: '%s'", utils.FmtObjMetadata(deployObjMeta), status.FailedStatus)

if err == nil || !strings.Contains(err.Error(), "failed early") {
t.Fatal("expected WaitForSet to fail early due to stalled deployment")
t.Fatal("expected error to contain status of failed deployment", err.Error())
}

if !strings.Contains(err.Error(), deployFailedMsg) {
t.Fatal("expected error to contain status of failed deployment")
if !strings.Contains(err.Error(), "InProgress") {
t.Fatal("expected error to contain InProgress deployment", err.Error())
}
})

t.Run("fail early even if there are still Progressing resources", func(t *testing.T) {
// change status to Pending to have an 'InProgress' resource
clusterPvc := &unstructured.Unstructured{}
clusterPvc.SetGroupVersionKind(schema.GroupVersionKind{
Group: "",
Kind: "PersistentVolumeClaim",
Version: "v1",
})
if err := manager.client.Get(ctx, client.ObjectKeyFromObject(pvc), clusterPvc); err != nil {
t.Fatal(err)
}

if err := unstructured.SetNestedField(clusterPvc.Object, "Pending", "status", "phase"); err != nil {
t.Fatal(err)
}
opts := &client.SubResourcePatchOptions{
PatchOptions: client.PatchOptions{
FieldManager: manager.owner.Field,
},
}

clusterPvc.SetManagedFields(nil)
if err := manager.client.Status().Patch(ctx, clusterPvc, client.Apply, opts); err != nil {
t.Fatal(err)
}

t.Run("fail early even if there are still progressing resources", func(t *testing.T) {
err = manager.WaitForSet(cs.ToObjMetadataSet(), WaitOptions{
Interval: interval,
Timeout: timeout,
Expand All @@ -256,11 +219,15 @@ func TestWaitForSet_failFast(t *testing.T) {
deployFailedMsg := fmt.Sprintf("%s status: '%s'", utils.FmtObjMetadata(deployObjMeta), status.FailedStatus)

if err == nil || !strings.Contains(err.Error(), "failed early") {
t.Fatal("expected WaitForSet to fail early due to stalled deployment")
t.Fatal("expected to fail early due to stalled deployment", err.Error())
}

if !strings.Contains(err.Error(), deployFailedMsg) {
t.Fatal("expected error to contain status of failed deployment")
t.Fatal("expected error to contain status of failed deployment", err.Error())
}

if strings.Contains(err.Error(), "InProgress") {
t.Fatal("expected error to not contain InProgress resources", err.Error())
}
})
}

0 comments on commit acf4807

Please sign in to comment.