diff --git a/api/v2beta2/helmrelease_types.go b/api/v2beta2/helmrelease_types.go index 56cf9709d..eaea9c01c 100644 --- a/api/v2beta2/helmrelease_types.go +++ b/api/v2beta2/helmrelease_types.go @@ -37,6 +37,11 @@ const ( HelmReleaseFinalizer = "finalizers.fluxcd.io" ) +const ( + // defaultMaxHistory is the default number of Helm release versions to keep. + defaultMaxHistory = 5 +) + // Kustomize Helm PostRenderer specification. type Kustomize struct { // Strategic merge and JSON patches, defined as inline YAML objects, @@ -1200,7 +1205,7 @@ func (in HelmRelease) GetTimeout() metav1.Duration { // GetMaxHistory returns the configured MaxHistory, or the default of 5. func (in HelmRelease) GetMaxHistory() int { if in.Spec.MaxHistory == nil { - return 5 + return defaultMaxHistory } return *in.Spec.MaxHistory } diff --git a/api/v2beta2/snapshot_types.go b/api/v2beta2/snapshot_types.go index 2258acc58..587667665 100644 --- a/api/v2beta2/snapshot_types.go +++ b/api/v2beta2/snapshot_types.go @@ -81,11 +81,13 @@ func (in Snapshots) Previous(ignoreTests bool) *Snapshot { } // Truncate removes all Snapshots up to the Previous deployed Snapshot. -// If there is no previous-deployed Snapshot, no Snapshots are removed. +// If there is no previous-deployed Snapshot, the most recent 5 Snapshots are +// retained. func (in *Snapshots) Truncate(ignoreTests bool) { if in.Len() < 2 { return } + in.SortByVersion() for i := range (*in)[1:] { s := (*in)[i+1] @@ -96,6 +98,13 @@ func (in *Snapshots) Truncate(ignoreTests bool) { } } } + + if in.Len() > defaultMaxHistory { + // If none of the Snapshots are deployed or superseded, and there + // are more than the defaultMaxHistory, truncate to the most recent + // Snapshots. + *in = (*in)[:defaultMaxHistory] + } } // Snapshot captures a point-in-time copy of the status information for a Helm release, diff --git a/api/v2beta2/snapshot_types_test.go b/api/v2beta2/snapshot_types_test.go index 8447868b9..32911877f 100644 --- a/api/v2beta2/snapshot_types_test.go +++ b/api/v2beta2/snapshot_types_test.go @@ -258,6 +258,24 @@ func TestSnapshots_Truncate(t *testing.T) { }}, }, }, + { + name: "retains most recent snapshots when all have failed", + in: Snapshots{ + {Version: 6, Status: "deployed"}, + {Version: 5, Status: "failed"}, + {Version: 4, Status: "failed"}, + {Version: 3, Status: "failed"}, + {Version: 2, Status: "failed"}, + {Version: 1, Status: "failed"}, + }, + want: Snapshots{ + {Version: 6, Status: "deployed"}, + {Version: 5, Status: "failed"}, + {Version: 4, Status: "failed"}, + {Version: 3, Status: "failed"}, + {Version: 2, Status: "failed"}, + }, + }, { name: "without previous snapshot", in: Snapshots{ diff --git a/internal/controller/helmrelease_controller.go b/internal/controller/helmrelease_controller.go index d10911653..0df4d8dd7 100644 --- a/internal/controller/helmrelease_controller.go +++ b/internal/controller/helmrelease_controller.go @@ -25,9 +25,11 @@ import ( "github.com/hashicorp/go-retryablehttp" corev1 "k8s.io/api/core/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" apierrutil "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/cli-runtime/pkg/genericclioptions" "k8s.io/client-go/rest" kuberecorder "k8s.io/client-go/tools/record" @@ -59,6 +61,7 @@ import ( "github.com/fluxcd/helm-controller/internal/action" "github.com/fluxcd/helm-controller/internal/chartutil" "github.com/fluxcd/helm-controller/internal/digest" + interrors "github.com/fluxcd/helm-controller/internal/errors" "github.com/fluxcd/helm-controller/internal/features" "github.com/fluxcd/helm-controller/internal/kube" "github.com/fluxcd/helm-controller/internal/loader" @@ -97,6 +100,11 @@ type HelmReleaseReconcilerOptions struct { RateLimiter ratelimiter.RateLimiter } +var ( + errWaitForDependency = errors.New("must wait for dependency") + errWaitForChart = errors.New("must wait for chart") +) + func (r *HelmReleaseReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opts HelmReleaseReconcilerOptions) error { // Index the HelmRelease by the HelmChart references they point at if err := mgr.GetFieldIndexer().IndexField(ctx, &v2.HelmRelease{}, v2.SourceIndexKey, @@ -167,6 +175,13 @@ func (r *HelmReleaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{}) } + // We do not want to return these errors, but rather wait for the + // designated RequeueAfter to expire and try again. + // However, not returning an error will cause the patch helper to + // patch the observed generation, which we do not want. So we ignore + // these errors here after patching. + retErr = interrors.Ignore(retErr, errWaitForDependency, errWaitForChart) + if err := patchHelper.Patch(ctx, obj, patchOpts...); err != nil { if !obj.DeletionTimestamp.IsZero() { err = apierrutil.FilterOut(err, func(e error) bool { return apierrors.IsNotFound(e) }) @@ -174,9 +189,15 @@ func (r *HelmReleaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) retErr = apierrutil.Reduce(apierrutil.NewAggregate([]error{retErr, err})) } - // Always record suspend, readiness and duration metrics. - r.Metrics.RecordSuspend(ctx, obj, obj.Spec.Suspend) - r.Metrics.RecordReadiness(ctx, obj) + // Wait for the object to have synced in-cache after patching. + // This is required to ensure that the next reconciliation will + // operate on the patched object when an immediate reconcile is + // requested. + if err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 10*time.Second, true, r.waitForHistoryCacheSync(obj)); err != nil { + log.Error(err, "failed to wait for object to sync in-cache after patching") + } + + // Record the duration of the reconciliation. r.Metrics.RecordDuration(ctx, obj, start) }() @@ -213,7 +234,7 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, patchHelpe // Mark the resource as under reconciliation. conditions.MarkReconciling(obj, meta.ProgressingReason, "Fulfilling prerequisites") - if err := patchHelper.Patch(ctx, obj); err != nil { + if err := patchHelper.Patch(ctx, obj, patch.WithOwnedConditions{Conditions: intreconcile.OwnedConditions}, patch.WithFieldOwner(r.FieldManager)); err != nil { return ctrl.Result{}, err } @@ -230,7 +251,7 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, patchHelpe // Exponential backoff would cause execution to be prolonged too much, // instead we requeue on a fixed interval. - return ctrl.Result{RequeueAfter: r.requeueDependency}, nil + return ctrl.Result{RequeueAfter: r.requeueDependency}, errWaitForDependency } log.Info("all dependencies are ready") @@ -262,7 +283,7 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, patchHelpe conditions.MarkFalse(obj, meta.ReadyCondition, "HelmChartNotReady", msg) // Do not requeue immediately, when the artifact is created // the watcher should trigger a reconciliation. - return jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: obj.GetRequeueAfter()}), nil + return jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: obj.GetRequeueAfter()}), errWaitForChart } // Compose values based from the spec and references. @@ -276,10 +297,10 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, patchHelpe loadedChart, err := loader.SecureLoadChartFromURL(r.httpClient, hc.GetArtifact().URL, hc.GetArtifact().Digest) if err != nil { if errors.Is(err, loader.ErrFileNotFound) { - msg := fmt.Sprintf("Chart not ready: artifact not found. Retrying in %s", r.requeueDependency) + msg := fmt.Sprintf("Chart not ready: artifact not found. Retrying in %s", r.requeueDependency.String()) conditions.MarkFalse(obj, meta.ReadyCondition, v2.ArtifactFailedReason, msg) log.Info(msg) - return ctrl.Result{RequeueAfter: r.requeueDependency}, nil + return ctrl.Result{RequeueAfter: r.requeueDependency}, errWaitForDependency } conditions.MarkFalse(obj, meta.ReadyCondition, v2.ArtifactFailedReason, fmt.Sprintf("Could not load chart: %s", err.Error())) @@ -352,7 +373,7 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, patchHelpe if errors.Is(err, intreconcile.ErrMustRequeue) { return ctrl.Result{Requeue: true}, nil } - if errors.Is(err, intreconcile.ErrExceededMaxRetries) { + if interrors.IsOneOf(err, intreconcile.ErrExceededMaxRetries, intreconcile.ErrMissingRollbackTarget) { err = reconcile.TerminalError(err) } return ctrl.Result{}, err @@ -633,6 +654,24 @@ func (r *HelmReleaseReconciler) getHelmChart(ctx context.Context, obj *v2.HelmRe return &hc, nil } +// waitForHistoryCacheSync returns a function that can be used to wait for the +// cache backing the Kubernetes client to be in sync with the current state of +// the v2beta2.HelmRelease. +// This is a trade-off between not caching at all, and introducing a slight +// delay to ensure we always have the latest history state. +func (r *HelmReleaseReconciler) waitForHistoryCacheSync(obj *v2.HelmRelease) wait.ConditionWithContextFunc { + newObj := &v2.HelmRelease{} + return func(ctx context.Context) (bool, error) { + if err := r.Get(ctx, client.ObjectKeyFromObject(obj), newObj); err != nil { + if apierrors.IsNotFound(err) { + return true, nil + } + return false, err + } + return apiequality.Semantic.DeepEqual(obj.Status.History, newObj.Status.History), nil + } +} + func (r *HelmReleaseReconciler) requestsForHelmChartChange(ctx context.Context, o client.Object) []reconcile.Request { hc, ok := o.(*sourcev1.HelmChart) if !ok { diff --git a/internal/controller/helmrelease_controller_test.go b/internal/controller/helmrelease_controller_test.go index e432a210b..1376770c4 100644 --- a/internal/controller/helmrelease_controller_test.go +++ b/internal/controller/helmrelease_controller_test.go @@ -108,7 +108,7 @@ func TestHelmReleaseReconciler_reconcileRelease(t *testing.T) { } res, err := r.reconcileRelease(context.TODO(), patch.NewSerialPatcher(obj, r.Client), obj) - g.Expect(err).ToNot(HaveOccurred()) + g.Expect(err).To(Equal(errWaitForDependency)) g.Expect(res.RequeueAfter).To(Equal(r.requeueDependency)) g.Expect(obj.Status.Conditions).To(conditions.MatchConditions([]metav1.Condition{ @@ -222,7 +222,7 @@ func TestHelmReleaseReconciler_reconcileRelease(t *testing.T) { } res, err := r.reconcileRelease(context.TODO(), patch.NewSerialPatcher(obj, r.Client), obj) - g.Expect(err).ToNot(HaveOccurred()) + g.Expect(err).To(Equal(errWaitForChart)) g.Expect(res.RequeueAfter).To(Equal(obj.Spec.Interval.Duration)) g.Expect(obj.Status.Conditions).To(conditions.MatchConditions([]metav1.Condition{ @@ -274,7 +274,7 @@ func TestHelmReleaseReconciler_reconcileRelease(t *testing.T) { } res, err := r.reconcileRelease(context.TODO(), patch.NewSerialPatcher(obj, r.Client), obj) - g.Expect(err).ToNot(HaveOccurred()) + g.Expect(err).To(Equal(errWaitForChart)) g.Expect(res.RequeueAfter).To(Equal(obj.Spec.Interval.Duration)) g.Expect(obj.Status.Conditions).To(conditions.MatchConditions([]metav1.Condition{ @@ -326,7 +326,7 @@ func TestHelmReleaseReconciler_reconcileRelease(t *testing.T) { } res, err := r.reconcileRelease(context.TODO(), patch.NewSerialPatcher(obj, r.Client), obj) - g.Expect(err).ToNot(HaveOccurred()) + g.Expect(err).To(Equal(errWaitForChart)) g.Expect(res.RequeueAfter).To(Equal(obj.Spec.Interval.Duration)) g.Expect(obj.Status.Conditions).To(conditions.MatchConditions([]metav1.Condition{ @@ -438,7 +438,7 @@ func TestHelmReleaseReconciler_reconcileRelease(t *testing.T) { } res, err := r.reconcileRelease(context.TODO(), patch.NewSerialPatcher(obj, r.Client), obj) - g.Expect(err).ToNot(HaveOccurred()) + g.Expect(err).To(Equal(errWaitForDependency)) g.Expect(res.RequeueAfter).To(Equal(r.requeueDependency)) g.Expect(obj.Status.Conditions).To(conditions.MatchConditions([]metav1.Condition{ @@ -1931,6 +1931,116 @@ func TestHelmReleaseReconciler_getHelmChart(t *testing.T) { } } +func Test_waitForHistoryCacheSync(t *testing.T) { + tests := []struct { + name string + rel *v2.HelmRelease + cacheRel *v2.HelmRelease + want bool + }{ + { + name: "different history", + rel: &v2.HelmRelease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "some-name", + }, + Status: v2.HelmReleaseStatus{ + History: v2.Snapshots{ + { + Version: 2, + Status: "deployed", + }, + { + Version: 1, + Status: "failed", + }, + }, + }, + }, + cacheRel: &v2.HelmRelease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "some-name", + }, + Status: v2.HelmReleaseStatus{ + History: v2.Snapshots{ + { + Version: 1, + Status: "deployed", + }, + }, + }, + }, + want: false, + }, + { + name: "same history", + rel: &v2.HelmRelease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "some-name", + }, + Status: v2.HelmReleaseStatus{ + History: v2.Snapshots{ + { + Version: 2, + Status: "deployed", + }, + { + Version: 1, + Status: "failed", + }, + }, + }, + }, + cacheRel: &v2.HelmRelease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "some-name", + }, + Status: v2.HelmReleaseStatus{ + History: v2.Snapshots{ + { + Version: 2, + Status: "deployed", + }, + { + Version: 1, + Status: "failed", + }, + }, + }, + }, + want: true, + }, + { + name: "does not exist", + rel: &v2.HelmRelease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "some-name", + }, + }, + cacheRel: nil, + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + c := fake.NewClientBuilder() + c.WithScheme(NewTestScheme()) + if tt.cacheRel != nil { + c.WithObjects(tt.cacheRel) + } + r := &HelmReleaseReconciler{ + Client: c.Build(), + } + + got, err := r.waitForHistoryCacheSync(tt.rel)(context.Background()) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(got == tt.want).To(BeTrue()) + }) + } +} + func TestValuesReferenceValidation(t *testing.T) { tests := []struct { name string diff --git a/internal/errors/ignore.go b/internal/errors/ignore.go new file mode 100644 index 000000000..8cd247c92 --- /dev/null +++ b/internal/errors/ignore.go @@ -0,0 +1,25 @@ +/* +Copyright 2023 The Flux authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package errors + +// Ignore returns nil if err is equal to any of the errs. +func Ignore(err error, errs ...error) error { + if IsOneOf(err, errs...) { + return nil + } + return err +} diff --git a/internal/errors/ignore_test.go b/internal/errors/ignore_test.go new file mode 100644 index 000000000..bcf204ea3 --- /dev/null +++ b/internal/errors/ignore_test.go @@ -0,0 +1,40 @@ +/* +Copyright 2023 The Flux authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package errors + +import ( + "errors" + "testing" +) + +func TestIgnore(t *testing.T) { + err1 := errors.New("error1") + err2 := errors.New("error2") + + if err := Ignore(err1, err1, err2); err != nil { + t.Errorf("Expected Ignore to return nil when the error is in the list, but got %v", err) + } + + err3 := errors.New("error3") + if err := Ignore(err3, err1, err2); !errors.Is(err, err3) { + t.Errorf("Expected Ignore to return the error when it is not in the list, but got %v", err) + } + + if err := Ignore(err1); !errors.Is(err, err1) { + t.Errorf("Expected Ignore to return the error with an empty list of errors, but got %v", err) + } +} diff --git a/internal/reconcile/atomic_release.go b/internal/reconcile/atomic_release.go index 1e6a9e8af..893dc317b 100644 --- a/internal/reconcile/atomic_release.go +++ b/internal/reconcile/atomic_release.go @@ -59,6 +59,9 @@ var ( // to continue the reconciliation process. ErrMustRequeue = errors.New("must requeue") + // ErrMissingRollbackTarget is returned when the rollback target is missing. + ErrMissingRollbackTarget = errors.New("missing target release for rollback") + // ErrUnknownReleaseStatus is returned when the release status is unknown // and cannot be acted upon. ErrUnknownReleaseStatus = errors.New("unknown release status") @@ -168,7 +171,7 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error { // last observation before returning. If the patch fails, we // log the error and return the original context cancellation // error. - if err := r.patchHelper.Patch(ctx, req.Object); err != nil { + if err := r.patchHelper.Patch(ctx, req.Object, patch.WithOwnedConditions{Conditions: OwnedConditions}, patch.WithFieldOwner(r.fieldManager)); err != nil { log.Error(err, "failed to patch HelmRelease after context cancellation") } cancel() @@ -189,6 +192,11 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error { if errors.Is(err, ErrExceededMaxRetries) { conditions.MarkStalled(req.Object, "RetriesExceeded", "Failed to %s after %d attempt(s)", req.Object.Status.LastAttemptedReleaseAction, req.Object.GetActiveRemediation().GetFailureCount(req.Object)) + return err + } + if errors.Is(err, ErrMissingRollbackTarget) { + conditions.MarkStalled(req.Object, "MissingRollbackTarget", "Failed to perform remediation: %s", err.Error()) + return err } return err } @@ -209,11 +217,13 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error { log.V(logger.DebugLevel).Info( fmt.Sprintf("instructed to stop before running %s action reconciler %s", next.Type(), next.Name()), ) - conditions.Delete(req.Object, meta.ReconcilingCondition) if remediation := req.Object.GetActiveRemediation(); remediation == nil || !remediation.RetriesExhausted(req.Object) { + conditions.MarkReconciling(req.Object, meta.ProgressingWithRetryReason, conditions.GetMessage(req.Object, meta.ReadyCondition)) return ErrMustRequeue } + + conditions.Delete(req.Object, meta.ReconcilingCondition) return nil } @@ -221,14 +231,14 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error { // This to show continuous progress, as Helm actions can be long-running. reconcilingMsg := fmt.Sprintf("Running '%s' action with timeout of %s", next.Name(), timeoutForAction(next, req.Object).String()) - conditions.MarkTrue(req.Object, meta.ReconcilingCondition, "Progressing", reconcilingMsg) + conditions.MarkReconciling(req.Object, meta.ProgressingReason, reconcilingMsg) // If the next action is a release action, we can mark the release // as progressing in terms of readiness as well. Doing this for any // other action type is not useful, as it would potentially // overwrite more important failure state from an earlier action. if next.Type() == ReconcilerTypeRelease { - conditions.MarkUnknown(req.Object, meta.ReadyCondition, "Progressing", reconcilingMsg) + conditions.MarkUnknown(req.Object, meta.ReadyCondition, meta.ProgressingReason, reconcilingMsg) } // Patch the object to reflect the new condition. @@ -250,11 +260,13 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error { log.V(logger.DebugLevel).Info(fmt.Sprintf( "instructed to stop after running %s action reconciler %s", next.Type(), next.Name()), ) - conditions.Delete(req.Object, meta.ReconcilingCondition) if remediation := req.Object.GetActiveRemediation(); remediation == nil || !remediation.RetriesExhausted(req.Object) { + conditions.MarkReconciling(req.Object, meta.ProgressingWithRetryReason, conditions.GetMessage(req.Object, meta.ReadyCondition)) return ErrMustRequeue } + + conditions.Delete(req.Object, meta.ReconcilingCondition) return nil } @@ -412,10 +424,15 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state // before instructing to roll back to it. prev := req.Object.Status.History.Previous(remediation.MustIgnoreTestFailures(req.Object.GetTest().IgnoreFailures)) if _, err := action.VerifySnapshot(r.configFactory.Build(nil), prev); err != nil { - if interrors.IsOneOf(err, action.ErrReleaseNotFound, action.ErrReleaseDisappeared, action.ErrReleaseNotObserved, action.ErrReleaseDigest) { - // If the rollback target is not found or is in any other - // way corrupt, the most correct remediation is to - // reattempt the upgrade. + if errors.Is(err, action.ErrReleaseNotFound) { + // If the rollback target is missing, we cannot roll back + // to it and must fail. + return nil, fmt.Errorf("%w: cannot remediate failed release", ErrMissingRollbackTarget) + } + + if interrors.IsOneOf(err, action.ErrReleaseDisappeared, action.ErrReleaseNotObserved, action.ErrReleaseDigest) { + // If the rollback target is in any way corrupt, + // the most correct remediation is to reattempt the upgrade. log.Info(msgWithReason("unable to verify previous release in storage to roll back to", err.Error())) return NewUpgrade(r.configFactory, r.eventRecorder), nil } diff --git a/internal/reconcile/atomic_release_test.go b/internal/reconcile/atomic_release_test.go index d2050e425..4e7fba159 100644 --- a/internal/reconcile/atomic_release_test.go +++ b/internal/reconcile/atomic_release_test.go @@ -1349,6 +1349,36 @@ func TestAtomicRelease_actionForState(t *testing.T) { }, want: &RollbackRemediation{}, }, + { + name: "failed release with active upgrade remediation and no previous release triggers error", + state: ReleaseState{Status: ReleaseStatusFailed}, + releases: []*helmrelease.Release{ + testutil.BuildRelease(&helmrelease.MockReleaseOptions{ + Name: mockReleaseName, + Namespace: mockReleaseNamespace, + Version: 2, + Status: helmrelease.StatusFailed, + Chart: testutil.BuildChart(), + }), + }, + spec: func(spec *v2.HelmReleaseSpec) { + spec.Upgrade = &v2.Upgrade{ + Remediation: &v2.UpgradeRemediation{ + Retries: 2, + }, + } + }, + status: func(releases []*helmrelease.Release) v2.HelmReleaseStatus { + return v2.HelmReleaseStatus{ + History: v2.Snapshots{ + release.ObservedToSnapshot(release.ObserveRelease(releases[0])), + }, + LastAttemptedReleaseAction: v2.ReleaseActionUpgrade, + UpgradeFailures: 1, + } + }, + wantErr: ErrMissingRollbackTarget, + }, { name: "failed release with active upgrade remediation and unverified previous triggers upgrade", state: ReleaseState{Status: ReleaseStatusFailed}, @@ -1357,7 +1387,7 @@ func TestAtomicRelease_actionForState(t *testing.T) { Name: mockReleaseName, Namespace: mockReleaseNamespace, Version: 2, - Status: helmrelease.StatusSuperseded, + Status: helmrelease.StatusFailed, Chart: testutil.BuildChart(), }), }, @@ -1371,6 +1401,7 @@ func TestAtomicRelease_actionForState(t *testing.T) { status: func(releases []*helmrelease.Release) v2.HelmReleaseStatus { return v2.HelmReleaseStatus{ History: v2.Snapshots{ + release.ObservedToSnapshot(release.ObserveRelease(releases[0])), release.ObservedToSnapshot(release.ObserveRelease( testutil.BuildRelease(&helmrelease.MockReleaseOptions{ Name: mockReleaseName, diff --git a/internal/reconcile/install.go b/internal/reconcile/install.go index 02adae7f9..a390b2a5e 100644 --- a/internal/reconcile/install.go +++ b/internal/reconcile/install.go @@ -84,6 +84,9 @@ func (r *Install) Reconcile(ctx context.Context, req *Request) error { // before the install. req.Object.Status.ClearHistory() + // If we are installing, we are no longer remediated. + conditions.Delete(req.Object, v2.RemediatedCondition) + // Run the Helm install action. _, err := action.Install(ctx, cfg, req.Object, req.Chart, req.Values) diff --git a/internal/reconcile/release.go b/internal/reconcile/release.go index 5e89aeab2..ce0a74d50 100644 --- a/internal/reconcile/release.go +++ b/internal/reconcile/release.go @@ -36,9 +36,6 @@ var ( // ErrNoLatest is returned when the HelmRelease has no latest release // but this is required by the ActionReconciler. ErrNoLatest = errors.New("no latest release") - // ErrNoPrevious is returned when the HelmRelease has no previous release - // but this is required by the ActionReconciler. - ErrNoPrevious = errors.New("no previous release") // ErrReleaseMismatch is returned when the resulting release after running // an action does not match the expected latest and/or previous release. // This can happen for actions where targeting a release by version is not @@ -128,10 +125,6 @@ func summarize(req *Request) { conditions.Delete(req.Object, v2.TestSuccessCondition) } - // Remove any stale Remediation observation as soon as the release is - // Released and (optionally) has TestSuccess. - conditionallyDeleteRemediated(req) - conds := req.Object.Status.Conditions if len(conds) == 0 { // Nothing to summarize if there are no conditions. @@ -172,51 +165,6 @@ func summarize(req *Request) { }) } -// conditionallyDeleteRemediated removes the Remediated condition if the -// release is Released and (optionally) has TestSuccess. But only if -// the observed generation of these conditions is equal or higher than -// the generation of the Remediated condition. -func conditionallyDeleteRemediated(req *Request) { - remediated := conditions.Get(req.Object, v2.RemediatedCondition) - if remediated == nil { - // If the object is not marked as Remediated, there is nothing to - // remove. - return - } - - released := conditions.Get(req.Object, v2.ReleasedCondition) - if released == nil || released.Status != metav1.ConditionTrue { - // If the release is not marked as Released, we must still be - // Remediated. - return - } - - if !req.Object.GetTest().Enable || req.Object.GetTest().IgnoreFailures { - // If tests are not enabled, or failures are ignored, and the - // generation is equal or higher than the generation of the - // Remediated condition, we are not in a Remediated state anymore. - if released.Status == metav1.ConditionTrue && released.ObservedGeneration >= remediated.ObservedGeneration { - conditions.Delete(req.Object, v2.RemediatedCondition) - } - return - } - - testSuccess := conditions.Get(req.Object, v2.TestSuccessCondition) - if testSuccess == nil || testSuccess.Status != metav1.ConditionTrue { - // If the release is not marked as TestSuccess, we must still be - // Remediated. - return - } - - if testSuccess.Status == metav1.ConditionTrue && testSuccess.ObservedGeneration >= remediated.ObservedGeneration { - // If the release is marked as TestSuccess, and the generation of - // the TestSuccess condition is equal or higher than the generation - // of the Remediated condition, we are not in a Remediated state. - conditions.Delete(req.Object, v2.RemediatedCondition) - return - } -} - // eventMessageWithLog returns an event message composed out of the given // message and any log messages by appending them to the message. func eventMessageWithLog(msg string, log *action.LogBuffer) string { diff --git a/internal/reconcile/release_test.go b/internal/reconcile/release_test.go index 117820323..167b8df72 100644 --- a/internal/reconcile/release_test.go +++ b/internal/reconcile/release_test.go @@ -424,60 +424,6 @@ func Test_summarize(t *testing.T) { }, }, }, - { - name: "with stale remediation", - spec: &v2.HelmReleaseSpec{ - Test: &v2.Test{ - Enable: true, - }, - }, - conditions: []metav1.Condition{ - { - Type: v2.RemediatedCondition, - Status: metav1.ConditionTrue, - Reason: v2.RollbackSucceededReason, - Message: "Rollback finished", - ObservedGeneration: 2, - }, - { - Type: v2.ReleasedCondition, - Status: metav1.ConditionTrue, - Reason: v2.UpgradeSucceededReason, - Message: "Upgrade finished", - ObservedGeneration: 2, - }, - { - Type: v2.TestSuccessCondition, - Status: metav1.ConditionTrue, - Reason: v2.TestSucceededReason, - Message: "test hooks succeeded", - ObservedGeneration: 2, - }, - }, - expect: []metav1.Condition{ - { - Type: meta.ReadyCondition, - Status: metav1.ConditionTrue, - Reason: v2.TestSucceededReason, - Message: "test hooks succeeded", - ObservedGeneration: 2, - }, - { - Type: v2.ReleasedCondition, - Status: metav1.ConditionTrue, - Reason: v2.UpgradeSucceededReason, - Message: "Upgrade finished", - ObservedGeneration: 2, - }, - { - Type: v2.TestSuccessCondition, - Status: metav1.ConditionTrue, - Reason: v2.TestSucceededReason, - Message: "test hooks succeeded", - ObservedGeneration: 2, - }, - }, - }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -501,145 +447,6 @@ func Test_summarize(t *testing.T) { } } -func Test_conditionallyDeleteRemediated(t *testing.T) { - tests := []struct { - name string - spec v2.HelmReleaseSpec - conditions []metav1.Condition - expectDelete bool - }{ - { - name: "no Remediated condition", - conditions: []metav1.Condition{ - *conditions.TrueCondition(v2.ReleasedCondition, v2.InstallSucceededReason, "Install finished"), - }, - expectDelete: false, - }, - { - name: "no Released condition", - conditions: []metav1.Condition{ - *conditions.TrueCondition(v2.RemediatedCondition, v2.RollbackSucceededReason, "Rollback finished"), - }, - expectDelete: false, - }, - { - name: "Released=True without tests enabled", - conditions: []metav1.Condition{ - *conditions.TrueCondition(v2.RemediatedCondition, v2.RollbackSucceededReason, "Rollback finished"), - *conditions.TrueCondition(v2.ReleasedCondition, v2.UpgradeSucceededReason, "Upgrade finished"), - }, - expectDelete: true, - }, - { - name: "Stale Released=True with newer Remediated", - conditions: []metav1.Condition{ - *conditions.TrueCondition(v2.ReleasedCondition, v2.UpgradeSucceededReason, "Upgrade finished"), - { - Type: v2.RemediatedCondition, - Status: metav1.ConditionTrue, - Reason: v2.RollbackSucceededReason, - Message: "Rollback finished", - ObservedGeneration: 2, - }, - }, - expectDelete: false, - }, - { - name: "Released=False", - conditions: []metav1.Condition{ - *conditions.TrueCondition(v2.RemediatedCondition, v2.RollbackSucceededReason, "Rollback finished"), - *conditions.FalseCondition(v2.ReleasedCondition, v2.UpgradeFailedReason, "Upgrade failed"), - }, - expectDelete: false, - }, - { - name: "TestSuccess=True with tests enabled", - spec: v2.HelmReleaseSpec{ - Test: &v2.Test{ - Enable: true, - }, - }, - conditions: []metav1.Condition{ - *conditions.TrueCondition(v2.RemediatedCondition, v2.RollbackSucceededReason, "Rollback finished"), - *conditions.TrueCondition(v2.ReleasedCondition, v2.UpgradeSucceededReason, "Upgrade finished"), - *conditions.TrueCondition(v2.TestSuccessCondition, v2.TestSucceededReason, "Test hooks succeeded"), - }, - expectDelete: true, - }, - { - name: "TestSuccess=False with tests enabled", - spec: v2.HelmReleaseSpec{ - Test: &v2.Test{ - Enable: true, - }, - }, - conditions: []metav1.Condition{ - *conditions.TrueCondition(v2.RemediatedCondition, v2.RollbackSucceededReason, "Rollback finished"), - *conditions.TrueCondition(v2.ReleasedCondition, v2.UpgradeSucceededReason, "Upgrade finished"), - *conditions.FalseCondition(v2.TestSuccessCondition, v2.TestSucceededReason, "Test hooks succeeded"), - }, - expectDelete: false, - }, - { - name: "TestSuccess=False with tests ignored", - spec: v2.HelmReleaseSpec{ - Test: &v2.Test{ - Enable: true, - IgnoreFailures: true, - }, - }, - conditions: []metav1.Condition{ - *conditions.TrueCondition(v2.RemediatedCondition, v2.RollbackSucceededReason, "Rollback finished"), - *conditions.TrueCondition(v2.ReleasedCondition, v2.UpgradeSucceededReason, "Upgrade finished"), - *conditions.FalseCondition(v2.TestSuccessCondition, v2.TestFailedReason, "Test hooks failed"), - }, - expectDelete: true, - }, - { - name: "Stale TestSuccess=True with newer Remediated", - spec: v2.HelmReleaseSpec{ - Test: &v2.Test{ - Enable: true, - }, - }, - conditions: []metav1.Condition{ - *conditions.TrueCondition(v2.ReleasedCondition, v2.UpgradeSucceededReason, "Upgrade finished"), - *conditions.TrueCondition(v2.TestSuccessCondition, v2.TestSucceededReason, "Test hooks succeeded"), - { - Type: v2.RemediatedCondition, - Status: metav1.ConditionTrue, - Reason: v2.RollbackSucceededReason, - Message: "Rollback finished", - ObservedGeneration: 2, - }, - }, - expectDelete: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) - - obj := &v2.HelmRelease{ - Spec: tt.spec, - Status: v2.HelmReleaseStatus{ - Conditions: tt.conditions, - }, - } - isRemediated := conditions.Has(obj, v2.RemediatedCondition) - - conditionallyDeleteRemediated(&Request{Object: obj}) - - if tt.expectDelete { - g.Expect(isRemediated).ToNot(Equal(conditions.Has(obj, v2.RemediatedCondition))) - return - } - - g.Expect(conditions.Has(obj, v2.RemediatedCondition)).To(Equal(isRemediated)) - }) - } -} - func mockLogBuffer(size int, lines int) *action.LogBuffer { log := action.NewLogBuffer(action.NewDebugLog(logr.Discard()), size) for i := 0; i < lines; i++ { diff --git a/internal/reconcile/rollback_remediation.go b/internal/reconcile/rollback_remediation.go index 755bf7434..e614f5a30 100644 --- a/internal/reconcile/rollback_remediation.go +++ b/internal/reconcile/rollback_remediation.go @@ -49,8 +49,8 @@ import ( // Remediated=False and a warning event is emitted. // // When the Request.Object does not have a (successful) previous deployed -// release, it returns an error of type ErrNoPrevious. In addition, it -// returns ErrReleaseMismatch if the name and/or namespace of the latest and +// release, it returns an error of type ErrMissingRollbackTarget. In addition, +// it returns ErrReleaseMismatch if the name and/or namespace of the latest and // previous release do not match. Any other returned error indicates the caller // should retry as it did not cause a change to the Helm storage. // @@ -85,7 +85,7 @@ func (r *RollbackRemediation) Reconcile(ctx context.Context, req *Request) error // Previous is required to determine what version to roll back to. prev := req.Object.Status.History.Previous(req.Object.GetUpgrade().GetRemediation().MustIgnoreTestFailures(req.Object.GetTest().IgnoreFailures)) if prev == nil { - return fmt.Errorf("%w: required to rollback", ErrNoPrevious) + return fmt.Errorf("%w: required to rollback", ErrMissingRollbackTarget) } // Confirm previous and current point to the same release. diff --git a/internal/reconcile/rollback_remediation_test.go b/internal/reconcile/rollback_remediation_test.go index cf8e23906..2300aefc3 100644 --- a/internal/reconcile/rollback_remediation_test.go +++ b/internal/reconcile/rollback_remediation_test.go @@ -121,7 +121,7 @@ func TestRollbackRemediation_Reconcile(t *testing.T) { }, }, { - name: "rollback without previous", + name: "rollback without previous target release", releases: func(namespace string) []*helmrelease.Release { return []*helmrelease.Release{ testutil.BuildRelease(&helmrelease.MockReleaseOptions{ @@ -147,7 +147,7 @@ func TestRollbackRemediation_Reconcile(t *testing.T) { }, } }, - wantErr: ErrNoPrevious, + wantErr: ErrMissingRollbackTarget, expectHistory: func(releases []*helmrelease.Release) v2.Snapshots { return v2.Snapshots{ release.ObservedToSnapshot(release.ObserveRelease(releases[1])), diff --git a/internal/reconcile/upgrade.go b/internal/reconcile/upgrade.go index 06e617491..95b345913 100644 --- a/internal/reconcile/upgrade.go +++ b/internal/reconcile/upgrade.go @@ -75,6 +75,9 @@ func (r *Upgrade) Reconcile(ctx context.Context, req *Request) error { // Mark upgrade attempt on object. req.Object.Status.LastAttemptedReleaseAction = v2.ReleaseActionUpgrade + // If we are upgrading, we are no longer remediated. + conditions.Delete(req.Object, v2.RemediatedCondition) + // Run the Helm upgrade action. _, err := action.Upgrade(ctx, cfg, req.Object, req.Chart, req.Values)