From 728b8dce888370079a1c4610d7251834fce40521 Mon Sep 17 00:00:00 2001 From: Sean Eagan Date: Mon, 4 May 2020 08:57:28 -0500 Subject: [PATCH 1/2] Revert "Expose HelmRelease phases as a Prometheus gauge" This reverts commit dfe842a4ca3e098826095c1698c1d44ca4970b72. See #383, the intention is to replace this with a condition based gauge in the future. --- pkg/status/conditions.go | 3 +-- pkg/status/metrics.go | 45 ---------------------------------------- 2 files changed, 1 insertion(+), 47 deletions(-) delete mode 100644 pkg/status/metrics.go diff --git a/pkg/status/conditions.go b/pkg/status/conditions.go index 1dae948e5..5e378a012 100644 --- a/pkg/status/conditions.go +++ b/pkg/status/conditions.go @@ -7,7 +7,7 @@ import ( "k8s.io/client-go/util/retry" "k8s.io/utils/clock" - v1 "github.com/fluxcd/helm-operator/pkg/apis/helm.fluxcd.io/v1" + "github.com/fluxcd/helm-operator/pkg/apis/helm.fluxcd.io/v1" v1client "github.com/fluxcd/helm-operator/pkg/client/clientset/versioned/typed/helm.fluxcd.io/v1" ) @@ -67,7 +67,6 @@ func SetStatusPhase(client v1client.HelmReleaseInterface, hr *v1.HelmRelease, ph } return SetCondition(client, hr, condition, func(cHr *v1.HelmRelease) { cHr.Status.Phase = phase - SetReleasePhaseGauge(phase, hr.Namespace, hr.Name) }) } diff --git a/pkg/status/metrics.go b/pkg/status/metrics.go deleted file mode 100644 index 201e45283..000000000 --- a/pkg/status/metrics.go +++ /dev/null @@ -1,45 +0,0 @@ -package status - -import ( - v1 "github.com/fluxcd/helm-operator/pkg/apis/helm.fluxcd.io/v1" - "github.com/go-kit/kit/metrics/prometheus" - stdprometheus "github.com/prometheus/client_golang/prometheus" -) - -const ( - LabelNamespace = "namespace" - LabelReleaseName = "release_name" -) - -var phaseToGaugeValue = map[v1.HelmReleasePhase]float64{ - // Unknown is mapped to 0 - v1.HelmReleasePhaseChartFetchFailed: -4, - v1.HelmReleasePhaseFailed: -3, - v1.HelmReleasePhaseRollbackFailed: -2, - v1.HelmReleasePhaseRolledBack: -1, - v1.HelmReleasePhaseRollingBack: 1, - v1.HelmReleasePhaseInstalling: 2, - v1.HelmReleasePhaseUpgrading: 3, - v1.HelmReleasePhaseChartFetched: 4, - v1.HelmReleasePhaseSucceeded: 5, -} - -var ( - releasePhase = prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: "flux", - Subsystem: "helm_operator", - Name: "release_phase_info", - Help: "Current HelmRelease phase.", - }, []string{LabelNamespace, LabelReleaseName}) -) - -func SetReleasePhaseGauge(phase v1.HelmReleasePhase, namespace, releaseName string) { - value, ok := phaseToGaugeValue[phase] - if !ok { - value = 0 - } - releasePhase.With( - LabelNamespace, namespace, - LabelReleaseName, releaseName, - ).Set(value) -} From aaae029deb4f55a16ad47423a5a2d24607e4a656 Mon Sep 17 00:00:00 2001 From: Sean Eagan Date: Mon, 4 May 2020 16:12:10 -0500 Subject: [PATCH 2/2] metrics: Add condition gauge Fixes #395 --- docs/references/monitoring.md | 33 +++++++++++++------------ pkg/status/conditions.go | 1 + pkg/status/metrics.go | 46 +++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 16 deletions(-) create mode 100644 pkg/status/metrics.go diff --git a/docs/references/monitoring.md b/docs/references/monitoring.md index 21003e364..a7dc1d99e 100644 --- a/docs/references/monitoring.md +++ b/docs/references/monitoring.md @@ -11,27 +11,28 @@ in Prometheus format. | `release_count` | Count of releases managed by the operator. | | `release_duration_seconds` | Release synchronization duration in seconds. This duration includes one or many `release_phase_durations`. | | `release_phase_duration_seconds` | Release phase synchronization duration in seconds. | -| `release_phase_info` | The (negative) integer equaling the current phase of a release. Negative values are failed phases, `0` equals to unknown. See [release phases](#release-phases). +| `release_condition_info` | Release condition status gauge, see [release conditions](#release-conditions). | `release_queue_length_count` | Count of release jobs waiting in the queue to be processed. | +### Release conditions -### Release phases +#### Labels -The following is a table of the values the `release_phase_info` metric exposes, -and the phase they represent: +| Label | Label Value | +|--------------------|--- +| `target_namespace` | `targetNamespace` of `HelmRelease` +| `release_name` | `releaseName` of `HelmRelease` +| `condition` | [condition type](helmrelease-custom-resource.md#helm.fluxcd.io/v1.HelmReleaseConditionType) -| Value | Phase | +#### Values + +Values represent the [condition status](helmrelease-custom-resource.md#helm.fluxcd.io/v1.ConditionStatus). + +| Value | Condition Status | |-------|--- -| `-4` | `ChartFetchFailed` -| `-3` | `Failed` -| `-2` | `RollbackFailed` -| `-1 ` | `RolledBack` +| `-1` | `False` | `0` | `Unknown` -| `1` | `RollingBack` -| `2` | `Installing` -| `3` | `Upgrading` -| `4` | `ChartFetched` -| `5` | `Succeeded` +| `1` | `True` ## Prometheus alert rules examples @@ -51,12 +52,12 @@ for: 30m ```yaml alert: HelmReleaseRolledBack -expr: flux_helm_operator_release_phase_info == -1 +expr: flux_helm_operator_release_condition_info{condition="RolledBack"} == 1 ``` ### `HelmRelease` subject to an error ```yaml alert: HelmReleaseError -expr: flux_helm_operator_release_phase_info < -1 +expr: flux_helm_operator_release_phase_info{condition="Released"} == -1 ``` diff --git a/pkg/status/conditions.go b/pkg/status/conditions.go index 5e378a012..9661f46c2 100644 --- a/pkg/status/conditions.go +++ b/pkg/status/conditions.go @@ -53,6 +53,7 @@ func SetCondition(client v1client.HelmReleaseInterface, hr *v1.HelmRelease, cond set(cHr) } + ObserveReleaseConditions(*hr, *cHr) _, err = client.UpdateStatus(cHr) firstTry = false return diff --git a/pkg/status/metrics.go b/pkg/status/metrics.go new file mode 100644 index 000000000..212f2ebb5 --- /dev/null +++ b/pkg/status/metrics.go @@ -0,0 +1,46 @@ +package status + +import ( + v1 "github.com/fluxcd/helm-operator/pkg/apis/helm.fluxcd.io/v1" + "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" +) + +const ( + LabelTargetNamespace = "target_namespace" + LabelReleaseName = "release_name" + LabelCondition = "condition" +) + +var ( + conditionStatusToGaugeValue = map[v1.ConditionStatus]float64{ + v1.ConditionFalse: -1, + v1.ConditionUnknown: 0, + v1.ConditionTrue: 1, + } + releaseCondition = prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: "flux", + Subsystem: "helm_operator", + Name: "release_condition_info", + Help: "Current HelmRelease condition status. Values are -1 (false), 0 (unknown or absent), 1 (true)", + }, []string{LabelTargetNamespace, LabelReleaseName, LabelCondition}) +) + +func ObserveReleaseConditions(old v1.HelmRelease, new v1.HelmRelease) { + conditions := make(map[v1.HelmReleaseConditionType]v1.ConditionStatus) + for _, condition := range old.Status.Conditions { + // Initialize conditions from old status to unknown, so that if + // they are removed in new status, they do not contain stale data. + conditions[condition.Type] = v1.ConditionUnknown + } + for _, condition := range new.Status.Conditions { + conditions[condition.Type] = condition.Status + } + for conditionType, conditionStatus := range conditions { + releaseCondition.With( + LabelTargetNamespace, new.Namespace, + LabelReleaseName, new.Name, + LabelCondition, string(conditionType), + ).Set(conditionStatusToGaugeValue[conditionStatus]) + } +}