From 5bda168bcaf566adee175572608882f796dde53c Mon Sep 17 00:00:00 2001 From: Sean Eagan Date: Mon, 4 May 2020 16:12:10 -0500 Subject: [PATCH] metrics: Add condition gauge Fixes #395 --- docs/references/monitoring.md | 33 +++++++++++++------------ pkg/status/conditions.go | 1 + pkg/status/metrics.go | 46 +++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 16 deletions(-) create mode 100644 pkg/status/metrics.go diff --git a/docs/references/monitoring.md b/docs/references/monitoring.md index 21003e364..a7dc1d99e 100644 --- a/docs/references/monitoring.md +++ b/docs/references/monitoring.md @@ -11,27 +11,28 @@ in Prometheus format. | `release_count` | Count of releases managed by the operator. | | `release_duration_seconds` | Release synchronization duration in seconds. This duration includes one or many `release_phase_durations`. | | `release_phase_duration_seconds` | Release phase synchronization duration in seconds. | -| `release_phase_info` | The (negative) integer equaling the current phase of a release. Negative values are failed phases, `0` equals to unknown. See [release phases](#release-phases). +| `release_condition_info` | Release condition status gauge, see [release conditions](#release-conditions). | `release_queue_length_count` | Count of release jobs waiting in the queue to be processed. | +### Release conditions -### Release phases +#### Labels -The following is a table of the values the `release_phase_info` metric exposes, -and the phase they represent: +| Label | Label Value | +|--------------------|--- +| `target_namespace` | `targetNamespace` of `HelmRelease` +| `release_name` | `releaseName` of `HelmRelease` +| `condition` | [condition type](helmrelease-custom-resource.md#helm.fluxcd.io/v1.HelmReleaseConditionType) -| Value | Phase | +#### Values + +Values represent the [condition status](helmrelease-custom-resource.md#helm.fluxcd.io/v1.ConditionStatus). + +| Value | Condition Status | |-------|--- -| `-4` | `ChartFetchFailed` -| `-3` | `Failed` -| `-2` | `RollbackFailed` -| `-1 ` | `RolledBack` +| `-1` | `False` | `0` | `Unknown` -| `1` | `RollingBack` -| `2` | `Installing` -| `3` | `Upgrading` -| `4` | `ChartFetched` -| `5` | `Succeeded` +| `1` | `True` ## Prometheus alert rules examples @@ -51,12 +52,12 @@ for: 30m ```yaml alert: HelmReleaseRolledBack -expr: flux_helm_operator_release_phase_info == -1 +expr: flux_helm_operator_release_condition_info{condition="RolledBack"} == 1 ``` ### `HelmRelease` subject to an error ```yaml alert: HelmReleaseError -expr: flux_helm_operator_release_phase_info < -1 +expr: flux_helm_operator_release_phase_info{condition="Released"} == -1 ``` diff --git a/pkg/status/conditions.go b/pkg/status/conditions.go index 5e378a012..9661f46c2 100644 --- a/pkg/status/conditions.go +++ b/pkg/status/conditions.go @@ -53,6 +53,7 @@ func SetCondition(client v1client.HelmReleaseInterface, hr *v1.HelmRelease, cond set(cHr) } + ObserveReleaseConditions(*hr, *cHr) _, err = client.UpdateStatus(cHr) firstTry = false return diff --git a/pkg/status/metrics.go b/pkg/status/metrics.go new file mode 100644 index 000000000..212f2ebb5 --- /dev/null +++ b/pkg/status/metrics.go @@ -0,0 +1,46 @@ +package status + +import ( + v1 "github.com/fluxcd/helm-operator/pkg/apis/helm.fluxcd.io/v1" + "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" +) + +const ( + LabelTargetNamespace = "target_namespace" + LabelReleaseName = "release_name" + LabelCondition = "condition" +) + +var ( + conditionStatusToGaugeValue = map[v1.ConditionStatus]float64{ + v1.ConditionFalse: -1, + v1.ConditionUnknown: 0, + v1.ConditionTrue: 1, + } + releaseCondition = prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: "flux", + Subsystem: "helm_operator", + Name: "release_condition_info", + Help: "Current HelmRelease condition status. Values are -1 (false), 0 (unknown or absent), 1 (true)", + }, []string{LabelTargetNamespace, LabelReleaseName, LabelCondition}) +) + +func ObserveReleaseConditions(old v1.HelmRelease, new v1.HelmRelease) { + conditions := make(map[v1.HelmReleaseConditionType]v1.ConditionStatus) + for _, condition := range old.Status.Conditions { + // Initialize conditions from old status to unknown, so that if + // they are removed in new status, they do not contain stale data. + conditions[condition.Type] = v1.ConditionUnknown + } + for _, condition := range new.Status.Conditions { + conditions[condition.Type] = condition.Status + } + for conditionType, conditionStatus := range conditions { + releaseCondition.With( + LabelTargetNamespace, new.Namespace, + LabelReleaseName, new.Name, + LabelCondition, string(conditionType), + ).Set(conditionStatusToGaugeValue[conditionStatus]) + } +}