Skip to content
This repository has been archived by the owner on Nov 1, 2022. It is now read-only.

Commit

Permalink
Merge pull request #403 from seaneagan/condition_gauge
Browse files Browse the repository at this point in the history
  • Loading branch information
hiddeco authored May 11, 2020
2 parents 3db3a24 + aaae029 commit 5f9e440
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 45 deletions.
33 changes: 17 additions & 16 deletions docs/references/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,28 @@ in Prometheus format.
| `release_count` | Count of releases managed by the operator. |
| `release_duration_seconds` | Release synchronization duration in seconds. This duration includes one or many `release_phase_durations`. |
| `release_phase_duration_seconds` | Release phase synchronization duration in seconds. |
| `release_phase_info` | The (negative) integer equaling the current phase of a release. Negative values are failed phases, `0` equals to unknown. See [release phases](#release-phases).
| `release_condition_info` | Release condition status gauge, see [release conditions](#release-conditions).
| `release_queue_length_count` | Count of release jobs waiting in the queue to be processed. |

### Release conditions

### Release phases
#### Labels

The following is a table of the values the `release_phase_info` metric exposes,
and the phase they represent:
| Label | Label Value |
|--------------------|---
| `target_namespace` | `targetNamespace` of `HelmRelease`
| `release_name` | `releaseName` of `HelmRelease`
| `condition` | [condition type](helmrelease-custom-resource.md#helm.fluxcd.io/v1.HelmReleaseConditionType)

| Value | Phase |
#### Values

Values represent the [condition status](helmrelease-custom-resource.md#helm.fluxcd.io/v1.ConditionStatus).

| Value | Condition Status |
|-------|---
| `-4` | `ChartFetchFailed`
| `-3` | `Failed`
| `-2` | `RollbackFailed`
| `-1 ` | `RolledBack`
| `-1` | `False`
| `0` | `Unknown`
| `1` | `RollingBack`
| `2` | `Installing`
| `3` | `Upgrading`
| `4` | `ChartFetched`
| `5` | `Succeeded`
| `1` | `True`

## Prometheus alert rules examples

Expand All @@ -51,12 +52,12 @@ for: 30m

```yaml
alert: HelmReleaseRolledBack
expr: flux_helm_operator_release_phase_info == -1
expr: flux_helm_operator_release_condition_info{condition="RolledBack"} == 1
```

### `HelmRelease` subject to an error

```yaml
alert: HelmReleaseError
expr: flux_helm_operator_release_phase_info < -1
expr: flux_helm_operator_release_phase_info{condition="Released"} == -1
```
4 changes: 2 additions & 2 deletions pkg/status/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"k8s.io/client-go/util/retry"
"k8s.io/utils/clock"

v1 "github.com/fluxcd/helm-operator/pkg/apis/helm.fluxcd.io/v1"
"github.com/fluxcd/helm-operator/pkg/apis/helm.fluxcd.io/v1"
v1client "github.com/fluxcd/helm-operator/pkg/client/clientset/versioned/typed/helm.fluxcd.io/v1"
)

Expand Down Expand Up @@ -53,6 +53,7 @@ func SetCondition(client v1client.HelmReleaseInterface, hr *v1.HelmRelease, cond
set(cHr)
}

ObserveReleaseConditions(*hr, *cHr)
_, err = client.UpdateStatus(cHr)
firstTry = false
return
Expand All @@ -67,7 +68,6 @@ func SetStatusPhase(client v1client.HelmReleaseInterface, hr *v1.HelmRelease, ph
}
return SetCondition(client, hr, condition, func(cHr *v1.HelmRelease) {
cHr.Status.Phase = phase
SetReleasePhaseGauge(phase, hr.Namespace, hr.Name)
})
}

Expand Down
55 changes: 28 additions & 27 deletions pkg/status/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,39 +7,40 @@ import (
)

const (
LabelNamespace = "namespace"
LabelReleaseName = "release_name"
LabelTargetNamespace = "target_namespace"
LabelReleaseName = "release_name"
LabelCondition = "condition"
)

var phaseToGaugeValue = map[v1.HelmReleasePhase]float64{
// Unknown is mapped to 0
v1.HelmReleasePhaseChartFetchFailed: -4,
v1.HelmReleasePhaseFailed: -3,
v1.HelmReleasePhaseRollbackFailed: -2,
v1.HelmReleasePhaseRolledBack: -1,
v1.HelmReleasePhaseRollingBack: 1,
v1.HelmReleasePhaseInstalling: 2,
v1.HelmReleasePhaseUpgrading: 3,
v1.HelmReleasePhaseChartFetched: 4,
v1.HelmReleasePhaseSucceeded: 5,
}

var (
releasePhase = prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
conditionStatusToGaugeValue = map[v1.ConditionStatus]float64{
v1.ConditionFalse: -1,
v1.ConditionUnknown: 0,
v1.ConditionTrue: 1,
}
releaseCondition = prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "flux",
Subsystem: "helm_operator",
Name: "release_phase_info",
Help: "Current HelmRelease phase.",
}, []string{LabelNamespace, LabelReleaseName})
Name: "release_condition_info",
Help: "Current HelmRelease condition status. Values are -1 (false), 0 (unknown or absent), 1 (true)",
}, []string{LabelTargetNamespace, LabelReleaseName, LabelCondition})
)

func SetReleasePhaseGauge(phase v1.HelmReleasePhase, namespace, releaseName string) {
value, ok := phaseToGaugeValue[phase]
if !ok {
value = 0
func ObserveReleaseConditions(old v1.HelmRelease, new v1.HelmRelease) {
conditions := make(map[v1.HelmReleaseConditionType]v1.ConditionStatus)
for _, condition := range old.Status.Conditions {
// Initialize conditions from old status to unknown, so that if
// they are removed in new status, they do not contain stale data.
conditions[condition.Type] = v1.ConditionUnknown
}
for _, condition := range new.Status.Conditions {
conditions[condition.Type] = condition.Status
}
for conditionType, conditionStatus := range conditions {
releaseCondition.With(
LabelTargetNamespace, new.Namespace,
LabelReleaseName, new.Name,
LabelCondition, string(conditionType),
).Set(conditionStatusToGaugeValue[conditionStatus])
}
releasePhase.With(
LabelNamespace, namespace,
LabelReleaseName, releaseName,
).Set(value)
}

0 comments on commit 5f9e440

Please sign in to comment.