Skip to content

Commit

Permalink
Merge pull request #100 from sighupio/hotfix/alertrules-description
Browse files Browse the repository at this point in the history
Backport fixes to Alertmanager template and alerts annotations to v1.13.x
  • Loading branch information
lnovara authored Oct 17, 2022
2 parents 03a535e + 726d425 commit 0af11fd
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 18 deletions.
4 changes: 2 additions & 2 deletions examples/prometheus-rules/add-alert.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
rules:
- alert: MyAppDown
annotations:
message: 'MyApp instance {{ $labels.instance }} has disappered from
description: 'MyApp instance {{ $labels.instance }} has disappered from
Prometheus target discovery.'
doc: "This alert fires if Prometheus target discovery was not able to
reach myapp-metrics in the last 3 minutes."
Expand All @@ -27,7 +27,7 @@ spec:
severity: critical
- alert: MyAppFailureRate
annotations:
message: 'MyApp failure rate is {{ prints "%.2f" $value }}%.'
description: 'MyApp failure rate is {{ prints "%.2f" $value }}%.'
doc: "This alert fires if the failure rate (the rate of 4xx and 5xx
responses) measured on a time window of 2 minutes was higher than 10%
in the last 10 minutes."
Expand Down
5 changes: 3 additions & 2 deletions katalog/alertmanager-operated/alertmanager.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

{{ define "__text" }}{{ range .Alerts }}
*Alert:* {{ .Labels.alertname }} - `{{ .Labels.severity }}`
*Description:* {{ .Annotations.message }}
*Description:* {{ .Annotations.description }}
*Runbook*: {{ .Annotations.runbook_url }}
*Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:>
*Details:*
{{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
Expand All @@ -20,4 +21,4 @@
{{ define "slack.default.text" }}{{ template "__text" . }}{{ end }}
{{ define "slack.default.footer" }}{{ end }}

{{ define "email.default.subject" }}{{ template "__subject" .}} - {{ template "slack.default.username" . }}{{ end }}
{{ define "email.default.subject" }}{{ template "__subject" .}} - {{ template "slack.default.username" . }}{{ end }}
28 changes: 14 additions & 14 deletions katalog/configs/kubeadm/rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ spec:
rules:
- alert: EtcdInsufficientMembers
annotations:
message: 'If one more etcd member goes down the cluster will be
description: 'If one more etcd member goes down the cluster will be
unavailable.'
doc: "This alert fires if less than half of Etcd cluster members were
online in the last 3 minutes."
Expand All @@ -67,7 +67,7 @@ spec:
severity: critical
- alert: EtcdNoLeader
annotations:
message: 'Etcd member {{ $labels.instance }} has no leader.'
description: 'Etcd member {{ $labels.instance }} has no leader.'
doc: "This alert fires if the Etcd cluster had no leader in the last
minute."
expr: |
Expand All @@ -77,7 +77,7 @@ spec:
severity: critical
- alert: EtcdHighNumberOfLeaderChanges
annotations:
message: 'Etcd instance {{ $labels.instance }} has seen {{ $value }}
description: 'Etcd instance {{ $labels.instance }} has seen {{ $value }}
leader changes within the last hour.'
doc: "This alert fires if the Etcd cluster changed leader more than 3
times in the last hour."
Expand All @@ -87,7 +87,7 @@ spec:
severity: warning
# - alert: EtcdHighNumberOfFailedGRPCRequests
# annotations:
# message: '{{ $value | printf "%.2f" }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}'
# description: '{{ $value | printf "%.2f" }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}'
# expr: |
# 100 * (sum(rate(grpc_server_handled_total{grpc_code!="OK",job="etcd-metrics"}[5m])) by (grpc_service, grpc_method, instance)
# /
Expand All @@ -97,7 +97,7 @@ spec:
# severity: warning
# - alert: EtcdHighNumberOfFailedGRPCRequests
# annotations:
# message: '{{ $value | printf "%.2f" }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}'
# description: '{{ $value | printf "%.2f" }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}'
# expr: |
# 100 * (sum(rate(grpc_server_handled_total{grpc_code!="OK",job="etcd-metrics"}[5m])) by (grpc_service, grpc_method, instance)
# /
Expand All @@ -107,7 +107,7 @@ spec:
# severity: critical
# - alert: EtcdGRPCRequestsSlow
# annotations:
# message: on etcd instance {{ $labels.instance }} gRPC requests to {{ $labels.grpc_method
# description: on etcd instance {{ $labels.instance }} gRPC requests to {{ $labels.grpc_method
# }} are slow
# expr: |
# histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job="etcd-metrics",grpc_type="unary"}[5m])) by (grpc_service, grpc_method, le)) > 0.15
Expand All @@ -116,15 +116,15 @@ spec:
# severity: critical
# - alert: EtcdMemberCommunicationSlow
# annotations:
# message: etcd instance {{ $labels.instance }} member communication with {{ $labels.To }} is slow
# description: etcd instance {{ $labels.instance }} member communication with {{ $labels.To }} is slow
# expr: |
# histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m])) > 0.15
# for: 10m
# labels:
# severity: warning
- alert: EtcdHighNumberOfFailedProposals
annotations:
message: 'Etcd instance {{ $labels.instance }} has seen {{ $value }}
description: 'Etcd instance {{ $labels.instance }} has seen {{ $value }}
proposal failures within the last hour.'
doc: "This alert fires if there were more than 5 proposal failure in the
last hour."
Expand All @@ -134,7 +134,7 @@ spec:
severity: warning
- alert: EtcdHighFsyncDurations
annotations:
message: 'Etcd instance {{ $labels.instance }} WAL fsync latency too
description: 'Etcd instance {{ $labels.instance }} WAL fsync latency too
high, current latency is {{ $value | printf "%.2f" }}.'
doc: "This alert fires if the WAL fsync 99th percentile latency was
higher than 0.5s in the last 10 minutes."
Expand All @@ -145,7 +145,7 @@ spec:
severity: warning
- alert: EtcdHighCommitDurations
annotations:
message: 'Etcd instance {{ $labels.instance }} commit latency too high,
description: 'Etcd instance {{ $labels.instance }} commit latency too high,
current latency is {{ $value | printf "%.2f" }}.'
doc: "This alert fires if the backend commit 99th percentile latency was
higher than 0.25s in the last 10 minutes."
Expand All @@ -158,7 +158,7 @@ spec:
rules:
- alert: CoreDNSPanic
annotations:
messages: 'CoreDNS instance {{ $labels.instance }} panic count
description: 'CoreDNS instance {{ $labels.instance }} panic count
increased by {{ $value }}.'
doc: "This alert fires if CoreDNS total panic count increased by at
least 1 in the last 10 minutes."
Expand All @@ -168,7 +168,7 @@ spec:
severity: critical
- alert: CoreDNSRequestsLatency
annotations:
message: 'CoreDNS instance {{ $labels.instance }} requests latency too
description: 'CoreDNS instance {{ $labels.instance }} requests latency too
high, current latency is {{ $value | printf "%.2f" }}.'
doc: "This alert fires if CoreDNS 99th percentile requests latency was
higher than 100ms in the last 10 minutes."
Expand All @@ -179,7 +179,7 @@ spec:
severity: warning
- alert: CoreDNSHealthRequestsLatency
annotations:
message: 'CoreDNS instance {{ $labels.instance }} health requests
description: 'CoreDNS instance {{ $labels.instance }} health requests
latency too high, current latency is {{ $value | printf "%.2f" }}.'
doc: "This alert fires if CoreDNS 99th percentile health requests
latency was higher than 10ms in the last 10 minutes."
Expand All @@ -190,7 +190,7 @@ spec:
severity: warning
- alert: CoreDNSProxyRequestsLatency
annotations:
message: 'CoreDNS instance {{ $labels.instance }} proxy requests
description: 'CoreDNS instance {{ $labels.instance }} proxy requests
latency too high, current latency is {{ $value | printf "%.2f" }}.'
doc: "This alert fires if CoreDNS 99th percentile proxy requests
latency was higher than 500ms in the last 10 minutes."
Expand Down

0 comments on commit 0af11fd

Please sign in to comment.