sighupio · lnovara · Oct 17, 2022 · Oct 14, 2022 · Oct 14, 2022 · Oct 17, 2022
diff --git a/examples/prometheus-rules/add-alert.yml b/examples/prometheus-rules/add-alert.yml
@@ -16,7 +16,7 @@ spec:
     rules:
     - alert: MyAppDown
       annotations:
-        message: 'MyApp instance {{ $labels.instance }} has disappered from
+        description: 'MyApp instance {{ $labels.instance }} has disappered from
           Prometheus target discovery.'
         doc: "This alert fires if Prometheus target discovery was not able to
           reach myapp-metrics in the last 3 minutes."
@@ -27,7 +27,7 @@ spec:
         severity: critical
     - alert: MyAppFailureRate
       annotations:
-        message: 'MyApp failure rate is {{ prints "%.2f" $value }}%.'
+        description: 'MyApp failure rate is {{ prints "%.2f" $value }}%.'
         doc: "This alert fires if the failure rate (the rate of 4xx and 5xx
           responses) measured on a time window of 2 minutes was higher than 10%
           in the last 10 minutes."

diff --git a/katalog/alertmanager-operated/alertmanager.tmpl b/katalog/alertmanager-operated/alertmanager.tmpl
@@ -4,7 +4,8 @@
 
 {{ define "__text" }}{{ range .Alerts }}
 *Alert:* {{ .Labels.alertname }} - `{{ .Labels.severity }}`
-*Description:* {{ .Annotations.message }}
+*Description:* {{ .Annotations.description }}
+*Runbook*: {{ .Annotations.runbook_url }}
 *Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:>
 *Details:*
 {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
@@ -20,4 +21,4 @@
 {{ define "slack.default.text" }}{{ template "__text" . }}{{ end }}
 {{ define "slack.default.footer" }}{{ end }}
 
-{{ define "email.default.subject" }}{{ template "__subject" .}} - {{ template "slack.default.username" . }}{{ end }}
+{{ define "email.default.subject" }}{{ template "__subject" .}} - {{ template "slack.default.username" . }}{{ end }}
diff --git a/katalog/configs/kubeadm/rules.yml b/katalog/configs/kubeadm/rules.yml
@@ -56,7 +56,7 @@ spec:
     rules:
     - alert: EtcdInsufficientMembers
       annotations:
-        message: 'If one more etcd member goes down the cluster will be
+        description: 'If one more etcd member goes down the cluster will be
           unavailable.'
         doc: "This alert fires if less than half of Etcd cluster members were
           online in the last 3 minutes."
@@ -67,7 +67,7 @@ spec:
         severity: critical
     - alert: EtcdNoLeader
       annotations:
-        message: 'Etcd member {{ $labels.instance }} has no leader.'
+        description: 'Etcd member {{ $labels.instance }} has no leader.'
         doc: "This alert fires if the Etcd cluster had no leader in the last
           minute."
       expr: |
@@ -77,7 +77,7 @@ spec:
         severity: critical
     - alert: EtcdHighNumberOfLeaderChanges
       annotations:
-        message: 'Etcd instance {{ $labels.instance }} has seen {{ $value }}
+        description: 'Etcd instance {{ $labels.instance }} has seen {{ $value }}
           leader changes within the last hour.'
         doc: "This alert fires if the Etcd cluster changed leader more than 3
           times in the last hour."
@@ -87,7 +87,7 @@ spec:
         severity: warning
     # - alert: EtcdHighNumberOfFailedGRPCRequests
     #   annotations:
-    #     message: '{{ $value | printf "%.2f" }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}'
+    #     description: '{{ $value | printf "%.2f" }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}'
     #   expr: |
     #     100 * (sum(rate(grpc_server_handled_total{grpc_code!="OK",job="etcd-metrics"}[5m])) by (grpc_service, grpc_method, instance)
     #       /
@@ -97,7 +97,7 @@ spec:
     #     severity: warning
     # - alert: EtcdHighNumberOfFailedGRPCRequests
     #   annotations:
-    #     message: '{{ $value | printf "%.2f" }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}'
+    #     description: '{{ $value | printf "%.2f" }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}'
     #   expr: |
     #     100 * (sum(rate(grpc_server_handled_total{grpc_code!="OK",job="etcd-metrics"}[5m])) by (grpc_service, grpc_method, instance)
     #       /
@@ -107,7 +107,7 @@ spec:
     #     severity: critical
     # - alert: EtcdGRPCRequestsSlow
     #   annotations:
-    #     message: on etcd instance {{ $labels.instance }} gRPC requests to {{ $labels.grpc_method
+    #     description: on etcd instance {{ $labels.instance }} gRPC requests to {{ $labels.grpc_method
     #       }} are slow
     #   expr: |
     #     histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job="etcd-metrics",grpc_type="unary"}[5m])) by (grpc_service, grpc_method, le)) > 0.15
@@ -116,15 +116,15 @@ spec:
     #     severity: critical
     # - alert: EtcdMemberCommunicationSlow
     #   annotations:
-    #     message: etcd instance {{ $labels.instance }} member communication with {{ $labels.To }} is slow
+    #     description: etcd instance {{ $labels.instance }} member communication with {{ $labels.To }} is slow
     #   expr: |
     #     histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m])) > 0.15
     #   for: 10m
     #   labels:
     #     severity: warning
     - alert: EtcdHighNumberOfFailedProposals
       annotations:
-        message: 'Etcd instance {{ $labels.instance }} has seen {{ $value }}
+        description: 'Etcd instance {{ $labels.instance }} has seen {{ $value }}
           proposal failures within the last hour.'
         doc: "This alert fires if there were more than 5 proposal failure in the
           last hour."
@@ -134,7 +134,7 @@ spec:
         severity: warning
     - alert: EtcdHighFsyncDurations
       annotations:
-        message: 'Etcd instance {{ $labels.instance }} WAL fsync latency too
+        description: 'Etcd instance {{ $labels.instance }} WAL fsync latency too
           high, current latency is {{ $value | printf "%.2f" }}.'
         doc: "This alert fires if the WAL fsync 99th percentile latency was
           higher than 0.5s in the last 10 minutes."
@@ -145,7 +145,7 @@ spec:
         severity: warning
     - alert: EtcdHighCommitDurations
       annotations:
-        message: 'Etcd instance {{ $labels.instance }} commit latency too high,
+        description: 'Etcd instance {{ $labels.instance }} commit latency too high,
           current latency is {{ $value | printf "%.2f" }}.'
         doc: "This alert fires if the backend commit 99th percentile latency was
           higher than 0.25s in the last 10 minutes."
@@ -158,7 +158,7 @@ spec:
     rules:
       - alert: CoreDNSPanic
         annotations:
-          messages: 'CoreDNS instance {{ $labels.instance }} panic count
+          description: 'CoreDNS instance {{ $labels.instance }} panic count
             increased by {{ $value }}.'
           doc: "This alert fires if CoreDNS total panic count increased by at
             least 1 in the last 10 minutes."
@@ -168,7 +168,7 @@ spec:
           severity: critical
       - alert: CoreDNSRequestsLatency
         annotations:
-          message: 'CoreDNS instance {{ $labels.instance }} requests latency too
+          description: 'CoreDNS instance {{ $labels.instance }} requests latency too
             high, current latency is {{ $value | printf "%.2f" }}.'
           doc: "This alert fires if CoreDNS 99th percentile requests latency was
             higher than 100ms in the last 10 minutes."
@@ -179,7 +179,7 @@ spec:
           severity: warning
       - alert: CoreDNSHealthRequestsLatency
         annotations:
-          message: 'CoreDNS instance {{ $labels.instance }} health requests
+          description: 'CoreDNS instance {{ $labels.instance }} health requests
             latency too high, current latency is {{ $value | printf "%.2f" }}.'
           doc: "This alert fires if CoreDNS 99th percentile health requests
             latency was higher than 10ms in the last 10 minutes."
@@ -190,7 +190,7 @@ spec:
           severity: warning
       - alert: CoreDNSProxyRequestsLatency
         annotations:
-          message: 'CoreDNS instance {{ $labels.instance }} proxy requests
+          description: 'CoreDNS instance {{ $labels.instance }} proxy requests
             latency too high, current latency is {{ $value | printf "%.2f" }}.'
           doc: "This alert fires if CoreDNS 99th percentile proxy requests
             latency was higher than 500ms in the last 10 minutes."