Skip to content

Commit

Permalink
Improve error rate and relayer activity metrics/alerts (#197)
Browse files Browse the repository at this point in the history
  • Loading branch information
dzmitryhil authored Mar 18, 2024
1 parent 7e8327a commit db4e2c4
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
4 changes: 2 additions & 2 deletions infra/composer/grafana/dashboards/relayer.json
Original file line number Diff line number Diff line change
Expand Up @@ -1293,15 +1293,15 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "increase(relayer_errors_total[$__range])",
"expr": "increase(relayer_errors_total[5m])",
"format": "time_series",
"instant": false,
"legendFormat": "Errors",
"range": true,
"refId": "A"
}
],
"title": "Errors count",
"title": "Errors increase 5m",
"type": "timeseries"
},
{
Expand Down
10 changes: 5 additions & 5 deletions infra/composer/prometheus/alert.rules
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ groups:
labels:
severity: "critical"

- alert: High relayer error rate
expr: rate(relayer_errors_total[5m]) * 5 * 60 > 10
for: 30s
- alert: High relayer error increase
expr: increase(relayer_errors_total[5m]) > 10
for: 10m
labels:
severity: major
annotations:
description: "High relayer error rate"
description: "High relayer error increase"

- alert: Low contract XRPL base fee
expr: contract_config_xrpl_base_fee < xrpl_chain_base_fee
Expand Down Expand Up @@ -76,7 +76,7 @@ groups:
description: "Detected malicious behaviour: {{ $labels.malicious_behaviour_key }}"

- alert: No relayer activity for more than 24h
expr: increase(relayer_activity{action=~"save_evidence|save_signature"}[24h]) == 0
expr: (relayer_activity{action="save_evidence"} + on (relayer_coreum_address) relayer_activity{action="save_signature"}) == 0
for: 10m # to let the relayer provide the metric after the restart
labels:
severity: "major"
Expand Down

0 comments on commit db4e2c4

Please sign in to comment.