Skip to content

Commit

Permalink
adding alert for exhausted PVC (#7200)
Browse files Browse the repository at this point in the history
* adding alert for exhausted PVC

* adding hard boundary for > 90% usage
  • Loading branch information
viennaa authored Oct 15, 2024
1 parent 5ca92c0 commit 5ec2345
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 8 deletions.
2 changes: 1 addition & 1 deletion prometheus-rules/prometheus-controlplane-rules/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
apiVersion: v2
name: prometheus-controlplane-rules
version: 1.0.24
version: 1.1.0
description: A collection of Prometheus alerting and aggregation rules for controlplane.
dependencies: []
Original file line number Diff line number Diff line change
Expand Up @@ -118,19 +118,32 @@ groups:
summary: Interface {{ $labels.device }} is down. Node network connectivity is degraded. Check ESX node state in vCenter.

### PVC usage ###
- alert: KubernetesPVCNoSpaceLeft
expr: kubelet_volume_stats_available_percent < 10
for: 10m
labels:
tier: k8s
support_group: '{{ if $labels.label_ccloud_support_group }}{{ $labels.label_ccloud_support_group }}{{ else }}containers{{ end }}'
service: '{{ if $labels.label_ccloud_service }}{{ $labels.label_ccloud_service }}{{ else }}resources{{ end }}'
severity: info
context: storage
meta: "PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} free space is less than 10%."
playbook: 'docs/support/playbook/kubernetes/pvc_usage'
annotations:
description: "The PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is almost full. Increase or delete files."
summary: "PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} free space is less than 10%."

- alert: KubernetesHighPVCUsagePredicted
# NOTE: The labels for support-group and service are already present in `kubelet_volume_stats_available_percent`, cf. the aggregation rule that defines it.
expr: sum((kubelet_volume_stats_available_percent < 30) and (predict_linear(kubelet_volume_stats_available_percent[1d], 7 * 24 * 3600) < 10)) by (label_ccloud_support_group, label_ccloud_service, namespace, persistentvolumeclaim)
for: 1h
- alert: KubernetesPVCNoSpaceLeft
expr: kubelet_volume_stats_available_percent < 2
for: 10m
labels:
tier: k8s
support_group: '{{ if $labels.label_ccloud_support_group }}{{ $labels.label_ccloud_support_group }}{{ else }}containers{{ end }}'
service: '{{ if $labels.label_ccloud_service }}{{ $labels.label_ccloud_service }}{{ else }}resources{{ end }}'
severity: warning
context: storage
meta: "PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is set to exceed 90% usage soon"
meta: "PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} usage is over 98%."
playbook: 'docs/support/playbook/kubernetes/pvc_usage'
annotations:
description: "The PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is predicted to exceed 90% storage consumption in the next 7 days."
summary: "PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is set to exceed 90% usage soon"
description: "The PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is full. Programs will stop working if relying upon free storage."
summary: "PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} usage is over 98%."

0 comments on commit 5ec2345

Please sign in to comment.