Skip to content

Commit

Permalink
Merge pull request #66 from thaum-xyz/use-kube-prometheus
Browse files Browse the repository at this point in the history
  • Loading branch information
paulfantom authored Feb 12, 2021
2 parents 54afb17 + 7249a77 commit aa0899a
Show file tree
Hide file tree
Showing 132 changed files with 7,874 additions and 3,465 deletions.
49 changes: 49 additions & 0 deletions .github/workflows/kubeconform.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
---
name: kubeconform

on:
push:
branches: [master]
pull_request:
branches: [master]

jobs:
apps:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: '1.15'
- run: go get -u github.com/yannh/kubeconform/cmd/kubeconform
# - run: ./hack/generate-schemas.sh
- run: >
kubeconform
-schema-location 'https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/{{ .NormalizedKubernetesVersion }}-standalone{{ .StrictSuffix }}/{{ .ResourceKind }}.json'
#-schema-location 'crdschemas/{{ .ResourceKind }}.json'
#-skip CustomResourceDefinition,SealedSecret,ConfigMapSecret
-skip CustomResourceDefinition,SealedSecret,ConfigMapSecret,ServiceMonitor,PodMonitor,Probe,Prometheus,Alertmanager
-ignore-filename-pattern vendor/*
-ignore-filename-pattern jsonnet/*
-summary
apps/
base:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: '1.15'
- run: go get -u github.com/yannh/kubeconform/cmd/kubeconform
# - run: ./hack/generate-schemas.sh
- run: >
kubeconform
-schema-location 'https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/{{ .NormalizedKubernetesVersion }}-standalone{{ .StrictSuffix }}/{{ .ResourceKind }}.json'
#-schema-location 'crdschemas/{{ .ResourceKind }}.json'
#-skip CustomResourceDefinition,SealedSecret,ConfigMapSecret,Plan,Application,AppProject,ClusterIssuer
-skip CustomResourceDefinition,SealedSecret,ConfigMapSecret,Plan,Application,AppProject,ClusterIssues,ServiceMonitor,PodMonitor
-ignore-filename-pattern vendor/*
-ignore-filename-pattern jsonnet/*
-summary
base/
28 changes: 0 additions & 28 deletions .github/workflows/kubeval.yml

This file was deleted.

2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ master.key
.kube
bin/
templates/
vendor/
crdschemas/
17 changes: 17 additions & 0 deletions apps/monitoring/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# What is this?

Customized kube-prometheus stack for @paulfantom personal homelab. This is also one of few public usages of kube-prometheus.

## How this works?

### Short version

1. `./generate.sh`
2. Commit and push
3. Profit

### Long version

`kube-prometheus` is used as a library and installed with `jb`. Next customization stored in `jsonnet/main.jsonnet` is
applied. After this `jsonnet` is used to generate `manifests/` directory and ConfigMapSecrets are copied into `manifests/`
from `configmapsecrets/` directory.
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,6 @@ spec:
labels:
node: 'DESKTOP-ODOR2KB'
# https://github.com/prometheus-pve/prometheus-pve-exporter
#- job_name: 'pve'
# static_configs:
# - targets:
# - 192.168.2.40 # Proxmox VE node.
# metrics_path: /pve
# params:
# module: [default]
# relabel_configs:
# - source_labels: [__address__]
# target_label: __param_target
# - source_labels: [__param_target]
# target_label: instance
# - target_label: __address__
# replacement: 127.0.0.1:9221 # PVE exporter.
- job_name: lancre
scrape_interval: 30s
scrape_timeout: 30s
Expand Down
29 changes: 29 additions & 0 deletions apps/monitoring/generate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

set -euo pipefail

# Install dependencies
if [ ! -d 'jsonnet/vendor' ]; then
cd jsonnet
jb install
cd ../
fi

# Remove old manifests
rm -rf manifests || :

# Generate manifests
jsonnet -J jsonnet/vendor -c -m manifests -S jsonnet/main.jsonnet

# Next step is just an eye-candy and only beautifies yaml files
for i in $(find manifests/ -name *.yaml); do
mv "$i" "$i.bak"
yamlfmt < "$i.bak" > "$i"
rm "$i.bak"
done

# Copy ConfigMapSecrets
for i in configmapsecrets/*.yaml; do
f="$(basename "$i" | sed 's/-/\//')"
cp "$i" "manifests/$f"
done
70 changes: 70 additions & 0 deletions apps/monitoring/jsonnet/ext/blackboxExporterConfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"modules": {
"http_2xx": {
"http": {
"preferred_ip_protocol": "ip4"
},
"prober": "http"
},
"http_post_2xx": {
"http": {
"method": "POST",
"preferred_ip_protocol": "ip4"
},
"prober": "http"
},
"irc_banner": {
"prober": "tcp",
"tcp": {
"preferred_ip_protocol": "ip4",
"query_response": [
{
"send": "NICK prober"
},
{
"send": "USER prober prober prober :prober"
},
{
"expect": "PING :([^ ]+)",
"send": "PONG ${1}"
},
{
"expect": "^:[^ ]+ 001"
}
]
}
},
"pop3s_banner": {
"prober": "tcp",
"tcp": {
"preferred_ip_protocol": "ip4",
"query_response": [
{
"expect": "^+OK"
}
],
"tls": true,
"tls_config": {
"insecure_skip_verify": false
}
}
},
"ssh_banner": {
"prober": "tcp",
"tcp": {
"preferred_ip_protocol": "ip4",
"query_response": [
{
"expect": "^SSH-2.0-"
}
]
}
},
"tcp_connect": {
"prober": "tcp",
"tcp": {
"preferred_ip_protocol": "ip4"
}
}
}
}
22 changes: 22 additions & 0 deletions apps/monitoring/jsonnet/ext/rules/testing.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"groups": [
{
"name": "testing.rules",
"rules": [
{
"alert": "CPUStealTimeHigh",
"annotations": {
"description": "CPU Steal Time is very high on {{ $labels.instance }} hypervisor. This can lead to VM being stalled.",
"runbook_url": "https://github.com/thaum-xyz/ankhmorpork/blob/master/docs/runbooks/CPUStealTimeHigh.md",
"summary": "High CPU Steal Time"
},
"expr": "sum by (instance) (rate(node_cpu_seconds_total{mode=\"steal\"}[3m])) / count by (instance) (node_cpu_seconds_total{mode=\"steal\"}) > 0.1\n",
"for": "20m",
"labels": {
"severity": "warning"
}
}
]
}
]
}
100 changes: 100 additions & 0 deletions apps/monitoring/jsonnet/ext/rules/thaum.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
{
"groups": [
{
"name": "custom node alert rules",
"rules": [
{
"alert": "PackagesAvailable",
"annotations": {
"description": "{{ $value }} packages are available for upgrade. Maybe it is time to upgrade?",
"runbook_url": "https://github.com/thaum-xyz/ankhmorpork/blob/master/docs/runbooks/PackagesAvailable.md",
"summary": "Packages are available for upgrade"
},
"expr": "sum by (node,instance) (yum_upgrades_pending) > 200\nor\nsum by (node,instance) (apt_upgrades_pending) > 200\n",
"for": "48h",
"labels": {
"severity": "info"
}
},
{
"alert": "RebootRequired",
"annotations": {
"description": "Instance '{{ $labels.instance }}' was upgraded and now requires a reboot.",
"runbook_url": "https://github.com/thaum-xyz/ankhmorpork/blob/master/docs/runbooks/RebootRequired.md",
"summary": "Reboot is required to finish package upgrade"
},
"expr": "node_reboot_required > 0",
"for": "4h",
"labels": {
"severity": "info"
}
}
]
},
{
"name": "alert rules specific to thaum.xyz",
"rules": [
{
"alert": "FederatedPrometheusDown",
"annotations": {
"description": "Remote Prometheus server {{ $labels.instance }} has been down for more than 10 minutes.",
"runbook_url": "https://github.com/thaum-xyz/ankhmorpork/blob/master/docs/runbooks/FederatedPrometheusDown.md",
"summary": "Federated prometheus is down"
},
"expr": "up{job=\"lancre\"} == 0",
"for": "20m",
"labels": {
"severity": "warning"
}
},
{
"alert": "FilesystemReadOnly",
"annotations": {
"description": "Filesystem went read-only on {{ $labels.instance }}. Check FS for possible corruption.",
"summary": "Filesystem went read-only possibly due to device error."
},
"expr": "node_filesystem_readonly{fstype=~\"(vfat|ext4|xfs)\"} != 0\n",
"labels": {
"severity": "critical"
}
},
{
"alert": "TouchscreenNotAvailable",
"annotations": {
"description": "Powercycle device {{ $labels.instance }} to bring touchscreen up",
"summary": "Touchscreen not available"
},
"expr": "devices_input_touchscreen_up == 0 or absent(devices_input_touchscreen_up)\n",
"for": "10m",
"labels": {
"severity": "warning"
}
},
{
"alert": "TouchscreenNotAvailable",
"annotations": {
"description": "Powercycle device {{ $labels.instance }}",
"summary": "Touchscreen not available and automatic remediation failed to restore it"
},
"expr": "devices_input_touchscreen_up == 0 or absent(devices_input_touchscreen_up)\n",
"for": "1h",
"labels": {
"severity": "critical"
}
},
{
"alert": "TemperaturesNotAvailable",
"annotations": {
"description": "Temperature data is gone. Immediatelly switch off all relays and check OW bus.",
"summary": "Cannot obtain temperature data"
},
"expr": "absent(evok_temperature_celsius)\n",
"for": "15m",
"labels": {
"severity": "critical"
}
}
]
}
]
}
42 changes: 42 additions & 0 deletions apps/monitoring/jsonnet/jsonnetfile.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/kube-prometheus",
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "master"
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics",
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "release-2.0"
},
{
"source": {
"git": {
"remote": "https://github.com/kubernetes/kube-state-metrics",
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "release-2.0"
},
{

"source": {
"git": {
"remote": "https://github.com/povilasv/coredns-mixin"
}
},
"version": "master"
}
],
"legacyImports": true
}
Loading

0 comments on commit aa0899a

Please sign in to comment.