From 318550e77f613c2940ba7f1e5c921fb0d24b80aa Mon Sep 17 00:00:00 2001 From: David Andersson <51036209+jdkandersson@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:32:20 +1000 Subject: [PATCH] Update idle runners after reconciliation (#421) * Update idle runners after reconciliation Adds the difference between expected and active plus idle * update docs * update changelog --------- Co-authored-by: Christopher Bartz --- docs/changelog.md | 4 + docs/reference/cos.md | 2 +- .../src-docs/openstack_cloud.health_checks.md | 10 +- ...penstack_cloud.openstack_runner_manager.md | 21 +- src/grafana_dashboards/metrics.json | 1246 +++++++++-------- 5 files changed, 657 insertions(+), 626 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 3caf325e0..2d61c349f 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,9 @@ # Changelog +### 2024-12-13 + +- Add the difference between expected and actual runners to the "Runners after reconciliation" dashboard panel. + ### 2024-12-05 - Bugfix to no longer stop the reconciliation when a runner's health check fails. diff --git a/docs/reference/cos.md b/docs/reference/cos.md index e1a0c4d26..fd21a5b30 100644 --- a/docs/reference/cos.md +++ b/docs/reference/cos.md @@ -13,7 +13,7 @@ The "GitHub Self-Hosted Runner Metrics" metrics dashboard presents the following - General: Displays general metrics about the charm and runners, such as: - Lifecycle counters: Tracks the frequency of Runner initialisation, start, stop, and crash events. - Available runners: A horizontal bar graph showing the number of runners available (and max expected) during the last reconciliation event. Note: This data is updated after each reconciliation event and is not real-time. - - Runners after reconciliation: A time series graph showing the number of runners marked as active/idle and the number of expected runners during the last reconciliation event over time. Note: This data is updated after each reconciliation event and is not real-time. + - Runners after reconciliation: A time series graph showing the number of runners marked as active/idle, the number of expected runners, and the difference between expected and the former (unknown) during the last reconciliation event over time. Note: This data is updated after each reconciliation event and is not real-time. - Duration observations: Each data point aggregates the last hour and shows the 50th, 90th, 95th percentile and maximum durations for: - Runner installation - Runner idle duration diff --git a/github-runner-manager/src-docs/openstack_cloud.health_checks.md b/github-runner-manager/src-docs/openstack_cloud.health_checks.md index 97e9e191b..7acb050c4 100644 --- a/github-runner-manager/src-docs/openstack_cloud.health_checks.md +++ b/github-runner-manager/src-docs/openstack_cloud.health_checks.md @@ -13,7 +13,7 @@ Collection of functions related to health checks for a runner VM. --- - + ## function `check_runner` @@ -43,7 +43,7 @@ This check applies to runners in any OpenStack state (ACTIVE, STOPPED, etc). --- - + ## function `check_active_runner` @@ -67,6 +67,12 @@ Run a health check for a runner whose openstack instance is ACTIVE. +**Raises:** + + - `OpenstackHealthCheckError`: If the health check could not be completed. + + + **Returns:** Whether the runner should be considered healthy. diff --git a/github-runner-manager/src-docs/openstack_cloud.openstack_runner_manager.md b/github-runner-manager/src-docs/openstack_cloud.openstack_runner_manager.md index 33b53390c..ac3181fc0 100644 --- a/github-runner-manager/src-docs/openstack_cloud.openstack_runner_manager.md +++ b/github-runner-manager/src-docs/openstack_cloud.openstack_runner_manager.md @@ -14,11 +14,12 @@ Manager for self-hosted runner on OpenStack. - **MAX_METRICS_FILE_SIZE** - **RUNNER_STARTUP_PROCESS** - **OUTDATED_METRICS_STORAGE_IN_SECONDS** +- **HEALTH_CHECK_ERROR_LOG_MSG** --- - + ## class `OpenStackServerConfig` Configuration for OpenStack server. @@ -49,7 +50,7 @@ __init__(image: str, flavor: str, network: str) → None --- - + ## class `OpenStackRunnerManagerConfig` Configuration for OpenStack runner manager. @@ -92,7 +93,7 @@ __init__( --- - + ## class `OpenStackRunnerManager` Manage self-hosted runner on OpenStack cloud. @@ -103,7 +104,7 @@ Manage self-hosted runner on OpenStack cloud. - `name_prefix`: The name prefix of the runners created. - + ### method `__init__` @@ -135,7 +136,7 @@ The prefix of runner names. --- - + ### method `cleanup` @@ -158,7 +159,7 @@ Cleanup runner and resource on the cloud. --- - + ### method `create_runner` @@ -188,7 +189,7 @@ Create a self-hosted runner. --- - + ### method `delete_runner` @@ -212,7 +213,7 @@ Delete self-hosted runners. --- - + ### method `flush_runners` @@ -235,7 +236,7 @@ Remove idle and/or busy runners. --- - + ### method `get_runner` @@ -258,7 +259,7 @@ Get a self-hosted runner by instance id. --- - + ### method `get_runners` diff --git a/src/grafana_dashboards/metrics.json b/src/grafana_dashboards/metrics.json index 02ffd253d..79ef2307e 100644 --- a/src/grafana_dashboards/metrics.json +++ b/src/grafana_dashboards/metrics.json @@ -24,7 +24,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 1034, + "id": 1085, "links": [], "liveNow": false, "panels": [ @@ -797,6 +797,18 @@ "legendFormat": "Expected", "queryType": "range", "refId": "B" + }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "editorMode": "code", + "expr": "sum by(filename)(last_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",expected_runners=\"expected_runners\",flavor=\"flavor\" | __error__=\"\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap expected_runners[60m])) - sum by(filename)(last_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",idle_runners=\"idle_runners\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap idle_runners[60m])) - sum by(filename)(last_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",active_runners=\"active_runners\",flavor=\"flavor\" | event=\"reconciliation\" | flavor=~\"$flavor\" | unwrap active_runners[60m]))", + "hide": false, + "legendFormat": "Unknown", + "queryType": "range", + "refId": "C" } ], "title": "Runners after Reconciliation", @@ -1104,7 +1116,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1192,7 +1205,7 @@ "type": "timeseries" }, { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, @@ -1200,676 +1213,683 @@ "y": 41 }, "id": 12, - "panels": [ - { - "datasource": { - "type": "loki", - "uid": "${lokids}" + "panels": [], + "title": "Jobs", + "type": "row" + }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "description": "Visualises the result from the runner's perspective.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "description": "Visualises the result from the runner's perspective.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "percent" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 34 - }, - "id": 14, - "interval": "1h", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "percent" }, - "tooltip": { - "mode": "single", - "sort": "none" + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "${lokids}" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "editorMode": "code", - "expr": "sum by(status)(count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"runner_stop\" | json status=\"status\",repo=\"repo\" | flavor=~\"$flavor\" | repo=~\"$repository\"[1h]))", - "legendFormat": "{{status}}", - "queryType": "range", - "refId": "A" - } - ], - "title": "Completion Status", - "type": "timeseries" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 14, + "interval": "1h", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "loki", "uid": "${lokids}" }, - "description": "Visualises the result from the GitHub Actions workflow perspective.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "percent" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 34 + "editorMode": "code", + "expr": "sum by(status)(count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"runner_stop\" | json status=\"status\",repo=\"repo\" | flavor=~\"$flavor\" | repo=~\"$repository\"[1h]))", + "legendFormat": "{{status}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Completion Status", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "description": "Visualises the result from the GitHub Actions workflow perspective.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "id": 20, - "interval": "1h", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "${lokids}" - }, - "editorMode": "code", - "expr": "sum by(job_conclusion)(count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"runner_stop\" | json job_conclusion=\"job_conclusion\",repo=\"repo\" | flavor=~\"$flavor\" | repo=~\"$repository\"[1h]))", - "legendFormat": "{{job_conclusion}}", - "queryType": "range", - "refId": "A" - } - ], - "title": "Job Conclusion", - "type": "timeseries" - }, - { - "datasource": { - "type": "loki", - "uid": "${lokids}" - }, - "description": "Visualises the proportion of applications in use.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "percent" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 42 - }, - "id": 18, - "interval": "1h", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "percent" }, - "tooltip": { - "mode": "single", - "sort": "none" + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "${lokids}" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "editorMode": "code", - "expr": "sum by(flavor)(count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"runner_start\" | json flavor=\"flavor\",repo=\"repo\" | flavor=~\"$flavor\" | repo=~\"$repository\"[1h]))", - "legendFormat": "{{flavor}}", - "queryType": "range", - "refId": "A" - } - ], - "title": "Application proportion", - "type": "timeseries" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 20, + "interval": "1h", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "loki", "uid": "${lokids}" }, - "description": "Visualises the http codes for repo policy check failures.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "percent" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 42 + "editorMode": "code", + "expr": "sum by(job_conclusion)(count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"runner_stop\" | json job_conclusion=\"job_conclusion\",repo=\"repo\" | flavor=~\"$flavor\" | repo=~\"$repository\"[1h]))", + "legendFormat": "{{job_conclusion}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Job Conclusion", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "description": "Visualises the proportion of applications in use.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "id": 15, - "interval": "1h", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "${lokids}" - }, - "editorMode": "code", - "expr": "sum by(http_code)(count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"runner_stop\" | json status=\"status\",repo=\"repo\" | status=\"repo-policy-check-failure\" | repo=~\"$repository\" | flavor=~\"$flavor\" | json http_code=\"status_info.code\"[1h]))", - "legendFormat": "{{http_code}}", - "queryType": "range", - "refId": "A" - } - ], - "title": "Repo Policy Check Failures", - "type": "timeseries" - }, - { - "datasource": { - "type": "loki", - "uid": "${lokids}" - }, - "description": "Visualises the GitHub events that triggered a workflow run.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "percent" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 50 - }, - "id": 19, - "interval": "1h", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "percent" }, - "tooltip": { - "mode": "single", - "sort": "none" + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "${lokids}" - }, - "editorMode": "code", - "expr": "sum by(github_event)(count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"runner_start\" | json github_event=\"github_event\",repo=\"repo\" | flavor=~\"$flavor\" | repo=~\"$repository\"[1h]))", - "legendFormat": "{{github_event}}", - "queryType": "range", - "refId": "A" - } - ], - "title": "GitHub Event", - "type": "timeseries" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 50 + }, + "id": 18, + "interval": "1h", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "loki", "uid": "${lokids}" }, - "description": "All aggregations are based on a 1-hour time period.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 50 + "editorMode": "code", + "expr": "sum by(flavor)(count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"runner_start\" | json flavor=\"flavor\",repo=\"repo\" | flavor=~\"$flavor\" | repo=~\"$repository\"[1h]))", + "legendFormat": "{{flavor}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Application proportion", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "description": "Visualises the http codes for repo policy check failures.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "id": 17, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "percent" }, - "tooltip": { - "mode": "single", - "sort": "none" + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "${lokids}" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "editorMode": "builder", - "expr": "quantile_over_time(0.5,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",job_duration=\"job_duration\",status=\"status\",flavor=\"flavor\" | event=\"runner_stop\" | status=\"normal\" | flavor=~\"$flavor\" | unwrap job_duration[1h]) by(filename)", - "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", - "legendFormat": "50%", - "queryType": "range", - "refId": "A" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 50 + }, + "id": 15, + "interval": "1h", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "editorMode": "code", + "expr": "sum by(http_code)(count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"runner_stop\" | json status=\"status\",repo=\"repo\" | status=\"repo-policy-check-failure\" | repo=~\"$repository\" | flavor=~\"$flavor\" | json http_code=\"status_info.code\"[1h]))", + "legendFormat": "{{http_code}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Repo Policy Check Failures", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "description": "Visualises the GitHub events that triggered a workflow run.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "datasource": { - "type": "loki", - "uid": "${lokids}" - }, - "editorMode": "builder", - "expr": "quantile_over_time(0.95,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",job_duration=\"job_duration\",status=\"status\",flavor=\"flavor\" | event=\"runner_stop\" | status=\"normal\" | flavor=~\"$flavor\" | unwrap job_duration[1h]) by(filename)", - "hide": false, - "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", - "legendFormat": "95%", - "queryType": "range", - "refId": "D" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "datasource": { - "type": "loki", - "uid": "${lokids}" - }, - "editorMode": "builder", - "expr": "quantile_over_time(0.99,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",job_duration=\"job_duration\",status=\"status\",flavor=\"flavor\" | event=\"runner_stop\" | status=\"normal\" | flavor=~\"$flavor\" | unwrap job_duration[1h]) by(filename)", - "hide": false, - "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", - "legendFormat": "99%", - "queryType": "range", - "refId": "E" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "percent" }, - { - "datasource": { - "type": "loki", - "uid": "${lokids}" - }, - "editorMode": "builder", - "expr": "max_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",job_duration=\"job_duration\",status=\"status\",flavor=\"flavor\" | event=\"runner_stop\" | status=\"normal\" | flavor=~\"$flavor\" | unwrap job_duration[1h]) by(filename)", - "hide": false, - "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", - "legendFormat": "Max", - "queryType": "range", - "refId": "C" + "thresholdsStyle": { + "mode": "off" } - ], - "title": "Job Duration (Percentile)", - "type": "timeseries" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 58 + }, + "id": 19, + "interval": "1h", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "loki", "uid": "${lokids}" }, - "description": "Visualises the number of jobs per repository.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-GrYlRd" + "editorMode": "code", + "expr": "sum by(github_event)(count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"runner_start\" | json github_event=\"github_event\",repo=\"repo\" | flavor=~\"$flavor\" | repo=~\"$repository\"[1h]))", + "legendFormat": "{{github_event}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "GitHub Event", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "description": "All aggregations are based on a 1-hour time period.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + { + "color": "red", + "value": 80 } - }, - "overrides": [] + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 58 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${lokids}" }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 58 + "editorMode": "builder", + "expr": "quantile_over_time(0.5,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",job_duration=\"job_duration\",status=\"status\",flavor=\"flavor\" | event=\"runner_stop\" | status=\"normal\" | flavor=~\"$flavor\" | unwrap job_duration[1h]) by(filename)", + "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", + "legendFormat": "50%", + "queryType": "range", + "refId": "A" + }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" }, - "id": 21, - "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": true - }, - "showUnfilled": true - }, - "pluginVersion": "9.2.1", - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "${lokids}" + "editorMode": "builder", + "expr": "quantile_over_time(0.95,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",job_duration=\"job_duration\",status=\"status\",flavor=\"flavor\" | event=\"runner_stop\" | status=\"normal\" | flavor=~\"$flavor\" | unwrap job_duration[1h]) by(filename)", + "hide": false, + "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", + "legendFormat": "95%", + "queryType": "range", + "refId": "D" + }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "editorMode": "builder", + "expr": "quantile_over_time(0.99,{filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",job_duration=\"job_duration\",status=\"status\",flavor=\"flavor\" | event=\"runner_stop\" | status=\"normal\" | flavor=~\"$flavor\" | unwrap job_duration[1h]) by(filename)", + "hide": false, + "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", + "legendFormat": "99%", + "queryType": "range", + "refId": "E" + }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "editorMode": "builder", + "expr": "max_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",job_duration=\"job_duration\",status=\"status\",flavor=\"flavor\" | event=\"runner_stop\" | status=\"normal\" | flavor=~\"$flavor\" | unwrap job_duration[1h]) by(filename)", + "hide": false, + "key": "Q-9302bc4d-cce0-4674-bad5-353257fdd2f4-0", + "legendFormat": "Max", + "queryType": "range", + "refId": "C" + } + ], + "title": "Job Duration (Percentile)", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "description": "Visualises the number of jobs per repository.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "editorMode": "code", - "expr": "sum by(repo)(count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"runner_start\" | json repo=\"repo\" | flavor=~\"$flavor\" | repo=~\"$repository\"[$__range]))", - "legendFormat": "", - "queryType": "instant", - "refId": "A" - } + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 66 + }, + "id": 21, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" ], - "title": "Repositories", - "type": "bargauge" + "fields": "", + "values": true + }, + "showUnfilled": true, + "valueMode": "color" + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${lokids}" + }, + "editorMode": "code", + "expr": "sum by(repo)(count_over_time({filename=\"/var/log/github-runner-metrics.log\", juju_application=~\"$juju_application\", juju_model=~\"$juju_model\", juju_model_uuid=~\"$juju_model_uuid\", juju_unit=~\"$juju_unit\"} | json event=\"event\",flavor=\"flavor\" | event=\"runner_start\" | json repo=\"repo\" | flavor=~\"$flavor\" | repo=~\"$repository\"[$__range]))", + "legendFormat": "", + "queryType": "instant", + "refId": "A" } ], - "title": "Jobs", - "type": "row" + "title": "Repositories", + "type": "bargauge" } ], - "refresh": "", + "refresh": "1m", "schemaVersion": 38, "style": "dark", "tags": [], @@ -2114,12 +2134,12 @@ ] }, "time": { - "from": "now-6h", + "from": "now-24h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "GitHub Self-Hosted Runner Metrics", - "version": 18, + "version": 19, "weekStart": "" }