From bbab5ba08b3e6940fd0712117ab2d7449637df5f Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Thu, 24 Oct 2024 10:42:26 +0200 Subject: [PATCH] dashbaords: include block level on queried blocks (#9706) * dashbaords: include block level on queried blocks Signed-off-by: Dimitar Dimitrov * Add CHANGELOG.md entry Signed-off-by: Dimitar Dimitrov * Group higher levels Signed-off-by: Dimitar Dimitrov * Reorder queries Signed-off-by: Dimitar Dimitrov * Format jsonnet Signed-off-by: Dimitar Dimitrov --------- Signed-off-by: Dimitar Dimitrov --- CHANGELOG.md | 1 + .../metamonitoring/grafana-dashboards.yaml | 19 ++++++++++++------ .../dashboards/mimir-queries.json | 19 ++++++++++++------ .../dashboards/mimir-queries.json | 19 ++++++++++++------ .../mimir-mixin/dashboards/queries.libsonnet | 20 ++++++++++++++++--- 5 files changed, 57 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f06efdc5655..effcfea0195 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,7 @@ * [CHANGE] Remove backwards compatibility for `thanos_memcached_` prefixed metrics in dashboards and alerts removed in 2.12. #9674 * [ENHANCEMENT] Unify ingester autoscaling panels on 'Mimir / Writes' dashboard to work for both ingest-storage and non-ingest-storage autoscaling. #9617 * [ENHANCEMENT] Dashboards: visualize the age of source blocks in the "Mimir / Compactor" dashboard. #9697 +* [ENHANCEMENT] Dashboards: Include block compaction level on queried blocks in 'Mimir / Queries' dashboard. #9706 * [BUGFIX] Dashboards: Fix autoscaling metrics joins when series churn. #9412 #9450 #9432 * [BUGFIX] Alerts: Fix autoscaling metrics joins in `MimirAutoscalerNotActive` when series churn. #9412 * [BUGFIX] Alerts: Exclude failed cache "add" operations from alerting since failures are expected in normal operation. #9658 diff --git a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml index 826a0ad5cbd..9cd2287c7d7 100644 --- a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml +++ b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml @@ -12730,18 +12730,19 @@ data: "panels": [ { "datasource": "$datasource", + "description": "### Blocks queried / sec by compaction level\nIncreased volume of lower levels (for example levels 1 and 2) can indicate that the compactor is not keeping up.\nIn that case the store-gateway will start serving more blocks which aren't that well compacted.\n\n", "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, + "fillOpacity": 100, + "lineWidth": 0, "pointSize": 5, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" } }, "min": 0, @@ -12767,13 +12768,19 @@ data: "span": 4, "targets": [ { - "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum by (level) (rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",level=~\"[0-4]\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", - "legendFormat": "blocks", + "legendFormat": "{{level}}", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",level!~\"[0-4]\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "5+", "legendLink": null } ], - "title": "Blocks queried / sec", + "title": "Blocks queried / sec by compaction level", "type": "timeseries" }, { diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-queries.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-queries.json index 3f96818784b..c4c917b21cd 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-queries.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-queries.json @@ -1510,18 +1510,19 @@ "panels": [ { "datasource": "$datasource", + "description": "### Blocks queried / sec by compaction level\nIncreased volume of lower levels (for example levels 1 and 2) can indicate that the compactor is not keeping up.\nIn that case the store-gateway will start serving more blocks which aren't that well compacted.\n\n", "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, + "fillOpacity": 100, + "lineWidth": 0, "pointSize": 5, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" } }, "min": 0, @@ -1547,13 +1548,19 @@ "span": 4, "targets": [ { - "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum by (level) (rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",level=~\"[0-4]\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{level}}", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",level!~\"[0-4]\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", - "legendFormat": "blocks", + "legendFormat": "5+", "legendLink": null } ], - "title": "Blocks queried / sec", + "title": "Blocks queried / sec by compaction level", "type": "timeseries" }, { diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-queries.json b/operations/mimir-mixin-compiled/dashboards/mimir-queries.json index bd71fcabe50..792e4635327 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-queries.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-queries.json @@ -1510,18 +1510,19 @@ "panels": [ { "datasource": "$datasource", + "description": "### Blocks queried / sec by compaction level\nIncreased volume of lower levels (for example levels 1 and 2) can indicate that the compactor is not keeping up.\nIn that case the store-gateway will start serving more blocks which aren't that well compacted.\n\n", "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", - "fillOpacity": 1, - "lineWidth": 1, + "fillOpacity": 100, + "lineWidth": 0, "pointSize": 5, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" } }, "min": 0, @@ -1547,13 +1548,19 @@ "span": 4, "targets": [ { - "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum by (level) (rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",level=~\"[0-4]\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{level}}", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",level!~\"[0-4]\",cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", - "legendFormat": "blocks", + "legendFormat": "5+", "legendLink": null } ], - "title": "Blocks queried / sec", + "title": "Blocks queried / sec by compaction level", "type": "timeseries" }, { diff --git a/operations/mimir-mixin/dashboards/queries.libsonnet b/operations/mimir-mixin/dashboards/queries.libsonnet index f20a318e32d..6950d3357c7 100644 --- a/operations/mimir-mixin/dashboards/queries.libsonnet +++ b/operations/mimir-mixin/dashboards/queries.libsonnet @@ -338,9 +338,23 @@ local filename = 'mimir-queries.json'; .addRow( $.row('Store-gateway') .addPanel( - $.timeseriesPanel('Blocks queried / sec') + - $.queryPanel('sum(rate(cortex_bucket_store_series_blocks_queried_sum{component="store-gateway",%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.store_gateway), 'blocks') + - { fieldConfig+: { defaults+: { unit: 'ops' } } }, + $.timeseriesPanel('Blocks queried / sec by compaction level') + + $.panelDescription( + 'Blocks queried / sec by compaction level', + ||| + Increased volume of lower levels (for example levels 1 and 2) can indicate that the compactor is not keeping up. + In that case the store-gateway will start serving more blocks which aren't that well compacted. + ||| + ) + + $.queryPanel( + [ + 'sum by (level) (rate(cortex_bucket_store_series_blocks_queried_sum{component="store-gateway",level=~"[0-4]",%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.store_gateway), + 'sum(rate(cortex_bucket_store_series_blocks_queried_sum{component="store-gateway",level!~"[0-4]",%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.store_gateway), + ], + ['{{level}}', '5+'], + ) + + { fieldConfig+: { defaults+: { unit: 'ops' } } } + + $.stack, ) .addPanel( $.timeseriesPanel('Data fetched / sec') +