diff --git a/CHANGELOG.md b/CHANGELOG.md index ccbb2ea..2372182 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,6 +67,7 @@ * [ENHANCEMENT] Add recording rules to improve responsiveness of Alertmanager dashboard. #387 * [ENHANCEMENT] Add `CortexRolloutStuck` alert. #405 * [ENHANCEMENT] Added `CortexKVStoreFailure` alert. #406 +* [ENHANCEMENT] Add ability to override `datasource` for generated dashboards. #407 * [ENHANCEMENT] Use alertmanager jobname for alertmanager dashboard panels #411 * [BUGFIX] Fixed `CortexIngesterHasNotShippedBlocks` alert false positive in case an ingester instance had ingested samples in the past, then no traffic was received for a long period and then it started receiving samples again. #308 * [BUGFIX] Alertmanager: fixed `--alertmanager.cluster.peers` CLI flag passed to alertmanager when HA is enabled. #329 diff --git a/cortex-mixin/config.libsonnet b/cortex-mixin/config.libsonnet index 5424123..4482958 100644 --- a/cortex-mixin/config.libsonnet +++ b/cortex-mixin/config.libsonnet @@ -69,5 +69,8 @@ // The routes to exclude from alerts. alert_excluded_routes: [], + + // Name of the datasource for which the dashboards should attach to + dashboard_datasource: 'default', }, } diff --git a/cortex-mixin/dashboards/dashboard-utils.libsonnet b/cortex-mixin/dashboards/dashboard-utils.libsonnet index 981614a..e426819 100644 --- a/cortex-mixin/dashboards/dashboard-utils.libsonnet +++ b/cortex-mixin/dashboards/dashboard-utils.libsonnet @@ -8,7 +8,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; // - default tags, // - some links that propagate the selectred cluster. dashboard(title):: - super.dashboard(title) + { + super.dashboard(title=title, datasource=$._config.dashboard_datasource) + { addRowIf(condition, row):: if condition then self.addRow(row) diff --git a/cortex-mixin/dashboards/ruler.libsonnet b/cortex-mixin/dashboards/ruler.libsonnet index bfa231b..d106258 100644 --- a/cortex-mixin/dashboards/ruler.libsonnet +++ b/cortex-mixin/dashboards/ruler.libsonnet @@ -68,19 +68,19 @@ local utils = import 'mixin-utils/utils.libsonnet'; }) .addPanel( $.panel('Active Configurations') + - $.statPanel('sum(cortex_ruler_managers_total{%s})' % $.jobMatcher('ruler'), format='short') + $.statPanel('sum(cortex_ruler_managers_total{%s})' % $.jobMatcher($._config.job_names.ruler), format='short') ) .addPanel( $.panel('Total Rules') + - $.statPanel('sum(cortex_prometheus_rule_group_rules{%s})' % $.jobMatcher('ruler'), format='short') + $.statPanel('sum(cortex_prometheus_rule_group_rules{%s})' % $.jobMatcher($._config.job_names.ruler), format='short') ) .addPanel( $.panel('Read from Ingesters - QPS') + - $.statPanel('sum(rate(cortex_ingester_client_request_duration_seconds_count{%s, operation="/cortex.Ingester/QueryStream"}[5m]))' % $.jobMatcher('ruler'), format='reqps') + $.statPanel('sum(rate(cortex_ingester_client_request_duration_seconds_count{%s, operation="/cortex.Ingester/QueryStream"}[5m]))' % $.jobMatcher($._config.job_names.ruler), format='reqps') ) .addPanel( $.panel('Write to Ingesters - QPS') + - $.statPanel('sum(rate(cortex_ingester_client_request_duration_seconds_count{%s, operation="/cortex.Ingester/Push"}[5m]))' % $.jobMatcher('ruler'), format='reqps') + $.statPanel('sum(rate(cortex_ingester_client_request_duration_seconds_count{%s, operation="/cortex.Ingester/Push"}[5m]))' % $.jobMatcher($._config.job_names.ruler), format='reqps') ) ) .addRow( @@ -89,8 +89,8 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.panel('EPS') + $.queryPanel( [ - $.rulerQueries.ruleEvaluations.success % [$.jobMatcher('ruler'), $.jobMatcher('ruler')], - $.rulerQueries.ruleEvaluations.failure % $.jobMatcher('ruler'), + $.rulerQueries.ruleEvaluations.success % [$.jobMatcher($._config.job_names.ruler), $.jobMatcher($._config.job_names.ruler)], + $.rulerQueries.ruleEvaluations.failure % $.jobMatcher($._config.job_names.ruler), ], ['success', 'failed'], ), @@ -98,7 +98,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addPanel( $.panel('Latency') + $.queryPanel( - $.rulerQueries.ruleEvaluations.latency % [$.jobMatcher('ruler'), $.jobMatcher('ruler')], + $.rulerQueries.ruleEvaluations.latency % [$.jobMatcher($._config.job_names.ruler), $.jobMatcher($._config.job_names.ruler)], 'average' ), ) @@ -126,22 +126,22 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('Writes (Ingesters)') .addPanel( $.panel('QPS') + - $.qpsPanel('cortex_ingester_client_request_duration_seconds_count{%s, operation="/cortex.Ingester/Push"}' % $.jobMatcher('ruler')) + $.qpsPanel('cortex_ingester_client_request_duration_seconds_count{%s, operation="/cortex.Ingester/Push"}' % $.jobMatcher($._config.job_names.ruler)) ) .addPanel( $.panel('Latency') + - $.latencyPanel('cortex_ingester_client_request_duration_seconds', '{%s, operation="/cortex.Ingester/Push"}' % $.jobMatcher('ruler')) + $.latencyPanel('cortex_ingester_client_request_duration_seconds', '{%s, operation="/cortex.Ingester/Push"}' % $.jobMatcher($._config.job_names.ruler)) ) ) .addRow( $.row('Reads (Ingesters)') .addPanel( $.panel('QPS') + - $.qpsPanel('cortex_ingester_client_request_duration_seconds_count{%s, operation="/cortex.Ingester/QueryStream"}' % $.jobMatcher('ruler')) + $.qpsPanel('cortex_ingester_client_request_duration_seconds_count{%s, operation="/cortex.Ingester/QueryStream"}' % $.jobMatcher($._config.job_names.ruler)) ) .addPanel( $.panel('Latency') + - $.latencyPanel('cortex_ingester_client_request_duration_seconds', '{%s, operation="/cortex.Ingester/QueryStream"}' % $.jobMatcher('ruler')) + $.latencyPanel('cortex_ingester_client_request_duration_seconds', '{%s, operation="/cortex.Ingester/QueryStream"}' % $.jobMatcher($._config.job_names.ruler)) ) ) .addRowIf( @@ -208,34 +208,34 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('Notifications') .addPanel( $.panel('Delivery Errors') + - $.queryPanel($.rulerQueries.notifications.failure % [$.jobMatcher('ruler'), $.jobMatcher('ruler')], '{{ user }}') + $.queryPanel($.rulerQueries.notifications.failure % [$.jobMatcher($._config.job_names.ruler), $.jobMatcher($._config.job_names.ruler)], '{{ user }}') ) .addPanel( $.panel('Queue Length') + - $.queryPanel($.rulerQueries.notifications.queue % [$.jobMatcher('ruler'), $.jobMatcher('ruler')], '{{ user }}') + $.queryPanel($.rulerQueries.notifications.queue % [$.jobMatcher($._config.job_names.ruler), $.jobMatcher($._config.job_names.ruler)], '{{ user }}') ) .addPanel( $.panel('Dropped') + - $.queryPanel($.rulerQueries.notifications.dropped % $.jobMatcher('ruler'), '{{ user }}') + $.queryPanel($.rulerQueries.notifications.dropped % $.jobMatcher($._config.job_names.ruler), '{{ user }}') ) ) .addRow( ($.row('Group Evaluations') + { collapse: true }) .addPanel( $.panel('Missed Iterations') + - $.queryPanel($.rulerQueries.groupEvaluations.missedIterations % $.jobMatcher('ruler'), '{{ user }}'), + $.queryPanel($.rulerQueries.groupEvaluations.missedIterations % $.jobMatcher($._config.job_names.ruler), '{{ user }}'), ) .addPanel( $.panel('Latency') + $.queryPanel( - $.rulerQueries.groupEvaluations.latency % [$.jobMatcher('ruler'), $.jobMatcher('ruler')], + $.rulerQueries.groupEvaluations.latency % [$.jobMatcher($._config.job_names.ruler), $.jobMatcher($._config.job_names.ruler)], '{{ user }}' ), ) .addPanel( $.panel('Failures') + $.queryPanel( - $.rulerQueries.perUserPerGroupEvaluations.failure % [$.jobMatcher('ruler')], '{{ rule_group }}' + $.rulerQueries.perUserPerGroupEvaluations.failure % [$.jobMatcher($._config.job_names.ruler)], '{{ rule_group }}' ) ) ) @@ -244,7 +244,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addPanel( $.panel('Latency') + $.queryPanel( - $.rulerQueries.perUserPerGroupEvaluations.latency % [$.jobMatcher('ruler'), $.jobMatcher('ruler')], + $.rulerQueries.perUserPerGroupEvaluations.latency % [$.jobMatcher($._config.job_names.ruler), $.jobMatcher($._config.job_names.ruler)], '{{ user }}' ) )