From 72905cfe7c6d695c6aa56d743a377edb8608258b Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 27 Oct 2020 18:00:38 +0000 Subject: [PATCH 1/3] Add a dashboard for the Alertmanager Includes: - Notification - Alerts received - Configuration API / UI Signed-off-by: gotjosh --- jsonnet/mimir-mixin/dashboards.libsonnet | 1 + .../dashboards/alertmanager.libsonnet | 88 +++++++++++++++++++ .../mimir-mixin/dashboards/ruler.libsonnet | 2 +- 3 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet diff --git a/jsonnet/mimir-mixin/dashboards.libsonnet b/jsonnet/mimir-mixin/dashboards.libsonnet index 06e739b776d..6f5e44f4870 100644 --- a/jsonnet/mimir-mixin/dashboards.libsonnet +++ b/jsonnet/mimir-mixin/dashboards.libsonnet @@ -4,6 +4,7 @@ (import 'dashboards/queries.libsonnet') + (import 'dashboards/reads.libsonnet') + (import 'dashboards/ruler.libsonnet') + + (import 'dashboards/alertmanager.libsonnet') + (import 'dashboards/scaling.libsonnet') + (import 'dashboards/writes.libsonnet') + diff --git a/jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet b/jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet new file mode 100644 index 00000000000..0b5c77a4c44 --- /dev/null +++ b/jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet @@ -0,0 +1,88 @@ +local utils = import 'mixin-utils/utils.libsonnet'; + +(import 'dashboard-utils.libsonnet') { + + 'alertmanager.json': + $.dashboard('Cortex / Alertmanager') + .addClusterSelectorTemplates() + .addRow( + ($.row('Headlines') + { + height: '100px', + showTitle: false, + }) + .addPanel( + $.panel('Total Alerts') + + $.statPanel('sum(cortex_alertmanager_alerts{%s})' % $.jobMatcher('alertmanager'), format='short') + ) + .addPanel( + $.panel('Total Silences') + + $.statPanel('sum(cortex_alertmanager_silences{%s})' % $.jobMatcher('alertmanager'), format='short') + ) + ) + .addRow( + $.row('Alerts Received') + .addPanel( + $.panel('APS') + + $.queryPanel( + [ + ||| + sum(rate(cortex_alertmanager_alerts_received_total{%s}[$__interval])) + - + sum(rate(cortex_alertmanager_alerts_invalid_total{%s}[$__interval])) + ||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')], + 'sum(rate(cortex_alertmanager_alerts_invalid_total{%s}[$__interval]))' % $.jobMatcher('alertmanager'), + ], + ['success', 'failed'] + ) + ) + ) + .addRow( + $.row('Alert Notifications') + .addPanel( + $.panel('NPS') + + $.queryPanel( + [ + ||| + sum(rate(cortex_alertmanager_notifications_total{%s}[$__interval])) + - + sum(rate(cortex_alertmanager_notifications_failed_total{%s}[$__interval])) + ||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')], + 'sum(rate(cortex_alertmanager_notifications_failed_total{%s}[$__interval]))' % $.jobMatcher('alertmanager'), + ], + ['success', 'failed'] + ) + ) + .addPanel( + $.panel('NPS by integration') + + $.queryPanel( + [ + ||| + ( + sum(rate(cortex_alertmanager_notifications_total{%s}[$__interval])) by(integration) + - + sum(rate(cortex_alertmanager_notifications_failed_total{%s}[$__interval])) by(integration) + ) > 0 + or on () vector(0) + ||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')], + 'sum(rate(cortex_alertmanager_notifications_failed_total{%s}[$__interval])) by(integration)' % $.jobMatcher('alertmanager'), + ], + ['success - {{ integration }}', 'failed - {{ integration }}'] + ) + ) + .addPanel( + $.panel('Latency') + + $.latencyPanel('cortex_alertmanager_notification_latency_seconds', '{%s}' % $.jobMatcher('alertmanager')) + ) + ) + .addRow( + $.row('Configuration API (gateway) + Alertmanager UI') + .addPanel( + $.panel('QPS') + + $.qpsPanel('cortex_request_duration_seconds_count{%s, route=~"api_v1_alerts|alertmanager"}' % $.jobMatcher($._config.job_names.gateway)) + ) + .addPanel( + $.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.gateway) + [utils.selector.re('route', 'api_v1_alerts|alertmanager')]) + ) + ), +} diff --git a/jsonnet/mimir-mixin/dashboards/ruler.libsonnet b/jsonnet/mimir-mixin/dashboards/ruler.libsonnet index 8fc83cfbcd4..9c81e8221f4 100644 --- a/jsonnet/mimir-mixin/dashboards/ruler.libsonnet +++ b/jsonnet/mimir-mixin/dashboards/ruler.libsonnet @@ -92,7 +92,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.rulerQueries.ruleEvaluations.success % [$.jobMatcher('ruler'), $.jobMatcher('ruler')], $.rulerQueries.ruleEvaluations.failure % $.jobMatcher('ruler'), ], - ['sucess', 'failed'], + ['success', 'failed'], ), ) .addPanel( From 012fa096262c0e220058699b48ad021173635345 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 27 Oct 2020 18:10:58 +0000 Subject: [PATCH 2/3] md5 dashboard name --- jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet b/jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet index 0b5c77a4c44..6f8d8a4c8dc 100644 --- a/jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet +++ b/jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet @@ -3,7 +3,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; (import 'dashboard-utils.libsonnet') { 'alertmanager.json': - $.dashboard('Cortex / Alertmanager') + $.dashboard('Cortex / Alertmanager') + { uid: 'a76bee5913c97c918d9e56a3cc88cc28' }) .addClusterSelectorTemplates() .addRow( ($.row('Headlines') + { From 590792702d97a9e8e1238fd93681c07e7c4dca40 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Mon, 2 Nov 2020 11:03:50 +0000 Subject: [PATCH 3/3] Fix wrapping --- jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet b/jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet index 6f8d8a4c8dc..1f3bdbc47d2 100644 --- a/jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet +++ b/jsonnet/mimir-mixin/dashboards/alertmanager.libsonnet @@ -3,7 +3,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; (import 'dashboard-utils.libsonnet') { 'alertmanager.json': - $.dashboard('Cortex / Alertmanager') + { uid: 'a76bee5913c97c918d9e56a3cc88cc28' }) + ($.dashboard('Cortex / Alertmanager') + { uid: 'a76bee5913c97c918d9e56a3cc88cc28' }) .addClusterSelectorTemplates() .addRow( ($.row('Headlines') + {