1local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
2local dashboard = grafana.dashboard;
3local row = grafana.row;
4local prometheus = grafana.prometheus;
5local template = grafana.template;
6local graphPanel = grafana.graphPanel;
7
8{
9 grafanaDashboards+:: {
10
11 local amQuerySelector = std.join(',', ['%s=~"$%s"' % [label, label] for label in std.split($._config.alertmanagerClusterLabels, ',')]),
12 local amNameDashboardLegend = std.join('/', ['{{%s}}' % [label] for label in std.split($._config.alertmanagerNameLabels, ',')]),
13
14 local alertmanagerClusterSelectorTemplates =
15 [
16 template.new(
17 name=label,
18 label=label,
19 datasource='$datasource',
20 query='label_values(alertmanager_alerts, %s)' % label,
21 current='',
22 refresh=2,
23 includeAll=false,
24 sort=1
25 )
26 for label in std.split($._config.alertmanagerClusterLabels, ',')
27 ],
28
29 local integrationTemplate =
30 template.new(
31 name='integration',
32 datasource='$datasource',
33 query='label_values(alertmanager_notifications_total{integration=~"%s"}, integration)' % $._config.alertmanagerCriticalIntegrationsRegEx,
34 current='all',
35 hide='2', // Always hide
36 refresh=2,
37 includeAll=true,
38 sort=1
39 ),
40
41 'alertmanager-overview.json':
42 local alerts =
43 graphPanel.new(
44 'Alerts',
45 description='current set of alerts stored in the Alertmanager',
46 datasource='$datasource',
47 span=6,
48 format='none',
49 stack=true,
50 fill=1,
51 legend_show=false,
52 )
53 .addTarget(prometheus.target('sum(alertmanager_alerts{%(amQuerySelector)s}) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s' % $._config { amNameDashboardLegend: amNameDashboardLegend }));
54
55 local alertsRate =
56 graphPanel.new(
57 'Alerts receive rate',
58 description='rate of successful and invalid alerts received by the Alertmanager',
59 datasource='$datasource',
60 span=6,
61 format='ops',
62 stack=true,
63 fill=1,
64 legend_show=false,
65 )
66 .addTarget(prometheus.target('sum(rate(alertmanager_alerts_received_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Received' % $._config { amNameDashboardLegend: amNameDashboardLegend }))
67 .addTarget(prometheus.target('sum(rate(alertmanager_alerts_invalid_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Invalid' % $._config { amNameDashboardLegend: amNameDashboardLegend }));
68
69 local notifications =
70 graphPanel.new(
71 '$integration: Notifications Send Rate',
72 description='rate of successful and invalid notifications sent by the Alertmanager',
73 datasource='$datasource',
74 format='ops',
75 stack=true,
76 fill=1,
77 legend_show=false,
78 repeat='integration'
79 )
80 .addTarget(prometheus.target('sum(rate(alertmanager_notifications_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Total' % $._config { amNameDashboardLegend: amNameDashboardLegend }))
81 .addTarget(prometheus.target('sum(rate(alertmanager_notifications_failed_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Failed' % $._config { amNameDashboardLegend: amNameDashboardLegend }));
82
83 local notificationDuration =
84 graphPanel.new(
85 '$integration: Notification Duration',
86 description='latency of notifications sent by the Alertmanager',
87 datasource='$datasource',
88 format='s',
89 stack=false,
90 fill=1,
91 legend_show=false,
92 repeat='integration'
93 )
94 .addTarget(prometheus.target(
95 |||
96 histogram_quantile(0.99,
97 sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
98 )
99 ||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s 99th Percentile' % $._config { amNameDashboardLegend: amNameDashboardLegend }
100 ))
101 .addTarget(prometheus.target(
102 |||
103 histogram_quantile(0.50,
104 sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
105 )
106 ||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Median' % $._config { amNameDashboardLegend: amNameDashboardLegend }
107 ))
108 .addTarget(prometheus.target(
109 |||
110 sum(rate(alertmanager_notification_latency_seconds_sum{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
111 /
112 sum(rate(alertmanager_notification_latency_seconds_count{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
113 ||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Average' % $._config { amNameDashboardLegend: amNameDashboardLegend }
114 ));
115
116 dashboard.new(
117 '%sOverview' % $._config.dashboardNamePrefix,
118 time_from='now-1h',
119 tags=($._config.dashboardTags),
120 timezone='utc',
121 refresh='30s',
122 graphTooltip='shared_crosshair',
123 uid='alertmanager-overview'
124 )
125 .addTemplate(
126 {
127 current: {
128 text: 'Prometheus',
129 value: 'Prometheus',
130 },
131 hide: 0,
132 label: 'Data Source',
133 name: 'datasource',
134 options: [],
135 query: 'prometheus',
136 refresh: 1,
137 regex: '',
138 type: 'datasource',
139 },
140 )
141 .addTemplates(alertmanagerClusterSelectorTemplates)
142 .addTemplate(integrationTemplate)
143 .addRow(
144 row.new('Alerts')
145 .addPanel(alerts)
146 .addPanel(alertsRate)
147 )
148 .addRow(
149 row.new('Notifications')
150 .addPanel(notifications)
151 .addPanel(notificationDuration)
152 ),
153 },
154}
View as plain text