1package kube
2
3import "encoding/yaml"
4
5configMap: prometheus: {
6 "alert.rules": yaml.Marshal(alert_rules)
7 let alert_rules = {
8 groups: [{
9 name: "rules.yaml"
10 rules: [{
11 alert: "InstanceDown"
12 expr: "up == 0"
13 for: "30s"
14 labels: severity: "page"
15 annotations: {
16 description: "{{$labels.app}} of job {{ $labels.job }} has been down for more than 30 seconds."
17 summary: "Instance {{$labels.app}} down"
18 }
19 }, {
20 alert: "InsufficientPeers"
21 expr: "count(up{job=\"etcd\"} == 0) > (count(up{job=\"etcd\"}) / 2 - 1)"
22 for: "3m"
23 labels: severity: "page"
24 annotations: {
25 description: "If one more etcd peer goes down the cluster will be unavailable"
26 summary: "etcd cluster small"
27 }
28 }, {
29 alert: "EtcdNoMaster"
30 expr: "sum(etcd_server_has_leader{app=\"etcd\"}) == 0"
31 for: "1s"
32 labels: severity: "page"
33 annotations: summary: "No ETCD master elected."
34 }, {
35 alert: "PodRestart"
36 expr: "(max_over_time(pod_container_status_restarts_total[5m]) - min_over_time(pod_container_status_restarts_total[5m])) > 2"
37 for: "1m"
38 labels: severity: "page"
39 annotations: {
40 description: "{{$labels.app}} {{ $labels.container }} resturted {{ $value }} times in 5m."
41 summary: "Pod for {{$labels.container}} restarts too often"
42 }
43 }]
44 }]
45 }
46 "prometheus.yml": yaml.Marshal(prometheus_yml)
47 let prometheus_yml = {
48 global: scrape_interval: "15s"
49 rule_files: ["/etc/prometheus/alert.rules"]
50 alerting: alertmanagers: [{
51 scheme: "http"
52 static_configs: [{
53 targets: ["alertmanager:9093"]
54 }]
55 }]
56 scrape_configs: [{
57 job_name: "kubernetes-apiservers"
58 kubernetes_sd_configs: [{
59 role: "endpoints"
60 }]
61 // Default to scraping over https. If required, just disable this or change to
62 // `http`.
63 scheme: "https"
64 // This TLS & bearer token file config is used to connect to the actual scrape
65 // endpoints for cluster components. This is separate to discovery auth
66 // configuration because discovery & scraping are two separate concerns in
67 // Prometheus. The discovery auth config is automatic if Prometheus runs inside
68 // the cluster. Otherwise, more config options have to be provided within the
69 // <kubernetes_sd_config>.
70 tls_config: ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
71 // If your node certificates are self-signed or use a different CA to the
72 // master CA, then disable certificate verification below. Note that
73 // certificate verification is an integral part of a secure infrastructure
74 // so this should only be disabled in a controlled environment. You can
75 // disable certificate verification by uncommenting the line below.
76 //
77 // insecure_skip_verify: true
78 bearer_token_file: "/var/run/secrets/kubernetes.io/serviceaccount/token"
79 // Keep only the default/kubernetes service endpoints for the https port. This
80 // will add targets for each API server which Kubernetes adds an endpoint to
81 // the default/kubernetes service.
82 relabel_configs: [{
83 source_labels: ["__meta_kubernetes_namespace", "__meta_kubernetes_service_name", "__meta_kubernetes_endpoint_port_name"]
84 action: "keep"
85 regex: "default;kubernetes;https"
86 }]
87 }, {
88 // Scrape config for nodes (kubelet).
89 //
90 // Rather than connecting directly to the node, the scrape is proxied though the
91 // Kubernetes apiserver. This means it will work if Prometheus is running out of
92 // cluster, or can't connect to nodes for some other reason (e.g. because of
93 // firewalling).
94 job_name: "kubernetes-nodes"
95 // Default to scraping over https. If required, just disable this or change to
96 // `http`.
97 scheme: "https"
98 // This TLS & bearer token file config is used to connect to the actual scrape
99 // endpoints for cluster components. This is separate to discovery auth
100 // configuration because discovery & scraping are two separate concerns in
101 // Prometheus. The discovery auth config is automatic if Prometheus runs inside
102 // the cluster. Otherwise, more config options have to be provided within the
103 // <kubernetes_sd_config>.
104 tls_config: ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
105 bearer_token_file: "/var/run/secrets/kubernetes.io/serviceaccount/token"
106 kubernetes_sd_configs: [{
107 role: "node"
108 }]
109 relabel_configs: [{
110 action: "labelmap"
111 regex: "__meta_kubernetes_node_label_(.+)"
112 }, {
113 target_label: "__address__"
114 replacement: "kubernetes.default.svc:443"
115 }, {
116 source_labels: ["__meta_kubernetes_node_name"]
117 regex: "(.+)"
118 target_label: "__metrics_path__"
119 replacement: "/api/v1/nodes/${1}/proxy/metrics"
120 }]
121 }, {
122 // Scrape config for Kubelet cAdvisor.
123 //
124 // This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
125 // (those whose names begin with 'container_') have been removed from the
126 // Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
127 // retrieve those metrics.
128 //
129 // In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor
130 // HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics"
131 // in that case (and ensure cAdvisor's HTTP server hasn't been disabled with
132 // the --cadvisor-port=0 Kubelet flag).
133 //
134 // This job is not necessary and should be removed in Kubernetes 1.6 and
135 // earlier versions, or it will cause the metrics to be scraped twice.
136 job_name: "kubernetes-cadvisor"
137 // Default to scraping over https. If required, just disable this or change to
138 // `http`.
139 scheme: "https"
140 // This TLS & bearer token file config is used to connect to the actual scrape
141 // endpoints for cluster components. This is separate to discovery auth
142 // configuration because discovery & scraping are two separate concerns in
143 // Prometheus. The discovery auth config is automatic if Prometheus runs inside
144 // the cluster. Otherwise, more config options have to be provided within the
145 // <kubernetes_sd_config>.
146 tls_config: ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
147 bearer_token_file: "/var/run/secrets/kubernetes.io/serviceaccount/token"
148 kubernetes_sd_configs: [{
149 role: "node"
150 }]
151 relabel_configs: [{
152 action: "labelmap"
153 regex: "__meta_kubernetes_node_label_(.+)"
154 }, {
155 target_label: "__address__"
156 replacement: "kubernetes.default.svc:443"
157 }, {
158 source_labels: ["__meta_kubernetes_node_name"]
159 regex: "(.+)"
160 target_label: "__metrics_path__"
161 replacement: "/api/v1/nodes/${1}/proxy/metrics/cadvisor"
162 }]
163 }, {
164 // Scrape config for service endpoints.
165 //
166 // The relabeling allows the actual service scrape endpoint to be configured
167 // via the following annotations:
168 //
169 // * `prometheus.io/scrape`: Only scrape services that have a value of `true`
170 // * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
171 // to set this to `https` & most likely set the `tls_config` of the scrape config.
172 // * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
173 // * `prometheus.io/port`: If the metrics are exposed on a different port to the
174 // service then set this appropriately.
175 job_name: "kubernetes-service-endpoints"
176 kubernetes_sd_configs: [{
177 role: "endpoints"
178 }]
179 relabel_configs: [{
180 source_labels: ["__meta_kubernetes_service_annotation_prometheus_io_scrape"]
181 action: "keep"
182 regex: true
183 }, {
184 source_labels: ["__meta_kubernetes_service_annotation_prometheus_io_scheme"]
185 action: "replace"
186 target_label: "__scheme__"
187 regex: "(https?)"
188 }, {
189 source_labels: ["__meta_kubernetes_service_annotation_prometheus_io_path"]
190 action: "replace"
191 target_label: "__metrics_path__"
192 regex: "(.+)"
193 }, {
194 source_labels: ["__address__", "__meta_kubernetes_service_annotation_prometheus_io_port"]
195 action: "replace"
196 target_label: "__address__"
197 regex: "([^:]+)(?::\\d+)?;(\\d+)"
198 replacement: "$1:$2"
199 }, {
200 action: "labelmap"
201 regex: "__meta_kubernetes_service_label_(.+)"
202 }, {
203 source_labels: ["__meta_kubernetes_namespace"]
204 action: "replace"
205 target_label: "kubernetes_namespace"
206 }, {
207 source_labels: ["__meta_kubernetes_service_name"]
208 action: "replace"
209 target_label: "kubernetes_name"
210 }]
211 }, {
212 // Example scrape config for probing services via the Blackbox Exporter.
213 //
214 // The relabeling allows the actual service scrape endpoint to be configured
215 // via the following annotations:
216 //
217 // * `prometheus.io/probe`: Only probe services that have a value of `true`
218 job_name: "kubernetes-services"
219 metrics_path: "/probe"
220 params: module: ["http_2xx"]
221 kubernetes_sd_configs: [{
222 role: "service"
223 }]
224 relabel_configs: [{
225 source_labels: ["__meta_kubernetes_service_annotation_prometheus_io_probe"]
226 action: "keep"
227 regex: true
228 }, {
229 source_labels: ["__address__"]
230 target_label: "__param_target"
231 }, {
232 target_label: "__address__"
233 replacement: "blackbox-exporter.example.com:9115"
234 }, {
235 source_labels: ["__param_target"]
236 target_label: "app"
237 }, {
238 action: "labelmap"
239 regex: "__meta_kubernetes_service_label_(.+)"
240 }, {
241 source_labels: ["__meta_kubernetes_namespace"]
242 target_label: "kubernetes_namespace"
243 }, {
244 source_labels: ["__meta_kubernetes_service_name"]
245 target_label: "kubernetes_name"
246 }]
247 }, {
248 // Example scrape config for probing ingresses via the Blackbox Exporter.
249 //
250 // The relabeling allows the actual ingress scrape endpoint to be configured
251 // via the following annotations:
252 //
253 // * `prometheus.io/probe`: Only probe services that have a value of `true`
254 job_name: "kubernetes-ingresses"
255 metrics_path: "/probe"
256 params: module: ["http_2xx"]
257 kubernetes_sd_configs: [{
258 role: "ingress"
259 }]
260 relabel_configs: [{
261 source_labels: ["__meta_kubernetes_ingress_annotation_prometheus_io_probe"]
262 action: "keep"
263 regex: true
264 }, {
265 source_labels: ["__meta_kubernetes_ingress_scheme", "__address__", "__meta_kubernetes_ingress_path"]
266 regex: "(.+);(.+);(.+)"
267 replacement: "${1}://${2}${3}"
268 target_label: "__param_target"
269 }, {
270 target_label: "__address__"
271 replacement: "blackbox-exporter.example.com:9115"
272 }, {
273 source_labels: ["__param_target"]
274 target_label: "app"
275 }, {
276 action: "labelmap"
277 regex: "__meta_kubernetes_ingress_label_(.+)"
278 }, {
279 source_labels: ["__meta_kubernetes_namespace"]
280 target_label: "kubernetes_namespace"
281 }, {
282 source_labels: ["__meta_kubernetes_ingress_name"]
283 target_label: "kubernetes_name"
284 }]
285 }, {
286 // Example scrape config for pods
287 //
288 // The relabeling allows the actual pod scrape endpoint to be configured via the
289 // following annotations:
290 //
291 // * `prometheus.io/scrape`: Only scrape pods that have a value of `true`
292 // * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
293 // * `prometheus.io/port`: Scrape the pod on the indicated port instead of the
294 // pod's declared ports (default is a port-free target if none are declared).
295 job_name: "kubernetes-pods"
296 kubernetes_sd_configs: [{
297 role: "pod"
298 }]
299 relabel_configs: [{
300 source_labels: ["__meta_kubernetes_pod_annotation_prometheus_io_scrape"]
301 action: "keep"
302 regex: true
303 }, {
304 source_labels: ["__meta_kubernetes_pod_annotation_prometheus_io_path"]
305 action: "replace"
306 target_label: "__metrics_path__"
307 regex: "(.+)"
308 }, {
309 source_labels: ["__address__", "__meta_kubernetes_pod_annotation_prometheus_io_port"]
310 action: "replace"
311 regex: "([^:]+)(?::\\d+)?;(\\d+)"
312 replacement: "$1:$2"
313 target_label: "__address__"
314 }, {
315 action: "labelmap"
316 regex: "__meta_kubernetes_pod_label_(.+)"
317 }, {
318 source_labels: ["__meta_kubernetes_namespace"]
319 action: "replace"
320 target_label: "kubernetes_namespace"
321 }, {
322 source_labels: ["__meta_kubernetes_pod_name"]
323 action: "replace"
324 target_label: "kubernetes_pod_name"
325 }]
326 }]
327 }
328}
View as plain text