...
1{
2 "displayName": "ALK Test AP 1",
3 "documentation": {
4 "content": "Test alert for service routing in PagerDuty",
5 "mimeType": "text/markdown"
6 },
7 "userLabels": {
8 "managed": "true",
9 "negative": "plus",
10 "policy-owner": "observability",
11 "severity": "critical",
12 "validation": "test"
13 },
14 "conditions": [
15 {
16 "displayName": "Pod Restarts PD Test Critical",
17 "conditionMonitoringQueryLanguage": {
18 "query": "{\n {\n t_0: prometheus_target :: prometheus.googleapis.com/kube_pod_container_status_restarts_total/counter\n| filter (metric.cluster_name == 'observability-gke-cluster-july-25')\n| group_by 1m,\n [value_kube_pod_container_status_restarts_total_aggregate:\n aggregate(value.kube_pod_container_status_restarts_total)]\n| every 1m\n ;\nt_1: prometheus_target :: prometheus.googleapis.com/kube_pod_annotations/gauge\n| filter (metric.cluster_name == 'observability-gke-cluster-july-25')\n| group_by 1m,\n [value_kube_pod_annotations_mean: mean(value.kube_pod_annotations)]\n| every 1m\n}\n| join\n} | condition t_0.value_kube_pod_container_status_restarts_total_aggregate >= 2",
19 "duration": "0s",
20 "trigger": {
21 "count": 1
22 }
23 }
24 }
25 ],
26 "combiner": "OR",
27 "enabled": false,
28 "notificationChannels": [
29 "email TH"
30 ],
31 "alertStrategy": {
32 "autoClose": "1800s"
33 }
34}
View as plain text