...
1apiVersion: v1
2kind: Namespace
3metadata:
4 name: prometheus
5 labels:
6 workload.edge.ncr.com: platform
7 annotations:
8 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
9 pallet.edge.ncr.com/name: prometheus
10 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
11 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
12 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
13 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
14---
15apiVersion: v1
16kind: ServiceAccount
17metadata:
18 name: prometheus
19 namespace: prometheus
20 annotations:
21 iam.gke.io/gcp-service-account: o11y-${cluster_hash}@${gcp_project_id}.iam.gserviceaccount.com
22 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
23 pallet.edge.ncr.com/name: prometheus
24 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
25 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
26 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
27 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
28 labels: {}
29---
30apiVersion: rbac.authorization.k8s.io/v1
31kind: ClusterRole
32metadata:
33 name: prometheus
34 annotations:
35 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
36 pallet.edge.ncr.com/name: prometheus
37 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
38 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
39 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
40 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
41 labels: {}
42rules:
43- resources:
44 - nodes
45 - nodes/metrics
46 - services
47 - endpoints
48 - pods
49 apiGroups: [""]
50 verbs: ["get", "list", "watch"]
51- resources:
52 - configmaps
53 apiGroups: [""]
54 verbs: ["get"]
55- resources:
56 - ingresses
57 apiGroups:
58 - networking.k8s.io
59 verbs: ["get", "list", "watch"]
60- nonResourceURLs: ["/metrics"]
61 verbs: ["get"]
62---
63apiVersion: rbac.authorization.k8s.io/v1
64kind: ClusterRoleBinding
65metadata:
66 name: prometheus
67 annotations:
68 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
69 pallet.edge.ncr.com/name: prometheus
70 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
71 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
72 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
73 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
74 labels: {}
75roleRef:
76 name: prometheus
77 kind: ClusterRole
78 apiGroup: rbac.authorization.k8s.io
79subjects:
80- name: prometheus
81 namespace: prometheus
82 kind: ServiceAccount
83---
84apiVersion: monitoring.coreos.com/v1
85kind: Alertmanager
86metadata:
87 name: alertmanager
88 namespace: prometheus
89 annotations:
90 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
91 pallet.edge.ncr.com/name: prometheus
92 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
93 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
94 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
95 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
96 labels:
97 cluster_hash: ${cluster_hash}
98 cluster_uuid: ${cluster_uuid}
99spec:
100 imagePullSecrets:
101 - name: edge-docker-pull-secret
102 resources:
103 limits:
104 cpu: "200m"
105 memory: 256Mi
106 requests:
107 cpu: 10m
108 memory: 64Mi
109 alertmanagerConfigSelector:
110 matchLabels: {}
111 logFormat: json
112---
113apiVersion: monitoring.coreos.com/v1
114kind: Prometheus
115metadata:
116 name: prometheus
117 namespace: prometheus
118 annotations:
119 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
120 pallet.edge.ncr.com/name: prometheus
121 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
122 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
123 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
124 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
125 labels:
126 cluster_hash: ${cluster_hash}
127 cluster_uuid: ${cluster_uuid}
128spec:
129 replicas: 1
130 serviceAccountName: prometheus
131 imagePullSecrets:
132 - name: edge-docker-pull-secret
133 resources:
134 limits:
135 cpu: "500m"
136 memory: 2048Mi
137 requests:
138 cpu: 100m
139 memory: 128Mi
140 alerting:
141 alertmanagers:
142 - name: alertmanager-operated
143 namespace: prometheus
144 port: web
145 externalLabels:
146 cluster: ${cluster_uuid}
147 location: ${gcp_region}-${gcp_zone}
148 project_id: ${gcp_project_id}
149 podMonitorNamespaceSelector: {}
150 podMonitorSelector: {}
151 ruleSelector:
152 matchLabels: {}
153 scrapeInterval: 60s
154 scrapeTimeout: 15s
155 serviceMonitorNamespaceSelector: {}
156 serviceMonitorSelector: {}
157 podMetadata:
158 annotations:
159 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
160 pallet.edge.ncr.com/name: prometheus
161 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
162 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
163 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
164 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
165---
166apiVersion: monitoring.coreos.com/v1
167kind: PrometheusRule
168metadata:
169 name: prometheus-metrics-rules
170 namespace: prometheus
171 labels:
172 prometheus: prometheus
173 role: metrics-rules
174 cluster_hash: ${cluster_hash}
175 cluster_uuid: ${cluster_uuid}
176 annotations:
177 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
178 pallet.edge.ncr.com/name: prometheus
179 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
180 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
181 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
182 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
183spec:
184 groups:
185 - name: node-exporter-rules
186 rules:
187 - expr: count(node_cpu_info) without (cpu,core,cachesize,family,microcode,model,model_name,package,stepping,vendor,namespace,container,endpoint,pod,job,service)
188 record: node_logical_cores_total
189 - expr: count(count(node_cpu_info) without (cpu,cachesize,family,microcode,model,model_name,package,stepping,vendor,namespace,container,endpoint,pod,job,service)) without (core)
190 record: node_physical_cores_total
191 - expr: node_network_address_info{scope="global"}
192 record: node_network_global_address_info
193---
194apiVersion: monitoring.coreos.com/v1
195kind: ServiceMonitor
196metadata:
197 name: kube-apiserver
198 labels:
199 app.kubernetes.io/name: apiserver
200 cluster_hash: ${cluster_hash}
201 cluster_uuid: ${cluster_uuid}
202 namespace: prometheus
203 annotations:
204 monitoring.edge.ncr.com/allowed-metrics: |
205 apiserver_current_inflight_requests
206 apiserver_current_inqueue_requests
207 apiserver_flowcontrol_current_executing_requests
208 apiserver_flowcontrol_current_inqueue_requests
209 apiserver_flowcontrol_dispatched_requests_total
210 apiserver_flowcontrol_request_execution_seconds_bucket
211 apiserver_flowcontrol_request_execution_seconds_count
212 apiserver_flowcontrol_request_execution_seconds_sum
213 apiserver_flowcontrol_request_wait_duration_seconds_bucket
214 apiserver_flowcontrol_request_wait_duration_seconds_count
215 apiserver_flowcontrol_request_wait_duration_seconds_sum
216 apiserver_request_duration_seconds_bucket
217 apiserver_request_duration_seconds_count
218 apiserver_request_duration_seconds_sum
219 apiserver_request_terminations_total
220 apiserver_request_total
221 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
222 pallet.edge.ncr.com/name: prometheus
223 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
224 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
225 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
226 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
227spec:
228 selector:
229 matchLabels:
230 component: apiserver
231 provider: kubernetes
232 endpoints:
233 - port: https
234 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
235 interval: 5m
236 metricRelabelings:
237 - action: drop
238 regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
239 sourceLabels:
240 - __name__
241 - action: drop
242 regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
243 sourceLabels:
244 - __name__
245 - action: drop
246 regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
247 sourceLabels:
248 - __name__
249 - action: drop
250 regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
251 sourceLabels:
252 - __name__
253 - action: drop
254 regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
255 sourceLabels:
256 - __name__
257 - action: drop
258 regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
259 sourceLabels:
260 - __name__
261 - action: drop
262 regex: transformation_(transformation_latencies_microseconds|failures_total)
263 sourceLabels:
264 - __name__
265 - action: drop
266 regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
267 sourceLabels:
268 - __name__
269 - action: drop
270 regex: etcd_(debugging|disk|server).*
271 sourceLabels:
272 - __name__
273 - action: drop
274 regex: apiserver_admission_controller_admission_latencies_seconds_.*
275 sourceLabels:
276 - __name__
277 - action: drop
278 regex: apiserver_admission_step_admission_latencies_seconds_.*
279 sourceLabels:
280 - __name__
281 - action: drop
282 regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
283 sourceLabels:
284 - __name__
285 - le
286 scheme: https
287 tlsConfig:
288 caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
289 serverName: kubernetes
290 jobLabel: component
291 namespaceSelector:
292 matchNames:
293 - default
294---
295apiVersion: monitoring.coreos.com/v1
296kind: ServiceMonitor
297metadata:
298 name: kubelet
299 labels:
300 app.kubernetes.io/name: kubelet
301 cluster_hash: ${cluster_hash}
302 cluster_uuid: ${cluster_uuid}
303 namespace: prometheus
304 annotations:
305 monitoring.edge.ncr.com/allowed-metrics: |
306 container_cpu_cfs_throttled_seconds_total
307 container_cpu_usage_seconds_total
308 container_fs_reads_bytes_total
309 container_fs_writes_bytes_total
310 container_memory_usage_bytes
311 container_memory_working_set_bytes
312 container_network_receive_bytes_total
313 container_network_receive_errors_total
314 container_network_receive_packets_dropped_total
315 container_network_transmit_bytes_total
316 container_network_transmit_errors_total
317 container_network_transmit_packets_dropped_total
318 container_oom_events_total
319 container_processes
320 kubernetes_build_info
321 machine_cpu_cores
322 machine_memory_bytes
323 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
324 pallet.edge.ncr.com/name: prometheus
325 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
326 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
327 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
328 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
329spec:
330 selector:
331 matchLabels:
332 app.kubernetes.io/name: kubelet
333 endpoints:
334 - port: https-metrics
335 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
336 honorLabels: true
337 interval: 60s
338 metricRelabelings:
339 - action: drop
340 regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
341 sourceLabels:
342 - __name__
343 - action: drop
344 regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
345 sourceLabels:
346 - __name__
347 - action: drop
348 regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
349 sourceLabels:
350 - __name__
351 - action: drop
352 regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
353 sourceLabels:
354 - __name__
355 - action: drop
356 regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
357 sourceLabels:
358 - __name__
359 - action: drop
360 regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
361 sourceLabels:
362 - __name__
363 - action: drop
364 regex: transformation_(transformation_latencies_microseconds|failures_total)
365 sourceLabels:
366 - __name__
367 - action: drop
368 regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
369 sourceLabels:
370 - __name__
371 relabelings:
372 - sourceLabels:
373 - __metrics_path__
374 targetLabel: metrics_path
375 scheme: https
376 tlsConfig:
377 insecureSkipVerify: true
378 - port: https-metrics
379 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
380 honorLabels: false
381 honorTimestamps: false
382 interval: 60s
383 metricRelabelings:
384 - action: drop
385 regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
386 sourceLabels:
387 - __name__
388 - action: drop
389 regex: (container_spec_.*|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);;
390 sourceLabels:
391 - __name__
392 - pod
393 - namespace
394 - action: drop
395 regex: (container_blkio_device_usage_total);.+
396 sourceLabels:
397 - __name__
398 - container
399 path: /metrics/cadvisor
400 relabelings:
401 - sourceLabels:
402 - __metrics_path__
403 targetLabel: metrics_path
404 scheme: https
405 tlsConfig:
406 insecureSkipVerify: true
407 - port: https-metrics
408 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
409 honorLabels: true
410 interval: 60s
411 path: /metrics/probes
412 relabelings:
413 - sourceLabels:
414 - __metrics_path__
415 targetLabel: metrics_path
416 scheme: https
417 tlsConfig:
418 insecureSkipVerify: true
419 jobLabel: app.kubernetes.io/name
420 namespaceSelector:
421 matchNames:
422 - kube-system
423---
424apiVersion: monitoring.coreos.com/v1
425kind: ServiceMonitor
426metadata:
427 name: prometheus
428 namespace: prometheus
429 annotations:
430 monitoring.edge.ncr.com/allowed-metrics: |
431 prometheus_http_requests_total
432 promhttp_metric_handler_requests_total
433 prometheus_http_request_duration_seconds
434 prometheus_http_request_duration_seconds_sum
435 prometheus_http_request_duration_seconds_count
436 prometheus_http_request_duration_seconds_bucket
437 prometheus_http_response_size_bytes
438 prometheus_http_response_size_bytes_bucket
439 prometheus_http_response_size_bytes_count
440 prometheus_http_response_size_bytes_sum
441 prometheus_rule_evaluation_failures_total
442 prometheus_rule_evaluations_total
443 scrape_duration_seconds
444 scrape_samples_post_metric_relabeling
445 scrape_samples_scraped
446 up
447 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
448 pallet.edge.ncr.com/name: prometheus
449 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
450 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
451 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
452 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
453 labels:
454 cluster_hash: ${cluster_hash}
455 cluster_uuid: ${cluster_uuid}
456spec:
457 selector:
458 matchLabels:
459 operated-prometheus: 'true'
460 endpoints:
461 - port: web
View as plain text