...
1apiVersion: v1
2kind: Namespace
3metadata:
4 name: prometheus
5 labels:
6 workload.edge.ncr.com: platform
7 annotations:
8 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
9 pallet.edge.ncr.com/name: prometheus
10 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
11 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
12 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
13 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
14---
15apiVersion: v1
16kind: ServiceAccount
17metadata:
18 name: prometheus
19 namespace: prometheus
20 annotations:
21 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
22 pallet.edge.ncr.com/name: prometheus
23 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
24 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
25 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
26 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
27 labels: {}
28---
29apiVersion: rbac.authorization.k8s.io/v1
30kind: ClusterRole
31metadata:
32 name: prometheus
33 annotations:
34 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
35 pallet.edge.ncr.com/name: prometheus
36 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
37 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
38 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
39 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
40 labels: {}
41rules:
42- resources:
43 - nodes
44 - nodes/metrics
45 - services
46 - endpoints
47 - pods
48 apiGroups: [""]
49 verbs: ["get", "list", "watch"]
50- resources:
51 - configmaps
52 apiGroups: [""]
53 verbs: ["get"]
54- resources:
55 - ingresses
56 apiGroups:
57 - networking.k8s.io
58 verbs: ["get", "list", "watch"]
59- nonResourceURLs: ["/metrics"]
60 verbs: ["get"]
61---
62apiVersion: rbac.authorization.k8s.io/v1
63kind: ClusterRoleBinding
64metadata:
65 name: prometheus
66 annotations:
67 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
68 pallet.edge.ncr.com/name: prometheus
69 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
70 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
71 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
72 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
73 labels: {}
74roleRef:
75 name: prometheus
76 kind: ClusterRole
77 apiGroup: rbac.authorization.k8s.io
78subjects:
79- name: prometheus
80 namespace: prometheus
81 kind: ServiceAccount
82---
83apiVersion: external-secrets.io/v1beta1
84kind: ExternalSecret
85metadata:
86 name: gcp-api-key
87 namespace: prometheus
88 annotations:
89 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
90 pallet.edge.ncr.com/name: prometheus
91 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
92 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
93 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
94 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
95 labels: {}
96spec:
97 data:
98 - remoteRef:
99 key: o11y-${cluster_uuid}-gcp-api-key
100 secretKey: key.json
101 refreshInterval: 1m
102 secretStoreRef:
103 name: gcp-provider
104 kind: ClusterSecretStore
105 target:
106 name: gcp-api-key
107 creationPolicy: Owner
108---
109apiVersion: monitoring.coreos.com/v1
110kind: Alertmanager
111metadata:
112 name: alertmanager
113 namespace: prometheus
114 annotations:
115 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
116 pallet.edge.ncr.com/name: prometheus
117 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
118 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
119 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
120 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
121 labels:
122 cluster_hash: ${cluster_hash}
123 cluster_uuid: ${cluster_uuid}
124spec:
125 imagePullSecrets:
126 - name: edge-docker-pull-secret
127 resources:
128 limits:
129 cpu: "200m"
130 memory: 256Mi
131 requests:
132 cpu: 10m
133 memory: 64Mi
134 alertmanagerConfigSelector:
135 matchLabels: {}
136 logFormat: json
137---
138apiVersion: monitoring.coreos.com/v1
139kind: Prometheus
140metadata:
141 name: prometheus
142 namespace: prometheus
143 annotations:
144 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
145 pallet.edge.ncr.com/name: prometheus
146 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
147 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
148 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
149 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
150 labels:
151 cluster_hash: ${cluster_hash}
152 cluster_uuid: ${cluster_uuid}
153spec:
154 replicas: 1
155 serviceAccountName: prometheus
156 imagePullSecrets:
157 - name: edge-docker-pull-secret
158 resources:
159 limits:
160 cpu: "500m"
161 memory: 2048Mi
162 requests:
163 cpu: 100m
164 memory: 128Mi
165 alerting:
166 alertmanagers:
167 - name: alertmanager-operated
168 namespace: prometheus
169 port: web
170 externalLabels:
171 cluster: ${cluster_uuid}
172 location: ${gcp_region}-${gcp_zone}
173 project_id: ${gcp_project_id}
174 podMonitorNamespaceSelector: {}
175 podMonitorSelector: {}
176 ruleSelector:
177 matchLabels: {}
178 scrapeInterval: 60s
179 scrapeTimeout: 15s
180 serviceMonitorNamespaceSelector: {}
181 serviceMonitorSelector: {}
182 podMetadata:
183 annotations:
184 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
185 pallet.edge.ncr.com/name: prometheus
186 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
187 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
188 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
189 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
190---
191apiVersion: monitoring.coreos.com/v1
192kind: PrometheusRule
193metadata:
194 name: prometheus-metrics-rules
195 namespace: prometheus
196 labels:
197 prometheus: prometheus
198 role: metrics-rules
199 cluster_hash: ${cluster_hash}
200 cluster_uuid: ${cluster_uuid}
201 annotations:
202 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
203 pallet.edge.ncr.com/name: prometheus
204 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
205 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
206 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
207 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
208spec:
209 groups:
210 - name: node-exporter-rules
211 rules:
212 - expr: count(node_cpu_info) without (cpu,core,cachesize,family,microcode,model,model_name,package,stepping,vendor,namespace,container,endpoint,pod,job,service)
213 record: node_logical_cores_total
214 - expr: count(count(node_cpu_info) without (cpu,cachesize,family,microcode,model,model_name,package,stepping,vendor,namespace,container,endpoint,pod,job,service)) without (core)
215 record: node_physical_cores_total
216 - expr: node_network_address_info{scope="global"}
217 record: node_network_global_address_info
218---
219apiVersion: monitoring.coreos.com/v1
220kind: ServiceMonitor
221metadata:
222 name: kube-apiserver
223 labels:
224 app.kubernetes.io/name: apiserver
225 cluster_hash: ${cluster_hash}
226 cluster_uuid: ${cluster_uuid}
227 namespace: prometheus
228 annotations:
229 monitoring.edge.ncr.com/allowed-metrics: |
230 apiserver_current_inflight_requests
231 apiserver_current_inqueue_requests
232 apiserver_flowcontrol_current_executing_requests
233 apiserver_flowcontrol_current_inqueue_requests
234 apiserver_flowcontrol_dispatched_requests_total
235 apiserver_flowcontrol_request_execution_seconds_bucket
236 apiserver_flowcontrol_request_execution_seconds_count
237 apiserver_flowcontrol_request_execution_seconds_sum
238 apiserver_flowcontrol_request_wait_duration_seconds_bucket
239 apiserver_flowcontrol_request_wait_duration_seconds_count
240 apiserver_flowcontrol_request_wait_duration_seconds_sum
241 apiserver_request_duration_seconds_bucket
242 apiserver_request_duration_seconds_count
243 apiserver_request_duration_seconds_sum
244 apiserver_request_terminations_total
245 apiserver_request_total
246 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
247 pallet.edge.ncr.com/name: prometheus
248 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
249 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
250 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
251 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
252spec:
253 selector:
254 matchLabels:
255 component: apiserver
256 provider: kubernetes
257 endpoints:
258 - port: https
259 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
260 interval: 5m
261 metricRelabelings:
262 - action: drop
263 regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
264 sourceLabels:
265 - __name__
266 - action: drop
267 regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
268 sourceLabels:
269 - __name__
270 - action: drop
271 regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
272 sourceLabels:
273 - __name__
274 - action: drop
275 regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
276 sourceLabels:
277 - __name__
278 - action: drop
279 regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
280 sourceLabels:
281 - __name__
282 - action: drop
283 regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
284 sourceLabels:
285 - __name__
286 - action: drop
287 regex: transformation_(transformation_latencies_microseconds|failures_total)
288 sourceLabels:
289 - __name__
290 - action: drop
291 regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
292 sourceLabels:
293 - __name__
294 - action: drop
295 regex: etcd_(debugging|disk|server).*
296 sourceLabels:
297 - __name__
298 - action: drop
299 regex: apiserver_admission_controller_admission_latencies_seconds_.*
300 sourceLabels:
301 - __name__
302 - action: drop
303 regex: apiserver_admission_step_admission_latencies_seconds_.*
304 sourceLabels:
305 - __name__
306 - action: drop
307 regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
308 sourceLabels:
309 - __name__
310 - le
311 scheme: https
312 tlsConfig:
313 caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
314 serverName: kubernetes
315 jobLabel: component
316 namespaceSelector:
317 matchNames:
318 - default
319---
320apiVersion: monitoring.coreos.com/v1
321kind: ServiceMonitor
322metadata:
323 name: kubelet
324 labels:
325 app.kubernetes.io/name: kubelet
326 cluster_hash: ${cluster_hash}
327 cluster_uuid: ${cluster_uuid}
328 namespace: prometheus
329 annotations:
330 monitoring.edge.ncr.com/allowed-metrics: |
331 container_cpu_cfs_throttled_seconds_total
332 container_cpu_usage_seconds_total
333 container_fs_reads_bytes_total
334 container_fs_writes_bytes_total
335 container_memory_usage_bytes
336 container_memory_working_set_bytes
337 container_network_receive_bytes_total
338 container_network_receive_errors_total
339 container_network_receive_packets_dropped_total
340 container_network_transmit_bytes_total
341 container_network_transmit_errors_total
342 container_network_transmit_packets_dropped_total
343 container_oom_events_total
344 container_processes
345 kubernetes_build_info
346 machine_cpu_cores
347 machine_memory_bytes
348 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
349 pallet.edge.ncr.com/name: prometheus
350 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
351 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
352 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
353 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
354spec:
355 selector:
356 matchLabels:
357 app.kubernetes.io/name: kubelet
358 endpoints:
359 - port: https-metrics
360 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
361 honorLabels: true
362 interval: 60s
363 metricRelabelings:
364 - action: drop
365 regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
366 sourceLabels:
367 - __name__
368 - action: drop
369 regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
370 sourceLabels:
371 - __name__
372 - action: drop
373 regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
374 sourceLabels:
375 - __name__
376 - action: drop
377 regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
378 sourceLabels:
379 - __name__
380 - action: drop
381 regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
382 sourceLabels:
383 - __name__
384 - action: drop
385 regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
386 sourceLabels:
387 - __name__
388 - action: drop
389 regex: transformation_(transformation_latencies_microseconds|failures_total)
390 sourceLabels:
391 - __name__
392 - action: drop
393 regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
394 sourceLabels:
395 - __name__
396 relabelings:
397 - sourceLabels:
398 - __metrics_path__
399 targetLabel: metrics_path
400 scheme: https
401 tlsConfig:
402 insecureSkipVerify: true
403 - port: https-metrics
404 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
405 honorLabels: false
406 honorTimestamps: false
407 interval: 60s
408 metricRelabelings:
409 - action: drop
410 regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
411 sourceLabels:
412 - __name__
413 - action: drop
414 regex: (container_spec_.*|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);;
415 sourceLabels:
416 - __name__
417 - pod
418 - namespace
419 - action: drop
420 regex: (container_blkio_device_usage_total);.+
421 sourceLabels:
422 - __name__
423 - container
424 path: /metrics/cadvisor
425 relabelings:
426 - sourceLabels:
427 - __metrics_path__
428 targetLabel: metrics_path
429 scheme: https
430 tlsConfig:
431 insecureSkipVerify: true
432 - port: https-metrics
433 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
434 honorLabels: true
435 interval: 60s
436 path: /metrics/probes
437 relabelings:
438 - sourceLabels:
439 - __metrics_path__
440 targetLabel: metrics_path
441 scheme: https
442 tlsConfig:
443 insecureSkipVerify: true
444 jobLabel: app.kubernetes.io/name
445 namespaceSelector:
446 matchNames:
447 - kube-system
448---
449apiVersion: monitoring.coreos.com/v1
450kind: ServiceMonitor
451metadata:
452 name: prometheus
453 namespace: prometheus
454 annotations:
455 monitoring.edge.ncr.com/allowed-metrics: |
456 prometheus_http_requests_total
457 promhttp_metric_handler_requests_total
458 prometheus_http_request_duration_seconds
459 prometheus_http_request_duration_seconds_sum
460 prometheus_http_request_duration_seconds_count
461 prometheus_http_request_duration_seconds_bucket
462 prometheus_http_response_size_bytes
463 prometheus_http_response_size_bytes_bucket
464 prometheus_http_response_size_bytes_count
465 prometheus_http_response_size_bytes_sum
466 prometheus_rule_evaluation_failures_total
467 prometheus_rule_evaluations_total
468 scrape_duration_seconds
469 scrape_samples_post_metric_relabeling
470 scrape_samples_scraped
471 up
472 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
473 pallet.edge.ncr.com/name: prometheus
474 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
475 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
476 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
477 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
478 labels:
479 cluster_hash: ${cluster_hash}
480 cluster_uuid: ${cluster_uuid}
481spec:
482 selector:
483 matchLabels:
484 operated-prometheus: 'true'
485 endpoints:
486 - port: web
View as plain text