...
1apiVersion: v1
2kind: Namespace
3metadata:
4 name: prometheus
5 labels:
6 workload.edge.ncr.com: platform
7 annotations:
8 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
9 pallet.edge.ncr.com/name: prometheus
10 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
11 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
12 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
13 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
14---
15apiVersion: v1
16kind: ServiceAccount
17metadata:
18 name: prometheus
19 namespace: prometheus
20 annotations:
21 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
22 pallet.edge.ncr.com/name: prometheus
23 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
24 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
25 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
26 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
27 labels: {}
28---
29apiVersion: rbac.authorization.k8s.io/v1
30kind: ClusterRole
31metadata:
32 name: prometheus
33 annotations:
34 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
35 pallet.edge.ncr.com/name: prometheus
36 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
37 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
38 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
39 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
40 labels: {}
41rules:
42- resources:
43 - nodes
44 - nodes/metrics
45 - services
46 - endpoints
47 - pods
48 apiGroups: [""]
49 verbs: ["get", "list", "watch"]
50- resources:
51 - configmaps
52 apiGroups: [""]
53 verbs: ["get"]
54- resources:
55 - ingresses
56 apiGroups:
57 - networking.k8s.io
58 verbs: ["get", "list", "watch"]
59- nonResourceURLs: ["/metrics"]
60 verbs: ["get"]
61---
62apiVersion: rbac.authorization.k8s.io/v1
63kind: ClusterRoleBinding
64metadata:
65 name: prometheus
66 annotations:
67 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
68 pallet.edge.ncr.com/name: prometheus
69 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
70 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
71 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
72 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
73 labels: {}
74roleRef:
75 name: prometheus
76 kind: ClusterRole
77 apiGroup: rbac.authorization.k8s.io
78subjects:
79- name: prometheus
80 namespace: prometheus
81 kind: ServiceAccount
82---
83apiVersion: external-secrets.io/v1beta1
84kind: ExternalSecret
85metadata:
86 name: gcp-api-key
87 namespace: prometheus
88 annotations:
89 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
90 pallet.edge.ncr.com/name: prometheus
91 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
92 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
93 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
94 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
95 labels: {}
96spec:
97 data:
98 - remoteRef:
99 key: o11y-${cluster_uuid}-gcp-api-key
100 secretKey: key.json
101 refreshInterval: 1m
102 secretStoreRef:
103 name: gcp-provider
104 kind: ClusterSecretStore
105 target:
106 name: gcp-api-key
107 creationPolicy: Owner
108---
109apiVersion: monitoring.coreos.com/v1
110kind: Alertmanager
111metadata:
112 name: alertmanager
113 namespace: prometheus
114 annotations:
115 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
116 pallet.edge.ncr.com/name: prometheus
117 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
118 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
119 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
120 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
121 labels:
122 cluster_hash: ${cluster_hash}
123 cluster_uuid: ${cluster_uuid}
124spec:
125 imagePullSecrets:
126 - name: edge-docker-pull-secret
127 resources:
128 limits:
129 cpu: "200m"
130 memory: 256Mi
131 requests:
132 cpu: 10m
133 memory: 64Mi
134 alertmanagerConfigSelector:
135 matchLabels: {}
136 logFormat: json
137 affinity:
138 nodeAffinity:
139 preferredDuringSchedulingIgnoredDuringExecution:
140 - weight: 100
141 preference:
142 matchExpressions:
143 - key: node.ncr.com/class
144 operator: In
145 values:
146 - server
147---
148apiVersion: monitoring.coreos.com/v1
149kind: Prometheus
150metadata:
151 name: prometheus
152 namespace: prometheus
153 annotations:
154 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
155 pallet.edge.ncr.com/name: prometheus
156 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
157 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
158 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
159 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
160 labels:
161 cluster_hash: ${cluster_hash}
162 cluster_uuid: ${cluster_uuid}
163spec:
164 replicas: 1
165 serviceAccountName: prometheus
166 imagePullSecrets:
167 - name: edge-docker-pull-secret
168 resources:
169 limits:
170 cpu: "500m"
171 memory: 2048Mi
172 requests:
173 cpu: 100m
174 memory: 128Mi
175 alerting:
176 alertmanagers:
177 - name: alertmanager-operated
178 namespace: prometheus
179 port: web
180 externalLabels:
181 cluster: ${cluster_uuid}
182 location: ${gcp_region}-${gcp_zone}
183 project_id: ${gcp_project_id}
184 podMonitorNamespaceSelector: {}
185 podMonitorSelector: {}
186 ruleSelector:
187 matchLabels: {}
188 scrapeInterval: 60s
189 scrapeTimeout: 15s
190 serviceMonitorNamespaceSelector: {}
191 serviceMonitorSelector: {}
192 affinity:
193 nodeAffinity:
194 preferredDuringSchedulingIgnoredDuringExecution:
195 - weight: 100
196 preference:
197 matchExpressions:
198 - key: node.ncr.com/class
199 operator: In
200 values:
201 - server
202 podMetadata:
203 annotations:
204 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
205 pallet.edge.ncr.com/name: prometheus
206 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
207 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
208 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
209 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
210---
211apiVersion: monitoring.coreos.com/v1
212kind: PrometheusRule
213metadata:
214 name: prometheus-metrics-rules
215 namespace: prometheus
216 labels:
217 prometheus: prometheus
218 role: metrics-rules
219 cluster_hash: ${cluster_hash}
220 cluster_uuid: ${cluster_uuid}
221 annotations:
222 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
223 pallet.edge.ncr.com/name: prometheus
224 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
225 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
226 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
227 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
228spec:
229 groups:
230 - name: node-exporter-rules
231 rules:
232 - expr: count(node_cpu_info) without (cpu,core,cachesize,family,microcode,model,model_name,package,stepping,vendor,namespace,container,endpoint,pod,job,service)
233 record: node_logical_cores_total
234 - expr: count(count(node_cpu_info) without (cpu,cachesize,family,microcode,model,model_name,package,stepping,vendor,namespace,container,endpoint,pod,job,service)) without (core)
235 record: node_physical_cores_total
236 - expr: node_network_address_info{scope="global"}
237 record: node_network_global_address_info
238---
239apiVersion: monitoring.coreos.com/v1
240kind: ServiceMonitor
241metadata:
242 name: kube-apiserver
243 labels:
244 app.kubernetes.io/name: apiserver
245 cluster_hash: ${cluster_hash}
246 cluster_uuid: ${cluster_uuid}
247 namespace: prometheus
248 annotations:
249 monitoring.edge.ncr.com/allowed-metrics: |
250 apiserver_current_inflight_requests
251 apiserver_current_inqueue_requests
252 apiserver_flowcontrol_current_executing_requests
253 apiserver_flowcontrol_current_inqueue_requests
254 apiserver_flowcontrol_dispatched_requests_total
255 apiserver_flowcontrol_request_execution_seconds_bucket
256 apiserver_flowcontrol_request_execution_seconds_count
257 apiserver_flowcontrol_request_execution_seconds_sum
258 apiserver_flowcontrol_request_wait_duration_seconds_bucket
259 apiserver_flowcontrol_request_wait_duration_seconds_count
260 apiserver_flowcontrol_request_wait_duration_seconds_sum
261 apiserver_request_duration_seconds_bucket
262 apiserver_request_duration_seconds_count
263 apiserver_request_duration_seconds_sum
264 apiserver_request_terminations_total
265 apiserver_request_total
266 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
267 pallet.edge.ncr.com/name: prometheus
268 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
269 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
270 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
271 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
272spec:
273 selector:
274 matchLabels:
275 component: apiserver
276 provider: kubernetes
277 endpoints:
278 - port: https
279 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
280 interval: 5m
281 metricRelabelings:
282 - action: drop
283 regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
284 sourceLabels:
285 - __name__
286 - action: drop
287 regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
288 sourceLabels:
289 - __name__
290 - action: drop
291 regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
292 sourceLabels:
293 - __name__
294 - action: drop
295 regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
296 sourceLabels:
297 - __name__
298 - action: drop
299 regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
300 sourceLabels:
301 - __name__
302 - action: drop
303 regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
304 sourceLabels:
305 - __name__
306 - action: drop
307 regex: transformation_(transformation_latencies_microseconds|failures_total)
308 sourceLabels:
309 - __name__
310 - action: drop
311 regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
312 sourceLabels:
313 - __name__
314 - action: drop
315 regex: etcd_(debugging|disk|server).*
316 sourceLabels:
317 - __name__
318 - action: drop
319 regex: apiserver_admission_controller_admission_latencies_seconds_.*
320 sourceLabels:
321 - __name__
322 - action: drop
323 regex: apiserver_admission_step_admission_latencies_seconds_.*
324 sourceLabels:
325 - __name__
326 - action: drop
327 regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
328 sourceLabels:
329 - __name__
330 - le
331 scheme: https
332 tlsConfig:
333 caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
334 serverName: kubernetes
335 jobLabel: component
336 namespaceSelector:
337 matchNames:
338 - default
339---
340apiVersion: monitoring.coreos.com/v1
341kind: ServiceMonitor
342metadata:
343 name: kubelet
344 labels:
345 app.kubernetes.io/name: kubelet
346 cluster_hash: ${cluster_hash}
347 cluster_uuid: ${cluster_uuid}
348 namespace: prometheus
349 annotations:
350 monitoring.edge.ncr.com/allowed-metrics: |
351 container_cpu_cfs_throttled_seconds_total
352 container_cpu_usage_seconds_total
353 container_fs_reads_bytes_total
354 container_fs_writes_bytes_total
355 container_memory_usage_bytes
356 container_memory_working_set_bytes
357 container_network_receive_bytes_total
358 container_network_receive_errors_total
359 container_network_receive_packets_dropped_total
360 container_network_transmit_bytes_total
361 container_network_transmit_errors_total
362 container_network_transmit_packets_dropped_total
363 container_oom_events_total
364 container_processes
365 kubernetes_build_info
366 machine_cpu_cores
367 machine_memory_bytes
368 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
369 pallet.edge.ncr.com/name: prometheus
370 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
371 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
372 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
373 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
374spec:
375 selector:
376 matchLabels:
377 app.kubernetes.io/name: kubelet
378 endpoints:
379 - port: https-metrics
380 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
381 honorLabels: true
382 interval: 60s
383 metricRelabelings:
384 - action: drop
385 regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
386 sourceLabels:
387 - __name__
388 - action: drop
389 regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
390 sourceLabels:
391 - __name__
392 - action: drop
393 regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
394 sourceLabels:
395 - __name__
396 - action: drop
397 regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
398 sourceLabels:
399 - __name__
400 - action: drop
401 regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
402 sourceLabels:
403 - __name__
404 - action: drop
405 regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
406 sourceLabels:
407 - __name__
408 - action: drop
409 regex: transformation_(transformation_latencies_microseconds|failures_total)
410 sourceLabels:
411 - __name__
412 - action: drop
413 regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
414 sourceLabels:
415 - __name__
416 relabelings:
417 - sourceLabels:
418 - __metrics_path__
419 targetLabel: metrics_path
420 scheme: https
421 tlsConfig:
422 insecureSkipVerify: true
423 - port: https-metrics
424 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
425 honorLabels: false
426 honorTimestamps: false
427 interval: 60s
428 metricRelabelings:
429 - action: drop
430 regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
431 sourceLabels:
432 - __name__
433 - action: drop
434 regex: (container_spec_.*|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);;
435 sourceLabels:
436 - __name__
437 - pod
438 - namespace
439 - action: drop
440 regex: (container_blkio_device_usage_total);.+
441 sourceLabels:
442 - __name__
443 - container
444 path: /metrics/cadvisor
445 relabelings:
446 - sourceLabels:
447 - __metrics_path__
448 targetLabel: metrics_path
449 scheme: https
450 tlsConfig:
451 insecureSkipVerify: true
452 - port: https-metrics
453 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
454 honorLabels: true
455 interval: 60s
456 path: /metrics/probes
457 relabelings:
458 - sourceLabels:
459 - __metrics_path__
460 targetLabel: metrics_path
461 scheme: https
462 tlsConfig:
463 insecureSkipVerify: true
464 jobLabel: app.kubernetes.io/name
465 namespaceSelector:
466 matchNames:
467 - kube-system
468---
469apiVersion: monitoring.coreos.com/v1
470kind: ServiceMonitor
471metadata:
472 name: prometheus
473 namespace: prometheus
474 annotations:
475 monitoring.edge.ncr.com/allowed-metrics: |
476 prometheus_http_requests_total
477 promhttp_metric_handler_requests_total
478 prometheus_http_request_duration_seconds
479 prometheus_http_request_duration_seconds_sum
480 prometheus_http_request_duration_seconds_count
481 prometheus_http_request_duration_seconds_bucket
482 prometheus_http_response_size_bytes
483 prometheus_http_response_size_bytes_bucket
484 prometheus_http_response_size_bytes_count
485 prometheus_http_response_size_bytes_sum
486 prometheus_rule_evaluation_failures_total
487 prometheus_rule_evaluations_total
488 scrape_duration_seconds
489 scrape_samples_post_metric_relabeling
490 scrape_samples_scraped
491 up
492 pallet.edge.ncr.com/created: "2023-02-16T21:26:39Z"
493 pallet.edge.ncr.com/name: prometheus
494 pallet.edge.ncr.com/revision: 696897a3df910b6e84a88c9336907a17b18159c1
495 pallet.edge.ncr.com/source: https://github.com/ncrvoyix-swt-retail/edge-infra/tree/696897a3df910b6e84a88c9336907a17b18159c1
496 pallet.edge.ncr.com/team: '@ncrvoyix-swt-retail/edge-o11y'
497 pallet.edge.ncr.com/version: 7.7.7-rc.1676582799+commit.696897a
498 labels:
499 cluster_hash: ${cluster_hash}
500 cluster_uuid: ${cluster_uuid}
501spec:
502 selector:
503 matchLabels:
504 operated-prometheus: 'true'
505 endpoints:
506 - port: web
View as plain text