...
1apiVersion: monitoring.coreos.com/v1
2kind: ServiceMonitor
3metadata:
4 name: kube-apiserver
5 labels:
6 app.kubernetes.io/name: apiserver
7 annotations:
8 monitoring.edge.ncr.com/allowed-metrics: |
9 apiserver_current_inflight_requests
10 apiserver_current_inqueue_requests
11 apiserver_flowcontrol_current_executing_requests
12 apiserver_flowcontrol_current_inqueue_requests
13 apiserver_flowcontrol_dispatched_requests_total
14 apiserver_flowcontrol_request_execution_seconds_bucket
15 apiserver_flowcontrol_request_execution_seconds_count
16 apiserver_flowcontrol_request_execution_seconds_sum
17 apiserver_flowcontrol_request_wait_duration_seconds_bucket
18 apiserver_flowcontrol_request_wait_duration_seconds_count
19 apiserver_flowcontrol_request_wait_duration_seconds_sum
20 apiserver_request_duration_seconds_bucket
21 apiserver_request_duration_seconds_count
22 apiserver_request_duration_seconds_sum
23 apiserver_request_terminations_total
24 apiserver_request_total
25spec:
26 selector:
27 matchLabels:
28 component: apiserver
29 provider: kubernetes
30 endpoints:
31 - port: https
32 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
33 interval: 5m
34 metricRelabelings:
35 - action: drop
36 regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
37 sourceLabels:
38 - __name__
39 - action: drop
40 regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
41 sourceLabels:
42 - __name__
43 - action: drop
44 regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs|request_sli_.*|request_slo_.*)
45 sourceLabels:
46 - __name__
47 - action: drop
48 regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
49 sourceLabels:
50 - __name__
51 - action: drop
52 regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
53 sourceLabels:
54 - __name__
55 - action: drop
56 regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary|request_duration_seconds.*)
57 sourceLabels:
58 - __name__
59 - action: drop
60 regex: transformation_(transformation_latencies_microseconds|failures_total)
61 sourceLabels:
62 - __name__
63 - action: drop
64 regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
65 sourceLabels:
66 - __name__
67 - action: drop
68 regex: etcd_(debugging|disk|server).*
69 sourceLabels:
70 - __name__
71 - action: drop
72 regex: apiserver_admission_controller_admission_latencies_seconds_.*
73 sourceLabels:
74 - __name__
75 - action: drop
76 regex: apiserver_admission_step_admission_latencies_seconds_.*
77 sourceLabels:
78 - __name__
79 - action: drop
80 regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
81 sourceLabels:
82 - __name__
83 - le
84 scheme: https
85 tlsConfig:
86 caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
87 serverName: kubernetes
88 jobLabel: component
89 namespaceSelector:
90 matchNames:
91 - default
92---
93apiVersion: monitoring.coreos.com/v1
94kind: ServiceMonitor
95metadata:
96 name: kubelet
97 labels:
98 app.kubernetes.io/name: kubelet
99 annotations:
100 monitoring.edge.ncr.com/allowed-metrics: |
101 container_cpu_cfs_throttled_seconds_total
102 container_cpu_usage_seconds_total
103 container_fs_reads_bytes_total
104 container_fs_writes_bytes_total
105 container_memory_usage_bytes
106 container_memory_working_set_bytes
107 container_network_receive_bytes_total
108 container_network_receive_errors_total
109 container_network_receive_packets_dropped_total
110 container_network_transmit_bytes_total
111 container_network_transmit_errors_total
112 container_network_transmit_packets_dropped_total
113 container_processes
114 kubernetes_build_info
115 machine_cpu_cores
116 machine_memory_bytes
117 kubelet_pleg_relist_duration_seconds_bucket
118 kubelet_pleg_relist_duration_seconds_count
119 kubelet_pleg_relist_duration_seconds_sum
120 kubelet_runtime_operations_duration_seconds_bucket
121 kubelet_runtime_operations_duration_seconds_count
122 kubelet_runtime_operations_duration_seconds_sum
123 kubelet_volume_stats_used_bytes
124 kubelet_volume_stats_capacity_bytes
125 kubelet_device_plugin_alloc_duration_seconds
126 kubelet_device_plugin_registration_total
127spec:
128 selector:
129 matchLabels:
130 app.kubernetes.io/name: kubelet
131 endpoints:
132 - port: https-metrics
133 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
134 honorLabels: true
135 interval: 60s
136 metricRelabelings:
137 - action: drop
138 regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
139 sourceLabels:
140 - __name__
141 - action: drop
142 regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
143 sourceLabels:
144 - __name__
145 - action: drop
146 regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
147 sourceLabels:
148 - __name__
149 - action: drop
150 regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
151 sourceLabels:
152 - __name__
153 - action: drop
154 regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
155 sourceLabels:
156 - __name__
157 - action: drop
158 regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
159 sourceLabels:
160 - __name__
161 - action: drop
162 regex: transformation_(transformation_latencies_microseconds|failures_total)
163 sourceLabels:
164 - __name__
165 - action: drop
166 regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
167 sourceLabels:
168 - __name__
169 relabelings:
170 - sourceLabels:
171 - __metrics_path__
172 targetLabel: metrics_path
173 scheme: https
174 tlsConfig:
175 insecureSkipVerify: true
176 - port: https-metrics
177 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
178 honorLabels: false
179 honorTimestamps: false
180 interval: 60s
181 metricRelabelings:
182 - action: drop
183 regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
184 sourceLabels:
185 - __name__
186 - action: drop
187 regex: (container_spec_.*|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);;
188 sourceLabels:
189 - __name__
190 - pod
191 - namespace
192 - action: drop
193 regex: (container_blkio_device_usage_total);.+
194 sourceLabels:
195 - __name__
196 - container
197 - action: replace
198 regex: (.+)
199 replacement: $1
200 sourceLabels:
201 - namespace
202 targetLabel: exported_namespace
203 path: /metrics/cadvisor
204 relabelings:
205 - sourceLabels:
206 - __metrics_path__
207 targetLabel: metrics_path
208 - action: labeldrop
209 regex: (__meta_kubernetes_namespace|namespace)
210 scheme: https
211 tlsConfig:
212 insecureSkipVerify: true
213 - port: https-metrics
214 bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
215 honorLabels: true
216 interval: 60s
217 path: /metrics/probes
218 relabelings:
219 - sourceLabels:
220 - __metrics_path__
221 targetLabel: metrics_path
222 scheme: https
223 tlsConfig:
224 insecureSkipVerify: true
225 jobLabel: app.kubernetes.io/name
226 namespaceSelector:
227 matchNames:
228 - kube-system
229---
230apiVersion: monitoring.coreos.com/v1
231kind: ServiceMonitor
232metadata:
233 name: prometheus
234 namespace: prometheus
235 annotations:
236 monitoring.edge.ncr.com/allowed-metrics: |
237 prometheus_http_requests_total
238 promhttp_metric_handler_requests_total
239 prometheus_http_request_duration_seconds
240 prometheus_http_request_duration_seconds_sum
241 prometheus_http_request_duration_seconds_count
242 prometheus_http_request_duration_seconds_bucket
243 prometheus_http_response_size_bytes
244 prometheus_http_response_size_bytes_bucket
245 prometheus_http_response_size_bytes_count
246 prometheus_http_response_size_bytes_sum
247 prometheus_rule_evaluation_failures_total
248 prometheus_rule_evaluations_total
249 scrape_duration_seconds
250 scrape_samples_post_metric_relabeling
251 scrape_samples_scraped
252 up
253spec:
254 selector:
255 matchLabels:
256 operated-prometheus: 'true'
257 endpoints:
258 - port: web
259 metricRelabelings:
260 - action: drop
261 sourceLabels:
262 - __name__
263 regex: prometheus_target.*|prometheus_sd.*|net_conntrack_.*
264 - action: keep
265 sourceLabels:
266 - __name__
267 regex: prometheus_.*|scrape_.*|up|promhttp_.*
View as plain text