apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: kube-apiserver labels: app.kubernetes.io/name: apiserver annotations: monitoring.edge.ncr.com/allowed-metrics: | apiserver_current_inflight_requests apiserver_current_inqueue_requests apiserver_flowcontrol_current_executing_requests apiserver_flowcontrol_current_inqueue_requests apiserver_flowcontrol_dispatched_requests_total apiserver_flowcontrol_request_execution_seconds_bucket apiserver_flowcontrol_request_execution_seconds_count apiserver_flowcontrol_request_execution_seconds_sum apiserver_flowcontrol_request_wait_duration_seconds_bucket apiserver_flowcontrol_request_wait_duration_seconds_count apiserver_flowcontrol_request_wait_duration_seconds_sum apiserver_request_duration_seconds_bucket apiserver_request_duration_seconds_count apiserver_request_duration_seconds_sum apiserver_request_terminations_total apiserver_request_total spec: selector: matchLabels: component: apiserver provider: kubernetes endpoints: - port: https bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token interval: 5m metricRelabelings: - action: drop regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds) sourceLabels: - __name__ - action: drop regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds) sourceLabels: - __name__ - action: drop regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs|request_sli_.*|request_slo_.*) sourceLabels: - __name__ - action: drop regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout) sourceLabels: - __name__ - action: drop regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total) sourceLabels: - __name__ - action: drop regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary|request_duration_seconds.*) sourceLabels: - __name__ - action: drop regex: transformation_(transformation_latencies_microseconds|failures_total) sourceLabels: - __name__ - action: drop regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count) sourceLabels: - __name__ - action: drop regex: etcd_(debugging|disk|server).* sourceLabels: - __name__ - action: drop regex: apiserver_admission_controller_admission_latencies_seconds_.* sourceLabels: - __name__ - action: drop regex: apiserver_admission_step_admission_latencies_seconds_.* sourceLabels: - __name__ - action: drop regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50) sourceLabels: - __name__ - le scheme: https tlsConfig: caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt serverName: kubernetes jobLabel: component namespaceSelector: matchNames: - default --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: kubelet labels: app.kubernetes.io/name: kubelet annotations: monitoring.edge.ncr.com/allowed-metrics: | container_cpu_cfs_throttled_seconds_total container_cpu_usage_seconds_total container_fs_reads_bytes_total container_fs_writes_bytes_total container_memory_usage_bytes container_memory_working_set_bytes container_network_receive_bytes_total container_network_receive_errors_total container_network_receive_packets_dropped_total container_network_transmit_bytes_total container_network_transmit_errors_total container_network_transmit_packets_dropped_total container_processes kubernetes_build_info machine_cpu_cores machine_memory_bytes kubelet_pleg_relist_duration_seconds_bucket kubelet_pleg_relist_duration_seconds_count kubelet_pleg_relist_duration_seconds_sum kubelet_runtime_operations_duration_seconds_bucket kubelet_runtime_operations_duration_seconds_count kubelet_runtime_operations_duration_seconds_sum kubelet_volume_stats_used_bytes kubelet_volume_stats_capacity_bytes kubelet_device_plugin_alloc_duration_seconds kubelet_device_plugin_registration_total spec: selector: matchLabels: app.kubernetes.io/name: kubelet endpoints: - port: https-metrics bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token honorLabels: true interval: 60s metricRelabelings: - action: drop regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds) sourceLabels: - __name__ - action: drop regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds) sourceLabels: - __name__ - action: drop regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs) sourceLabels: - __name__ - action: drop regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout) sourceLabels: - __name__ - action: drop regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total) sourceLabels: - __name__ - action: drop regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary) sourceLabels: - __name__ - action: drop regex: transformation_(transformation_latencies_microseconds|failures_total) sourceLabels: - __name__ - action: drop regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count) sourceLabels: - __name__ relabelings: - sourceLabels: - __metrics_path__ targetLabel: metrics_path scheme: https tlsConfig: insecureSkipVerify: true - port: https-metrics bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token honorLabels: false honorTimestamps: false interval: 60s metricRelabelings: - action: drop regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) sourceLabels: - __name__ - action: drop regex: (container_spec_.*|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);; sourceLabels: - __name__ - pod - namespace - action: drop regex: (container_blkio_device_usage_total);.+ sourceLabels: - __name__ - container - action: replace regex: (.+) replacement: $1 sourceLabels: - namespace targetLabel: exported_namespace path: /metrics/cadvisor relabelings: - sourceLabels: - __metrics_path__ targetLabel: metrics_path - action: labeldrop regex: (__meta_kubernetes_namespace|namespace) scheme: https tlsConfig: insecureSkipVerify: true - port: https-metrics bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token honorLabels: true interval: 60s path: /metrics/probes relabelings: - sourceLabels: - __metrics_path__ targetLabel: metrics_path scheme: https tlsConfig: insecureSkipVerify: true jobLabel: app.kubernetes.io/name namespaceSelector: matchNames: - kube-system --- apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: prometheus namespace: prometheus annotations: monitoring.edge.ncr.com/allowed-metrics: | prometheus_http_requests_total promhttp_metric_handler_requests_total prometheus_http_request_duration_seconds prometheus_http_request_duration_seconds_sum prometheus_http_request_duration_seconds_count prometheus_http_request_duration_seconds_bucket prometheus_http_response_size_bytes prometheus_http_response_size_bytes_bucket prometheus_http_response_size_bytes_count prometheus_http_response_size_bytes_sum prometheus_rule_evaluation_failures_total prometheus_rule_evaluations_total scrape_duration_seconds scrape_samples_post_metric_relabeling scrape_samples_scraped up spec: selector: matchLabels: operated-prometheus: 'true' endpoints: - port: web metricRelabelings: - action: drop sourceLabels: - __name__ regex: prometheus_target.*|prometheus_sd.*|net_conntrack_.* - action: keep sourceLabels: - __name__ regex: prometheus_.*|scrape_.*|up|promhttp_.*