...

Text file src/edge-infra.dev/third_party/k8s/prometheus-operator/prometheus/base/monitoring.yaml

Documentation: edge-infra.dev/third_party/k8s/prometheus-operator/prometheus/base

     1apiVersion: monitoring.coreos.com/v1
     2kind: ServiceMonitor
     3metadata:
     4  name: kube-apiserver
     5  labels:
     6    app.kubernetes.io/name: apiserver
     7  annotations:
     8    monitoring.edge.ncr.com/allowed-metrics: |
     9      apiserver_current_inflight_requests
    10      apiserver_current_inqueue_requests
    11      apiserver_flowcontrol_current_executing_requests
    12      apiserver_flowcontrol_current_inqueue_requests
    13      apiserver_flowcontrol_dispatched_requests_total
    14      apiserver_flowcontrol_request_execution_seconds_bucket
    15      apiserver_flowcontrol_request_execution_seconds_count
    16      apiserver_flowcontrol_request_execution_seconds_sum
    17      apiserver_flowcontrol_request_wait_duration_seconds_bucket
    18      apiserver_flowcontrol_request_wait_duration_seconds_count
    19      apiserver_flowcontrol_request_wait_duration_seconds_sum
    20      apiserver_request_duration_seconds_bucket
    21      apiserver_request_duration_seconds_count
    22      apiserver_request_duration_seconds_sum
    23      apiserver_request_terminations_total
    24      apiserver_request_total
    25spec:
    26  selector:
    27    matchLabels:
    28      component: apiserver
    29      provider: kubernetes
    30  endpoints:
    31  - port: https
    32    bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
    33    interval: 5m
    34    metricRelabelings:
    35    - action: drop
    36      regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
    37      sourceLabels:
    38      - __name__
    39    - action: drop
    40      regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
    41      sourceLabels:
    42      - __name__
    43    - action: drop
    44      regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs|request_sli_.*|request_slo_.*)
    45      sourceLabels:
    46      - __name__
    47    - action: drop
    48      regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
    49      sourceLabels:
    50      - __name__
    51    - action: drop
    52      regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
    53      sourceLabels:
    54      - __name__
    55    - action: drop
    56      regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary|request_duration_seconds.*)
    57      sourceLabels:
    58      - __name__
    59    - action: drop
    60      regex: transformation_(transformation_latencies_microseconds|failures_total)
    61      sourceLabels:
    62      - __name__
    63    - action: drop
    64      regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
    65      sourceLabels:
    66      - __name__
    67    - action: drop
    68      regex: etcd_(debugging|disk|server).*
    69      sourceLabels:
    70      - __name__
    71    - action: drop
    72      regex: apiserver_admission_controller_admission_latencies_seconds_.*
    73      sourceLabels:
    74      - __name__
    75    - action: drop
    76      regex: apiserver_admission_step_admission_latencies_seconds_.*
    77      sourceLabels:
    78      - __name__
    79    - action: drop
    80      regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
    81      sourceLabels:
    82      - __name__
    83      - le
    84    scheme: https
    85    tlsConfig:
    86      caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
    87      serverName: kubernetes
    88  jobLabel: component
    89  namespaceSelector:
    90    matchNames:
    91    - default
    92---
    93apiVersion: monitoring.coreos.com/v1
    94kind: ServiceMonitor
    95metadata:
    96  name: kubelet
    97  labels:
    98    app.kubernetes.io/name: kubelet
    99  annotations:
   100    monitoring.edge.ncr.com/allowed-metrics: |
   101      container_cpu_cfs_throttled_seconds_total
   102      container_cpu_usage_seconds_total
   103      container_fs_reads_bytes_total
   104      container_fs_writes_bytes_total
   105      container_memory_usage_bytes
   106      container_memory_working_set_bytes
   107      container_network_receive_bytes_total
   108      container_network_receive_errors_total
   109      container_network_receive_packets_dropped_total
   110      container_network_transmit_bytes_total
   111      container_network_transmit_errors_total
   112      container_network_transmit_packets_dropped_total
   113      container_processes
   114      kubernetes_build_info
   115      machine_cpu_cores
   116      machine_memory_bytes
   117      kubelet_pleg_relist_duration_seconds_bucket
   118      kubelet_pleg_relist_duration_seconds_count
   119      kubelet_pleg_relist_duration_seconds_sum
   120      kubelet_runtime_operations_duration_seconds_bucket
   121      kubelet_runtime_operations_duration_seconds_count
   122      kubelet_runtime_operations_duration_seconds_sum
   123      kubelet_volume_stats_used_bytes
   124      kubelet_volume_stats_capacity_bytes
   125      kubelet_device_plugin_alloc_duration_seconds
   126      kubelet_device_plugin_registration_total
   127spec:
   128  selector:
   129    matchLabels:
   130      app.kubernetes.io/name: kubelet
   131  endpoints:
   132  - port: https-metrics
   133    bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
   134    honorLabels: true
   135    interval: 60s
   136    metricRelabelings:
   137    - action: drop
   138      regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
   139      sourceLabels:
   140      - __name__
   141    - action: drop
   142      regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
   143      sourceLabels:
   144      - __name__
   145    - action: drop
   146      regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
   147      sourceLabels:
   148      - __name__
   149    - action: drop
   150      regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
   151      sourceLabels:
   152      - __name__
   153    - action: drop
   154      regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
   155      sourceLabels:
   156      - __name__
   157    - action: drop
   158      regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
   159      sourceLabels:
   160      - __name__
   161    - action: drop
   162      regex: transformation_(transformation_latencies_microseconds|failures_total)
   163      sourceLabels:
   164      - __name__
   165    - action: drop
   166      regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
   167      sourceLabels:
   168      - __name__
   169    relabelings:
   170    - sourceLabels:
   171      - __metrics_path__
   172      targetLabel: metrics_path
   173    scheme: https
   174    tlsConfig:
   175      insecureSkipVerify: true
   176  - port: https-metrics
   177    bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
   178    honorLabels: false
   179    honorTimestamps: false
   180    interval: 60s
   181    metricRelabelings:
   182    - action: drop
   183      regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
   184      sourceLabels:
   185      - __name__
   186    - action: drop
   187      regex: (container_spec_.*|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);;
   188      sourceLabels:
   189      - __name__
   190      - pod
   191      - namespace
   192    - action: drop
   193      regex: (container_blkio_device_usage_total);.+
   194      sourceLabels:
   195      - __name__
   196      - container
   197    - action: replace
   198      regex: (.+)
   199      replacement: $1
   200      sourceLabels:
   201      - namespace
   202      targetLabel: exported_namespace
   203    path: /metrics/cadvisor
   204    relabelings:
   205    - sourceLabels:
   206      - __metrics_path__
   207      targetLabel: metrics_path
   208    - action: labeldrop
   209      regex: (__meta_kubernetes_namespace|namespace)
   210    scheme: https
   211    tlsConfig:
   212      insecureSkipVerify: true
   213  - port: https-metrics
   214    bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
   215    honorLabels: true
   216    interval: 60s
   217    path: /metrics/probes
   218    relabelings:
   219    - sourceLabels:
   220      - __metrics_path__
   221      targetLabel: metrics_path
   222    scheme: https
   223    tlsConfig:
   224      insecureSkipVerify: true
   225  jobLabel: app.kubernetes.io/name
   226  namespaceSelector:
   227    matchNames:
   228    - kube-system
   229---
   230apiVersion: monitoring.coreos.com/v1
   231kind: ServiceMonitor
   232metadata:
   233  name: prometheus
   234  namespace: prometheus
   235  annotations:
   236    monitoring.edge.ncr.com/allowed-metrics: |
   237      prometheus_http_requests_total
   238      promhttp_metric_handler_requests_total
   239      prometheus_http_request_duration_seconds
   240      prometheus_http_request_duration_seconds_sum
   241      prometheus_http_request_duration_seconds_count
   242      prometheus_http_request_duration_seconds_bucket
   243      prometheus_http_response_size_bytes
   244      prometheus_http_response_size_bytes_bucket
   245      prometheus_http_response_size_bytes_count
   246      prometheus_http_response_size_bytes_sum
   247      prometheus_rule_evaluation_failures_total
   248      prometheus_rule_evaluations_total
   249      scrape_duration_seconds
   250      scrape_samples_post_metric_relabeling
   251      scrape_samples_scraped
   252      up
   253spec:
   254  selector:
   255    matchLabels:
   256      operated-prometheus: 'true'
   257  endpoints:
   258  - port: web
   259    metricRelabelings:
   260    - action: drop
   261      sourceLabels:
   262      - __name__
   263      regex: prometheus_target.*|prometheus_sd.*|net_conntrack_.*
   264    - action: keep
   265      sourceLabels:
   266      - __name__
   267      regex: prometheus_.*|scrape_.*|up|promhttp_.*

View as plain text