stable-metrics-list.yaml

Documentation: k8s.io/kubernetes/test/instrumentation/testdata

     1- name: current_executing_requests
     2  subsystem: flowcontrol
     3  namespace: apiserver
     4  help: Number of requests in initial (for a WATCH) or any (for a non-WATCH) execution
     5    stage in the API Priority and Fairness subsystem
     6  type: Gauge
     7  stabilityLevel: BETA
     8  labels:
     9  - flow_schema
    10  - priority_level
    11- name: current_executing_seats
    12  subsystem: flowcontrol
    13  namespace: apiserver
    14  help: Concurrency (number of seats) occupied by the currently executing (initial
    15    stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness
    16    subsystem
    17  type: Gauge
    18  stabilityLevel: BETA
    19  labels:
    20  - flow_schema
    21  - priority_level
    22- name: current_inqueue_requests
    23  subsystem: flowcontrol
    24  namespace: apiserver
    25  help: Number of requests currently pending in queues of the API Priority and Fairness
    26    subsystem
    27  type: Gauge
    28  stabilityLevel: BETA
    29  labels:
    30  - flow_schema
    31  - priority_level
    32- name: dispatched_requests_total
    33  subsystem: flowcontrol
    34  namespace: apiserver
    35  help: Number of requests executed by API Priority and Fairness subsystem
    36  type: Counter
    37  stabilityLevel: BETA
    38  labels:
    39  - flow_schema
    40  - priority_level
    41- name: nominal_limit_seats
    42  subsystem: flowcontrol
    43  namespace: apiserver
    44  help: Nominal number of execution seats configured for each priority level
    45  type: Gauge
    46  stabilityLevel: BETA
    47  labels:
    48  - priority_level
    49- name: rejected_requests_total
    50  subsystem: flowcontrol
    51  namespace: apiserver
    52  help: Number of requests rejected by API Priority and Fairness subsystem
    53  type: Counter
    54  stabilityLevel: BETA
    55  labels:
    56  - flow_schema
    57  - priority_level
    58  - reason
    59- name: request_wait_duration_seconds
    60  subsystem: flowcontrol
    61  namespace: apiserver
    62  help: Length of time a request spent waiting in its queue
    63  type: Histogram
    64  stabilityLevel: BETA
    65  labels:
    66  - execute
    67  - flow_schema
    68  - priority_level
    69  buckets:
    70  - 0
    71  - 0.005
    72  - 0.02
    73  - 0.05
    74  - 0.1
    75  - 0.2
    76  - 0.5
    77  - 1
    78  - 2
    79  - 5
    80  - 10
    81  - 15
    82  - 30
    83- name: disabled_metrics_total
    84  help: The count of disabled metrics.
    85  type: Counter
    86  stabilityLevel: BETA
    87- name: hidden_metrics_total
    88  help: The count of hidden metrics.
    89  type: Counter
    90  stabilityLevel: BETA
    91- name: feature_enabled
    92  namespace: kubernetes
    93  help: This metric records the data about the stage and enablement of a k8s feature.
    94  type: Gauge
    95  stabilityLevel: BETA
    96  labels:
    97  - name
    98  - stage
    99- name: registered_metrics_total
   100  help: The count of registered metrics broken by stability level and deprecation
   101    version.
   102  type: Counter
   103  stabilityLevel: BETA
   104  labels:
   105  - deprecated_version
   106  - stability_level
   107- name: pod_scheduling_sli_duration_seconds
   108  subsystem: scheduler
   109  help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling
   110    queue an d might involve multiple scheduling attempts.
   111  type: Histogram
   112  stabilityLevel: BETA
   113  labels:
   114  - attempts
   115  buckets:
   116  - 0.01
   117  - 0.02
   118  - 0.04
   119  - 0.08
   120  - 0.16
   121  - 0.32
   122  - 0.64
   123  - 1.28
   124  - 2.56
   125  - 5.12
   126  - 10.24
   127  - 20.48
   128  - 40.96
   129  - 81.92
   130  - 163.84
   131  - 327.68
   132  - 655.36
   133  - 1310.72
   134  - 2621.44
   135  - 5242.88
   136- name: controller_admission_duration_seconds
   137  subsystem: admission
   138  namespace: apiserver
   139  help: Admission controller latency histogram in seconds, identified by name and
   140    broken out for each operation and API resource and type (validate or admit).
   141  type: Histogram
   142  stabilityLevel: STABLE
   143  labels:
   144  - name
   145  - operation
   146  - rejected
   147  - type
   148  buckets:
   149  - 0.005
   150  - 0.025
   151  - 0.1
   152  - 0.5
   153  - 1
   154  - 2.5
   155- name: step_admission_duration_seconds
   156  subsystem: admission
   157  namespace: apiserver
   158  help: Admission sub-step latency histogram in seconds, broken out for each operation
   159    and API resource and step type (validate or admit).
   160  type: Histogram
   161  stabilityLevel: STABLE
   162  labels:
   163  - operation
   164  - rejected
   165  - type
   166  buckets:
   167  - 0.005
   168  - 0.025
   169  - 0.1
   170  - 0.5
   171  - 1
   172  - 2.5
   173- name: webhook_admission_duration_seconds
   174  subsystem: admission
   175  namespace: apiserver
   176  help: Admission webhook latency histogram in seconds, identified by name and broken
   177    out for each operation and API resource and type (validate or admit).
   178  type: Histogram
   179  stabilityLevel: STABLE
   180  labels:
   181  - name
   182  - operation
   183  - rejected
   184  - type
   185  buckets:
   186  - 0.005
   187  - 0.025
   188  - 0.1
   189  - 0.5
   190  - 1
   191  - 2.5
   192  - 10
   193  - 25
   194- name: current_inflight_requests
   195  subsystem: apiserver
   196  help: Maximal number of currently used inflight request limit of this apiserver
   197    per request kind in last second.
   198  type: Gauge
   199  stabilityLevel: STABLE
   200  labels:
   201  - request_kind
   202- name: longrunning_requests
   203  subsystem: apiserver
   204  help: Gauge of all active long-running apiserver requests broken out by verb, group,
   205    version, resource, scope and component. Not all requests are tracked this way.
   206  type: Gauge
   207  stabilityLevel: STABLE
   208  labels:
   209  - component
   210  - group
   211  - resource
   212  - scope
   213  - subresource
   214  - verb
   215  - version
   216- name: request_duration_seconds
   217  subsystem: apiserver
   218  help: Response latency distribution in seconds for each verb, dry run value, group,
   219    version, resource, subresource, scope and component.
   220  type: Histogram
   221  stabilityLevel: STABLE
   222  labels:
   223  - component
   224  - dry_run
   225  - group
   226  - resource
   227  - scope
   228  - subresource
   229  - verb
   230  - version
   231  buckets:
   232  - 0.005
   233  - 0.025
   234  - 0.05
   235  - 0.1
   236  - 0.2
   237  - 0.4
   238  - 0.6
   239  - 0.8
   240  - 1
   241  - 1.25
   242  - 1.5
   243  - 2
   244  - 3
   245  - 4
   246  - 5
   247  - 6
   248  - 8
   249  - 10
   250  - 15
   251  - 20
   252  - 30
   253  - 45
   254  - 60
   255- name: request_total
   256  subsystem: apiserver
   257  help: Counter of apiserver requests broken out for each verb, dry run value, group,
   258    version, resource, scope, component, and HTTP response code.
   259  type: Counter
   260  stabilityLevel: STABLE
   261  labels:
   262  - code
   263  - component
   264  - dry_run
   265  - group
   266  - resource
   267  - scope
   268  - subresource
   269  - verb
   270  - version
   271- name: requested_deprecated_apis
   272  subsystem: apiserver
   273  help: Gauge of deprecated APIs that have been requested, broken out by API group,
   274    version, resource, subresource, and removed_release.
   275  type: Gauge
   276  stabilityLevel: STABLE
   277  labels:
   278  - group
   279  - removed_release
   280  - resource
   281  - subresource
   282  - version
   283- name: response_sizes
   284  subsystem: apiserver
   285  help: Response size distribution in bytes for each group, version, verb, resource,
   286    subresource, scope and component.
   287  type: Histogram
   288  stabilityLevel: STABLE
   289  labels:
   290  - component
   291  - group
   292  - resource
   293  - scope
   294  - subresource
   295  - verb
   296  - version
   297  buckets:
   298  - 1000
   299  - 10000
   300  - 100000
   301  - 1e+06
   302  - 1e+07
   303  - 1e+08
   304  - 1e+09
   305- name: apiserver_storage_objects
   306  help: Number of stored objects at the time of last check split by kind. In case
   307    of a fetching error, the value will be -1.
   308  type: Gauge
   309  stabilityLevel: STABLE
   310  labels:
   311  - resource
   312- name: apiserver_storage_size_bytes
   313  help: Size of the storage database file physically allocated in bytes.
   314  type: Custom
   315  stabilityLevel: STABLE
   316  labels:
   317  - storage_cluster_id
   318- name: container_cpu_usage_seconds_total
   319  help: Cumulative cpu time consumed by the container in core-seconds
   320  type: Custom
   321  stabilityLevel: STABLE
   322  labels:
   323  - container
   324  - pod
   325  - namespace
   326- name: container_memory_working_set_bytes
   327  help: Current working set of the container in bytes
   328  type: Custom
   329  stabilityLevel: STABLE
   330  labels:
   331  - container
   332  - pod
   333  - namespace
   334- name: container_start_time_seconds
   335  help: Start time of the container since unix epoch in seconds
   336  type: Custom
   337  stabilityLevel: STABLE
   338  labels:
   339  - container
   340  - pod
   341  - namespace
   342- name: job_creation_skew_duration_seconds
   343  subsystem: cronjob_controller
   344  help: Time between when a cronjob is scheduled to be run, and when the corresponding
   345    job is created
   346  type: Histogram
   347  stabilityLevel: STABLE
   348  buckets:
   349  - 1
   350  - 2
   351  - 4
   352  - 8
   353  - 16
   354  - 32
   355  - 64
   356  - 128
   357  - 256
   358  - 512
   359- name: job_pods_finished_total
   360  subsystem: job_controller
   361  help: The number of finished Pods that are fully tracked
   362  type: Counter
   363  stabilityLevel: STABLE
   364  labels:
   365  - completion_mode
   366  - result
   367- name: job_sync_duration_seconds
   368  subsystem: job_controller
   369  help: The time it took to sync a job
   370  type: Histogram
   371  stabilityLevel: STABLE
   372  labels:
   373  - action
   374  - completion_mode
   375  - result
   376  buckets:
   377  - 0.004
   378  - 0.008
   379  - 0.016
   380  - 0.032
   381  - 0.064
   382  - 0.128
   383  - 0.256
   384  - 0.512
   385  - 1.024
   386  - 2.048
   387  - 4.096
   388  - 8.192
   389  - 16.384
   390  - 32.768
   391  - 65.536
   392- name: job_syncs_total
   393  subsystem: job_controller
   394  help: The number of job syncs
   395  type: Counter
   396  stabilityLevel: STABLE
   397  labels:
   398  - action
   399  - completion_mode
   400  - result
   401- name: jobs_finished_total
   402  subsystem: job_controller
   403  help: The number of finished jobs
   404  type: Counter
   405  stabilityLevel: STABLE
   406  labels:
   407  - completion_mode
   408  - reason
   409  - result
   410- name: kube_pod_resource_limit
   411  help: Resources limit for workloads on the cluster, broken down by pod. This shows
   412    the resource usage the scheduler and kubelet expect per pod for resources along
   413    with the unit for the resource if any.
   414  type: Custom
   415  stabilityLevel: STABLE
   416  labels:
   417  - namespace
   418  - pod
   419  - node
   420  - scheduler
   421  - priority
   422  - resource
   423  - unit
   424- name: kube_pod_resource_request
   425  help: Resources requested by workloads on the cluster, broken down by pod. This
   426    shows the resource usage the scheduler and kubelet expect per pod for resources
   427    along with the unit for the resource if any.
   428  type: Custom
   429  stabilityLevel: STABLE
   430  labels:
   431  - namespace
   432  - pod
   433  - node
   434  - scheduler
   435  - priority
   436  - resource
   437  - unit
   438- name: healthcheck
   439  namespace: kubernetes
   440  help: This metric records the result of a single healthcheck.
   441  type: Gauge
   442  stabilityLevel: STABLE
   443  labels:
   444  - name
   445  - type
   446- name: healthchecks_total
   447  namespace: kubernetes
   448  help: This metric records the results of all healthcheck.
   449  type: Counter
   450  stabilityLevel: STABLE
   451  labels:
   452  - name
   453  - status
   454  - type
   455- name: evictions_total
   456  subsystem: node_collector
   457  help: Number of Node evictions that happened since current instance of NodeController
   458    started.
   459  type: Counter
   460  stabilityLevel: STABLE
   461  labels:
   462  - zone
   463- name: node_cpu_usage_seconds_total
   464  help: Cumulative cpu time consumed by the node in core-seconds
   465  type: Custom
   466  stabilityLevel: STABLE
   467- name: node_memory_working_set_bytes
   468  help: Current working set of the node in bytes
   469  type: Custom
   470  stabilityLevel: STABLE
   471- name: pod_cpu_usage_seconds_total
   472  help: Cumulative cpu time consumed by the pod in core-seconds
   473  type: Custom
   474  stabilityLevel: STABLE
   475  labels:
   476  - pod
   477  - namespace
   478- name: pod_memory_working_set_bytes
   479  help: Current working set of the pod in bytes
   480  type: Custom
   481  stabilityLevel: STABLE
   482  labels:
   483  - pod
   484  - namespace
   485- name: resource_scrape_error
   486  help: 1 if there was an error while getting container metrics, 0 otherwise
   487  type: Custom
   488  stabilityLevel: STABLE
   489- name: framework_extension_point_duration_seconds
   490  subsystem: scheduler
   491  help: Latency for running all plugins of a specific extension point.
   492  type: Histogram
   493  stabilityLevel: STABLE
   494  labels:
   495  - extension_point
   496  - profile
   497  - status
   498  buckets:
   499  - 0.0001
   500  - 0.0002
   501  - 0.0004
   502  - 0.0008
   503  - 0.0016
   504  - 0.0032
   505  - 0.0064
   506  - 0.0128
   507  - 0.0256
   508  - 0.0512
   509  - 0.1024
   510  - 0.2048
   511- name: pending_pods
   512  subsystem: scheduler
   513  help: Number of pending pods, by the queue type. 'active' means number of pods in
   514    activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number
   515    of pods in unschedulablePods that the scheduler attempted to schedule and failed;
   516    'gated' is the number of unschedulable pods that the scheduler never attempted
   517    to schedule because they are gated.
   518  type: Gauge
   519  stabilityLevel: STABLE
   520  labels:
   521  - queue
   522- name: pod_scheduling_attempts
   523  subsystem: scheduler
   524  help: Number of attempts to successfully schedule a pod.
   525  type: Histogram
   526  stabilityLevel: STABLE
   527  buckets:
   528  - 1
   529  - 2
   530  - 4
   531  - 8
   532  - 16
   533- name: pod_scheduling_duration_seconds
   534  subsystem: scheduler
   535  help: E2e latency for a pod being scheduled which may include multiple scheduling
   536    attempts.
   537  type: Histogram
   538  deprecatedVersion: 1.29.0
   539  stabilityLevel: STABLE
   540  labels:
   541  - attempts
   542  buckets:
   543  - 0.01
   544  - 0.02
   545  - 0.04
   546  - 0.08
   547  - 0.16
   548  - 0.32
   549  - 0.64
   550  - 1.28
   551  - 2.56
   552  - 5.12
   553  - 10.24
   554  - 20.48
   555  - 40.96
   556  - 81.92
   557  - 163.84
   558  - 327.68
   559  - 655.36
   560  - 1310.72
   561  - 2621.44
   562  - 5242.88
   563- name: preemption_attempts_total
   564  subsystem: scheduler
   565  help: Total preemption attempts in the cluster till now
   566  type: Counter
   567  stabilityLevel: STABLE
   568- name: preemption_victims
   569  subsystem: scheduler
   570  help: Number of selected preemption victims
   571  type: Histogram
   572  stabilityLevel: STABLE
   573  buckets:
   574  - 1
   575  - 2
   576  - 4
   577  - 8
   578  - 16
   579  - 32
   580  - 64
   581- name: queue_incoming_pods_total
   582  subsystem: scheduler
   583  help: Number of pods added to scheduling queues by event and queue type.
   584  type: Counter
   585  stabilityLevel: STABLE
   586  labels:
   587  - event
   588  - queue
   589- name: schedule_attempts_total
   590  subsystem: scheduler
   591  help: Number of attempts to schedule pods, by the result. 'unschedulable' means
   592    a pod could not be scheduled, while 'error' means an internal scheduler problem.
   593  type: Counter
   594  stabilityLevel: STABLE
   595  labels:
   596  - profile
   597  - result
   598- name: scheduling_attempt_duration_seconds
   599  subsystem: scheduler
   600  help: Scheduling attempt latency in seconds (scheduling algorithm + binding)
   601  type: Histogram
   602  stabilityLevel: STABLE
   603  labels:
   604  - profile
   605  - result
   606  buckets:
   607  - 0.001
   608  - 0.002
   609  - 0.004
   610  - 0.008
   611  - 0.016
   612  - 0.032
   613  - 0.064
   614  - 0.128
   615  - 0.256
   616  - 0.512
   617  - 1.024
   618  - 2.048
   619  - 4.096
   620  - 8.192
   621  - 16.384
View as plain text