...

Source file src/k8s.io/kubernetes/pkg/controller/job/metrics/metrics.go

Documentation: k8s.io/kubernetes/pkg/controller/job/metrics

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package metrics
    18  
    19  import (
    20  	"sync"
    21  
    22  	"k8s.io/component-base/metrics"
    23  	"k8s.io/component-base/metrics/legacyregistry"
    24  )
    25  
    26  // JobControllerSubsystem - subsystem name used for this controller.
    27  const JobControllerSubsystem = "job_controller"
    28  
    29  var (
    30  	// JobSyncDurationSeconds tracks the latency of Job syncs. Possible label
    31  	// values:
    32  	//   completion_mode: Indexed, NonIndexed
    33  	//   result:          success, error
    34  	//   action:          reconciling, tracking, pods_created, pods_deleted
    35  	JobSyncDurationSeconds = metrics.NewHistogramVec(
    36  		&metrics.HistogramOpts{
    37  			Subsystem:      JobControllerSubsystem,
    38  			Name:           "job_sync_duration_seconds",
    39  			Help:           "The time it took to sync a job",
    40  			StabilityLevel: metrics.STABLE,
    41  			Buckets:        metrics.ExponentialBuckets(0.004, 2, 15),
    42  		},
    43  		[]string{"completion_mode", "result", "action"},
    44  	)
    45  	// JobSyncNum tracks the number of Job syncs. Possible label values:
    46  	//   completion_mode: Indexed, NonIndexed
    47  	//   result:          success, error
    48  	//   action:          reconciling, tracking, pods_created, pods_deleted
    49  	JobSyncNum = metrics.NewCounterVec(
    50  		&metrics.CounterOpts{
    51  			Subsystem:      JobControllerSubsystem,
    52  			Name:           "job_syncs_total",
    53  			Help:           "The number of job syncs",
    54  			StabilityLevel: metrics.STABLE,
    55  		},
    56  		[]string{"completion_mode", "result", "action"},
    57  	)
    58  	// JobFinishedNum tracks the number of Jobs that finish. Empty reason label
    59  	// is used to count successful jobs.
    60  	// Possible label values:
    61  	//   completion_mode: Indexed, NonIndexed
    62  	//   result:          failed, succeeded
    63  	//   reason:          "BackoffLimitExceeded", "DeadlineExceeded", "PodFailurePolicy", ""
    64  	JobFinishedNum = metrics.NewCounterVec(
    65  		&metrics.CounterOpts{
    66  			Subsystem:      JobControllerSubsystem,
    67  			Name:           "jobs_finished_total",
    68  			Help:           "The number of finished jobs",
    69  			StabilityLevel: metrics.STABLE,
    70  		},
    71  		[]string{"completion_mode", "result", "reason"},
    72  	)
    73  
    74  	// JobByExternalControllerTotal tracks the number of Jobs that were created
    75  	// as managed by an external controller.
    76  	// The value of the label controller_name corresponds to the value of the
    77  	// managedBy field.
    78  	JobByExternalControllerTotal = metrics.NewCounterVec(
    79  		&metrics.CounterOpts{
    80  			Subsystem:      JobControllerSubsystem,
    81  			Name:           "jobs_by_external_controller_total",
    82  			Help:           "The number of Jobs managed by an external controller",
    83  			StabilityLevel: metrics.ALPHA,
    84  		},
    85  		[]string{"controller_name"},
    86  	)
    87  
    88  	// JobPodsFinished records the number of finished Pods that the job controller
    89  	// finished tracking.
    90  	// It only applies to Jobs that were created while the feature gate
    91  	// JobTrackingWithFinalizers was enabled.
    92  	// Possible label values:
    93  	//   completion_mode: Indexed, NonIndexed
    94  	//   result:          failed, succeeded
    95  	JobPodsFinished = metrics.NewCounterVec(
    96  		&metrics.CounterOpts{
    97  			Subsystem:      JobControllerSubsystem,
    98  			Name:           "job_pods_finished_total",
    99  			Help:           "The number of finished Pods that are fully tracked",
   100  			StabilityLevel: metrics.STABLE,
   101  		},
   102  		[]string{"completion_mode", "result"})
   103  
   104  	// PodFailuresHandledByFailurePolicy records the number of finished Pods
   105  	// handled by pod failure policy.
   106  	// Possible label values:
   107  	//   action: FailJob, Ignore, Count
   108  	PodFailuresHandledByFailurePolicy = metrics.NewCounterVec(
   109  		&metrics.CounterOpts{
   110  			Subsystem: JobControllerSubsystem,
   111  			Name:      "pod_failures_handled_by_failure_policy_total",
   112  			Help: `The number of failed Pods handled by failure policy with
   113  			respect to the failure policy action applied based on the matched
   114  			rule. Possible values of the action label correspond to the
   115  			possible values for the failure policy rule action, which are:
   116  			"FailJob", "Ignore" and "Count".`,
   117  		},
   118  		[]string{"action"})
   119  
   120  	// TerminatedPodsTrackingFinalizerTotal records the addition and removal of
   121  	// terminated pods that have the finalizer batch.kubernetes.io/job-tracking,
   122  	// regardless of whether they are owned by a Job.
   123  	TerminatedPodsTrackingFinalizerTotal = metrics.NewCounterVec(
   124  		&metrics.CounterOpts{
   125  			Subsystem: JobControllerSubsystem,
   126  			Name:      "terminated_pods_tracking_finalizer_total",
   127  			Help: `The number of terminated pods (phase=Failed|Succeeded)
   128  that have the finalizer batch.kubernetes.io/job-tracking
   129  The event label can be "add" or "delete".`,
   130  		}, []string{"event"})
   131  
   132  	// JobFinishedIndexesTotal records the number of finished indexes.
   133  	JobFinishedIndexesTotal = metrics.NewCounterVec(
   134  		&metrics.CounterOpts{
   135  			Subsystem: JobControllerSubsystem,
   136  			Name:      "job_finished_indexes_total",
   137  			Help: `The number of finished indexes. Possible values for the
   138  			status label are: "succeeded", "failed". Possible values for the
   139  			backoffLimit label are: "perIndex" and "global"`,
   140  		},
   141  		[]string{"status", "backoffLimit"})
   142  
   143  	// JobPodsCreationTotal records the number of pods created by the job controller
   144  	// based on the reason for their creation (i.e. if PodReplacementPolicy was specified)
   145  	// and the status of the creation (i.e. if the Pod creation succeeded or failed).
   146  	// Possible label values:
   147  	//   reason: new, recreate_terminating_or_failed, recreate_failed
   148  	//   status: succeeded, failed
   149  	JobPodsCreationTotal = metrics.NewCounterVec(
   150  		&metrics.CounterOpts{
   151  			Subsystem: JobControllerSubsystem,
   152  			Name:      "job_pods_creation_total",
   153  			Help: `The number of Pods created by the Job controller labelled with a reason for the Pod creation.
   154  This metric also distinguishes between Pods created using different PodReplacementPolicy settings.
   155  Possible values of the "reason" label are:
   156  "new", "recreate_terminating_or_failed", "recreate_failed".
   157  Possible values of the "status" label are:
   158  "succeeded", "failed".`,
   159  		}, []string{"reason", "status"})
   160  )
   161  
   162  const (
   163  	// Possible values for the "action" label in the above metrics.
   164  
   165  	// JobSyncActionReconciling when the Job's pod creation/deletion expectations
   166  	// are unsatisfied and the controller is waiting for issued Pod
   167  	// creation/deletions to complete.
   168  	JobSyncActionReconciling = "reconciling"
   169  	// JobSyncActionTracking when the Job's pod creation/deletion expectations
   170  	// are satisfied and the number of active Pods matches expectations (i.e. no
   171  	// pod creation/deletions issued in this sync). This is expected to be the
   172  	// action in most of the syncs.
   173  	JobSyncActionTracking = "tracking"
   174  	// JobSyncActionPodsCreated when the controller creates Pods. This can happen
   175  	// when the number of active Pods is less than the wanted Job parallelism.
   176  	JobSyncActionPodsCreated = "pods_created"
   177  	// JobSyncActionPodsDeleted when the controller deletes Pods. This can happen
   178  	// if a Job is suspended or if the number of active Pods is more than
   179  	// parallelism.
   180  	JobSyncActionPodsDeleted = "pods_deleted"
   181  
   182  	// Possible values for "result" and "status" (job_pods_creation_total) labels in the above metrics.
   183  
   184  	Succeeded = "succeeded"
   185  	Failed    = "failed"
   186  
   187  	// Possible values for "event"  label in the terminated_pods_tracking_finalizer
   188  	// metric.
   189  	Add    = "add"
   190  	Delete = "delete"
   191  
   192  	// Possible values for "reason" label in the job_pods_creation_total metric.
   193  
   194  	PodCreateNew                   = "new"
   195  	PodRecreateTerminatingOrFailed = "recreate_terminating_or_failed"
   196  	PodRecreateFailed              = "recreate_failed"
   197  )
   198  
   199  var registerMetrics sync.Once
   200  
   201  // Register registers Job controller metrics.
   202  func Register() {
   203  	registerMetrics.Do(func() {
   204  		legacyregistry.MustRegister(JobSyncDurationSeconds)
   205  		legacyregistry.MustRegister(JobSyncNum)
   206  		legacyregistry.MustRegister(JobFinishedNum)
   207  		legacyregistry.MustRegister(JobPodsFinished)
   208  		legacyregistry.MustRegister(PodFailuresHandledByFailurePolicy)
   209  		legacyregistry.MustRegister(TerminatedPodsTrackingFinalizerTotal)
   210  		legacyregistry.MustRegister(JobFinishedIndexesTotal)
   211  		legacyregistry.MustRegister(JobPodsCreationTotal)
   212  		legacyregistry.MustRegister(JobByExternalControllerTotal)
   213  	})
   214  }
   215  

View as plain text