...

Source file src/k8s.io/kubernetes/pkg/volume/util/metrics.go

Documentation: k8s.io/kubernetes/pkg/volume/util

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package util
    18  
    19  import (
    20  	"fmt"
    21  	"strconv"
    22  	"time"
    23  
    24  	"google.golang.org/grpc/codes"
    25  	"google.golang.org/grpc/status"
    26  	"k8s.io/component-base/metrics"
    27  	"k8s.io/component-base/metrics/legacyregistry"
    28  	"k8s.io/kubernetes/pkg/volume"
    29  	"k8s.io/kubernetes/pkg/volume/util/types"
    30  )
    31  
    32  const (
    33  	statusSuccess     = "success"
    34  	statusFailUnknown = "fail-unknown"
    35  )
    36  
    37  /*
    38   * By default, all the following metrics are defined as falling under
    39   * ALPHA stability level https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1209-metrics-stability/kubernetes-control-plane-metrics-stability.md#stability-classes)
    40   *
    41   * Promoting the stability level of the metric is a responsibility of the component owner, since it
    42   * involves explicitly acknowledging support for the metric across multiple releases, in accordance with
    43   * the metric stability policy.
    44   */
    45  
    46  var StorageOperationMetric = metrics.NewHistogramVec(
    47  	&metrics.HistogramOpts{
    48  		Name:           "storage_operation_duration_seconds",
    49  		Help:           "Storage operation duration",
    50  		Buckets:        []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50, 120, 300, 600},
    51  		StabilityLevel: metrics.ALPHA,
    52  	},
    53  	[]string{"volume_plugin", "operation_name", "status", "migrated"},
    54  )
    55  
    56  var storageOperationEndToEndLatencyMetric = metrics.NewHistogramVec(
    57  	&metrics.HistogramOpts{
    58  		Name:           "volume_operation_total_seconds",
    59  		Help:           "Storage operation end to end duration in seconds",
    60  		Buckets:        []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50, 120, 300, 600},
    61  		StabilityLevel: metrics.ALPHA,
    62  	},
    63  	[]string{"plugin_name", "operation_name"},
    64  )
    65  
    66  var csiOperationsLatencyMetric = metrics.NewHistogramVec(
    67  	&metrics.HistogramOpts{
    68  		Subsystem:      "csi",
    69  		Name:           "operations_seconds",
    70  		Help:           "Container Storage Interface operation duration with gRPC error code status total",
    71  		Buckets:        []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 25, 50, 120, 300, 600},
    72  		StabilityLevel: metrics.ALPHA,
    73  	},
    74  	[]string{"driver_name", "method_name", "grpc_status_code", "migrated"},
    75  )
    76  
    77  func init() {
    78  	registerMetrics()
    79  }
    80  
    81  func registerMetrics() {
    82  	// legacyregistry is the internal k8s wrapper around the prometheus
    83  	// global registry, used specifically for metric stability enforcement
    84  	legacyregistry.MustRegister(StorageOperationMetric)
    85  	legacyregistry.MustRegister(storageOperationEndToEndLatencyMetric)
    86  	legacyregistry.MustRegister(csiOperationsLatencyMetric)
    87  }
    88  
    89  // OperationCompleteHook returns a hook to call when an operation is completed
    90  func OperationCompleteHook(plugin, operationName string) func(types.CompleteFuncParam) {
    91  	requestTime := time.Now()
    92  	opComplete := func(c types.CompleteFuncParam) {
    93  		timeTaken := time.Since(requestTime).Seconds()
    94  		// Create metric with operation name and plugin name
    95  		status := statusSuccess
    96  		if *c.Err != nil {
    97  			// TODO: Establish well-known error codes to be able to distinguish
    98  			// user configuration errors from system errors.
    99  			status = statusFailUnknown
   100  		}
   101  		migrated := false
   102  		if c.Migrated != nil {
   103  			migrated = *c.Migrated
   104  		}
   105  		StorageOperationMetric.WithLabelValues(plugin, operationName, status, strconv.FormatBool(migrated)).Observe(timeTaken)
   106  	}
   107  	return opComplete
   108  }
   109  
   110  // FSGroupCompleteHook returns a hook to call when volume recursive permission is changed
   111  func FSGroupCompleteHook(plugin volume.VolumePlugin, spec *volume.Spec) func(types.CompleteFuncParam) {
   112  	return OperationCompleteHook(GetFullQualifiedPluginNameForVolume(plugin.GetPluginName(), spec), "volume_apply_access_control")
   113  }
   114  
   115  // GetFullQualifiedPluginNameForVolume returns full qualified plugin name for
   116  // given volume. For CSI plugin, it appends plugin driver name at the end of
   117  // plugin name, e.g. kubernetes.io/csi:csi-hostpath. It helps to distinguish
   118  // between metrics emitted for CSI volumes which may be handled by different
   119  // CSI plugin drivers.
   120  func GetFullQualifiedPluginNameForVolume(pluginName string, spec *volume.Spec) string {
   121  	if spec != nil {
   122  		if spec.Volume != nil && spec.Volume.CSI != nil {
   123  			return fmt.Sprintf("%s:%s", pluginName, spec.Volume.CSI.Driver)
   124  		}
   125  		if spec.PersistentVolume != nil && spec.PersistentVolume.Spec.CSI != nil {
   126  			return fmt.Sprintf("%s:%s", pluginName, spec.PersistentVolume.Spec.CSI.Driver)
   127  		}
   128  	}
   129  	return pluginName
   130  }
   131  
   132  // RecordOperationLatencyMetric records the end to end latency for certain operation
   133  // into metric volume_operation_total_seconds
   134  func RecordOperationLatencyMetric(plugin, operationName string, secondsTaken float64) {
   135  	storageOperationEndToEndLatencyMetric.WithLabelValues(plugin, operationName).Observe(secondsTaken)
   136  }
   137  
   138  // RecordCSIOperationLatencyMetrics records the CSI operation latency and grpc status
   139  // into metric csi_kubelet_operations_seconds
   140  func RecordCSIOperationLatencyMetrics(driverName string,
   141  	operationName string,
   142  	operationErr error,
   143  	operationDuration time.Duration,
   144  	migrated string) {
   145  	csiOperationsLatencyMetric.WithLabelValues(driverName, operationName, getErrorCode(operationErr), migrated).Observe(operationDuration.Seconds())
   146  }
   147  
   148  func getErrorCode(err error) string {
   149  	if err == nil {
   150  		return codes.OK.String()
   151  	}
   152  
   153  	st, ok := status.FromError(err)
   154  	if !ok {
   155  		// This is not gRPC error. The operation must have failed before gRPC
   156  		// method was called, otherwise we would get gRPC error.
   157  		return "unknown-non-grpc"
   158  	}
   159  
   160  	return st.Code().String()
   161  }
   162  

View as plain text