...

Source file src/edge-infra.dev/pkg/k8s/runtime/controller/metrics/recorder.go

Documentation: edge-infra.dev/pkg/k8s/runtime/controller/metrics

     1  package metrics
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	"github.com/prometheus/client_golang/prometheus"
     8  	corev1 "k8s.io/api/core/v1"
     9  
    10  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    11  )
    12  
    13  const (
    14  	reconcileDurationBucketsMin   = 0.05
    15  	reconcileDurationBucketsMax   = 600.0
    16  	reconcileDurationBucketsCount = 10
    17  )
    18  
    19  // Name returns a standard Edge controller metric name using the component name
    20  // and base metric name. It prepends a standard edge-wide metric prefix.
    21  //
    22  // Note: If https://github.com/kubernetes-sigs/controller-runtime/issues/1995
    23  // is resolved, this function can be deprecated in favor of using
    24  // prometheus.WrapRegistererWithPrefix().
    25  func Name(prefix, metricName string) string {
    26  	return fmt.Sprintf("edge_%s_%s", prefix, metricName)
    27  }
    28  
    29  // Recorder is a standard metrics collector for K8s controllers. It contains
    30  // pre-defined collectors that are instantiated via NewRecorder() by default.
    31  // In general, the expectation is that the Recorder struct would only be used by
    32  // controller authors via the Metrics struct.
    33  type Recorder struct {
    34  	reconcileConditionGauge           *prometheus.GaugeVec
    35  	reconcileConditionGaugeWithReason *prometheus.GaugeVec
    36  	suspendGauge                      *prometheus.GaugeVec
    37  	durationHistogram                 *prometheus.HistogramVec
    38  
    39  	// additional user-provided collectors, for specific controllers
    40  	collectors []prometheus.Collector
    41  }
    42  
    43  // Create a new Recorder for recording standard metrics and any additional metrics
    44  // added via WithCollectors(). The metrics prefix string is appended after a
    45  // standard edge prefix. e.g., a prefix of "cluster" would produce metric names
    46  // like "edge_cluster_reconcile_condition_status", etc. Because any additional
    47  // prometheus.Collectors don't make their metric name visible via the Collector
    48  // interface, we cannot add the same prefix for additional custom collectors
    49  // (see https://github.com/kubernetes-sigs/controller-runtime/issues/1995).
    50  // For custom collectors, its recommended to use the Name() function to create
    51  // consistent metric names.
    52  func NewRecorder(prefix string, options ...Option) *Recorder {
    53  	opts := makeOptions(options...)
    54  	rec := &Recorder{collectors: opts.customCollectors}
    55  
    56  	if opts.reason {
    57  		// If WithReason() is passed during setup, reconcile_condition_status will also include the `reason` label.
    58  		rec.reconcileConditionGaugeWithReason = prometheus.NewGaugeVec(
    59  			prometheus.GaugeOpts{
    60  				Name: Name(prefix, "reconcile_condition_status"),
    61  				Help: "The current condition status of a controller's resource reconciliation with reason included.",
    62  			},
    63  			[]string{"kind", "name", "namespace", "type", "reason", "status"},
    64  		)
    65  		rec.collectors = append(rec.collectors, rec.reconcileConditionGaugeWithReason)
    66  	} else {
    67  		rec.reconcileConditionGauge = prometheus.NewGaugeVec(
    68  			prometheus.GaugeOpts{
    69  				Name: Name(prefix, "reconcile_condition_status"),
    70  				Help: "The current condition status of a controller's resource reconciliation.",
    71  			},
    72  			[]string{"kind", "name", "namespace", "type", "status"},
    73  		)
    74  		rec.collectors = append(rec.collectors, rec.reconcileConditionGauge)
    75  	}
    76  
    77  	// set histogram buckets to have more useful sizes
    78  	rec.durationHistogram = prometheus.NewHistogramVec(
    79  		prometheus.HistogramOpts{
    80  			Name:    Name(prefix, "reconcile_duration_seconds"),
    81  			Help:    "The duration in seconds of a controller's resource reconciliation.",
    82  			Buckets: prometheus.ExponentialBucketsRange(reconcileDurationBucketsMin, reconcileDurationBucketsMax, reconcileDurationBucketsCount),
    83  		},
    84  		[]string{"kind", "name", "namespace"},
    85  	)
    86  
    87  	rec.collectors = append(rec.collectors, rec.durationHistogram)
    88  
    89  	// add optional recorders
    90  	if opts.suspend {
    91  		rec.suspendGauge = prometheus.NewGaugeVec(
    92  			prometheus.GaugeOpts{
    93  				Name: Name(prefix, "reconcile_suspend_status"),
    94  				Help: "The current suspend status of a controller's resource.",
    95  			},
    96  			[]string{"kind", "name", "namespace"},
    97  		)
    98  		rec.collectors = append(rec.collectors, rec.suspendGauge)
    99  	}
   100  
   101  	return rec
   102  }
   103  
   104  // Collectors returns a slice of Prometheus collectors, which can be used to
   105  // register them in a metrics registry.
   106  // TODO: Create collectors iteratively for each desired non-standard metric
   107  func (r *Recorder) Collectors() []prometheus.Collector {
   108  	return r.collectors
   109  }
   110  
   111  // RecordDuration records the duration since start for the given ref.
   112  func (r *Recorder) RecordDuration(ref corev1.ObjectReference, start time.Time) {
   113  	r.durationHistogram.WithLabelValues(ref.Kind, ref.Name, ref.Namespace).Observe(time.Since(start).Seconds())
   114  }
   115  
   116  // RecordSuspend records the suspend status as given for the ref.
   117  func (r *Recorder) RecordSuspend(ref corev1.ObjectReference, suspend bool) error {
   118  	// If suspendGauge is not set for the recorder, return
   119  	if r.suspendGauge == nil {
   120  		return fmt.Errorf("suspendGauge not set")
   121  	}
   122  	var value float64
   123  	if suspend {
   124  		value = 1
   125  	}
   126  	r.suspendGauge.WithLabelValues(ref.Kind, ref.Name, ref.Namespace).Set(value)
   127  
   128  	return nil
   129  }
   130  
   131  // RecordCondition records the condition as given for the ref. If the object is
   132  // marked for deletion, the metrics for the referenced object are deleted.
   133  func (r *Recorder) RecordCondition(ref corev1.ObjectReference, condition metav1.Condition, deleted bool) error {
   134  	// If reconcileConditionGauge is not set for the recorder, return
   135  	if r.reconcileConditionGauge == nil {
   136  		return fmt.Errorf("reconcileConditionGauge not set")
   137  	}
   138  
   139  	labels := prometheus.Labels{
   140  		"kind":      ref.Kind,
   141  		"name":      ref.Name,
   142  		"namespace": ref.Namespace,
   143  		"type":      condition.Type,
   144  	}
   145  
   146  	for _, status := range []string{string(metav1.ConditionTrue), string(metav1.ConditionFalse), string(metav1.ConditionUnknown)} {
   147  		var value float64
   148  		labels["status"] = status
   149  		if deleted {
   150  			r.reconcileConditionGauge.DeletePartialMatch(labels)
   151  		} else {
   152  			if status == string(condition.Status) {
   153  				value = 1
   154  			}
   155  			r.reconcileConditionGauge.With(labels).Set(value)
   156  		}
   157  	}
   158  
   159  	return nil
   160  }
   161  
   162  // RecordConditionWithReason records the condition as given for the ref.
   163  // If the object is marked for deletion, the metrics for the referenced object are deleted.
   164  // TODO(dk185217): RecordCondition should be deprecated in favor of this approach
   165  func (r *Recorder) RecordConditionWithReason(ref corev1.ObjectReference, condition metav1.Condition, deleted bool) error {
   166  	if deleted {
   167  		return nil
   168  	}
   169  
   170  	labels := prometheus.Labels{
   171  		"kind":      ref.Kind,
   172  		"name":      ref.Name,
   173  		"namespace": ref.Namespace,
   174  		"type":      condition.Type,
   175  		"status":    string(condition.Status),
   176  		"reason":    condition.Reason,
   177  	}
   178  
   179  	r.reconcileConditionGaugeWithReason.With(labels).Set(1)
   180  	return nil
   181  }
   182  

View as plain text