package chariot import ( "errors" "fmt" "sync" "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "google.golang.org/api/googleapi" ) const ( MetricNameDeadlineExceededTotal = "chariot2_deadline_exceeded_total" MetricNameErrorsTotal = "chariot2_errors_total" MetricNameGoogleAPIErrorsTotal = "chariot2_google_api_errors_total" MetricNameRequestsTotal = "chariot2_requests_total" MetricNameStorageOperationsTotal = "chariot2_storage_operations_total" ) var MetricsCounterVecHelp = map[string]string{ MetricNameDeadlineExceededTotal: "tracks how many times context.DeadlineExceeded errors result in a failed request", MetricNameErrorsTotal: "tracks the total amount of errors that caused a request to fail", MetricNameGoogleAPIErrorsTotal: "tracks every google api error", MetricNameRequestsTotal: "Total amount of requests, their operation, and whether they succeeded", MetricNameStorageOperationsTotal: "Total amount of storage operations chariot performs", } var MetricsLabels = map[string][]string{ MetricNameDeadlineExceededTotal: {"timeout"}, MetricNameErrorsTotal: {"operation"}, MetricNameGoogleAPIErrorsTotal: {"status_code", "reason"}, MetricNameRequestsTotal: {"operation", "successful"}, MetricNameStorageOperationsTotal: {"operation", "dir", "scope", "result"}, } type Metrics struct { deadlineExceededTotal *prometheus.CounterVec errorsTotal *prometheus.CounterVec googleAPIErrorsTotal *prometheus.CounterVec requestsTotal *prometheus.CounterVec storageOpsTotal *prometheus.CounterVec } // metricsOnce protects importers of Chariot from automatically registering chariot2 metrics. // metricsOnce also prevents a promauto panic. var metricsOnce struct { sync.Once m *Metrics } func NewMetrics() *Metrics { metricsOnce.Do(func() { var counters = make(map[string]*prometheus.CounterVec) for name, help := range MetricsCounterVecHelp { counters[name] = promauto.NewCounterVec(prometheus.CounterOpts{ Name: name, Help: help, }, MetricsLabels[name]) } metricsOnce.m = &Metrics{ deadlineExceededTotal: counters[MetricNameDeadlineExceededTotal], errorsTotal: counters[MetricNameErrorsTotal], googleAPIErrorsTotal: counters[MetricNameGoogleAPIErrorsTotal], requestsTotal: counters[MetricNameRequestsTotal], storageOpsTotal: counters[MetricNameStorageOperationsTotal], } }) return metricsOnce.m } func (m *Metrics) IncErrorsTotal(req Request) { var l = prometheus.Labels{"operation": req.Operation} m.errorsTotal.With(l).Inc() } func (m *Metrics) IncGoogleAPIErrorsTotal(err error) { var gerr *googleapi.Error if !errors.As(err, &gerr) { return // return without doing anything } var l = prometheus.Labels{ "status_code": fmt.Sprint(gerr.Code), } // The googleapi.Error might provide an Errors field which contains more detailed reasons for the failure. if len(gerr.Errors) == 0 { l["reason"] = "unknown" m.googleAPIErrorsTotal.With(l).Inc() } else { // Not sure if we'll ever see multiple errors, but loop through them anyway. Increase the count for each // ErrorItem in Errors. for _, ei := range gerr.Errors { l["reason"] = ei.Reason // Reason is a typed error code, and therefore has bound orthogonality. m.googleAPIErrorsTotal.With(l).Inc() } } } func (m *Metrics) IncDeadlineExceededTotal(timeout time.Duration) { var l = prometheus.Labels{ "timeout": fmt.Sprint(timeout), } m.deadlineExceededTotal.With(l).Inc() } func (m *Metrics) IncRequestsTotal(req Request, successful bool) { var op = req.Operation if op == "" { op = "unknown" // this will be set whenever requests can't be unmarshaled. } var l = prometheus.Labels{ "operation": op, "successful": fmt.Sprint(successful), } m.requestsTotal.With(l).Inc() } func (m *Metrics) IncStorageOperationsTotal(req Request, si StorageInfo) { var op = req.Operation var dir = req.Dir // orthogonality is not a concern since the dirs are (should be) limited to a small whitelisted set. Good to know when it is used. var scope = "banner" // Scope is whether it's a banner-wide or cluster-wide storage operation. if req.Cluster != "" { scope = "cluster" } var mets = map[string]int{ OperationCreate: len(si.ObjectsPut), OperationDelete: len(si.ObjectsDeleted), "DNE": len(si.ObjectsDoNotExist), "ERROR": len(si.Errors), } for result, count := range mets { if count != 0 { var l = prometheus.Labels{ "operation": op, "dir": dir, "scope": scope, "result": result, } m.storageOpsTotal.With(l).Add(float64(count)) } } }