...

Source file src/cloud.google.com/go/cloudsqlconn/internal/trace/metrics.go

Documentation: cloud.google.com/go/cloudsqlconn/internal/trace

     1  // Copyright 2022 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package trace
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"strings"
    22  	"sync"
    23  
    24  	"go.opencensus.io/stats"
    25  	"go.opencensus.io/stats/view"
    26  	"go.opencensus.io/tag"
    27  	"google.golang.org/api/googleapi"
    28  )
    29  
    30  var (
    31  	keyInstance, _  = tag.NewKey("cloudsql_instance")
    32  	keyDialerID, _  = tag.NewKey("cloudsql_dialer_id")
    33  	keyErrorCode, _ = tag.NewKey("cloudsql_error_code")
    34  
    35  	mLatencyMS = stats.Int64(
    36  		"cloudsqlconn/latency",
    37  		"The latency in milliseconds per Dial",
    38  		stats.UnitMilliseconds,
    39  	)
    40  	mConnections = stats.Int64(
    41  		"cloudsqlconn/connection",
    42  		"A connect or disconnect event to Cloud SQL",
    43  		stats.UnitDimensionless,
    44  	)
    45  	mDialError = stats.Int64(
    46  		"cloudsqlconn/dial_failure",
    47  		"A failure to dial a Cloud SQL instance",
    48  		stats.UnitDimensionless,
    49  	)
    50  	mSuccessfulRefresh = stats.Int64(
    51  		"cloudsqlconn/refresh_success",
    52  		"A successful certificate refresh operation",
    53  		stats.UnitDimensionless,
    54  	)
    55  	mFailedRefresh = stats.Int64(
    56  		"cloudsqlconn/refresh_failure",
    57  		"A failed certificate refresh operation",
    58  		stats.UnitDimensionless,
    59  	)
    60  
    61  	latencyView = &view.View{
    62  		Name:        "cloudsqlconn/dial_latency",
    63  		Measure:     mLatencyMS,
    64  		Description: "The distribution of dialer latencies (ms)",
    65  		// Latency in buckets, e.g., >=0ms, >=100ms, etc.
    66  		Aggregation: view.Distribution(0, 5, 25, 100, 250, 500, 1000, 2000, 5000, 30000),
    67  		TagKeys:     []tag.Key{keyInstance, keyDialerID},
    68  	}
    69  	connectionsView = &view.View{
    70  		Name:        "cloudsqlconn/open_connections",
    71  		Measure:     mConnections,
    72  		Description: "The current number of open Cloud SQL connections",
    73  		Aggregation: view.LastValue(),
    74  		TagKeys:     []tag.Key{keyInstance, keyDialerID},
    75  	}
    76  	dialFailureView = &view.View{
    77  		Name:        "cloudsqlconn/dial_failure_count",
    78  		Measure:     mDialError,
    79  		Description: "The number of failed dial attempts",
    80  		Aggregation: view.Count(),
    81  		TagKeys:     []tag.Key{keyInstance, keyDialerID},
    82  	}
    83  	refreshCountView = &view.View{
    84  		Name:        "cloudsqlconn/refresh_success_count",
    85  		Measure:     mSuccessfulRefresh,
    86  		Description: "The number of successful certificate refresh operations",
    87  		Aggregation: view.Count(),
    88  		TagKeys:     []tag.Key{keyInstance, keyDialerID},
    89  	}
    90  	failedRefreshCountView = &view.View{
    91  		Name:        "cloudsqlconn/refresh_failure_count",
    92  		Measure:     mFailedRefresh,
    93  		Description: "The number of failed certificate refresh operations",
    94  		Aggregation: view.Count(),
    95  		TagKeys:     []tag.Key{keyInstance, keyDialerID, keyErrorCode},
    96  	}
    97  
    98  	registerOnce sync.Once
    99  	registerErr  error
   100  )
   101  
   102  // InitMetrics registers all views once. Without registering views, metrics will
   103  // not be reported. If any names of the registered views conflict, this function
   104  // returns an error to indicate an internal configuration problem.
   105  func InitMetrics() error {
   106  	registerOnce.Do(func() {
   107  		if rErr := view.Register(
   108  			latencyView,
   109  			connectionsView,
   110  			dialFailureView,
   111  			refreshCountView,
   112  			failedRefreshCountView,
   113  		); rErr != nil {
   114  			registerErr = fmt.Errorf("failed to initialize metrics: %v", rErr)
   115  		}
   116  	})
   117  	return registerErr
   118  }
   119  
   120  // RecordDialLatency records a latency value for a call to dial.
   121  func RecordDialLatency(ctx context.Context, instance, dialerID string, latency int64) {
   122  	// tag.New creates a new context and errors only if the new tag already
   123  	// exists in the provided context. Since we're adding tags within this
   124  	// package only, we can be confident that there were be no duplicate tags
   125  	// and so can ignore the error.
   126  	ctx, _ = tag.New(ctx, tag.Upsert(keyInstance, instance), tag.Upsert(keyDialerID, dialerID))
   127  	stats.Record(ctx, mLatencyMS.M(latency))
   128  }
   129  
   130  // RecordOpenConnections records the number of open connections
   131  func RecordOpenConnections(ctx context.Context, num int64, dialerID, instance string) {
   132  	ctx, _ = tag.New(ctx, tag.Upsert(keyInstance, instance), tag.Upsert(keyDialerID, dialerID))
   133  	stats.Record(ctx, mConnections.M(num))
   134  }
   135  
   136  // RecordDialError reports a failed dial attempt. If err is nil, RecordDialError
   137  // is a no-op.
   138  func RecordDialError(ctx context.Context, instance, dialerID string, err error) {
   139  	if err == nil {
   140  		return
   141  	}
   142  	ctx, _ = tag.New(ctx, tag.Upsert(keyInstance, instance), tag.Upsert(keyDialerID, dialerID))
   143  	stats.Record(ctx, mDialError.M(1))
   144  }
   145  
   146  // RecordRefreshResult reports the result of a refresh operation, either
   147  // successfull or failed.
   148  func RecordRefreshResult(ctx context.Context, instance, dialerID string, err error) {
   149  	ctx, _ = tag.New(ctx, tag.Upsert(keyInstance, instance), tag.Upsert(keyDialerID, dialerID))
   150  	if err != nil {
   151  		if c := errorCode(err); c != "" {
   152  			ctx, _ = tag.New(ctx, tag.Upsert(keyErrorCode, c))
   153  		}
   154  		stats.Record(ctx, mFailedRefresh.M(1))
   155  		return
   156  	}
   157  	stats.Record(ctx, mSuccessfulRefresh.M(1))
   158  }
   159  
   160  // errorCode returns an error code as given from the SQL Admin API, provided the
   161  // error wraps a googleapi.Error type. If multiple error codes are returned from
   162  // the API, then a comma-separated string of all codes is returned.
   163  //
   164  // For possible error codes and their meaning see:
   165  // https://cloud.google.com/sql/docs/mysql/admin-api-error-messages
   166  func errorCode(err error) string {
   167  	var apiErr *googleapi.Error
   168  	ok := errors.As(err, &apiErr)
   169  	if !ok {
   170  		return ""
   171  	}
   172  	var codes []string
   173  	for _, e := range apiErr.Errors {
   174  		codes = append(codes, e.Reason)
   175  	}
   176  	return strings.Join(codes, ",")
   177  }
   178  

View as plain text