trace.go

Documentation: k8s.io/utils/trace

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package trace
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"fmt"
    23  	"math/rand"
    24  	"sync"
    25  	"time"
    26  
    27  	"k8s.io/klog/v2"
    28  )
    29  
    30  var klogV = func(lvl klog.Level) bool {
    31  	return klog.V(lvl).Enabled()
    32  }
    33  
    34  // Field is a key value pair that provides additional details about the trace.
    35  type Field struct {
    36  	Key   string
    37  	Value interface{}
    38  }
    39  
    40  func (f Field) format() string {
    41  	return fmt.Sprintf("%s:%v", f.Key, f.Value)
    42  }
    43  
    44  func writeFields(b *bytes.Buffer, l []Field) {
    45  	for i, f := range l {
    46  		b.WriteString(f.format())
    47  		if i < len(l)-1 {
    48  			b.WriteString(",")
    49  		}
    50  	}
    51  }
    52  
    53  func writeTraceItemSummary(b *bytes.Buffer, msg string, totalTime time.Duration, startTime time.Time, fields []Field) {
    54  	b.WriteString(fmt.Sprintf("%q ", msg))
    55  	if len(fields) > 0 {
    56  		writeFields(b, fields)
    57  		b.WriteString(" ")
    58  	}
    59  
    60  	b.WriteString(fmt.Sprintf("%vms (%v)", durationToMilliseconds(totalTime), startTime.Format("15:04:05.000")))
    61  }
    62  
    63  func durationToMilliseconds(timeDuration time.Duration) int64 {
    64  	return timeDuration.Nanoseconds() / 1e6
    65  }
    66  
    67  type traceItem interface {
    68  	// rLock must be called before invoking time or writeItem.
    69  	rLock()
    70  	// rUnlock must be called after processing the item is complete.
    71  	rUnlock()
    72  
    73  	// time returns when the trace was recorded as completed.
    74  	time() time.Time
    75  	// writeItem outputs the traceItem to the buffer. If stepThreshold is non-nil, only output the
    76  	// traceItem if its the duration exceeds the stepThreshold.
    77  	// Each line of output is prefixed by formatter to visually indent nested items.
    78  	writeItem(b *bytes.Buffer, formatter string, startTime time.Time, stepThreshold *time.Duration)
    79  }
    80  
    81  type traceStep struct {
    82  	stepTime time.Time
    83  	msg      string
    84  	fields   []Field
    85  }
    86  
    87  // rLock doesn't need to do anything because traceStep instances are immutable.
    88  func (s traceStep) rLock()   {}
    89  func (s traceStep) rUnlock() {}
    90  
    91  func (s traceStep) time() time.Time {
    92  	return s.stepTime
    93  }
    94  
    95  func (s traceStep) writeItem(b *bytes.Buffer, formatter string, startTime time.Time, stepThreshold *time.Duration) {
    96  	stepDuration := s.stepTime.Sub(startTime)
    97  	if stepThreshold == nil || *stepThreshold == 0 || stepDuration >= *stepThreshold || klogV(4) {
    98  		b.WriteString(fmt.Sprintf("%s---", formatter))
    99  		writeTraceItemSummary(b, s.msg, stepDuration, s.stepTime, s.fields)
   100  	}
   101  }
   102  
   103  // Trace keeps track of a set of "steps" and allows us to log a specific
   104  // step if it took longer than its share of the total allowed time
   105  type Trace struct {
   106  	// constant fields
   107  	name        string
   108  	fields      []Field
   109  	startTime   time.Time
   110  	parentTrace *Trace
   111  	// fields guarded by a lock
   112  	lock       sync.RWMutex
   113  	threshold  *time.Duration
   114  	endTime    *time.Time
   115  	traceItems []traceItem
   116  }
   117  
   118  func (t *Trace) rLock() {
   119  	t.lock.RLock()
   120  }
   121  
   122  func (t *Trace) rUnlock() {
   123  	t.lock.RUnlock()
   124  }
   125  
   126  func (t *Trace) time() time.Time {
   127  	if t.endTime != nil {
   128  		return *t.endTime
   129  	}
   130  	return t.startTime // if the trace is incomplete, don't assume an end time
   131  }
   132  
   133  func (t *Trace) writeItem(b *bytes.Buffer, formatter string, startTime time.Time, stepThreshold *time.Duration) {
   134  	if t.durationIsWithinThreshold() || klogV(4) {
   135  		b.WriteString(fmt.Sprintf("%v[", formatter))
   136  		writeTraceItemSummary(b, t.name, t.TotalTime(), t.startTime, t.fields)
   137  		if st := t.calculateStepThreshold(); st != nil {
   138  			stepThreshold = st
   139  		}
   140  		t.writeTraceSteps(b, formatter+" ", stepThreshold)
   141  		b.WriteString("]")
   142  		return
   143  	}
   144  	// If the trace should not be written, still check for nested traces that should be written
   145  	for _, s := range t.traceItems {
   146  		if nestedTrace, ok := s.(*Trace); ok {
   147  			nestedTrace.writeItem(b, formatter, startTime, stepThreshold)
   148  		}
   149  	}
   150  }
   151  
   152  // New creates a Trace with the specified name. The name identifies the operation to be traced. The
   153  // Fields add key value pairs to provide additional details about the trace, such as operation inputs.
   154  func New(name string, fields ...Field) *Trace {
   155  	return &Trace{name: name, startTime: time.Now(), fields: fields}
   156  }
   157  
   158  // Step adds a new step with a specific message. Call this at the end of an execution step to record
   159  // how long it took. The Fields add key value pairs to provide additional details about the trace
   160  // step.
   161  func (t *Trace) Step(msg string, fields ...Field) {
   162  	t.lock.Lock()
   163  	defer t.lock.Unlock()
   164  	if t.traceItems == nil {
   165  		// traces almost always have less than 6 steps, do this to avoid more than a single allocation
   166  		t.traceItems = make([]traceItem, 0, 6)
   167  	}
   168  	t.traceItems = append(t.traceItems, traceStep{stepTime: time.Now(), msg: msg, fields: fields})
   169  }
   170  
   171  // Nest adds a nested trace with the given message and fields and returns it.
   172  // As a convenience, if the receiver is nil, returns a top level trace. This allows
   173  // one to call FromContext(ctx).Nest without having to check if the trace
   174  // in the context is nil.
   175  func (t *Trace) Nest(msg string, fields ...Field) *Trace {
   176  	newTrace := New(msg, fields...)
   177  	if t != nil {
   178  		newTrace.parentTrace = t
   179  		t.lock.Lock()
   180  		t.traceItems = append(t.traceItems, newTrace)
   181  		t.lock.Unlock()
   182  	}
   183  	return newTrace
   184  }
   185  
   186  // Log is used to dump all the steps in the Trace. It also logs the nested trace messages using indentation.
   187  // If the Trace is nested it is not immediately logged. Instead, it is logged when the trace it is nested within
   188  // is logged.
   189  func (t *Trace) Log() {
   190  	endTime := time.Now()
   191  	t.lock.Lock()
   192  	t.endTime = &endTime
   193  	t.lock.Unlock()
   194  	// an explicit logging request should dump all the steps out at the higher level
   195  	if t.parentTrace == nil && klogV(2) { // We don't start logging until Log or LogIfLong is called on the root trace
   196  		t.logTrace()
   197  	}
   198  }
   199  
   200  // LogIfLong only logs the trace if the duration of the trace exceeds the threshold.
   201  // Only steps that took longer than their share or the given threshold are logged.
   202  // If klog is at verbosity level 4 or higher and the trace took longer than the threshold,
   203  // all substeps and subtraces are logged. Otherwise, only those which took longer than
   204  // their own threshold.
   205  // If the Trace is nested it is not immediately logged. Instead, it is logged when the trace it
   206  // is nested within is logged.
   207  func (t *Trace) LogIfLong(threshold time.Duration) {
   208  	t.lock.Lock()
   209  	t.threshold = &threshold
   210  	t.lock.Unlock()
   211  	t.Log()
   212  }
   213  
   214  // logTopLevelTraces finds all traces in a hierarchy of nested traces that should be logged but do not have any
   215  // parents that will be logged, due to threshold limits, and logs them as top level traces.
   216  func (t *Trace) logTrace() {
   217  	t.lock.RLock()
   218  	defer t.lock.RUnlock()
   219  	if t.durationIsWithinThreshold() {
   220  		var buffer bytes.Buffer
   221  		traceNum := rand.Int31()
   222  
   223  		totalTime := t.endTime.Sub(t.startTime)
   224  		buffer.WriteString(fmt.Sprintf("Trace[%d]: %q ", traceNum, t.name))
   225  		if len(t.fields) > 0 {
   226  			writeFields(&buffer, t.fields)
   227  			buffer.WriteString(" ")
   228  		}
   229  
   230  		// if any step took more than it's share of the total allowed time, it deserves a higher log level
   231  		buffer.WriteString(fmt.Sprintf("(%v) (total time: %vms):", t.startTime.Format("02-Jan-2006 15:04:05.000"), totalTime.Milliseconds()))
   232  		stepThreshold := t.calculateStepThreshold()
   233  		t.writeTraceSteps(&buffer, fmt.Sprintf("\nTrace[%d]: ", traceNum), stepThreshold)
   234  		buffer.WriteString(fmt.Sprintf("\nTrace[%d]: [%v] [%v] END\n", traceNum, t.endTime.Sub(t.startTime), totalTime))
   235  
   236  		klog.Info(buffer.String())
   237  		return
   238  	}
   239  
   240  	// If the trace should not be logged, still check if nested traces should be logged
   241  	for _, s := range t.traceItems {
   242  		if nestedTrace, ok := s.(*Trace); ok {
   243  			nestedTrace.logTrace()
   244  		}
   245  	}
   246  }
   247  
   248  func (t *Trace) writeTraceSteps(b *bytes.Buffer, formatter string, stepThreshold *time.Duration) {
   249  	lastStepTime := t.startTime
   250  	for _, stepOrTrace := range t.traceItems {
   251  		stepOrTrace.rLock()
   252  		stepOrTrace.writeItem(b, formatter, lastStepTime, stepThreshold)
   253  		lastStepTime = stepOrTrace.time()
   254  		stepOrTrace.rUnlock()
   255  	}
   256  }
   257  
   258  func (t *Trace) durationIsWithinThreshold() bool {
   259  	if t.endTime == nil { // we don't assume incomplete traces meet the threshold
   260  		return false
   261  	}
   262  	return t.threshold == nil || *t.threshold == 0 || t.endTime.Sub(t.startTime) >= *t.threshold
   263  }
   264  
   265  // TotalTime can be used to figure out how long it took since the Trace was created
   266  func (t *Trace) TotalTime() time.Duration {
   267  	return time.Since(t.startTime)
   268  }
   269  
   270  // calculateStepThreshold returns a threshold for the individual steps of a trace, or nil if there is no threshold and
   271  // all steps should be written.
   272  func (t *Trace) calculateStepThreshold() *time.Duration {
   273  	if t.threshold == nil {
   274  		return nil
   275  	}
   276  	lenTrace := len(t.traceItems) + 1
   277  	traceThreshold := *t.threshold
   278  	for _, s := range t.traceItems {
   279  		nestedTrace, ok := s.(*Trace)
   280  		if ok {
   281  			nestedTrace.lock.RLock()
   282  			if nestedTrace.threshold != nil {
   283  				traceThreshold = traceThreshold - *nestedTrace.threshold
   284  				lenTrace--
   285  			}
   286  			nestedTrace.lock.RUnlock()
   287  		}
   288  	}
   289  
   290  	// the limit threshold is used when the threshold(
   291  	//remaining after subtracting that of the child trace) is getting very close to zero to prevent unnecessary logging
   292  	limitThreshold := *t.threshold / 4
   293  	if traceThreshold < limitThreshold {
   294  		traceThreshold = limitThreshold
   295  		lenTrace = len(t.traceItems) + 1
   296  	}
   297  
   298  	stepThreshold := traceThreshold / time.Duration(lenTrace)
   299  	return &stepThreshold
   300  }
   301  
   302  // ContextTraceKey provides a common key for traces in context.Context values.
   303  type ContextTraceKey struct{}
   304  
   305  // FromContext returns the trace keyed by ContextTraceKey in the context values, if one
   306  // is present, or nil If there is no trace in the Context.
   307  // It is safe to call Nest() on the returned value even if it is nil because ((*Trace)nil).Nest returns a top level
   308  // trace.
   309  func FromContext(ctx context.Context) *Trace {
   310  	if v, ok := ctx.Value(ContextTraceKey{}).(*Trace); ok {
   311  		return v
   312  	}
   313  	return nil
   314  }
   315  
   316  // ContextWithTrace returns a context with trace included in the context values, keyed by ContextTraceKey.
   317  func ContextWithTrace(ctx context.Context, trace *Trace) context.Context {
   318  	return context.WithValue(ctx, ContextTraceKey{}, trace)
   319  }
   320
View as plain text