...

Source file src/k8s.io/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker_test.go

Documentation: k8s.io/kubernetes/pkg/kubelet/util

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package util
    18  
    19  import (
    20  	"strings"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/stretchr/testify/assert"
    25  	corev1 "k8s.io/api/core/v1"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	"k8s.io/apimachinery/pkg/types"
    28  
    29  	"k8s.io/component-base/metrics/testutil"
    30  	"k8s.io/kubernetes/pkg/kubelet/metrics"
    31  	testingclock "k8s.io/utils/clock/testing"
    32  )
    33  
    34  var frozenTime = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)
    35  
    36  const (
    37  	uid         = "3c1df8a9-11a8-4791-aeae-184c18cca686"
    38  	metricsName = "kubelet_pod_start_sli_duration_seconds"
    39  )
    40  
    41  func TestNoEvents(t *testing.T) {
    42  	t.Run("metrics registered; no incoming events", func(t *testing.T) {
    43  
    44  		// expects no metrics in the output
    45  		wants := ""
    46  
    47  		metrics.Register()
    48  
    49  		tracker := &basicPodStartupLatencyTracker{
    50  			pods: map[types.UID]*perPodState{},
    51  		}
    52  
    53  		if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
    54  			t.Fatal(err)
    55  		}
    56  
    57  		assert.Empty(t, tracker.pods)
    58  		metrics.PodStartSLIDuration.Reset()
    59  	})
    60  }
    61  
    62  func TestPodsRunningBeforeKubeletStarted(t *testing.T) {
    63  	t.Run("pod was running for 10m before kubelet started", func(t *testing.T) {
    64  
    65  		// expects no metrics in the output
    66  		wants := ""
    67  
    68  		metrics.Register()
    69  
    70  		tracker := &basicPodStartupLatencyTracker{
    71  			pods: map[types.UID]*perPodState{},
    72  		}
    73  
    74  		if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
    75  			t.Fatal(err)
    76  		}
    77  
    78  		podStarted := &corev1.Pod{
    79  			Status: corev1.PodStatus{
    80  				StartTime: &metav1.Time{Time: frozenTime.Add(-10 * time.Minute)},
    81  			},
    82  		}
    83  		tracker.ObservedPodOnWatch(podStarted, frozenTime)
    84  
    85  		assert.Empty(t, tracker.pods)
    86  		metrics.PodStartSLIDuration.Reset()
    87  	})
    88  }
    89  
    90  func TestSinglePodOneImageDownloadRecorded(t *testing.T) {
    91  
    92  	t.Run("single pod; started in 3s, image pulling 100ms", func(t *testing.T) {
    93  
    94  		wants := `
    95  # HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
    96  # TYPE kubelet_pod_start_sli_duration_seconds histogram
    97  kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0
    98  kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0
    99  kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0
   100  kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 1
   101  kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 1
   102  kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 1
   103  kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 1
   104  kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 1
   105  kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 1
   106  kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1
   107  kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1
   108  kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1
   109  kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1
   110  kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1
   111  kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1
   112  kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1
   113  kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1
   114  kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1
   115  kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1
   116  kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1
   117  kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1
   118  kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1
   119  kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1
   120  kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1
   121  kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1
   122  kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1
   123  kubelet_pod_start_sli_duration_seconds_sum 2.9
   124  kubelet_pod_start_sli_duration_seconds_count 1
   125  		`
   126  
   127  		fakeClock := testingclock.NewFakeClock(frozenTime)
   128  
   129  		metrics.Register()
   130  
   131  		tracker := &basicPodStartupLatencyTracker{
   132  			pods:  map[types.UID]*perPodState{},
   133  			clock: fakeClock,
   134  		}
   135  
   136  		podInit := buildInitializingPod()
   137  		tracker.ObservedPodOnWatch(podInit, frozenTime)
   138  
   139  		// image pulling took 100ms
   140  		tracker.RecordImageStartedPulling(podInit.UID)
   141  		fakeClock.Step(time.Millisecond * 100)
   142  		tracker.RecordImageFinishedPulling(podInit.UID)
   143  
   144  		podState, ok := tracker.pods[podInit.UID]
   145  		if !ok {
   146  			t.Errorf("expected to track pod: %s, but pod not found", podInit.UID)
   147  		}
   148  		if !podState.lastFinishedPulling.Equal(podState.firstStartedPulling.Add(time.Millisecond * 100)) {
   149  			t.Errorf("expected pod firstStartedPulling: %s and lastFinishedPulling: %s but got firstStartedPulling: %s and lastFinishedPulling: %s",
   150  				podState.firstStartedPulling, podState.firstStartedPulling.Add(time.Millisecond*100), podState.firstStartedPulling, podState.lastFinishedPulling)
   151  		}
   152  
   153  		podStarted := buildRunningPod()
   154  		tracker.RecordStatusUpdated(podStarted)
   155  
   156  		// 3s later, observe the same pod on watch
   157  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*3))
   158  
   159  		if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
   160  			t.Fatal(err)
   161  		}
   162  
   163  		// cleanup
   164  		tracker.DeletePodStartupState(podStarted.UID)
   165  
   166  		assert.Empty(t, tracker.pods)
   167  		metrics.PodStartSLIDuration.Reset()
   168  	})
   169  }
   170  
   171  func TestSinglePodMultipleDownloadsAndRestartsRecorded(t *testing.T) {
   172  
   173  	t.Run("single pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) {
   174  
   175  		wants := `
   176  # HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
   177  # TYPE kubelet_pod_start_sli_duration_seconds histogram
   178  kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0
   179  kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0
   180  kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0
   181  kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 0
   182  kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 0
   183  kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 0
   184  kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 0
   185  kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 0
   186  kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 0
   187  kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1
   188  kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1
   189  kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1
   190  kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1
   191  kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1
   192  kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1
   193  kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1
   194  kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1
   195  kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1
   196  kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1
   197  kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1
   198  kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1
   199  kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1
   200  kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1
   201  kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1
   202  kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1
   203  kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1
   204  kubelet_pod_start_sli_duration_seconds_sum 20
   205  kubelet_pod_start_sli_duration_seconds_count 1
   206  `
   207  
   208  		fakeClock := testingclock.NewFakeClock(frozenTime)
   209  
   210  		metrics.Register()
   211  
   212  		tracker := &basicPodStartupLatencyTracker{
   213  			pods:  map[types.UID]*perPodState{},
   214  			clock: fakeClock,
   215  		}
   216  
   217  		podInitializing := buildInitializingPod()
   218  		tracker.ObservedPodOnWatch(podInitializing, frozenTime)
   219  
   220  		// image pulling started at 10s and the last one finished at 30s
   221  		// first image starts pulling at 10s
   222  		fakeClock.SetTime(frozenTime.Add(time.Second * 10))
   223  		tracker.RecordImageStartedPulling(podInitializing.UID)
   224  		// second image starts pulling at 11s
   225  		fakeClock.SetTime(frozenTime.Add(time.Second * 11))
   226  		tracker.RecordImageStartedPulling(podInitializing.UID)
   227  		// third image starts pulling at 14s
   228  		fakeClock.SetTime(frozenTime.Add(time.Second * 14))
   229  		tracker.RecordImageStartedPulling(podInitializing.UID)
   230  		// first image finished pulling at 18s
   231  		fakeClock.SetTime(frozenTime.Add(time.Second * 18))
   232  		tracker.RecordImageFinishedPulling(podInitializing.UID)
   233  		// second and third finished pulling at 20s
   234  		fakeClock.SetTime(frozenTime.Add(time.Second * 20))
   235  		tracker.RecordImageFinishedPulling(podInitializing.UID)
   236  
   237  		podState, ok := tracker.pods[podInitializing.UID]
   238  		if !ok {
   239  			t.Errorf("expected to track pod: %s, but pod not found", podInitializing.UID)
   240  		}
   241  		if !podState.firstStartedPulling.Equal(frozenTime.Add(time.Second * 10)) { // second and third image start pulling should not affect pod firstStartedPulling
   242  			t.Errorf("expected pod firstStartedPulling: %s but got firstStartedPulling: %s",
   243  				podState.firstStartedPulling.Add(time.Second*10), podState.firstStartedPulling)
   244  		}
   245  		if !podState.lastFinishedPulling.Equal(frozenTime.Add(time.Second * 20)) { // should be updated when the pod's last image finished pulling
   246  			t.Errorf("expected pod lastFinishedPulling: %s but got lastFinishedPulling: %s",
   247  				podState.lastFinishedPulling.Add(time.Second*20), podState.lastFinishedPulling)
   248  		}
   249  
   250  		// pod started
   251  		podStarted := buildRunningPod()
   252  		tracker.RecordStatusUpdated(podStarted)
   253  
   254  		// at 30s observe the same pod on watch
   255  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*30))
   256  
   257  		if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
   258  			t.Fatal(err)
   259  		}
   260  
   261  		// any new pod observations should not impact the metrics, as the pod should be recorder only once
   262  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*150))
   263  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*200))
   264  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*250))
   265  
   266  		if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
   267  			t.Fatal(err)
   268  		}
   269  
   270  		// cleanup
   271  		tracker.DeletePodStartupState(podStarted.UID)
   272  
   273  		assert.Empty(t, tracker.pods)
   274  		metrics.PodStartSLIDuration.Reset()
   275  	})
   276  }
   277  
   278  func TestFirstNetworkPodMetrics(t *testing.T) {
   279  
   280  	t.Run("first network pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) {
   281  
   282  		wants := `
   283  # HELP kubelet_first_network_pod_start_sli_duration_seconds [INTERNAL] Duration in seconds to start the first network pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
   284  # TYPE kubelet_first_network_pod_start_sli_duration_seconds gauge
   285  kubelet_first_network_pod_start_sli_duration_seconds 30
   286  `
   287  
   288  		fakeClock := testingclock.NewFakeClock(frozenTime)
   289  
   290  		metrics.Register()
   291  
   292  		tracker := &basicPodStartupLatencyTracker{
   293  			pods:  map[types.UID]*perPodState{},
   294  			clock: fakeClock,
   295  		}
   296  
   297  		// hostNetwork pods should not be tracked
   298  		hostNetworkPodInitializing := buildInitializingPod()
   299  		hostNetworkPodInitializing.UID = "11111-22222"
   300  		hostNetworkPodInitializing.Spec.HostNetwork = true
   301  		tracker.ObservedPodOnWatch(hostNetworkPodInitializing, frozenTime)
   302  
   303  		hostNetworkPodStarted := buildRunningPod()
   304  		hostNetworkPodStarted.UID = "11111-22222"
   305  		hostNetworkPodStarted.Spec.HostNetwork = true
   306  		tracker.RecordStatusUpdated(hostNetworkPodStarted)
   307  
   308  		// track only the first pod with network
   309  		podInitializing := buildInitializingPod()
   310  		tracker.ObservedPodOnWatch(podInitializing, frozenTime)
   311  
   312  		// pod started
   313  		podStarted := buildRunningPod()
   314  		tracker.RecordStatusUpdated(podStarted)
   315  
   316  		// at 30s observe the same pod on watch
   317  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*30))
   318  
   319  		if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), "kubelet_first_network_pod_start_sli_duration_seconds", "kubelet_first_network_pod_start_total_duration_seconds"); err != nil {
   320  			t.Fatal(err)
   321  		}
   322  
   323  		// any new pod observations should not impact the metrics, as the pod should be recorder only once
   324  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*150))
   325  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*200))
   326  		tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*250))
   327  
   328  		if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), "kubelet_first_network_pod_start_sli_duration_seconds", "kubelet_first_network_pod_start_total_duration_seconds"); err != nil {
   329  			t.Fatal(err)
   330  		}
   331  
   332  		// cleanup
   333  		tracker.DeletePodStartupState(hostNetworkPodStarted.UID)
   334  		tracker.DeletePodStartupState(podStarted.UID)
   335  
   336  		assert.Empty(t, tracker.pods)
   337  		metrics.PodStartSLIDuration.Reset()
   338  	})
   339  }
   340  
   341  func buildInitializingPod() *corev1.Pod {
   342  	return buildPodWithStatus([]corev1.ContainerStatus{
   343  		{State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "PodInitializing"}}},
   344  	})
   345  }
   346  
   347  func buildRunningPod() *corev1.Pod {
   348  	return buildPodWithStatus([]corev1.ContainerStatus{
   349  		{State: corev1.ContainerState{Running: &corev1.ContainerStateRunning{StartedAt: metav1.NewTime(frozenTime)}}},
   350  	})
   351  }
   352  
   353  func buildPodWithStatus(cs []corev1.ContainerStatus) *corev1.Pod {
   354  	return &corev1.Pod{
   355  		ObjectMeta: metav1.ObjectMeta{
   356  			UID:               types.UID(uid),
   357  			CreationTimestamp: metav1.NewTime(frozenTime),
   358  		},
   359  		Status: corev1.PodStatus{
   360  			ContainerStatuses: cs,
   361  		},
   362  	}
   363  }
   364  

View as plain text