1
16
17 package util
18
19 import (
20 "strings"
21 "testing"
22 "time"
23
24 "github.com/stretchr/testify/assert"
25 corev1 "k8s.io/api/core/v1"
26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27 "k8s.io/apimachinery/pkg/types"
28
29 "k8s.io/component-base/metrics/testutil"
30 "k8s.io/kubernetes/pkg/kubelet/metrics"
31 testingclock "k8s.io/utils/clock/testing"
32 )
33
34 var frozenTime = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)
35
36 const (
37 uid = "3c1df8a9-11a8-4791-aeae-184c18cca686"
38 metricsName = "kubelet_pod_start_sli_duration_seconds"
39 )
40
41 func TestNoEvents(t *testing.T) {
42 t.Run("metrics registered; no incoming events", func(t *testing.T) {
43
44
45 wants := ""
46
47 metrics.Register()
48
49 tracker := &basicPodStartupLatencyTracker{
50 pods: map[types.UID]*perPodState{},
51 }
52
53 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
54 t.Fatal(err)
55 }
56
57 assert.Empty(t, tracker.pods)
58 metrics.PodStartSLIDuration.Reset()
59 })
60 }
61
62 func TestPodsRunningBeforeKubeletStarted(t *testing.T) {
63 t.Run("pod was running for 10m before kubelet started", func(t *testing.T) {
64
65
66 wants := ""
67
68 metrics.Register()
69
70 tracker := &basicPodStartupLatencyTracker{
71 pods: map[types.UID]*perPodState{},
72 }
73
74 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
75 t.Fatal(err)
76 }
77
78 podStarted := &corev1.Pod{
79 Status: corev1.PodStatus{
80 StartTime: &metav1.Time{Time: frozenTime.Add(-10 * time.Minute)},
81 },
82 }
83 tracker.ObservedPodOnWatch(podStarted, frozenTime)
84
85 assert.Empty(t, tracker.pods)
86 metrics.PodStartSLIDuration.Reset()
87 })
88 }
89
90 func TestSinglePodOneImageDownloadRecorded(t *testing.T) {
91
92 t.Run("single pod; started in 3s, image pulling 100ms", func(t *testing.T) {
93
94 wants := `
95 # HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
96 # TYPE kubelet_pod_start_sli_duration_seconds histogram
97 kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0
98 kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0
99 kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0
100 kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 1
101 kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 1
102 kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 1
103 kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 1
104 kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 1
105 kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 1
106 kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1
107 kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1
108 kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1
109 kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1
110 kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1
111 kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1
112 kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1
113 kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1
114 kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1
115 kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1
116 kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1
117 kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1
118 kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1
119 kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1
120 kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1
121 kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1
122 kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1
123 kubelet_pod_start_sli_duration_seconds_sum 2.9
124 kubelet_pod_start_sli_duration_seconds_count 1
125 `
126
127 fakeClock := testingclock.NewFakeClock(frozenTime)
128
129 metrics.Register()
130
131 tracker := &basicPodStartupLatencyTracker{
132 pods: map[types.UID]*perPodState{},
133 clock: fakeClock,
134 }
135
136 podInit := buildInitializingPod()
137 tracker.ObservedPodOnWatch(podInit, frozenTime)
138
139
140 tracker.RecordImageStartedPulling(podInit.UID)
141 fakeClock.Step(time.Millisecond * 100)
142 tracker.RecordImageFinishedPulling(podInit.UID)
143
144 podState, ok := tracker.pods[podInit.UID]
145 if !ok {
146 t.Errorf("expected to track pod: %s, but pod not found", podInit.UID)
147 }
148 if !podState.lastFinishedPulling.Equal(podState.firstStartedPulling.Add(time.Millisecond * 100)) {
149 t.Errorf("expected pod firstStartedPulling: %s and lastFinishedPulling: %s but got firstStartedPulling: %s and lastFinishedPulling: %s",
150 podState.firstStartedPulling, podState.firstStartedPulling.Add(time.Millisecond*100), podState.firstStartedPulling, podState.lastFinishedPulling)
151 }
152
153 podStarted := buildRunningPod()
154 tracker.RecordStatusUpdated(podStarted)
155
156
157 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*3))
158
159 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
160 t.Fatal(err)
161 }
162
163
164 tracker.DeletePodStartupState(podStarted.UID)
165
166 assert.Empty(t, tracker.pods)
167 metrics.PodStartSLIDuration.Reset()
168 })
169 }
170
171 func TestSinglePodMultipleDownloadsAndRestartsRecorded(t *testing.T) {
172
173 t.Run("single pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) {
174
175 wants := `
176 # HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
177 # TYPE kubelet_pod_start_sli_duration_seconds histogram
178 kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0
179 kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0
180 kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0
181 kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 0
182 kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 0
183 kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 0
184 kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 0
185 kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 0
186 kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 0
187 kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1
188 kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1
189 kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1
190 kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1
191 kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1
192 kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1
193 kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1
194 kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1
195 kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1
196 kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1
197 kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1
198 kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1
199 kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1
200 kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1
201 kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1
202 kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1
203 kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1
204 kubelet_pod_start_sli_duration_seconds_sum 20
205 kubelet_pod_start_sli_duration_seconds_count 1
206 `
207
208 fakeClock := testingclock.NewFakeClock(frozenTime)
209
210 metrics.Register()
211
212 tracker := &basicPodStartupLatencyTracker{
213 pods: map[types.UID]*perPodState{},
214 clock: fakeClock,
215 }
216
217 podInitializing := buildInitializingPod()
218 tracker.ObservedPodOnWatch(podInitializing, frozenTime)
219
220
221
222 fakeClock.SetTime(frozenTime.Add(time.Second * 10))
223 tracker.RecordImageStartedPulling(podInitializing.UID)
224
225 fakeClock.SetTime(frozenTime.Add(time.Second * 11))
226 tracker.RecordImageStartedPulling(podInitializing.UID)
227
228 fakeClock.SetTime(frozenTime.Add(time.Second * 14))
229 tracker.RecordImageStartedPulling(podInitializing.UID)
230
231 fakeClock.SetTime(frozenTime.Add(time.Second * 18))
232 tracker.RecordImageFinishedPulling(podInitializing.UID)
233
234 fakeClock.SetTime(frozenTime.Add(time.Second * 20))
235 tracker.RecordImageFinishedPulling(podInitializing.UID)
236
237 podState, ok := tracker.pods[podInitializing.UID]
238 if !ok {
239 t.Errorf("expected to track pod: %s, but pod not found", podInitializing.UID)
240 }
241 if !podState.firstStartedPulling.Equal(frozenTime.Add(time.Second * 10)) {
242 t.Errorf("expected pod firstStartedPulling: %s but got firstStartedPulling: %s",
243 podState.firstStartedPulling.Add(time.Second*10), podState.firstStartedPulling)
244 }
245 if !podState.lastFinishedPulling.Equal(frozenTime.Add(time.Second * 20)) {
246 t.Errorf("expected pod lastFinishedPulling: %s but got lastFinishedPulling: %s",
247 podState.lastFinishedPulling.Add(time.Second*20), podState.lastFinishedPulling)
248 }
249
250
251 podStarted := buildRunningPod()
252 tracker.RecordStatusUpdated(podStarted)
253
254
255 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*30))
256
257 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
258 t.Fatal(err)
259 }
260
261
262 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*150))
263 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*200))
264 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*250))
265
266 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
267 t.Fatal(err)
268 }
269
270
271 tracker.DeletePodStartupState(podStarted.UID)
272
273 assert.Empty(t, tracker.pods)
274 metrics.PodStartSLIDuration.Reset()
275 })
276 }
277
278 func TestFirstNetworkPodMetrics(t *testing.T) {
279
280 t.Run("first network pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) {
281
282 wants := `
283 # HELP kubelet_first_network_pod_start_sli_duration_seconds [INTERNAL] Duration in seconds to start the first network pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
284 # TYPE kubelet_first_network_pod_start_sli_duration_seconds gauge
285 kubelet_first_network_pod_start_sli_duration_seconds 30
286 `
287
288 fakeClock := testingclock.NewFakeClock(frozenTime)
289
290 metrics.Register()
291
292 tracker := &basicPodStartupLatencyTracker{
293 pods: map[types.UID]*perPodState{},
294 clock: fakeClock,
295 }
296
297
298 hostNetworkPodInitializing := buildInitializingPod()
299 hostNetworkPodInitializing.UID = "11111-22222"
300 hostNetworkPodInitializing.Spec.HostNetwork = true
301 tracker.ObservedPodOnWatch(hostNetworkPodInitializing, frozenTime)
302
303 hostNetworkPodStarted := buildRunningPod()
304 hostNetworkPodStarted.UID = "11111-22222"
305 hostNetworkPodStarted.Spec.HostNetwork = true
306 tracker.RecordStatusUpdated(hostNetworkPodStarted)
307
308
309 podInitializing := buildInitializingPod()
310 tracker.ObservedPodOnWatch(podInitializing, frozenTime)
311
312
313 podStarted := buildRunningPod()
314 tracker.RecordStatusUpdated(podStarted)
315
316
317 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*30))
318
319 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), "kubelet_first_network_pod_start_sli_duration_seconds", "kubelet_first_network_pod_start_total_duration_seconds"); err != nil {
320 t.Fatal(err)
321 }
322
323
324 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*150))
325 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*200))
326 tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*250))
327
328 if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), "kubelet_first_network_pod_start_sli_duration_seconds", "kubelet_first_network_pod_start_total_duration_seconds"); err != nil {
329 t.Fatal(err)
330 }
331
332
333 tracker.DeletePodStartupState(hostNetworkPodStarted.UID)
334 tracker.DeletePodStartupState(podStarted.UID)
335
336 assert.Empty(t, tracker.pods)
337 metrics.PodStartSLIDuration.Reset()
338 })
339 }
340
341 func buildInitializingPod() *corev1.Pod {
342 return buildPodWithStatus([]corev1.ContainerStatus{
343 {State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "PodInitializing"}}},
344 })
345 }
346
347 func buildRunningPod() *corev1.Pod {
348 return buildPodWithStatus([]corev1.ContainerStatus{
349 {State: corev1.ContainerState{Running: &corev1.ContainerStateRunning{StartedAt: metav1.NewTime(frozenTime)}}},
350 })
351 }
352
353 func buildPodWithStatus(cs []corev1.ContainerStatus) *corev1.Pod {
354 return &corev1.Pod{
355 ObjectMeta: metav1.ObjectMeta{
356 UID: types.UID(uid),
357 CreationTimestamp: metav1.NewTime(frozenTime),
358 },
359 Status: corev1.PodStatus{
360 ContainerStatuses: cs,
361 },
362 }
363 }
364
View as plain text