1
16
17 package eviction
18
19 import (
20 "context"
21 "fmt"
22 "testing"
23 "time"
24
25 gomock "github.com/golang/mock/gomock"
26 "github.com/google/go-cmp/cmp"
27 "github.com/google/go-cmp/cmp/cmpopts"
28 v1 "k8s.io/api/core/v1"
29 "k8s.io/apimachinery/pkg/api/resource"
30 "k8s.io/apimachinery/pkg/types"
31 utilfeature "k8s.io/apiserver/pkg/util/feature"
32 "k8s.io/client-go/tools/record"
33 featuregatetesting "k8s.io/component-base/featuregate/testing"
34 statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
35 kubeapi "k8s.io/kubernetes/pkg/apis/core"
36 "k8s.io/kubernetes/pkg/apis/scheduling"
37 "k8s.io/kubernetes/pkg/features"
38 evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
39 "k8s.io/kubernetes/pkg/kubelet/lifecycle"
40 kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
41 testingclock "k8s.io/utils/clock/testing"
42 "k8s.io/utils/ptr"
43 )
44
45 const (
46 lowPriority = -1
47 defaultPriority = 0
48 highPriority = 1
49 )
50
51
52 type mockPodKiller struct {
53 pod *v1.Pod
54 evict bool
55 statusFn func(*v1.PodStatus)
56 gracePeriodOverride *int64
57 }
58
59
60 func (m *mockPodKiller) killPodNow(pod *v1.Pod, evict bool, gracePeriodOverride *int64, statusFn func(*v1.PodStatus)) error {
61 m.pod = pod
62 m.statusFn = statusFn
63 m.evict = evict
64 m.gracePeriodOverride = gracePeriodOverride
65 return nil
66 }
67
68
69 type mockDiskInfoProvider struct {
70 dedicatedImageFs *bool
71 }
72
73
74 func (m *mockDiskInfoProvider) HasDedicatedImageFs(_ context.Context) (bool, error) {
75 return ptr.Deref(m.dedicatedImageFs, false), nil
76 }
77
78
79 type mockDiskGC struct {
80 err error
81 imageGCInvoked bool
82 containerGCInvoked bool
83 readAndWriteSeparate bool
84 fakeSummaryProvider *fakeSummaryProvider
85 summaryAfterGC *statsapi.Summary
86 }
87
88
89 func (m *mockDiskGC) DeleteUnusedImages(_ context.Context) error {
90 m.imageGCInvoked = true
91 if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
92 m.fakeSummaryProvider.result = m.summaryAfterGC
93 }
94 return m.err
95 }
96
97
98 func (m *mockDiskGC) DeleteAllUnusedContainers(_ context.Context) error {
99 m.containerGCInvoked = true
100 if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
101 m.fakeSummaryProvider.result = m.summaryAfterGC
102 }
103 return m.err
104 }
105
106 func (m *mockDiskGC) IsContainerFsSeparateFromImageFs(_ context.Context) bool {
107 return m.readAndWriteSeparate
108 }
109
110 func makePodWithMemoryStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) {
111 pod := newPod(name, priority, []v1.Container{
112 newContainer(name, requests, limits),
113 }, nil)
114 podStats := newPodMemoryStats(pod, resource.MustParse(memoryWorkingSet))
115 return pod, podStats
116 }
117
118 func makePodWithPIDStats(name string, priority int32, processCount uint64) (*v1.Pod, statsapi.PodStats) {
119 pod := newPod(name, priority, []v1.Container{
120 newContainer(name, nil, nil),
121 }, nil)
122 podStats := newPodProcessStats(pod, processCount)
123 return pod, podStats
124 }
125
126 func makePodWithDiskStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootFsUsed, logsUsed, perLocalVolumeUsed string) (*v1.Pod, statsapi.PodStats) {
127 pod := newPod(name, priority, []v1.Container{
128 newContainer(name, requests, limits),
129 }, nil)
130 podStats := newPodDiskStats(pod, parseQuantity(rootFsUsed), parseQuantity(logsUsed), parseQuantity(perLocalVolumeUsed))
131 return pod, podStats
132 }
133
134 func makePodWithLocalStorageCapacityIsolationOpen(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) {
135 vol := newVolume("local-volume", v1.VolumeSource{
136 EmptyDir: &v1.EmptyDirVolumeSource{
137 SizeLimit: resource.NewQuantity(requests.Memory().Value(), resource.BinarySI),
138 },
139 })
140 var vols []v1.Volume
141 vols = append(vols, vol)
142 pod := newPod(name, priority, []v1.Container{
143 newContainer(name, requests, limits),
144 }, vols)
145
146 var podStats statsapi.PodStats
147 switch name {
148 case "empty-dir":
149 podStats = newPodMemoryStats(pod, *resource.NewQuantity(requests.Memory().Value()*2, resource.BinarySI))
150 case "container-ephemeral-storage-limit":
151 podStats = newPodMemoryStats(pod, *resource.NewQuantity(limits.StorageEphemeral().Value(), resource.BinarySI))
152 case "pod-ephemeral-storage-limit":
153 podStats = newPodMemoryStats(pod, *resource.NewQuantity(limits.StorageEphemeral().Value()*2, resource.BinarySI))
154 default:
155 podStats = newPodMemoryStats(pod, resource.MustParse(memoryWorkingSet))
156 }
157 return pod, podStats
158 }
159
160 func makePIDStats(nodeAvailablePIDs string, numberOfRunningProcesses string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary {
161 val := resource.MustParse(nodeAvailablePIDs)
162 availablePIDs := int64(val.Value())
163
164 parsed := resource.MustParse(numberOfRunningProcesses)
165 NumberOfRunningProcesses := int64(parsed.Value())
166 result := &statsapi.Summary{
167 Node: statsapi.NodeStats{
168 Rlimit: &statsapi.RlimitStats{
169 MaxPID: &availablePIDs,
170 NumOfRunningProcesses: &NumberOfRunningProcesses,
171 },
172 },
173 Pods: []statsapi.PodStats{},
174 }
175 for _, podStat := range podStats {
176 result.Pods = append(result.Pods, podStat)
177 }
178 return result
179 }
180
181 func makeMemoryStats(nodeAvailableBytes string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary {
182 val := resource.MustParse(nodeAvailableBytes)
183 availableBytes := uint64(val.Value())
184 WorkingSetBytes := uint64(val.Value())
185 result := &statsapi.Summary{
186 Node: statsapi.NodeStats{
187 Memory: &statsapi.MemoryStats{
188 AvailableBytes: &availableBytes,
189 WorkingSetBytes: &WorkingSetBytes,
190 },
191 SystemContainers: []statsapi.ContainerStats{
192 {
193 Name: statsapi.SystemContainerPods,
194 Memory: &statsapi.MemoryStats{
195 AvailableBytes: &availableBytes,
196 WorkingSetBytes: &WorkingSetBytes,
197 },
198 },
199 },
200 },
201 Pods: []statsapi.PodStats{},
202 }
203 for _, podStat := range podStats {
204 result.Pods = append(result.Pods, podStat)
205 }
206 return result
207 }
208
209 type diskStats struct {
210 rootFsAvailableBytes string
211 imageFsAvailableBytes string
212
213
214 containerFsAvailableBytes string
215 podStats map[*v1.Pod]statsapi.PodStats
216 }
217
218 func makeDiskStats(diskStats diskStats) *statsapi.Summary {
219 rootFsVal := resource.MustParse(diskStats.rootFsAvailableBytes)
220 rootFsBytes := uint64(rootFsVal.Value())
221 rootFsCapacityBytes := uint64(rootFsVal.Value() * 2)
222 imageFsVal := resource.MustParse(diskStats.imageFsAvailableBytes)
223 imageFsBytes := uint64(imageFsVal.Value())
224 imageFsCapacityBytes := uint64(imageFsVal.Value() * 2)
225 if diskStats.containerFsAvailableBytes == "" {
226 diskStats.containerFsAvailableBytes = diskStats.imageFsAvailableBytes
227 }
228 containerFsVal := resource.MustParse(diskStats.containerFsAvailableBytes)
229 containerFsBytes := uint64(containerFsVal.Value())
230 containerFsCapacityBytes := uint64(containerFsVal.Value() * 2)
231 result := &statsapi.Summary{
232 Node: statsapi.NodeStats{
233 Fs: &statsapi.FsStats{
234 AvailableBytes: &rootFsBytes,
235 CapacityBytes: &rootFsCapacityBytes,
236 },
237 Runtime: &statsapi.RuntimeStats{
238 ImageFs: &statsapi.FsStats{
239 AvailableBytes: &imageFsBytes,
240 CapacityBytes: &imageFsCapacityBytes,
241 },
242 ContainerFs: &statsapi.FsStats{
243 AvailableBytes: &containerFsBytes,
244 CapacityBytes: &containerFsCapacityBytes,
245 },
246 },
247 },
248 Pods: []statsapi.PodStats{},
249 }
250 for _, podStat := range diskStats.podStats {
251 result.Pods = append(result.Pods, podStat)
252 }
253 return result
254 }
255
256 type podToMake struct {
257 name string
258 priority int32
259 requests v1.ResourceList
260 limits v1.ResourceList
261 memoryWorkingSet string
262 pidUsage uint64
263 rootFsUsed string
264 logsFsUsed string
265 logsFsInodesUsed string
266 rootFsInodesUsed string
267 perLocalVolumeUsed string
268 perLocalVolumeInodesUsed string
269 }
270
271 func TestMemoryPressure_VerifyPodStatus(t *testing.T) {
272 testCases := map[string]struct {
273 wantPodStatus v1.PodStatus
274 }{
275 "eviction due to memory pressure; no image fs": {
276 wantPodStatus: v1.PodStatus{
277 Phase: v1.PodFailed,
278 Reason: "Evicted",
279 Message: "The node was low on resource: memory. Threshold quantity: 2Gi, available: 1500Mi. ",
280 },
281 },
282 "eviction due to memory pressure; image fs": {
283 wantPodStatus: v1.PodStatus{
284 Phase: v1.PodFailed,
285 Reason: "Evicted",
286 Message: "The node was low on resource: memory. Threshold quantity: 2Gi, available: 1500Mi. ",
287 },
288 },
289 }
290 for name, tc := range testCases {
291 for _, enablePodDisruptionConditions := range []bool{false, true} {
292 t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
293 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)()
294
295 podMaker := makePodWithMemoryStats
296 summaryStatsMaker := makeMemoryStats
297 podsToMake := []podToMake{
298 {name: "below-requests", requests: newResourceList("", "1Gi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "900Mi"},
299 {name: "above-requests", requests: newResourceList("", "100Mi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "700Mi"},
300 }
301 pods := []*v1.Pod{}
302 podStats := map[*v1.Pod]statsapi.PodStats{}
303 for _, podToMake := range podsToMake {
304 pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
305 pods = append(pods, pod)
306 podStats[pod] = podStat
307 }
308 activePodsFunc := func() []*v1.Pod {
309 return pods
310 }
311
312 fakeClock := testingclock.NewFakeClock(time.Now())
313 podKiller := &mockPodKiller{}
314 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
315 diskGC := &mockDiskGC{err: nil}
316 nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
317
318 config := Config{
319 PressureTransitionPeriod: time.Minute * 5,
320 Thresholds: []evictionapi.Threshold{
321 {
322 Signal: evictionapi.SignalMemoryAvailable,
323 Operator: evictionapi.OpLessThan,
324 Value: evictionapi.ThresholdValue{
325 Quantity: quantityMustParse("2Gi"),
326 },
327 },
328 },
329 }
330 summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1500Mi", podStats)}
331 manager := &managerImpl{
332 clock: fakeClock,
333 killPodFunc: podKiller.killPodNow,
334 imageGC: diskGC,
335 containerGC: diskGC,
336 config: config,
337 recorder: &record.FakeRecorder{},
338 summaryProvider: summaryProvider,
339 nodeRef: nodeRef,
340 nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
341 thresholdsFirstObservedAt: thresholdsObservedAt{},
342 }
343
344
345 _, err := manager.synchronize(diskInfoProvider, activePodsFunc)
346
347 if err != nil {
348 t.Fatalf("Manager expects no error but got %v", err)
349 }
350
351 if !manager.IsUnderMemoryPressure() {
352 t.Fatalf("Manager should have detected memory pressure")
353 }
354
355
356 if podKiller.pod == nil {
357 t.Fatalf("Manager should have selected a pod for eviction")
358 }
359
360 wantPodStatus := tc.wantPodStatus.DeepCopy()
361 if enablePodDisruptionConditions {
362 wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
363 Type: "DisruptionTarget",
364 Status: "True",
365 Reason: "TerminationByKubelet",
366 Message: "The node was low on resource: memory. Threshold quantity: 2Gi, available: 1500Mi. ",
367 })
368 }
369
370
371 podKiller.statusFn(&podKiller.pod.Status)
372 if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
373 t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
374 }
375 })
376 }
377 }
378 }
379
380 func TestPIDPressure_VerifyPodStatus(t *testing.T) {
381 testCases := map[string]struct {
382 wantPodStatus v1.PodStatus
383 }{
384 "eviction due to pid pressure": {
385 wantPodStatus: v1.PodStatus{
386 Phase: v1.PodFailed,
387 Reason: "Evicted",
388 Message: "The node was low on resource: pids. Threshold quantity: 1200, available: 500. ",
389 },
390 },
391 }
392 for name, tc := range testCases {
393 for _, enablePodDisruptionConditions := range []bool{true, false} {
394 t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
395 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)()
396
397 podMaker := makePodWithPIDStats
398 summaryStatsMaker := makePIDStats
399 podsToMake := []podToMake{
400 {name: "pod1", priority: lowPriority, pidUsage: 500},
401 {name: "pod2", priority: defaultPriority, pidUsage: 500},
402 }
403 pods := []*v1.Pod{}
404 podStats := map[*v1.Pod]statsapi.PodStats{}
405 for _, podToMake := range podsToMake {
406 pod, podStat := podMaker(podToMake.name, podToMake.priority, 2)
407 pods = append(pods, pod)
408 podStats[pod] = podStat
409 }
410 activePodsFunc := func() []*v1.Pod {
411 return pods
412 }
413
414 fakeClock := testingclock.NewFakeClock(time.Now())
415 podKiller := &mockPodKiller{}
416 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
417 diskGC := &mockDiskGC{err: nil}
418 nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
419
420 config := Config{
421 PressureTransitionPeriod: time.Minute * 5,
422 Thresholds: []evictionapi.Threshold{
423 {
424 Signal: evictionapi.SignalPIDAvailable,
425 Operator: evictionapi.OpLessThan,
426 Value: evictionapi.ThresholdValue{
427 Quantity: quantityMustParse("1200"),
428 },
429 },
430 },
431 }
432 summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1500", "1000", podStats)}
433 manager := &managerImpl{
434 clock: fakeClock,
435 killPodFunc: podKiller.killPodNow,
436 imageGC: diskGC,
437 containerGC: diskGC,
438 config: config,
439 recorder: &record.FakeRecorder{},
440 summaryProvider: summaryProvider,
441 nodeRef: nodeRef,
442 nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
443 thresholdsFirstObservedAt: thresholdsObservedAt{},
444 }
445
446
447 _, err := manager.synchronize(diskInfoProvider, activePodsFunc)
448
449 if err != nil {
450 t.Fatalf("Manager expects no error but got %v", err)
451 }
452
453
454 if !manager.IsUnderPIDPressure() {
455 t.Fatalf("Manager should have detected PID pressure")
456 }
457
458
459 if podKiller.pod == nil {
460 t.Fatalf("Manager should have selected a pod for eviction")
461 }
462
463 wantPodStatus := tc.wantPodStatus.DeepCopy()
464 if enablePodDisruptionConditions {
465 wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
466 Type: "DisruptionTarget",
467 Status: "True",
468 Reason: "TerminationByKubelet",
469 Message: "The node was low on resource: pids. Threshold quantity: 1200, available: 500. ",
470 })
471 }
472
473
474 podKiller.statusFn(&podKiller.pod.Status)
475 if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
476 t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
477 }
478 })
479 }
480 }
481 }
482
483 func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) {
484 testCases := map[string]struct {
485 nodeFsStats string
486 imageFsStats string
487 containerFsStats string
488 evictionMessage string
489 kubeletSeparateDiskFeature bool
490 writeableSeparateFromReadOnly bool
491 thresholdToMonitor evictionapi.Threshold
492 podToMakes []podToMake
493 dedicatedImageFs *bool
494 expectErr string
495 }{
496 "eviction due to disk pressure; no image fs": {
497 dedicatedImageFs: ptr.To(false),
498 nodeFsStats: "1.5Gi",
499 imageFsStats: "10Gi",
500 containerFsStats: "10Gi",
501 thresholdToMonitor: evictionapi.Threshold{
502 Signal: evictionapi.SignalNodeFsAvailable,
503 Operator: evictionapi.OpLessThan,
504 Value: evictionapi.ThresholdValue{
505 Quantity: quantityMustParse("2Gi"),
506 },
507 },
508 evictionMessage: "The node was low on resource: ephemeral-storage. Threshold quantity: 2Gi, available: 1536Mi. ",
509 podToMakes: []podToMake{
510 {name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
511 {name: "above-requests", requests: newResourceList("", "", "100Mi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "700Mi"},
512 },
513 },
514 "eviction due to image disk pressure; image fs": {
515 dedicatedImageFs: ptr.To(true),
516 nodeFsStats: "1Gi",
517 imageFsStats: "10Gi",
518 containerFsStats: "10Gi",
519 evictionMessage: "The node was low on resource: ephemeral-storage. Threshold quantity: 50Gi, available: 10Gi. ",
520 thresholdToMonitor: evictionapi.Threshold{
521 Signal: evictionapi.SignalImageFsAvailable,
522 Operator: evictionapi.OpLessThan,
523 Value: evictionapi.ThresholdValue{
524 Quantity: quantityMustParse("50Gi"),
525 },
526 },
527 podToMakes: []podToMake{
528 {name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
529 {name: "above-requests", requests: newResourceList("", "", "50Gi"), limits: newResourceList("", "", "50Gi"), rootFsUsed: "80Gi"},
530 },
531 },
532 "eviction due to container disk pressure; feature off; error; container fs": {
533 dedicatedImageFs: ptr.To(true),
534 kubeletSeparateDiskFeature: false,
535 writeableSeparateFromReadOnly: true,
536 expectErr: "KubeletSeparateDiskGC is turned off but we still have a split filesystem",
537 nodeFsStats: "1Gi",
538 imageFsStats: "100Gi",
539 containerFsStats: "10Gi",
540 evictionMessage: "The node was low on resource: ephemeral-storage. Threshold quantity: 50Gi, available: 10Gi. ",
541 thresholdToMonitor: evictionapi.Threshold{
542 Signal: evictionapi.SignalContainerFsAvailable,
543 Operator: evictionapi.OpLessThan,
544 Value: evictionapi.ThresholdValue{
545 Quantity: quantityMustParse("50Gi"),
546 },
547 },
548 podToMakes: []podToMake{
549 {name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
550 {name: "above-requests", requests: newResourceList("", "", "50Gi"), limits: newResourceList("", "", "50Gi"), rootFsUsed: "80Gi"},
551 },
552 },
553 "eviction due to container disk pressure; container fs": {
554 dedicatedImageFs: ptr.To(true),
555 kubeletSeparateDiskFeature: true,
556 writeableSeparateFromReadOnly: true,
557 nodeFsStats: "10Gi",
558 imageFsStats: "100Gi",
559 containerFsStats: "10Gi",
560 evictionMessage: "The node was low on resource: ephemeral-storage. Threshold quantity: 50Gi, available: 10Gi. ",
561 thresholdToMonitor: evictionapi.Threshold{
562 Signal: evictionapi.SignalNodeFsAvailable,
563 Operator: evictionapi.OpLessThan,
564 Value: evictionapi.ThresholdValue{
565 Quantity: quantityMustParse("50Gi"),
566 },
567 },
568 podToMakes: []podToMake{
569 {name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
570 {name: "above-requests", requests: newResourceList("", "", "50Gi"), limits: newResourceList("", "", "50Gi"), rootFsUsed: "80Gi"},
571 },
572 },
573 }
574 for name, tc := range testCases {
575 for _, enablePodDisruptionConditions := range []bool{false, true} {
576 t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
577 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, tc.kubeletSeparateDiskFeature)()
578 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)()
579
580 podMaker := makePodWithDiskStats
581 summaryStatsMaker := makeDiskStats
582 podsToMake := tc.podToMakes
583 wantPodStatus := v1.PodStatus{
584 Phase: v1.PodFailed,
585 Reason: "Evicted",
586 Message: tc.evictionMessage,
587 }
588 pods := []*v1.Pod{}
589 podStats := map[*v1.Pod]statsapi.PodStats{}
590 for _, podToMake := range podsToMake {
591 pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
592 pods = append(pods, pod)
593 podStats[pod] = podStat
594 }
595 activePodsFunc := func() []*v1.Pod {
596 return pods
597 }
598
599 fakeClock := testingclock.NewFakeClock(time.Now())
600 podKiller := &mockPodKiller{}
601 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
602 diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
603 nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
604
605 config := Config{
606 PressureTransitionPeriod: time.Minute * 5,
607 Thresholds: []evictionapi.Threshold{tc.thresholdToMonitor},
608 }
609 diskStat := diskStats{
610 rootFsAvailableBytes: tc.nodeFsStats,
611 imageFsAvailableBytes: tc.imageFsStats,
612 containerFsAvailableBytes: tc.containerFsStats,
613 podStats: podStats,
614 }
615 summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStat)}
616 manager := &managerImpl{
617 clock: fakeClock,
618 killPodFunc: podKiller.killPodNow,
619 imageGC: diskGC,
620 containerGC: diskGC,
621 config: config,
622 recorder: &record.FakeRecorder{},
623 summaryProvider: summaryProvider,
624 nodeRef: nodeRef,
625 nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
626 thresholdsFirstObservedAt: thresholdsObservedAt{},
627 }
628
629
630 pods, synchErr := manager.synchronize(diskInfoProvider, activePodsFunc)
631
632 if synchErr == nil && tc.expectErr != "" {
633 t.Fatalf("Manager should report error but did not")
634 } else if tc.expectErr != "" && synchErr != nil {
635 if diff := cmp.Diff(tc.expectErr, synchErr.Error()); diff != "" {
636 t.Errorf("Unexpected error (-want,+got):\n%s", diff)
637 }
638 } else {
639
640 if !manager.IsUnderDiskPressure() {
641 t.Fatalf("Manager should report disk pressure")
642 }
643
644
645 if podKiller.pod == nil {
646 t.Fatalf("Manager should have selected a pod for eviction")
647 }
648
649 if enablePodDisruptionConditions {
650 wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
651 Type: "DisruptionTarget",
652 Status: "True",
653 Reason: "TerminationByKubelet",
654 Message: tc.evictionMessage,
655 })
656 }
657
658
659 podKiller.statusFn(&podKiller.pod.Status)
660 if diff := cmp.Diff(wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
661 t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
662 }
663 }
664 })
665 }
666 }
667 }
668
669
670 func TestMemoryPressure(t *testing.T) {
671 podMaker := makePodWithMemoryStats
672 summaryStatsMaker := makeMemoryStats
673 podsToMake := []podToMake{
674 {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
675 {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
676 {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"},
677 {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"},
678 {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"},
679 }
680 pods := []*v1.Pod{}
681 podStats := map[*v1.Pod]statsapi.PodStats{}
682 for _, podToMake := range podsToMake {
683 pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
684 pods = append(pods, pod)
685 podStats[pod] = podStat
686 }
687 podToEvict := pods[4]
688 activePodsFunc := func() []*v1.Pod {
689 return pods
690 }
691
692 fakeClock := testingclock.NewFakeClock(time.Now())
693 podKiller := &mockPodKiller{}
694 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
695 diskGC := &mockDiskGC{err: nil}
696 nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
697
698 config := Config{
699 MaxPodGracePeriodSeconds: 5,
700 PressureTransitionPeriod: time.Minute * 5,
701 Thresholds: []evictionapi.Threshold{
702 {
703 Signal: evictionapi.SignalMemoryAvailable,
704 Operator: evictionapi.OpLessThan,
705 Value: evictionapi.ThresholdValue{
706 Quantity: quantityMustParse("1Gi"),
707 },
708 },
709 {
710 Signal: evictionapi.SignalMemoryAvailable,
711 Operator: evictionapi.OpLessThan,
712 Value: evictionapi.ThresholdValue{
713 Quantity: quantityMustParse("2Gi"),
714 },
715 GracePeriod: time.Minute * 2,
716 },
717 },
718 }
719 summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
720 manager := &managerImpl{
721 clock: fakeClock,
722 killPodFunc: podKiller.killPodNow,
723 imageGC: diskGC,
724 containerGC: diskGC,
725 config: config,
726 recorder: &record.FakeRecorder{},
727 summaryProvider: summaryProvider,
728 nodeRef: nodeRef,
729 nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
730 thresholdsFirstObservedAt: thresholdsObservedAt{},
731 }
732
733
734 bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi")
735 burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi")
736
737
738 _, err := manager.synchronize(diskInfoProvider, activePodsFunc)
739
740 if err != nil {
741 t.Fatalf("Manager expects no error but got %v", err)
742 }
743
744
745 if manager.IsUnderMemoryPressure() {
746 t.Errorf("Manager should not report memory pressure")
747 }
748
749
750 expected := []bool{true, true}
751 for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
752 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
753 t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
754 }
755 }
756
757
758 fakeClock.Step(1 * time.Minute)
759 summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
760 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
761
762 if err != nil {
763 t.Fatalf("Manager expects no error but got %v", err)
764 }
765
766
767 if !manager.IsUnderMemoryPressure() {
768 t.Errorf("Manager should report memory pressure since soft threshold was met")
769 }
770
771
772 if podKiller.pod != nil {
773 t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
774 }
775
776
777 fakeClock.Step(3 * time.Minute)
778 summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
779 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
780
781 if err != nil {
782 t.Fatalf("Manager expects no error but got %v", err)
783 }
784
785
786 if !manager.IsUnderMemoryPressure() {
787 t.Errorf("Manager should report memory pressure since soft threshold was met")
788 }
789
790
791 if podKiller.pod != podToEvict {
792 t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
793 }
794 if podKiller.gracePeriodOverride == nil {
795 t.Errorf("Manager chose to kill pod but should have had a grace period override.")
796 }
797 observedGracePeriod := *podKiller.gracePeriodOverride
798 if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
799 t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
800 }
801
802 podKiller.pod = nil
803 podKiller.gracePeriodOverride = nil
804
805
806 fakeClock.Step(20 * time.Minute)
807 summaryProvider.result = summaryStatsMaker("3Gi", podStats)
808 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
809
810 if err != nil {
811 t.Fatalf("Manager expects no error but got %v", err)
812 }
813
814
815 if manager.IsUnderMemoryPressure() {
816 t.Errorf("Manager should not report memory pressure")
817 }
818
819
820 fakeClock.Step(1 * time.Minute)
821 summaryProvider.result = summaryStatsMaker("500Mi", podStats)
822 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
823
824 if err != nil {
825 t.Fatalf("Manager expects no error but got %v", err)
826 }
827
828
829 if !manager.IsUnderMemoryPressure() {
830 t.Errorf("Manager should report memory pressure")
831 }
832
833
834 if podKiller.pod != podToEvict {
835 t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
836 }
837 observedGracePeriod = *podKiller.gracePeriodOverride
838 if observedGracePeriod != int64(0) {
839 t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
840 }
841
842
843 expected = []bool{false, true}
844 for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
845 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
846 t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
847 }
848 }
849
850
851 fakeClock.Step(1 * time.Minute)
852 summaryProvider.result = summaryStatsMaker("2Gi", podStats)
853 podKiller.pod = nil
854 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
855
856 if err != nil {
857 t.Fatalf("Manager expects no error but got %v", err)
858 }
859
860
861 if !manager.IsUnderMemoryPressure() {
862 t.Errorf("Manager should report memory pressure")
863 }
864
865
866 if podKiller.pod != nil {
867 t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
868 }
869
870
871 expected = []bool{false, true}
872 for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
873 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
874 t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
875 }
876 }
877
878
879 fakeClock.Step(5 * time.Minute)
880 summaryProvider.result = summaryStatsMaker("2Gi", podStats)
881 podKiller.pod = nil
882 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
883
884 if err != nil {
885 t.Fatalf("Manager expects no error but got %v", err)
886 }
887
888
889 if manager.IsUnderMemoryPressure() {
890 t.Errorf("Manager should not report memory pressure")
891 }
892
893
894 if podKiller.pod != nil {
895 t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
896 }
897
898
899 expected = []bool{true, true}
900 for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
901 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
902 t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
903 }
904 }
905 }
906
907 func makeContainersByQOS(class v1.PodQOSClass) []v1.Container {
908 resource := newResourceList("100m", "1Gi", "")
909 switch class {
910 case v1.PodQOSGuaranteed:
911 return []v1.Container{newContainer("guaranteed-container", resource, resource)}
912 case v1.PodQOSBurstable:
913 return []v1.Container{newContainer("burtable-container", resource, nil)}
914 case v1.PodQOSBestEffort:
915 fallthrough
916 default:
917 return []v1.Container{newContainer("best-effort-container", nil, nil)}
918 }
919 }
920
921 func TestPIDPressure(t *testing.T) {
922 testCases := []struct {
923 name string
924 podsToMake []podToMake
925 evictPodIndex int
926 noPressurePIDUsage string
927 pressurePIDUsageWithGracePeriod string
928 pressurePIDUsageWithoutGracePeriod string
929 totalPID string
930 }{
931 {
932 name: "eviction due to pid pressure",
933 podsToMake: []podToMake{
934 {name: "high-priority-high-usage", priority: highPriority, pidUsage: 900},
935 {name: "default-priority-low-usage", priority: defaultPriority, pidUsage: 100},
936 {name: "default-priority-medium-usage", priority: defaultPriority, pidUsage: 400},
937 {name: "low-priority-high-usage", priority: lowPriority, pidUsage: 600},
938 {name: "low-priority-low-usage", priority: lowPriority, pidUsage: 50},
939 },
940 evictPodIndex: 3,
941 noPressurePIDUsage: "300",
942 pressurePIDUsageWithGracePeriod: "700",
943 pressurePIDUsageWithoutGracePeriod: "1200",
944 totalPID: "2000",
945 },
946 }
947
948 for _, tc := range testCases {
949 t.Run(tc.name, func(t *testing.T) {
950 podMaker := makePodWithPIDStats
951 summaryStatsMaker := makePIDStats
952 pods := []*v1.Pod{}
953 podStats := map[*v1.Pod]statsapi.PodStats{}
954 for _, podToMake := range tc.podsToMake {
955 pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.pidUsage)
956 pods = append(pods, pod)
957 podStats[pod] = podStat
958 }
959 podToEvict := pods[tc.evictPodIndex]
960 activePodsFunc := func() []*v1.Pod { return pods }
961
962 fakeClock := testingclock.NewFakeClock(time.Now())
963 podKiller := &mockPodKiller{}
964 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
965 diskGC := &mockDiskGC{err: nil}
966 nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
967
968 config := Config{
969 MaxPodGracePeriodSeconds: 5,
970 PressureTransitionPeriod: time.Minute * 5,
971 Thresholds: []evictionapi.Threshold{
972 {
973 Signal: evictionapi.SignalPIDAvailable,
974 Operator: evictionapi.OpLessThan,
975 Value: evictionapi.ThresholdValue{
976 Quantity: quantityMustParse("1200"),
977 },
978 },
979 {
980 Signal: evictionapi.SignalPIDAvailable,
981 Operator: evictionapi.OpLessThan,
982 Value: evictionapi.ThresholdValue{
983 Quantity: quantityMustParse("1500"),
984 },
985 GracePeriod: time.Minute * 2,
986 },
987 },
988 }
989
990 summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats)}
991 manager := &managerImpl{
992 clock: fakeClock,
993 killPodFunc: podKiller.killPodNow,
994 imageGC: diskGC,
995 containerGC: diskGC,
996 config: config,
997 recorder: &record.FakeRecorder{},
998 summaryProvider: summaryProvider,
999 nodeRef: nodeRef,
1000 nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
1001 thresholdsFirstObservedAt: thresholdsObservedAt{},
1002 }
1003
1004
1005 podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, 50)
1006
1007
1008 _, err := manager.synchronize(diskInfoProvider, activePodsFunc)
1009
1010 if err != nil {
1011 t.Fatalf("Manager expects no error but got %v", err)
1012 }
1013
1014
1015 if manager.IsUnderPIDPressure() {
1016 t.Fatalf("Manager should not report PID pressure")
1017 }
1018
1019
1020 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
1021 t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
1022 }
1023
1024
1025 fakeClock.Step(1 * time.Minute)
1026 summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.pressurePIDUsageWithGracePeriod, podStats)
1027 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1028
1029 if err != nil {
1030 t.Fatalf("Manager expects no error but got %v", err)
1031 }
1032
1033
1034 if !manager.IsUnderPIDPressure() {
1035 t.Errorf("Manager should report PID pressure since soft threshold was met")
1036 }
1037
1038
1039 if podKiller.pod != nil {
1040 t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
1041 }
1042
1043
1044 fakeClock.Step(3 * time.Minute)
1045
1046 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1047
1048 if err != nil {
1049 t.Fatalf("Manager expects no error but got %v", err)
1050 }
1051
1052
1053 if !manager.IsUnderPIDPressure() {
1054 t.Errorf("Manager should still report PID pressure")
1055 }
1056
1057
1058 if podKiller.pod != podToEvict {
1059 t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
1060 }
1061 if podKiller.gracePeriodOverride == nil {
1062 t.Errorf("Manager chose to kill pod but should have had a grace period override.")
1063 }
1064 observedGracePeriod := *podKiller.gracePeriodOverride
1065 if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
1066 t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
1067 }
1068
1069
1070 podKiller.pod = nil
1071 podKiller.gracePeriodOverride = nil
1072
1073
1074 fakeClock.Step(20 * time.Minute)
1075 summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats)
1076 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1077
1078 if err != nil {
1079 t.Fatalf("Manager expects no error but got %v", err)
1080 }
1081
1082
1083 if manager.IsUnderPIDPressure() {
1084 t.Errorf("Manager should not report PID pressure")
1085 }
1086
1087
1088 fakeClock.Step(1 * time.Minute)
1089 summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.pressurePIDUsageWithoutGracePeriod, podStats)
1090 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1091
1092 if err != nil {
1093 t.Fatalf("Manager expects no error but got %v", err)
1094 }
1095
1096
1097 if !manager.IsUnderPIDPressure() {
1098 t.Errorf("Manager should report PID pressure")
1099 }
1100
1101
1102 if podKiller.pod != podToEvict {
1103 t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
1104 }
1105 if podKiller.gracePeriodOverride == nil {
1106 t.Errorf("Manager chose to kill pod but should have had a grace period override.")
1107 }
1108 observedGracePeriod = *podKiller.gracePeriodOverride
1109 if observedGracePeriod != int64(0) {
1110 t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
1111 }
1112
1113
1114 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
1115 t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
1116 }
1117
1118
1119 fakeClock.Step(1 * time.Minute)
1120 summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats)
1121 podKiller.pod = nil
1122 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1123
1124 if err != nil {
1125 t.Fatalf("Manager expects no error but got %v", err)
1126 }
1127
1128
1129 if !manager.IsUnderPIDPressure() {
1130 t.Errorf("Manager should report PID pressure")
1131 }
1132
1133
1134 if podKiller.pod != nil {
1135 t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
1136 }
1137
1138
1139 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
1140 t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
1141 }
1142
1143
1144 fakeClock.Step(5 * time.Minute)
1145 summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats)
1146 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1147
1148 if err != nil {
1149 t.Fatalf("Manager expects no error but got %v", err)
1150 }
1151
1152
1153 if manager.IsUnderPIDPressure() {
1154 t.Errorf("Manager should not report PID pressure")
1155 }
1156
1157
1158 if podKiller.pod != nil {
1159 t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
1160 }
1161
1162
1163 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
1164 t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
1165 }
1166 })
1167 }
1168 }
1169
1170 func TestAdmitUnderNodeConditions(t *testing.T) {
1171 manager := &managerImpl{}
1172 pods := []*v1.Pod{
1173 newPod("guaranteed-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, makeContainersByQOS(v1.PodQOSGuaranteed), nil),
1174 newPod("burstable-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, makeContainersByQOS(v1.PodQOSBurstable), nil),
1175 newPod("best-effort-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, makeContainersByQOS(v1.PodQOSBestEffort), nil),
1176 }
1177
1178 expected := []bool{true, true, true}
1179 for i, pod := range pods {
1180 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
1181 t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
1182 }
1183 }
1184
1185 manager.nodeConditions = []v1.NodeConditionType{v1.NodeMemoryPressure}
1186 expected = []bool{true, true, false}
1187 for i, pod := range pods {
1188 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
1189 t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
1190 }
1191 }
1192
1193 manager.nodeConditions = []v1.NodeConditionType{v1.NodeMemoryPressure, v1.NodeDiskPressure}
1194 expected = []bool{false, false, false}
1195 for i, pod := range pods {
1196 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
1197 t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
1198 }
1199 }
1200 }
1201
1202
1203 func parseQuantity(value string) resource.Quantity {
1204 if len(value) == 0 {
1205 return resource.MustParse("0")
1206 }
1207 return resource.MustParse(value)
1208 }
1209
1210 func TestDiskPressureNodeFs(t *testing.T) {
1211
1212 testCases := map[string]struct {
1213 nodeFsStats string
1214 imageFsStats string
1215 containerFsStats string
1216 kubeletSeparateDiskFeature bool
1217 writeableSeparateFromReadOnly bool
1218 thresholdToMonitor []evictionapi.Threshold
1219 podToMakes []podToMake
1220 dedicatedImageFs *bool
1221 expectErr string
1222 inducePressureOnWhichFs string
1223 softDiskPressure string
1224 hardDiskPressure string
1225 }{
1226 "eviction due to disk pressure; no image fs": {
1227 dedicatedImageFs: ptr.To(false),
1228 nodeFsStats: "16Gi",
1229 imageFsStats: "16Gi",
1230 containerFsStats: "16Gi",
1231 inducePressureOnWhichFs: "nodefs",
1232 softDiskPressure: "1.5Gi",
1233 hardDiskPressure: "750Mi",
1234 thresholdToMonitor: []evictionapi.Threshold{
1235 {
1236 Signal: evictionapi.SignalNodeFsAvailable,
1237 Operator: evictionapi.OpLessThan,
1238 Value: evictionapi.ThresholdValue{
1239 Quantity: quantityMustParse("1Gi"),
1240 },
1241 },
1242 {
1243 Signal: evictionapi.SignalNodeFsAvailable,
1244 Operator: evictionapi.OpLessThan,
1245 Value: evictionapi.ThresholdValue{
1246 Quantity: quantityMustParse("2Gi"),
1247 },
1248 GracePeriod: time.Minute * 2,
1249 },
1250 },
1251 podToMakes: []podToMake{
1252 {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
1253 {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
1254 {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
1255 {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
1256 {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
1257 },
1258 },
1259 "eviction due to image disk pressure; image fs": {
1260 dedicatedImageFs: ptr.To(true),
1261 nodeFsStats: "16Gi",
1262 imageFsStats: "16Gi",
1263 containerFsStats: "16Gi",
1264 softDiskPressure: "1.5Gi",
1265 hardDiskPressure: "750Mi",
1266 inducePressureOnWhichFs: "imagefs",
1267 thresholdToMonitor: []evictionapi.Threshold{
1268 {
1269 Signal: evictionapi.SignalImageFsAvailable,
1270 Operator: evictionapi.OpLessThan,
1271 Value: evictionapi.ThresholdValue{
1272 Quantity: quantityMustParse("1Gi"),
1273 },
1274 },
1275 {
1276 Signal: evictionapi.SignalImageFsAvailable,
1277 Operator: evictionapi.OpLessThan,
1278 Value: evictionapi.ThresholdValue{
1279 Quantity: quantityMustParse("2Gi"),
1280 },
1281 GracePeriod: time.Minute * 2,
1282 },
1283 },
1284 podToMakes: []podToMake{
1285 {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
1286 {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
1287 {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
1288 {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
1289 {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
1290 },
1291 },
1292 "eviction due to container disk pressure; container fs": {
1293 dedicatedImageFs: ptr.To(true),
1294 kubeletSeparateDiskFeature: true,
1295 writeableSeparateFromReadOnly: true,
1296 nodeFsStats: "16Gi",
1297 imageFsStats: "16Gi",
1298 containerFsStats: "16Gi",
1299 softDiskPressure: "1.5Gi",
1300 hardDiskPressure: "750Mi",
1301 inducePressureOnWhichFs: "containerfs",
1302 thresholdToMonitor: []evictionapi.Threshold{
1303 {
1304 Signal: evictionapi.SignalNodeFsAvailable,
1305 Operator: evictionapi.OpLessThan,
1306 Value: evictionapi.ThresholdValue{
1307 Quantity: quantityMustParse("1Gi"),
1308 },
1309 },
1310 {
1311 Signal: evictionapi.SignalNodeFsAvailable,
1312 Operator: evictionapi.OpLessThan,
1313 Value: evictionapi.ThresholdValue{
1314 Quantity: quantityMustParse("2Gi"),
1315 },
1316 GracePeriod: time.Minute * 2,
1317 },
1318 },
1319 podToMakes: []podToMake{
1320 {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
1321 {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
1322 {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
1323 {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
1324 {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
1325 },
1326 },
1327 }
1328
1329 for name, tc := range testCases {
1330 t.Run(name, func(t *testing.T) {
1331 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, tc.kubeletSeparateDiskFeature)()
1332
1333 podMaker := makePodWithDiskStats
1334 summaryStatsMaker := makeDiskStats
1335 podsToMake := tc.podToMakes
1336 pods := []*v1.Pod{}
1337 podStats := map[*v1.Pod]statsapi.PodStats{}
1338 for _, podToMake := range podsToMake {
1339 pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
1340 pods = append(pods, pod)
1341 podStats[pod] = podStat
1342 }
1343 podToEvict := pods[0]
1344 activePodsFunc := func() []*v1.Pod {
1345 return pods
1346 }
1347
1348 fakeClock := testingclock.NewFakeClock(time.Now())
1349 podKiller := &mockPodKiller{}
1350 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
1351 diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
1352 nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
1353
1354 config := Config{
1355 MaxPodGracePeriodSeconds: 5,
1356 PressureTransitionPeriod: time.Minute * 5,
1357 Thresholds: tc.thresholdToMonitor,
1358 }
1359
1360 diskStatStart := diskStats{
1361 rootFsAvailableBytes: tc.nodeFsStats,
1362 imageFsAvailableBytes: tc.imageFsStats,
1363 containerFsAvailableBytes: tc.containerFsStats,
1364 podStats: podStats,
1365 }
1366 diskStatConst := diskStatStart
1367 summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStatStart)}
1368 manager := &managerImpl{
1369 clock: fakeClock,
1370 killPodFunc: podKiller.killPodNow,
1371 imageGC: diskGC,
1372 containerGC: diskGC,
1373 config: config,
1374 recorder: &record.FakeRecorder{},
1375 summaryProvider: summaryProvider,
1376 nodeRef: nodeRef,
1377 nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
1378 thresholdsFirstObservedAt: thresholdsObservedAt{},
1379 }
1380
1381
1382 podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi", "0Gi", "0Gi")
1383
1384
1385 _, err := manager.synchronize(diskInfoProvider, activePodsFunc)
1386
1387 if err != nil {
1388 t.Fatalf("Manager expects no error but got %v", err)
1389 }
1390
1391
1392 if manager.IsUnderDiskPressure() {
1393 t.Fatalf("Manager should not report disk pressure")
1394 }
1395
1396
1397 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
1398 t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
1399 }
1400
1401
1402 fakeClock.Step(1 * time.Minute)
1403
1404 if tc.inducePressureOnWhichFs == "nodefs" {
1405 diskStatStart.rootFsAvailableBytes = tc.softDiskPressure
1406 } else if tc.inducePressureOnWhichFs == "imagefs" {
1407 diskStatStart.imageFsAvailableBytes = tc.softDiskPressure
1408 } else if tc.inducePressureOnWhichFs == "containerfs" {
1409 diskStatStart.containerFsAvailableBytes = tc.softDiskPressure
1410 }
1411 summaryProvider.result = summaryStatsMaker(diskStatStart)
1412 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1413
1414 if err != nil {
1415 t.Fatalf("Manager expects no error but got %v", err)
1416 }
1417
1418
1419 if !manager.IsUnderDiskPressure() {
1420 t.Fatalf("Manager should report disk pressure since soft threshold was met")
1421 }
1422
1423
1424 if podKiller.pod != nil {
1425 t.Fatalf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
1426 }
1427
1428
1429 fakeClock.Step(3 * time.Minute)
1430 summaryProvider.result = summaryStatsMaker(diskStatStart)
1431 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1432
1433 if err != nil {
1434 t.Fatalf("Manager expects no error but got %v", err)
1435 }
1436
1437
1438 if !manager.IsUnderDiskPressure() {
1439 t.Fatalf("Manager should report disk pressure since soft threshold was met")
1440 }
1441
1442
1443 if podKiller.pod != podToEvict {
1444 t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
1445 }
1446 if podKiller.gracePeriodOverride == nil {
1447 t.Fatalf("Manager chose to kill pod but should have had a grace period override.")
1448 }
1449 observedGracePeriod := *podKiller.gracePeriodOverride
1450 if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
1451 t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
1452 }
1453
1454 podKiller.pod = nil
1455 podKiller.gracePeriodOverride = nil
1456
1457
1458 fakeClock.Step(20 * time.Minute)
1459 summaryProvider.result = summaryStatsMaker(diskStatConst)
1460 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1461
1462 if err != nil {
1463 t.Fatalf("Manager expects no error but got %v", err)
1464 }
1465
1466
1467 if manager.IsUnderDiskPressure() {
1468 t.Fatalf("Manager should not report disk pressure")
1469 }
1470
1471
1472 fakeClock.Step(1 * time.Minute)
1473 if tc.inducePressureOnWhichFs == "nodefs" {
1474 diskStatStart.rootFsAvailableBytes = tc.hardDiskPressure
1475 } else if tc.inducePressureOnWhichFs == "imagefs" {
1476 diskStatStart.imageFsAvailableBytes = tc.hardDiskPressure
1477 } else if tc.inducePressureOnWhichFs == "containerfs" {
1478 diskStatStart.containerFsAvailableBytes = tc.hardDiskPressure
1479 }
1480 summaryProvider.result = summaryStatsMaker(diskStatStart)
1481 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1482
1483 if err != nil {
1484 t.Fatalf("Manager expects no error but got %v", err)
1485 }
1486
1487
1488 if !manager.IsUnderDiskPressure() {
1489 t.Fatalf("Manager should report disk pressure")
1490 }
1491
1492
1493 if podKiller.pod != podToEvict {
1494 t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
1495 }
1496 observedGracePeriod = *podKiller.gracePeriodOverride
1497 if observedGracePeriod != int64(0) {
1498 t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
1499 }
1500
1501
1502 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
1503 t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
1504 }
1505
1506
1507 fakeClock.Step(1 * time.Minute)
1508
1509 summaryProvider.result = summaryStatsMaker(diskStatConst)
1510 podKiller.pod = nil
1511 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1512
1513 if err != nil {
1514 t.Fatalf("Manager should not have an error %v", err)
1515 }
1516
1517 if !manager.IsUnderDiskPressure() {
1518 t.Fatalf("Manager should report disk pressure")
1519 }
1520
1521
1522 if podKiller.pod != nil {
1523 t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
1524 }
1525
1526
1527 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
1528 t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
1529 }
1530
1531
1532 fakeClock.Step(5 * time.Minute)
1533 summaryProvider.result = summaryStatsMaker(diskStatConst)
1534 podKiller.pod = nil
1535 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1536
1537 if err != nil {
1538 t.Fatalf("Manager should not have an error %v", err)
1539 }
1540
1541
1542 if manager.IsUnderDiskPressure() {
1543 t.Fatalf("Manager should not report disk pressure")
1544 }
1545
1546
1547 if podKiller.pod != nil {
1548 t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
1549 }
1550
1551
1552 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
1553 t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
1554 }
1555 })
1556 }
1557 }
1558
1559
1560 func TestMinReclaim(t *testing.T) {
1561 podMaker := makePodWithMemoryStats
1562 summaryStatsMaker := makeMemoryStats
1563 podsToMake := []podToMake{
1564 {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
1565 {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
1566 {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"},
1567 {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"},
1568 {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"},
1569 }
1570 pods := []*v1.Pod{}
1571 podStats := map[*v1.Pod]statsapi.PodStats{}
1572 for _, podToMake := range podsToMake {
1573 pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
1574 pods = append(pods, pod)
1575 podStats[pod] = podStat
1576 }
1577 podToEvict := pods[4]
1578 activePodsFunc := func() []*v1.Pod {
1579 return pods
1580 }
1581
1582 fakeClock := testingclock.NewFakeClock(time.Now())
1583 podKiller := &mockPodKiller{}
1584 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
1585 diskGC := &mockDiskGC{err: nil}
1586 nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
1587
1588 config := Config{
1589 MaxPodGracePeriodSeconds: 5,
1590 PressureTransitionPeriod: time.Minute * 5,
1591 Thresholds: []evictionapi.Threshold{
1592 {
1593 Signal: evictionapi.SignalMemoryAvailable,
1594 Operator: evictionapi.OpLessThan,
1595 Value: evictionapi.ThresholdValue{
1596 Quantity: quantityMustParse("1Gi"),
1597 },
1598 MinReclaim: &evictionapi.ThresholdValue{
1599 Quantity: quantityMustParse("500Mi"),
1600 },
1601 },
1602 },
1603 }
1604 summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
1605 manager := &managerImpl{
1606 clock: fakeClock,
1607 killPodFunc: podKiller.killPodNow,
1608 imageGC: diskGC,
1609 containerGC: diskGC,
1610 config: config,
1611 recorder: &record.FakeRecorder{},
1612 summaryProvider: summaryProvider,
1613 nodeRef: nodeRef,
1614 nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
1615 thresholdsFirstObservedAt: thresholdsObservedAt{},
1616 }
1617
1618
1619 _, err := manager.synchronize(diskInfoProvider, activePodsFunc)
1620 if err != nil {
1621 t.Errorf("Manager should not report any errors")
1622 }
1623
1624 if manager.IsUnderMemoryPressure() {
1625 t.Errorf("Manager should not report memory pressure")
1626 }
1627
1628
1629 fakeClock.Step(1 * time.Minute)
1630 summaryProvider.result = summaryStatsMaker("500Mi", podStats)
1631 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1632
1633 if err != nil {
1634 t.Fatalf("Manager should not have an error %v", err)
1635 }
1636
1637
1638 if !manager.IsUnderMemoryPressure() {
1639 t.Errorf("Manager should report memory pressure")
1640 }
1641
1642
1643 if podKiller.pod != podToEvict {
1644 t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
1645 }
1646 observedGracePeriod := *podKiller.gracePeriodOverride
1647 if observedGracePeriod != int64(0) {
1648 t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
1649 }
1650
1651
1652 fakeClock.Step(1 * time.Minute)
1653 summaryProvider.result = summaryStatsMaker("1.2Gi", podStats)
1654 podKiller.pod = nil
1655 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1656
1657 if err != nil {
1658 t.Fatalf("Manager should not have an error %v", err)
1659 }
1660
1661
1662 if !manager.IsUnderMemoryPressure() {
1663 t.Errorf("Manager should report memory pressure")
1664 }
1665
1666
1667 if podKiller.pod != podToEvict {
1668 t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
1669 }
1670 observedGracePeriod = *podKiller.gracePeriodOverride
1671 if observedGracePeriod != int64(0) {
1672 t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
1673 }
1674
1675
1676 fakeClock.Step(1 * time.Minute)
1677 summaryProvider.result = summaryStatsMaker("2Gi", podStats)
1678 podKiller.pod = nil
1679 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1680
1681 if err != nil {
1682 t.Fatalf("Manager should not have an error %v", err)
1683 }
1684
1685
1686 if !manager.IsUnderMemoryPressure() {
1687 t.Errorf("Manager should report memory pressure")
1688 }
1689
1690
1691 if podKiller.pod != nil {
1692 t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
1693 }
1694
1695
1696 fakeClock.Step(5 * time.Minute)
1697 summaryProvider.result = summaryStatsMaker("2Gi", podStats)
1698 podKiller.pod = nil
1699 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1700
1701 if err != nil {
1702 t.Fatalf("Manager should not have an error %v", err)
1703 }
1704
1705
1706 if manager.IsUnderMemoryPressure() {
1707 t.Errorf("Manager should not report memory pressure")
1708 }
1709
1710
1711 if podKiller.pod != nil {
1712 t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
1713 }
1714 }
1715
1716 func TestNodeReclaimFuncs(t *testing.T) {
1717 testCases := map[string]struct {
1718 nodeFsStats string
1719 imageFsStats string
1720 containerFsStats string
1721 kubeletSeparateDiskFeature bool
1722 writeableSeparateFromReadOnly bool
1723 expectContainerGcCall bool
1724 expectImageGcCall bool
1725 thresholdToMonitor evictionapi.Threshold
1726 podToMakes []podToMake
1727 dedicatedImageFs *bool
1728 expectErr string
1729 inducePressureOnWhichFs string
1730 softDiskPressure string
1731 hardDiskPressure string
1732 }{
1733 "eviction due to disk pressure; no image fs": {
1734 dedicatedImageFs: ptr.To(false),
1735 nodeFsStats: "16Gi",
1736 imageFsStats: "16Gi",
1737 containerFsStats: "16Gi",
1738 inducePressureOnWhichFs: "nodefs",
1739 softDiskPressure: "1.5Gi",
1740 hardDiskPressure: "750Mi",
1741 expectContainerGcCall: true,
1742 expectImageGcCall: true,
1743 thresholdToMonitor: evictionapi.Threshold{
1744 Signal: evictionapi.SignalNodeFsAvailable,
1745 Operator: evictionapi.OpLessThan,
1746 Value: evictionapi.ThresholdValue{
1747 Quantity: quantityMustParse("1Gi"),
1748 },
1749 MinReclaim: &evictionapi.ThresholdValue{
1750 Quantity: quantityMustParse("500Mi"),
1751 },
1752 },
1753 podToMakes: []podToMake{
1754 {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
1755 {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
1756 {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
1757 {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
1758 {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
1759 },
1760 },
1761 "eviction due to image disk pressure; image fs": {
1762 dedicatedImageFs: ptr.To(true),
1763 nodeFsStats: "16Gi",
1764 imageFsStats: "16Gi",
1765 containerFsStats: "16Gi",
1766 softDiskPressure: "1.5Gi",
1767 hardDiskPressure: "750Mi",
1768 inducePressureOnWhichFs: "imagefs",
1769 expectContainerGcCall: true,
1770 expectImageGcCall: true,
1771 thresholdToMonitor: evictionapi.Threshold{
1772 Signal: evictionapi.SignalImageFsAvailable,
1773 Operator: evictionapi.OpLessThan,
1774 Value: evictionapi.ThresholdValue{
1775 Quantity: quantityMustParse("1Gi"),
1776 },
1777 MinReclaim: &evictionapi.ThresholdValue{
1778 Quantity: quantityMustParse("500Mi"),
1779 },
1780 },
1781 podToMakes: []podToMake{
1782 {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
1783 {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
1784 {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
1785 {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
1786 {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
1787 },
1788 },
1789 "eviction due to container disk pressure; container fs": {
1790 dedicatedImageFs: ptr.To(true),
1791 kubeletSeparateDiskFeature: true,
1792 writeableSeparateFromReadOnly: true,
1793 nodeFsStats: "16Gi",
1794 imageFsStats: "16Gi",
1795 containerFsStats: "16Gi",
1796 softDiskPressure: "1.5Gi",
1797 hardDiskPressure: "750Mi",
1798 inducePressureOnWhichFs: "nodefs",
1799 expectContainerGcCall: true,
1800 expectImageGcCall: false,
1801 thresholdToMonitor: evictionapi.Threshold{
1802 Signal: evictionapi.SignalNodeFsAvailable,
1803 Operator: evictionapi.OpLessThan,
1804 Value: evictionapi.ThresholdValue{
1805 Quantity: quantityMustParse("1Gi"),
1806 },
1807 MinReclaim: &evictionapi.ThresholdValue{
1808 Quantity: quantityMustParse("500Mi"),
1809 },
1810 },
1811 podToMakes: []podToMake{
1812 {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
1813 {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
1814 {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
1815 {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
1816 {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
1817 },
1818 },
1819 "eviction due to image disk pressure; container fs": {
1820 dedicatedImageFs: ptr.To(true),
1821 kubeletSeparateDiskFeature: true,
1822 writeableSeparateFromReadOnly: true,
1823 nodeFsStats: "16Gi",
1824 imageFsStats: "16Gi",
1825 containerFsStats: "16Gi",
1826 softDiskPressure: "1.5Gi",
1827 hardDiskPressure: "750Mi",
1828 inducePressureOnWhichFs: "imagefs",
1829 expectContainerGcCall: false,
1830 expectImageGcCall: true,
1831 thresholdToMonitor: evictionapi.Threshold{
1832 Signal: evictionapi.SignalImageFsAvailable,
1833 Operator: evictionapi.OpLessThan,
1834 Value: evictionapi.ThresholdValue{
1835 Quantity: quantityMustParse("1Gi"),
1836 },
1837 MinReclaim: &evictionapi.ThresholdValue{
1838 Quantity: quantityMustParse("500Mi"),
1839 },
1840 },
1841 podToMakes: []podToMake{
1842 {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
1843 {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
1844 {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
1845 {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
1846 {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
1847 },
1848 },
1849 }
1850
1851 for name, tc := range testCases {
1852 t.Run(name, func(t *testing.T) {
1853 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, tc.kubeletSeparateDiskFeature)()
1854
1855 podMaker := makePodWithDiskStats
1856 summaryStatsMaker := makeDiskStats
1857 podsToMake := tc.podToMakes
1858 pods := []*v1.Pod{}
1859 podStats := map[*v1.Pod]statsapi.PodStats{}
1860 for _, podToMake := range podsToMake {
1861 pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
1862 pods = append(pods, pod)
1863 podStats[pod] = podStat
1864 }
1865 podToEvict := pods[0]
1866 activePodsFunc := func() []*v1.Pod {
1867 return pods
1868 }
1869
1870 fakeClock := testingclock.NewFakeClock(time.Now())
1871 podKiller := &mockPodKiller{}
1872 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
1873 nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
1874
1875 config := Config{
1876 MaxPodGracePeriodSeconds: 5,
1877 PressureTransitionPeriod: time.Minute * 5,
1878 Thresholds: []evictionapi.Threshold{tc.thresholdToMonitor},
1879 }
1880 diskStatStart := diskStats{
1881 rootFsAvailableBytes: tc.nodeFsStats,
1882 imageFsAvailableBytes: tc.imageFsStats,
1883 containerFsAvailableBytes: tc.containerFsStats,
1884 podStats: podStats,
1885 }
1886
1887 diskStatConst := diskStatStart
1888 summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStatStart)}
1889 diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
1890 manager := &managerImpl{
1891 clock: fakeClock,
1892 killPodFunc: podKiller.killPodNow,
1893 imageGC: diskGC,
1894 containerGC: diskGC,
1895 config: config,
1896 recorder: &record.FakeRecorder{},
1897 summaryProvider: summaryProvider,
1898 nodeRef: nodeRef,
1899 nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
1900 thresholdsFirstObservedAt: thresholdsObservedAt{},
1901 }
1902
1903
1904 _, err := manager.synchronize(diskInfoProvider, activePodsFunc)
1905
1906 if err != nil {
1907 t.Fatalf("Manager should not have an error %v", err)
1908 }
1909
1910
1911 if manager.IsUnderDiskPressure() {
1912 t.Errorf("Manager should not report disk pressure")
1913 }
1914
1915
1916 fakeClock.Step(1 * time.Minute)
1917
1918 setDiskStatsBasedOnFs := func(whichFs string, diskPressure string, diskStat diskStats) diskStats {
1919 if tc.inducePressureOnWhichFs == "nodefs" {
1920 diskStat.rootFsAvailableBytes = diskPressure
1921 } else if tc.inducePressureOnWhichFs == "imagefs" {
1922 diskStat.imageFsAvailableBytes = diskPressure
1923 } else if tc.inducePressureOnWhichFs == "containerfs" {
1924 diskStat.containerFsAvailableBytes = diskPressure
1925 }
1926 return diskStat
1927 }
1928 newDiskAfterHardEviction := setDiskStatsBasedOnFs(tc.inducePressureOnWhichFs, tc.hardDiskPressure, diskStatStart)
1929 summaryProvider.result = summaryStatsMaker(newDiskAfterHardEviction)
1930
1931 diskGC.summaryAfterGC = summaryStatsMaker(diskStatConst)
1932 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1933
1934 if err != nil {
1935 t.Fatalf("Manager should not have an error %v", err)
1936 }
1937
1938
1939 if !manager.IsUnderDiskPressure() {
1940 t.Fatalf("Manager should report disk pressure since soft threshold was met")
1941 }
1942
1943
1944
1945
1946 if diskGC.imageGCInvoked != tc.expectImageGcCall && diskGC.containerGCInvoked != tc.expectContainerGcCall {
1947 t.Fatalf("Manager should have invoked image gc")
1948 }
1949
1950
1951 if podKiller.pod != nil {
1952 t.Fatalf("Manager should not have killed a pod, but killed: %v", podKiller.pod.Name)
1953 }
1954
1955
1956 diskGC.imageGCInvoked = false
1957 diskGC.containerGCInvoked = false
1958
1959
1960 fakeClock.Step(20 * time.Minute)
1961 summaryProvider.result = summaryStatsMaker(diskStatConst)
1962 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1963
1964 if err != nil {
1965 t.Fatalf("Manager should not have an error %v", err)
1966 }
1967
1968
1969 if manager.IsUnderDiskPressure() {
1970 t.Fatalf("Manager should not report disk pressure")
1971 }
1972
1973
1974 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1975
1976 if err != nil {
1977 t.Fatalf("Manager should not have an error %v", err)
1978 }
1979
1980
1981 if manager.IsUnderDiskPressure() {
1982 t.Fatalf("Manager should not report disk pressure")
1983 }
1984
1985
1986 fakeClock.Step(1 * time.Minute)
1987 newDiskAfterHardEviction = setDiskStatsBasedOnFs(tc.inducePressureOnWhichFs, tc.hardDiskPressure, diskStatStart)
1988 summaryProvider.result = summaryStatsMaker(newDiskAfterHardEviction)
1989
1990 gcBelowThreshold := setDiskStatsBasedOnFs(tc.inducePressureOnWhichFs, "1.1G", newDiskAfterHardEviction)
1991 diskGC.summaryAfterGC = summaryStatsMaker(gcBelowThreshold)
1992 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
1993
1994 if err != nil {
1995 t.Fatalf("Manager should not have an error %v", err)
1996 }
1997
1998
1999 if !manager.IsUnderDiskPressure() {
2000 t.Fatalf("Manager should report disk pressure since soft threshold was met")
2001 }
2002
2003
2004
2005
2006 if diskGC.imageGCInvoked != tc.expectImageGcCall && diskGC.containerGCInvoked != tc.expectContainerGcCall {
2007 t.Fatalf("Manager should have invoked image gc")
2008 }
2009
2010
2011 if podKiller.pod == nil {
2012 t.Fatalf("Manager should have killed a pod, but didn't")
2013 }
2014
2015
2016 diskGC.imageGCInvoked = false
2017 diskGC.containerGCInvoked = false
2018 podKiller.pod = nil
2019
2020
2021 fakeClock.Step(20 * time.Minute)
2022 summaryProvider.result = summaryStatsMaker(diskStatConst)
2023 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2024
2025 if err != nil {
2026 t.Fatalf("Manager should not have an error %v", err)
2027 }
2028
2029
2030 if manager.IsUnderDiskPressure() {
2031 t.Fatalf("Manager should not report disk pressure")
2032 }
2033
2034
2035 fakeClock.Step(1 * time.Minute)
2036 softDiskPressure := setDiskStatsBasedOnFs(tc.inducePressureOnWhichFs, tc.hardDiskPressure, diskStatStart)
2037 summaryProvider.result = summaryStatsMaker(softDiskPressure)
2038
2039 diskGC.summaryAfterGC = summaryStatsMaker(softDiskPressure)
2040 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2041
2042 if err != nil {
2043 t.Fatalf("Manager should not have an error %v", err)
2044 }
2045
2046
2047 if !manager.IsUnderDiskPressure() {
2048 t.Fatalf("Manager should report disk pressure")
2049 }
2050
2051
2052
2053
2054 if diskGC.imageGCInvoked != tc.expectImageGcCall && diskGC.containerGCInvoked != tc.expectContainerGcCall {
2055 t.Fatalf("Manager should have invoked image gc")
2056 }
2057
2058
2059 if podKiller.pod != podToEvict {
2060 t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
2061 }
2062 observedGracePeriod := *podKiller.gracePeriodOverride
2063 if observedGracePeriod != int64(0) {
2064 t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
2065 }
2066
2067
2068 fakeClock.Step(1 * time.Minute)
2069 summaryProvider.result = summaryStatsMaker(diskStatConst)
2070 diskGC.imageGCInvoked = false
2071 diskGC.containerGCInvoked = false
2072 podKiller.pod = nil
2073 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2074
2075 if err != nil {
2076 t.Fatalf("Manager should not have an error %v", err)
2077 }
2078
2079
2080 if !manager.IsUnderDiskPressure() {
2081 t.Fatalf("Manager should report disk pressure")
2082 }
2083
2084 if diskGC.imageGCInvoked || diskGC.containerGCInvoked {
2085 t.Errorf("Manager chose to perform image gc when it was not needed")
2086 }
2087
2088
2089 if podKiller.pod != nil {
2090 t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
2091 }
2092
2093
2094 fakeClock.Step(5 * time.Minute)
2095 summaryProvider.result = summaryStatsMaker(diskStatConst)
2096 diskGC.imageGCInvoked = false
2097 diskGC.containerGCInvoked = false
2098 podKiller.pod = nil
2099 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2100
2101 if err != nil {
2102 t.Fatalf("Manager should not have an error %v", err)
2103 }
2104
2105
2106 if manager.IsUnderDiskPressure() {
2107 t.Fatalf("Manager should not report disk pressure")
2108 }
2109
2110 if diskGC.imageGCInvoked || diskGC.containerGCInvoked {
2111 t.Errorf("Manager chose to perform image gc when it was not needed")
2112 }
2113
2114
2115 if podKiller.pod != nil {
2116 t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
2117 }
2118 })
2119 }
2120 }
2121
2122 func TestInodePressureFsInodes(t *testing.T) {
2123 podMaker := func(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootInodes, logInodes, volumeInodes string) (*v1.Pod, statsapi.PodStats) {
2124 pod := newPod(name, priority, []v1.Container{
2125 newContainer(name, requests, limits),
2126 }, nil)
2127 podStats := newPodInodeStats(pod, parseQuantity(rootInodes), parseQuantity(logInodes), parseQuantity(volumeInodes))
2128 return pod, podStats
2129 }
2130 summaryStatsMaker := func(rootFsInodesFree, rootFsInodes, imageFsInodesFree, imageFsInodes, containerFsInodesFree, containerFsInodes string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary {
2131 rootFsInodesFreeVal := resource.MustParse(rootFsInodesFree)
2132 internalRootFsInodesFree := uint64(rootFsInodesFreeVal.Value())
2133 rootFsInodesVal := resource.MustParse(rootFsInodes)
2134 internalRootFsInodes := uint64(rootFsInodesVal.Value())
2135
2136 imageFsInodesFreeVal := resource.MustParse(imageFsInodesFree)
2137 internalImageFsInodesFree := uint64(imageFsInodesFreeVal.Value())
2138 imageFsInodesVal := resource.MustParse(imageFsInodes)
2139 internalImageFsInodes := uint64(imageFsInodesVal.Value())
2140
2141 containerFsInodesFreeVal := resource.MustParse(containerFsInodesFree)
2142 internalContainerFsInodesFree := uint64(containerFsInodesFreeVal.Value())
2143 containerFsInodesVal := resource.MustParse(containerFsInodes)
2144 internalContainerFsInodes := uint64(containerFsInodesVal.Value())
2145
2146 result := &statsapi.Summary{
2147 Node: statsapi.NodeStats{
2148 Fs: &statsapi.FsStats{
2149 InodesFree: &internalRootFsInodesFree,
2150 Inodes: &internalRootFsInodes,
2151 },
2152 Runtime: &statsapi.RuntimeStats{
2153 ImageFs: &statsapi.FsStats{
2154 InodesFree: &internalImageFsInodesFree,
2155 Inodes: &internalImageFsInodes,
2156 },
2157 ContainerFs: &statsapi.FsStats{
2158 InodesFree: &internalContainerFsInodesFree,
2159 Inodes: &internalContainerFsInodes,
2160 },
2161 },
2162 },
2163 Pods: []statsapi.PodStats{},
2164 }
2165 for _, podStat := range podStats {
2166 result.Pods = append(result.Pods, podStat)
2167 }
2168 return result
2169 }
2170
2171 setINodesFreeBasedOnFs := func(whichFs string, inodesFree string, diskStat *statsapi.Summary) *statsapi.Summary {
2172 inodesFreeVal := resource.MustParse(inodesFree)
2173 internalFsInodesFree := uint64(inodesFreeVal.Value())
2174
2175 if whichFs == "nodefs" {
2176 diskStat.Node.Fs.InodesFree = &internalFsInodesFree
2177 } else if whichFs == "imagefs" {
2178 diskStat.Node.Runtime.ImageFs.InodesFree = &internalFsInodesFree
2179 } else if whichFs == "containerfs" {
2180 diskStat.Node.Runtime.ContainerFs.InodesFree = &internalFsInodesFree
2181 }
2182 return diskStat
2183 }
2184
2185 testCases := map[string]struct {
2186 nodeFsInodesFree string
2187 nodeFsInodes string
2188 imageFsInodesFree string
2189 imageFsInodes string
2190 containerFsInodesFree string
2191 containerFsInodes string
2192 kubeletSeparateDiskFeature bool
2193 writeableSeparateFromReadOnly bool
2194 thresholdToMonitor []evictionapi.Threshold
2195 podToMakes []podToMake
2196 dedicatedImageFs *bool
2197 expectErr string
2198 inducePressureOnWhichFs string
2199 softINodePressure string
2200 hardINodePressure string
2201 }{
2202 "eviction due to disk pressure; no image fs": {
2203 dedicatedImageFs: ptr.To(false),
2204 nodeFsInodesFree: "3Mi",
2205 nodeFsInodes: "4Mi",
2206 imageFsInodesFree: "3Mi",
2207 imageFsInodes: "4Mi",
2208 containerFsInodesFree: "3Mi",
2209 containerFsInodes: "4Mi",
2210 inducePressureOnWhichFs: "nodefs",
2211 softINodePressure: "1.5Mi",
2212 hardINodePressure: "0.5Mi",
2213 thresholdToMonitor: []evictionapi.Threshold{
2214 {
2215 Signal: evictionapi.SignalNodeFsInodesFree,
2216 Operator: evictionapi.OpLessThan,
2217 Value: evictionapi.ThresholdValue{
2218 Quantity: quantityMustParse("1Mi"),
2219 },
2220 },
2221 {
2222 Signal: evictionapi.SignalNodeFsInodesFree,
2223 Operator: evictionapi.OpLessThan,
2224 Value: evictionapi.ThresholdValue{
2225 Quantity: quantityMustParse("2Mi"),
2226 },
2227 GracePeriod: time.Minute * 2,
2228 },
2229 },
2230 podToMakes: []podToMake{
2231 {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsInodesUsed: "900Mi"},
2232 {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "50Mi"},
2233 {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "400Mi"},
2234 {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "400Mi"},
2235 {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "100Mi"},
2236 },
2237 },
2238 "eviction due to image disk pressure; image fs": {
2239 dedicatedImageFs: ptr.To(true),
2240 nodeFsInodesFree: "3Mi",
2241 nodeFsInodes: "4Mi",
2242 imageFsInodesFree: "3Mi",
2243 imageFsInodes: "4Mi",
2244 containerFsInodesFree: "3Mi",
2245 containerFsInodes: "4Mi",
2246 softINodePressure: "1.5Mi",
2247 hardINodePressure: "0.5Mi",
2248 inducePressureOnWhichFs: "imagefs",
2249 thresholdToMonitor: []evictionapi.Threshold{
2250 {
2251 Signal: evictionapi.SignalImageFsInodesFree,
2252 Operator: evictionapi.OpLessThan,
2253 Value: evictionapi.ThresholdValue{
2254 Quantity: quantityMustParse("1Mi"),
2255 },
2256 },
2257 {
2258 Signal: evictionapi.SignalImageFsInodesFree,
2259 Operator: evictionapi.OpLessThan,
2260 Value: evictionapi.ThresholdValue{
2261 Quantity: quantityMustParse("2Mi"),
2262 },
2263 GracePeriod: time.Minute * 2,
2264 },
2265 },
2266 podToMakes: []podToMake{
2267 {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsInodesUsed: "900Mi"},
2268 {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "50Mi"},
2269 {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "400Mi"},
2270 {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "400Mi"},
2271 {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "100Mi"},
2272 },
2273 },
2274 "eviction due to container disk pressure; container fs": {
2275 dedicatedImageFs: ptr.To(true),
2276 kubeletSeparateDiskFeature: true,
2277 writeableSeparateFromReadOnly: true,
2278 nodeFsInodesFree: "3Mi",
2279 nodeFsInodes: "4Mi",
2280 imageFsInodesFree: "3Mi",
2281 imageFsInodes: "4Mi",
2282 containerFsInodesFree: "3Mi",
2283 containerFsInodes: "4Mi",
2284 softINodePressure: "1.5Mi",
2285 hardINodePressure: "0.5Mi",
2286 inducePressureOnWhichFs: "nodefs",
2287 thresholdToMonitor: []evictionapi.Threshold{
2288 {
2289 Signal: evictionapi.SignalNodeFsInodesFree,
2290 Operator: evictionapi.OpLessThan,
2291 Value: evictionapi.ThresholdValue{
2292 Quantity: quantityMustParse("1Mi"),
2293 },
2294 },
2295 {
2296 Signal: evictionapi.SignalNodeFsInodesFree,
2297 Operator: evictionapi.OpLessThan,
2298 Value: evictionapi.ThresholdValue{
2299 Quantity: quantityMustParse("2Mi"),
2300 },
2301 GracePeriod: time.Minute * 2,
2302 },
2303 },
2304 podToMakes: []podToMake{
2305 {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsInodesUsed: "900Mi"},
2306 {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "50Mi"},
2307 {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "400Mi"},
2308 {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "400Mi"},
2309 {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "100Mi"},
2310 },
2311 },
2312 }
2313
2314 for name, tc := range testCases {
2315 t.Run(name, func(t *testing.T) {
2316 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, tc.kubeletSeparateDiskFeature)()
2317
2318 podMaker := podMaker
2319 summaryStatsMaker := summaryStatsMaker
2320 podsToMake := tc.podToMakes
2321 pods := []*v1.Pod{}
2322 podStats := map[*v1.Pod]statsapi.PodStats{}
2323 for _, podToMake := range podsToMake {
2324 pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsInodesUsed, podToMake.logsFsInodesUsed, podToMake.perLocalVolumeInodesUsed)
2325 pods = append(pods, pod)
2326 podStats[pod] = podStat
2327 }
2328 podToEvict := pods[0]
2329 activePodsFunc := func() []*v1.Pod {
2330 return pods
2331 }
2332
2333 fakeClock := testingclock.NewFakeClock(time.Now())
2334 podKiller := &mockPodKiller{}
2335 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
2336 diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
2337 nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
2338
2339 config := Config{
2340 MaxPodGracePeriodSeconds: 5,
2341 PressureTransitionPeriod: time.Minute * 5,
2342 Thresholds: tc.thresholdToMonitor,
2343 }
2344 startingStatsConst := summaryStatsMaker(tc.nodeFsInodesFree, tc.nodeFsInodes, tc.imageFsInodesFree, tc.imageFsInodes, tc.containerFsInodesFree, tc.containerFsInodes, podStats)
2345 startingStatsModified := summaryStatsMaker(tc.nodeFsInodesFree, tc.nodeFsInodes, tc.imageFsInodesFree, tc.imageFsInodes, tc.containerFsInodesFree, tc.containerFsInodes, podStats)
2346 summaryProvider := &fakeSummaryProvider{result: startingStatsModified}
2347 manager := &managerImpl{
2348 clock: fakeClock,
2349 killPodFunc: podKiller.killPodNow,
2350 imageGC: diskGC,
2351 containerGC: diskGC,
2352 config: config,
2353 recorder: &record.FakeRecorder{},
2354 summaryProvider: summaryProvider,
2355 nodeRef: nodeRef,
2356 nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
2357 thresholdsFirstObservedAt: thresholdsObservedAt{},
2358 }
2359
2360
2361 podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0", "0", "0")
2362
2363
2364 _, err := manager.synchronize(diskInfoProvider, activePodsFunc)
2365
2366 if err != nil {
2367 t.Fatalf("Manager should not have an error %v", err)
2368 }
2369
2370
2371 if manager.IsUnderDiskPressure() {
2372 t.Fatalf("Manager should not report inode pressure")
2373 }
2374
2375
2376 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
2377 t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
2378 }
2379
2380
2381 fakeClock.Step(1 * time.Minute)
2382 summaryProvider.result = setINodesFreeBasedOnFs(tc.inducePressureOnWhichFs, tc.softINodePressure, startingStatsModified)
2383 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2384
2385 if err != nil {
2386 t.Fatalf("Manager should not have an error %v", err)
2387 }
2388
2389
2390 if !manager.IsUnderDiskPressure() {
2391 t.Fatalf("Manager should report inode pressure since soft threshold was met")
2392 }
2393
2394
2395 if podKiller.pod != nil {
2396 t.Fatalf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
2397 }
2398
2399
2400 fakeClock.Step(3 * time.Minute)
2401 summaryProvider.result = setINodesFreeBasedOnFs(tc.inducePressureOnWhichFs, tc.softINodePressure, startingStatsModified)
2402 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2403
2404 if err != nil {
2405 t.Fatalf("Manager should not have an error %v", err)
2406 }
2407
2408
2409 if !manager.IsUnderDiskPressure() {
2410 t.Fatalf("Manager should report inode pressure since soft threshold was met")
2411 }
2412
2413
2414 if podKiller.pod != podToEvict {
2415 t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
2416 }
2417 if podKiller.gracePeriodOverride == nil {
2418 t.Fatalf("Manager chose to kill pod but should have had a grace period override.")
2419 }
2420 observedGracePeriod := *podKiller.gracePeriodOverride
2421 if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
2422 t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
2423 }
2424
2425 podKiller.pod = nil
2426 podKiller.gracePeriodOverride = nil
2427
2428
2429 fakeClock.Step(20 * time.Minute)
2430 summaryProvider.result = startingStatsConst
2431 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2432
2433 if err != nil {
2434 t.Fatalf("Manager should not have an error %v", err)
2435 }
2436
2437
2438 if manager.IsUnderDiskPressure() {
2439 t.Fatalf("Manager should not report inode pressure")
2440 }
2441
2442
2443 fakeClock.Step(1 * time.Minute)
2444 summaryProvider.result = setINodesFreeBasedOnFs(tc.inducePressureOnWhichFs, tc.hardINodePressure, startingStatsModified)
2445 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2446
2447 if err != nil {
2448 t.Fatalf("Manager should not have an error %v", err)
2449 }
2450
2451
2452 if !manager.IsUnderDiskPressure() {
2453 t.Fatalf("Manager should report inode pressure")
2454 }
2455
2456
2457 if podKiller.pod != podToEvict {
2458 t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
2459 }
2460 observedGracePeriod = *podKiller.gracePeriodOverride
2461 if observedGracePeriod != int64(0) {
2462 t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
2463 }
2464
2465
2466 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
2467 t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
2468 }
2469
2470
2471 fakeClock.Step(1 * time.Minute)
2472 summaryProvider.result = startingStatsConst
2473 podKiller.pod = nil
2474 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2475
2476 if err != nil {
2477 t.Fatalf("Manager should not have an error %v", err)
2478 }
2479
2480
2481 if !manager.IsUnderDiskPressure() {
2482 t.Fatalf("Manager should report inode pressure")
2483 }
2484
2485
2486 if podKiller.pod != nil {
2487 t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
2488 }
2489
2490
2491 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
2492 t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
2493 }
2494
2495
2496 fakeClock.Step(5 * time.Minute)
2497 summaryProvider.result = startingStatsConst
2498 podKiller.pod = nil
2499 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2500
2501 if err != nil {
2502 t.Fatalf("Manager should not have an error %v", err)
2503 }
2504
2505
2506 if manager.IsUnderDiskPressure() {
2507 t.Fatalf("Manager should not report inode pressure")
2508 }
2509
2510
2511 if podKiller.pod != nil {
2512 t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
2513 }
2514
2515
2516 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
2517 t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
2518 }
2519 })
2520 }
2521 }
2522
2523
2524 func TestStaticCriticalPodsAreNotEvicted(t *testing.T) {
2525 podMaker := makePodWithMemoryStats
2526 summaryStatsMaker := makeMemoryStats
2527 podsToMake := []podToMake{
2528 {name: "critical", priority: scheduling.SystemCriticalPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "800Mi"},
2529 }
2530 pods := []*v1.Pod{}
2531 podStats := map[*v1.Pod]statsapi.PodStats{}
2532 for _, podToMake := range podsToMake {
2533 pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
2534 pods = append(pods, pod)
2535 podStats[pod] = podStat
2536 }
2537
2538 pods[0].Annotations = map[string]string{
2539 kubelettypes.ConfigSourceAnnotationKey: kubelettypes.FileSource,
2540 }
2541
2542 podPriority := scheduling.SystemCriticalPriority
2543 pods[0].Spec.Priority = &podPriority
2544 pods[0].Namespace = kubeapi.NamespaceSystem
2545
2546 podToEvict := pods[0]
2547 activePodsFunc := func() []*v1.Pod {
2548 return pods
2549 }
2550
2551 fakeClock := testingclock.NewFakeClock(time.Now())
2552 podKiller := &mockPodKiller{}
2553 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
2554 diskGC := &mockDiskGC{err: nil}
2555 nodeRef := &v1.ObjectReference{
2556 Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
2557 }
2558
2559 config := Config{
2560 MaxPodGracePeriodSeconds: 5,
2561 PressureTransitionPeriod: time.Minute * 5,
2562 Thresholds: []evictionapi.Threshold{
2563 {
2564 Signal: evictionapi.SignalMemoryAvailable,
2565 Operator: evictionapi.OpLessThan,
2566 Value: evictionapi.ThresholdValue{
2567 Quantity: quantityMustParse("1Gi"),
2568 },
2569 },
2570 {
2571 Signal: evictionapi.SignalMemoryAvailable,
2572 Operator: evictionapi.OpLessThan,
2573 Value: evictionapi.ThresholdValue{
2574 Quantity: quantityMustParse("2Gi"),
2575 },
2576 GracePeriod: time.Minute * 2,
2577 },
2578 },
2579 }
2580 summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
2581 manager := &managerImpl{
2582 clock: fakeClock,
2583 killPodFunc: podKiller.killPodNow,
2584 imageGC: diskGC,
2585 containerGC: diskGC,
2586 config: config,
2587 recorder: &record.FakeRecorder{},
2588 summaryProvider: summaryProvider,
2589 nodeRef: nodeRef,
2590 nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
2591 thresholdsFirstObservedAt: thresholdsObservedAt{},
2592 }
2593
2594 fakeClock.Step(1 * time.Minute)
2595 summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
2596 _, err := manager.synchronize(diskInfoProvider, activePodsFunc)
2597
2598 if err != nil {
2599 t.Fatalf("Manager should not have an error %v", err)
2600 }
2601
2602
2603 if !manager.IsUnderMemoryPressure() {
2604 t.Errorf("Manager should report memory pressure since soft threshold was met")
2605 }
2606
2607
2608 if podKiller.pod != nil {
2609 t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
2610 }
2611
2612
2613 fakeClock.Step(3 * time.Minute)
2614 summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
2615 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2616
2617 if err != nil {
2618 t.Fatalf("Manager should not have an error %v", err)
2619 }
2620
2621
2622 if !manager.IsUnderMemoryPressure() {
2623 t.Errorf("Manager should report memory pressure since soft threshold was met")
2624 }
2625
2626
2627 if podKiller.pod == podToEvict {
2628 t.Errorf("Manager chose to kill critical pod: %v, but should have ignored it", podKiller.pod.Name)
2629 }
2630
2631 podKiller.pod = nil
2632 podKiller.gracePeriodOverride = nil
2633
2634
2635 fakeClock.Step(20 * time.Minute)
2636 summaryProvider.result = summaryStatsMaker("3Gi", podStats)
2637 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2638
2639 if err != nil {
2640 t.Fatalf("Manager should not have an error %v", err)
2641 }
2642
2643
2644 if manager.IsUnderMemoryPressure() {
2645 t.Errorf("Manager should not report memory pressure")
2646 }
2647
2648 pods[0].Annotations = map[string]string{
2649 kubelettypes.ConfigSourceAnnotationKey: kubelettypes.FileSource,
2650 }
2651 pods[0].Spec.Priority = nil
2652 pods[0].Namespace = kubeapi.NamespaceSystem
2653
2654
2655 fakeClock.Step(1 * time.Minute)
2656 summaryProvider.result = summaryStatsMaker("500Mi", podStats)
2657 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2658
2659 if err != nil {
2660 t.Fatalf("Manager should not have an error %v", err)
2661 }
2662
2663
2664 if !manager.IsUnderMemoryPressure() {
2665 t.Errorf("Manager should report memory pressure")
2666 }
2667 }
2668
2669
2670 func TestAllocatableMemoryPressure(t *testing.T) {
2671 podMaker := makePodWithMemoryStats
2672 summaryStatsMaker := makeMemoryStats
2673 podsToMake := []podToMake{
2674 {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
2675 {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
2676 {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"},
2677 {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"},
2678 {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"},
2679 }
2680 pods := []*v1.Pod{}
2681 podStats := map[*v1.Pod]statsapi.PodStats{}
2682 for _, podToMake := range podsToMake {
2683 pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
2684 pods = append(pods, pod)
2685 podStats[pod] = podStat
2686 }
2687 podToEvict := pods[4]
2688 activePodsFunc := func() []*v1.Pod {
2689 return pods
2690 }
2691
2692 fakeClock := testingclock.NewFakeClock(time.Now())
2693 podKiller := &mockPodKiller{}
2694 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
2695 diskGC := &mockDiskGC{err: nil}
2696 nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
2697
2698 config := Config{
2699 MaxPodGracePeriodSeconds: 5,
2700 PressureTransitionPeriod: time.Minute * 5,
2701 Thresholds: []evictionapi.Threshold{
2702 {
2703 Signal: evictionapi.SignalAllocatableMemoryAvailable,
2704 Operator: evictionapi.OpLessThan,
2705 Value: evictionapi.ThresholdValue{
2706 Quantity: quantityMustParse("1Gi"),
2707 },
2708 },
2709 },
2710 }
2711 summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("4Gi", podStats)}
2712 manager := &managerImpl{
2713 clock: fakeClock,
2714 killPodFunc: podKiller.killPodNow,
2715 imageGC: diskGC,
2716 containerGC: diskGC,
2717 config: config,
2718 recorder: &record.FakeRecorder{},
2719 summaryProvider: summaryProvider,
2720 nodeRef: nodeRef,
2721 nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
2722 thresholdsFirstObservedAt: thresholdsObservedAt{},
2723 }
2724
2725
2726 bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi")
2727 burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi")
2728
2729
2730 _, err := manager.synchronize(diskInfoProvider, activePodsFunc)
2731
2732 if err != nil {
2733 t.Fatalf("Manager should not have an error %v", err)
2734 }
2735
2736
2737 if manager.IsUnderMemoryPressure() {
2738 t.Errorf("Manager should not report memory pressure")
2739 }
2740
2741
2742 expected := []bool{true, true}
2743 for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
2744 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
2745 t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
2746 }
2747 }
2748
2749
2750 fakeClock.Step(1 * time.Minute)
2751 pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newResourceList("100m", "1Gi", ""), newResourceList("100m", "1Gi", ""), "1Gi")
2752 podStats[pod] = podStat
2753 summaryProvider.result = summaryStatsMaker("500Mi", podStats)
2754 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2755
2756 if err != nil {
2757 t.Fatalf("Manager should not have an error %v", err)
2758 }
2759
2760
2761 if !manager.IsUnderMemoryPressure() {
2762 t.Errorf("Manager should report memory pressure")
2763 }
2764
2765
2766 if podKiller.pod != podToEvict {
2767 t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
2768 }
2769 observedGracePeriod := *podKiller.gracePeriodOverride
2770 if observedGracePeriod != int64(0) {
2771 t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
2772 }
2773
2774 podKiller.pod = nil
2775 podKiller.gracePeriodOverride = nil
2776
2777
2778 expected = []bool{false, true}
2779 for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
2780 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
2781 t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
2782 }
2783 }
2784
2785
2786 fakeClock.Step(1 * time.Minute)
2787 for pod := range podStats {
2788 if pod.Name == "guaranteed-high-2" {
2789 delete(podStats, pod)
2790 }
2791 }
2792 summaryProvider.result = summaryStatsMaker("2Gi", podStats)
2793 podKiller.pod = nil
2794 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2795
2796 if err != nil {
2797 t.Fatalf("Manager should not have an error %v", err)
2798 }
2799
2800
2801 if !manager.IsUnderMemoryPressure() {
2802 t.Errorf("Manager should report memory pressure")
2803 }
2804
2805
2806 if podKiller.pod != nil {
2807 t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
2808 }
2809
2810
2811 expected = []bool{false, true}
2812 for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
2813 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
2814 t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
2815 }
2816 }
2817
2818
2819 fakeClock.Step(5 * time.Minute)
2820 summaryProvider.result = summaryStatsMaker("2Gi", podStats)
2821 podKiller.pod = nil
2822 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2823
2824 if err != nil {
2825 t.Fatalf("Manager should not have an error %v", err)
2826 }
2827
2828
2829 if manager.IsUnderMemoryPressure() {
2830 t.Errorf("Manager should not report memory pressure")
2831 }
2832
2833
2834 if podKiller.pod != nil {
2835 t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
2836 }
2837
2838
2839 expected = []bool{true, true}
2840 for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
2841 if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
2842 t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
2843 }
2844 }
2845 }
2846
2847 func TestUpdateMemcgThreshold(t *testing.T) {
2848 activePodsFunc := func() []*v1.Pod {
2849 return []*v1.Pod{}
2850 }
2851
2852 fakeClock := testingclock.NewFakeClock(time.Now())
2853 podKiller := &mockPodKiller{}
2854 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
2855 diskGC := &mockDiskGC{err: nil}
2856 nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
2857
2858 config := Config{
2859 MaxPodGracePeriodSeconds: 5,
2860 PressureTransitionPeriod: time.Minute * 5,
2861 Thresholds: []evictionapi.Threshold{
2862 {
2863 Signal: evictionapi.SignalMemoryAvailable,
2864 Operator: evictionapi.OpLessThan,
2865 Value: evictionapi.ThresholdValue{
2866 Quantity: quantityMustParse("1Gi"),
2867 },
2868 },
2869 },
2870 PodCgroupRoot: "kubepods",
2871 }
2872 summaryProvider := &fakeSummaryProvider{result: makeMemoryStats("2Gi", map[*v1.Pod]statsapi.PodStats{})}
2873
2874 mockCtrl := gomock.NewController(t)
2875 defer mockCtrl.Finish()
2876
2877 thresholdNotifier := NewMockThresholdNotifier(mockCtrl)
2878 thresholdNotifier.EXPECT().UpdateThreshold(summaryProvider.result).Return(nil).Times(2)
2879
2880 manager := &managerImpl{
2881 clock: fakeClock,
2882 killPodFunc: podKiller.killPodNow,
2883 imageGC: diskGC,
2884 containerGC: diskGC,
2885 config: config,
2886 recorder: &record.FakeRecorder{},
2887 summaryProvider: summaryProvider,
2888 nodeRef: nodeRef,
2889 nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
2890 thresholdsFirstObservedAt: thresholdsObservedAt{},
2891 thresholdNotifiers: []ThresholdNotifier{thresholdNotifier},
2892 }
2893
2894
2895 _, err := manager.synchronize(diskInfoProvider, activePodsFunc)
2896
2897 if err != nil {
2898 t.Fatalf("Manager should not have an error %v", err)
2899 }
2900
2901
2902 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2903
2904 if err != nil {
2905 t.Fatalf("Manager should not have an error %v", err)
2906 }
2907
2908
2909 fakeClock.Step(2 * notifierRefreshInterval)
2910 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2911
2912 if err != nil {
2913 t.Fatalf("Manager should not have an error %v", err)
2914 }
2915
2916
2917 thresholdNotifier = NewMockThresholdNotifier(mockCtrl)
2918 thresholdNotifier.EXPECT().UpdateThreshold(summaryProvider.result).Return(fmt.Errorf("error updating threshold")).Times(1)
2919 thresholdNotifier.EXPECT().Description().Return("mock thresholdNotifier").Times(1)
2920 manager.thresholdNotifiers = []ThresholdNotifier{thresholdNotifier}
2921
2922
2923
2924 fakeClock.Step(2 * notifierRefreshInterval)
2925 _, err = manager.synchronize(diskInfoProvider, activePodsFunc)
2926
2927 if err != nil {
2928 t.Fatalf("Manager should not have an error %v", err)
2929 }
2930 }
2931
2932 func TestManagerWithLocalStorageCapacityIsolationOpen(t *testing.T) {
2933 podMaker := makePodWithLocalStorageCapacityIsolationOpen
2934 summaryStatsMaker := makeDiskStats
2935 podsToMake := []podToMake{
2936 {name: "empty-dir", requests: newResourceList("", "900Mi", ""), limits: newResourceList("", "1Gi", "")},
2937 {name: "container-ephemeral-storage-limit", requests: newResourceList("", "", "900Mi"), limits: newResourceList("", "", "800Mi")},
2938 {name: "pod-ephemeral-storage-limit", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "800Mi")},
2939 }
2940
2941 pods := []*v1.Pod{}
2942 podStats := map[*v1.Pod]statsapi.PodStats{}
2943 for _, podToMake := range podsToMake {
2944 pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
2945 pods = append(pods, pod)
2946 podStats[pod] = podStat
2947 }
2948
2949 diskStat := diskStats{
2950 rootFsAvailableBytes: "1Gi",
2951 imageFsAvailableBytes: "200Mi",
2952 podStats: podStats,
2953 }
2954 summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStat)}
2955
2956 config := Config{
2957 MaxPodGracePeriodSeconds: 5,
2958 PressureTransitionPeriod: time.Minute * 5,
2959 Thresholds: []evictionapi.Threshold{
2960 {
2961 Signal: evictionapi.SignalAllocatableMemoryAvailable,
2962 Operator: evictionapi.OpLessThan,
2963 Value: evictionapi.ThresholdValue{
2964 Quantity: quantityMustParse("1Gi"),
2965 },
2966 },
2967 },
2968 }
2969
2970 podKiller := &mockPodKiller{}
2971 diskGC := &mockDiskGC{err: nil}
2972 nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
2973 fakeClock := testingclock.NewFakeClock(time.Now())
2974 diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
2975
2976 mgr := &managerImpl{
2977 clock: fakeClock,
2978 killPodFunc: podKiller.killPodNow,
2979 imageGC: diskGC,
2980 containerGC: diskGC,
2981 config: config,
2982 recorder: &record.FakeRecorder{},
2983 summaryProvider: summaryProvider,
2984 nodeRef: nodeRef,
2985 localStorageCapacityIsolation: true,
2986 dedicatedImageFs: diskInfoProvider.dedicatedImageFs,
2987 }
2988
2989 activePodsFunc := func() []*v1.Pod {
2990 return pods
2991 }
2992
2993 evictedPods, err := mgr.synchronize(diskInfoProvider, activePodsFunc)
2994
2995 if err != nil {
2996 t.Fatalf("Manager should not have error but got %v", err)
2997 }
2998 if podKiller.pod == nil {
2999 t.Fatalf("Manager should have selected a pod for eviction")
3000 }
3001
3002 if diff := cmp.Diff(pods, evictedPods); diff != "" {
3003 t.Fatalf("Unexpected evicted pod (-want,+got):\n%s", diff)
3004 }
3005 }
3006
View as plain text