1
16
17 package podgc
18
19 import (
20 "context"
21 "encoding/json"
22 "testing"
23 "time"
24
25 "github.com/google/go-cmp/cmp"
26 "github.com/google/go-cmp/cmp/cmpopts"
27
28 v1 "k8s.io/api/core/v1"
29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30 "k8s.io/apimachinery/pkg/labels"
31 "k8s.io/apimachinery/pkg/util/sets"
32 "k8s.io/apimachinery/pkg/util/strategicpatch"
33 "k8s.io/apimachinery/pkg/util/wait"
34 utilfeature "k8s.io/apiserver/pkg/util/feature"
35 "k8s.io/client-go/informers"
36 coreinformers "k8s.io/client-go/informers/core/v1"
37 clientset "k8s.io/client-go/kubernetes"
38 "k8s.io/client-go/kubernetes/fake"
39 clienttesting "k8s.io/client-go/testing"
40 "k8s.io/client-go/util/workqueue"
41 featuregatetesting "k8s.io/component-base/featuregate/testing"
42 metricstestutil "k8s.io/component-base/metrics/testutil"
43 "k8s.io/klog/v2/ktesting"
44 "k8s.io/kubernetes/pkg/controller"
45 "k8s.io/kubernetes/pkg/controller/podgc/metrics"
46 "k8s.io/kubernetes/pkg/controller/testutil"
47 "k8s.io/kubernetes/pkg/features"
48 "k8s.io/kubernetes/pkg/kubelet/eviction"
49 testingclock "k8s.io/utils/clock/testing"
50 "k8s.io/utils/pointer"
51 )
52
53 func alwaysReady() bool { return true }
54
55 func NewFromClient(ctx context.Context, kubeClient clientset.Interface, terminatedPodThreshold int) (*PodGCController, coreinformers.PodInformer, coreinformers.NodeInformer) {
56 informerFactory := informers.NewSharedInformerFactory(kubeClient, controller.NoResyncPeriodFunc())
57 podInformer := informerFactory.Core().V1().Pods()
58 nodeInformer := informerFactory.Core().V1().Nodes()
59 controller := NewPodGC(ctx, kubeClient, podInformer, nodeInformer, terminatedPodThreshold)
60 controller.podListerSynced = alwaysReady
61 return controller, podInformer, nodeInformer
62 }
63
64 func TestGCTerminated(t *testing.T) {
65 type nameToPhase struct {
66 name string
67 phase v1.PodPhase
68 reason string
69 }
70
71 testCases := []struct {
72 name string
73 pods []nameToPhase
74 threshold int
75 deletedPodNames sets.String
76 patchedPodNames sets.String
77 enablePodDisruptionConditions bool
78 }{
79 {
80 name: "delete pod a which is PodFailed and pod b which is PodSucceeded; PodDisruptionConditions enabled",
81 pods: []nameToPhase{
82 {name: "a", phase: v1.PodFailed},
83 {name: "b", phase: v1.PodSucceeded},
84 {name: "c", phase: v1.PodFailed},
85 },
86 threshold: 1,
87 patchedPodNames: sets.NewString(),
88 deletedPodNames: sets.NewString("a", "b"),
89 enablePodDisruptionConditions: true,
90 },
91 {
92 name: "threshold = 0, disables terminated pod deletion",
93 pods: []nameToPhase{
94 {name: "a", phase: v1.PodFailed},
95 {name: "b", phase: v1.PodSucceeded},
96 },
97 threshold: 0,
98
99 deletedPodNames: sets.NewString(),
100 },
101 {
102 name: "threshold = 1, delete pod a which is PodFailed and pod b which is PodSucceeded",
103 pods: []nameToPhase{
104 {name: "a", phase: v1.PodFailed},
105 {name: "b", phase: v1.PodSucceeded},
106 {name: "c", phase: v1.PodFailed},
107 },
108 threshold: 1,
109 deletedPodNames: sets.NewString("a", "b"),
110 },
111 {
112 name: "threshold = 1, delete pod b which is PodSucceeded",
113 pods: []nameToPhase{
114 {name: "a", phase: v1.PodRunning},
115 {name: "b", phase: v1.PodSucceeded},
116 {name: "c", phase: v1.PodFailed},
117 },
118 threshold: 1,
119 deletedPodNames: sets.NewString("b"),
120 },
121 {
122 name: "threshold = 1, delete pod a which is PodFailed",
123 pods: []nameToPhase{
124 {name: "a", phase: v1.PodFailed},
125 {name: "b", phase: v1.PodSucceeded},
126 },
127 threshold: 1,
128 deletedPodNames: sets.NewString("a"),
129 },
130 {
131 name: "threshold = 5, don't delete pod",
132 pods: []nameToPhase{
133 {name: "a", phase: v1.PodFailed},
134 {name: "b", phase: v1.PodSucceeded},
135 },
136 threshold: 5,
137 deletedPodNames: sets.NewString(),
138 },
139 {
140 pods: []nameToPhase{
141 {name: "a", phase: v1.PodFailed},
142 {name: "b", phase: v1.PodSucceeded},
143 {name: "c", phase: v1.PodFailed, reason: eviction.Reason},
144 },
145 threshold: 1,
146 deletedPodNames: sets.NewString("c", "a"),
147 },
148 {
149 pods: []nameToPhase{
150 {name: "a", phase: v1.PodRunning},
151 {name: "b", phase: v1.PodSucceeded},
152 {name: "c", phase: v1.PodFailed, reason: eviction.Reason},
153 },
154 threshold: 1,
155 deletedPodNames: sets.NewString("c"),
156 },
157 }
158 for _, test := range testCases {
159 t.Run(test.name, func(t *testing.T) {
160 _, ctx := ktesting.NewTestContext(t)
161 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
162 creationTime := time.Unix(0, 0)
163 nodes := []*v1.Node{testutil.NewNode("node")}
164
165 pods := make([]*v1.Pod, 0, len(test.pods))
166 for _, pod := range test.pods {
167 creationTime = creationTime.Add(1 * time.Hour)
168 pods = append(pods, &v1.Pod{
169 ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime}},
170 Status: v1.PodStatus{Phase: pod.phase, Reason: pod.reason},
171 Spec: v1.PodSpec{NodeName: "node"},
172 })
173 }
174 client := setupNewSimpleClient(nodes, pods)
175 gcc, podInformer, _ := NewFromClient(ctx, client, test.threshold)
176 for _, pod := range pods {
177 podInformer.Informer().GetStore().Add(pod)
178 }
179
180 gcc.gc(ctx)
181
182 verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
183 })
184 }
185
186
187 testDeletingPodsMetrics(t, 9, metrics.PodGCReasonTerminated)
188 }
189
190 func makePod(name string, nodeName string, phase v1.PodPhase) *v1.Pod {
191 return &v1.Pod{
192 ObjectMeta: metav1.ObjectMeta{
193 Name: name,
194 Namespace: metav1.NamespaceDefault,
195 },
196 Spec: v1.PodSpec{NodeName: nodeName},
197 Status: v1.PodStatus{Phase: phase},
198 }
199 }
200
201 func waitForAdded(q workqueue.DelayingInterface, depth int) error {
202 return wait.Poll(1*time.Millisecond, 10*time.Second, func() (done bool, err error) {
203 if q.Len() == depth {
204 return true, nil
205 }
206
207 return false, nil
208 })
209 }
210
211 func TestGCOrphaned(t *testing.T) {
212 testCases := []struct {
213 name string
214 initialClientNodes []*v1.Node
215 initialInformerNodes []*v1.Node
216 delay time.Duration
217 addedClientNodes []*v1.Node
218 deletedClientNodes []*v1.Node
219 addedInformerNodes []*v1.Node
220 deletedInformerNodes []*v1.Node
221 pods []*v1.Pod
222 itemsInQueue int
223 deletedPodNames sets.String
224 patchedPodNames sets.String
225 enablePodDisruptionConditions bool
226 }{
227 {
228 name: "nodes present in lister",
229 initialInformerNodes: []*v1.Node{
230 testutil.NewNode("existing1"),
231 testutil.NewNode("existing2"),
232 },
233 delay: 2 * quarantineTime,
234 pods: []*v1.Pod{
235 makePod("a", "existing1", v1.PodRunning),
236 makePod("b", "existing2", v1.PodFailed),
237 makePod("c", "existing2", v1.PodSucceeded),
238 },
239 itemsInQueue: 0,
240 deletedPodNames: sets.NewString(),
241 },
242 {
243 name: "nodes present in client",
244 initialClientNodes: []*v1.Node{
245 testutil.NewNode("existing1"),
246 testutil.NewNode("existing2"),
247 },
248 delay: 2 * quarantineTime,
249 pods: []*v1.Pod{
250 makePod("a", "existing1", v1.PodRunning),
251 makePod("b", "existing2", v1.PodFailed),
252 makePod("c", "existing2", v1.PodSucceeded),
253 },
254 itemsInQueue: 2,
255 deletedPodNames: sets.NewString(),
256 },
257 {
258 name: "no nodes",
259 delay: 2 * quarantineTime,
260 pods: []*v1.Pod{
261 makePod("a", "deleted", v1.PodFailed),
262 makePod("b", "deleted", v1.PodSucceeded),
263 },
264 itemsInQueue: 1,
265 deletedPodNames: sets.NewString("a", "b"),
266 },
267 {
268 name: "no nodes with PodDisruptionConditions enabled",
269 delay: 2 * quarantineTime,
270 pods: []*v1.Pod{
271 makePod("a", "deleted", v1.PodFailed),
272 makePod("b", "deleted", v1.PodSucceeded),
273 makePod("c", "deleted", v1.PodRunning),
274 },
275 itemsInQueue: 1,
276 deletedPodNames: sets.NewString("a", "b", "c"),
277 patchedPodNames: sets.NewString("c"),
278 enablePodDisruptionConditions: true,
279 },
280 {
281 name: "quarantine not finished",
282 delay: quarantineTime / 2,
283 pods: []*v1.Pod{
284 makePod("a", "deleted", v1.PodFailed),
285 },
286 itemsInQueue: 0,
287 deletedPodNames: sets.NewString(),
288 },
289 {
290 name: "wrong nodes",
291 initialInformerNodes: []*v1.Node{testutil.NewNode("existing")},
292 delay: 2 * quarantineTime,
293 pods: []*v1.Pod{
294 makePod("a", "deleted", v1.PodRunning),
295 },
296 itemsInQueue: 1,
297 deletedPodNames: sets.NewString("a"),
298 patchedPodNames: sets.NewString("a"),
299 },
300 {
301 name: "some nodes missing",
302 initialInformerNodes: []*v1.Node{testutil.NewNode("existing")},
303 delay: 2 * quarantineTime,
304 pods: []*v1.Pod{
305 makePod("a", "deleted", v1.PodFailed),
306 makePod("b", "existing", v1.PodFailed),
307 makePod("c", "deleted", v1.PodSucceeded),
308 makePod("d", "deleted", v1.PodRunning),
309 },
310 itemsInQueue: 1,
311 deletedPodNames: sets.NewString("a", "c", "d"),
312 patchedPodNames: sets.NewString("d"),
313 },
314 {
315 name: "node added to client after quarantine",
316 delay: 2 * quarantineTime,
317 addedClientNodes: []*v1.Node{testutil.NewNode("node")},
318 pods: []*v1.Pod{
319 makePod("a", "node", v1.PodRunning),
320 },
321 itemsInQueue: 1,
322 deletedPodNames: sets.NewString(),
323 },
324 {
325 name: "node added to informer after quarantine",
326 delay: 2 * quarantineTime,
327 addedInformerNodes: []*v1.Node{testutil.NewNode("node")},
328 pods: []*v1.Pod{
329 makePod("a", "node", v1.PodFailed),
330 },
331 itemsInQueue: 1,
332 deletedPodNames: sets.NewString(),
333 },
334 {
335
336
337 name: "node deleted from client after quarantine",
338 initialClientNodes: []*v1.Node{testutil.NewNode("node")},
339 delay: 2 * quarantineTime,
340 deletedClientNodes: []*v1.Node{testutil.NewNode("node")},
341 pods: []*v1.Pod{
342 makePod("a", "node", v1.PodFailed),
343 },
344 itemsInQueue: 1,
345 deletedPodNames: sets.NewString("a"),
346 },
347 {
348 name: "node deleted from informer after quarantine",
349 initialInformerNodes: []*v1.Node{testutil.NewNode("node")},
350 delay: 2 * quarantineTime,
351 deletedInformerNodes: []*v1.Node{testutil.NewNode("node")},
352 pods: []*v1.Pod{
353 makePod("a", "node", v1.PodSucceeded),
354 },
355 itemsInQueue: 0,
356 deletedPodNames: sets.NewString(),
357 },
358 }
359
360 for _, test := range testCases {
361 t.Run(test.name, func(t *testing.T) {
362 _, ctx := ktesting.NewTestContext(t)
363 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
364 nodes := make([]*v1.Node, 0, len(test.initialClientNodes))
365 for _, node := range test.initialClientNodes {
366 nodes = append(nodes, node)
367 }
368 pods := make([]*v1.Pod, 0, len(test.pods))
369 for _, pod := range test.pods {
370 pods = append(pods, pod)
371 }
372 client := setupNewSimpleClient(nodes, pods)
373 gcc, podInformer, nodeInformer := NewFromClient(ctx, client, -1)
374 for _, node := range test.initialInformerNodes {
375 nodeInformer.Informer().GetStore().Add(node)
376 }
377 for _, pod := range test.pods {
378 podInformer.Informer().GetStore().Add(pod)
379 }
380
381 fakeClock := testingclock.NewFakeClock(time.Now())
382 gcc.nodeQueue.ShutDown()
383 gcc.nodeQueue = workqueue.NewDelayingQueueWithCustomClock(fakeClock, "podgc_test_queue")
384
385
386 gcc.gc(ctx)
387 deletedPodNames := getDeletedPodNames(client)
388
389 if len(deletedPodNames) > 0 {
390 t.Errorf("no pods should be deleted at this point.\n\tactual: %v", deletedPodNames)
391 }
392
393
394 fakeClock.Step(test.delay)
395
396 if test.itemsInQueue > 0 {
397 err := waitForAdded(gcc.nodeQueue, test.itemsInQueue)
398 if err != nil {
399 t.Errorf("wrong number of items in the node queue.\n\texpected: %v\n\tactual: %v",
400 test.itemsInQueue, gcc.nodeQueue.Len())
401 }
402 }
403
404
405 for _, node := range test.addedClientNodes {
406 client.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{})
407 }
408 for _, node := range test.deletedClientNodes {
409 client.CoreV1().Nodes().Delete(context.TODO(), node.Name, metav1.DeleteOptions{})
410 }
411 for _, node := range test.addedInformerNodes {
412 nodeInformer.Informer().GetStore().Add(node)
413 }
414 for _, node := range test.deletedInformerNodes {
415 nodeInformer.Informer().GetStore().Delete(node)
416 }
417
418
419 gcc.gc(context.TODO())
420 verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
421 })
422 }
423
424
425 testDeletingPodsMetrics(t, 10, metrics.PodGCReasonOrphaned)
426 }
427
428 func TestGCUnscheduledTerminating(t *testing.T) {
429 type nameToPhase struct {
430 name string
431 phase v1.PodPhase
432 deletionTimeStamp *metav1.Time
433 nodeName string
434 }
435
436 testCases := []struct {
437 name string
438 pods []nameToPhase
439 deletedPodNames sets.String
440 patchedPodNames sets.String
441 enablePodDisruptionConditions bool
442 }{
443 {
444 name: "Unscheduled pod in any phase must be deleted, the phase of the running pod is changed to Failed; PodDisruptionConditions enabled",
445 pods: []nameToPhase{
446 {name: "a", phase: v1.PodFailed, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
447 {name: "b", phase: v1.PodSucceeded, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
448 {name: "c", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
449 },
450 deletedPodNames: sets.NewString("a", "b", "c"),
451 patchedPodNames: sets.NewString("c"),
452 enablePodDisruptionConditions: true,
453 },
454 {
455 name: "Unscheduled pod in any phase must be deleted",
456 pods: []nameToPhase{
457 {name: "a", phase: v1.PodFailed, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
458 {name: "b", phase: v1.PodSucceeded, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
459 {name: "c", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
460 },
461 deletedPodNames: sets.NewString("a", "b", "c"),
462 patchedPodNames: sets.NewString("c"),
463 },
464 {
465 name: "Scheduled pod in any phase must not be deleted",
466 pods: []nameToPhase{
467 {name: "a", phase: v1.PodFailed, deletionTimeStamp: nil, nodeName: ""},
468 {name: "b", phase: v1.PodSucceeded, deletionTimeStamp: nil, nodeName: "node"},
469 {name: "c", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: "node"},
470 },
471 deletedPodNames: sets.NewString(),
472 },
473 }
474
475 for _, test := range testCases {
476 t.Run(test.name, func(t *testing.T) {
477 _, ctx := ktesting.NewTestContext(t)
478 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
479 creationTime := time.Unix(0, 0)
480
481 pods := make([]*v1.Pod, 0, len(test.pods))
482 for _, pod := range test.pods {
483 creationTime = creationTime.Add(1 * time.Hour)
484 pods = append(pods, &v1.Pod{
485 ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime},
486 DeletionTimestamp: pod.deletionTimeStamp},
487 Status: v1.PodStatus{Phase: pod.phase},
488 Spec: v1.PodSpec{NodeName: pod.nodeName},
489 })
490 }
491 nodes := []*v1.Node{}
492 client := setupNewSimpleClient(nodes, pods)
493 gcc, podInformer, _ := NewFromClient(ctx, client, -1)
494
495 for _, pod := range pods {
496 podInformer.Informer().GetStore().Add(pod)
497 }
498
499 pods, err := podInformer.Lister().List(labels.Everything())
500 if err != nil {
501 t.Errorf("Error while listing all Pods: %v", err)
502 return
503 }
504 gcc.gcUnscheduledTerminating(ctx, pods)
505 verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
506 })
507 }
508
509
510 testDeletingPodsMetrics(t, 6, metrics.PodGCReasonTerminatingUnscheduled)
511 }
512
513 func TestGCTerminating(t *testing.T) {
514 type node struct {
515 name string
516 readyCondition v1.ConditionStatus
517 taints []v1.Taint
518 }
519
520 type nameToPodConfig struct {
521 name string
522 phase v1.PodPhase
523 deletionTimeStamp *metav1.Time
524 nodeName string
525 }
526
527 testCases := []struct {
528 name string
529 pods []nameToPodConfig
530 nodes []node
531 deletedPodNames sets.String
532 patchedPodNames sets.String
533 enablePodDisruptionConditions bool
534 }{
535 {
536 name: "pods have deletion timestamp set and the corresponding nodes are not ready",
537 nodes: []node{
538 {name: "worker-0", readyCondition: v1.ConditionFalse},
539 {name: "worker-1", readyCondition: v1.ConditionFalse},
540 },
541 pods: []nameToPodConfig{
542 {name: "a", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-0"},
543 {name: "b", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-1"},
544 },
545 deletedPodNames: sets.NewString(),
546 },
547
548 {
549 name: "some pods have deletion timestamp and/or phase set and some of the corresponding nodes have an" +
550 "outofservice taint that are not ready",
551 nodes: []node{
552
553 {name: "worker-0", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
554 Effect: v1.TaintEffectNoExecute}}},
555
556 {name: "worker-1", readyCondition: v1.ConditionFalse},
557
558 {name: "worker-2", readyCondition: v1.ConditionTrue},
559
560 {name: "worker-3", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
561 Effect: v1.TaintEffectNoSchedule}}},
562
563 {name: "worker-4", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
564 Effect: v1.TaintEffectPreferNoSchedule}}},
565
566 {name: "worker-5", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
567 Value: "any-value", Effect: v1.TaintEffectNoExecute}}},
568 },
569 pods: []nameToPodConfig{
570
571 {name: "a1", nodeName: "worker-0"},
572 {name: "b1", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-0"},
573 {name: "c1", phase: v1.PodPending, nodeName: "worker-0"},
574 {name: "d1", phase: v1.PodRunning, nodeName: "worker-0"},
575 {name: "e1", phase: v1.PodUnknown, nodeName: "worker-0"},
576
577
578 {name: "a2", nodeName: "worker-1"},
579 {name: "b2", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-1"},
580 {name: "c2", phase: v1.PodPending, nodeName: "worker-1"},
581 {name: "d2", phase: v1.PodRunning, nodeName: "worker-1"},
582 {name: "e2", phase: v1.PodUnknown, nodeName: "worker-1"},
583
584
585 {name: "a3", nodeName: "worker-2"},
586 {name: "b3", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-2"},
587 {name: "c3", phase: v1.PodPending, nodeName: "worker-2"},
588 {name: "d3", phase: v1.PodRunning, nodeName: "worker-2"},
589 {name: "e3", phase: v1.PodUnknown, nodeName: "worker-2"},
590
591
592 {name: "a4", nodeName: "worker-3"},
593 {name: "b4", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-3"},
594 {name: "c4", phase: v1.PodPending, nodeName: "worker-3"},
595 {name: "d4", phase: v1.PodRunning, nodeName: "worker-3"},
596 {name: "e4", phase: v1.PodUnknown, nodeName: "worker-3"},
597
598
599 {name: "a5", nodeName: "worker-3"},
600 {name: "b5", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-4"},
601 {name: "c5", phase: v1.PodPending, nodeName: "worker-4"},
602 {name: "d5", phase: v1.PodRunning, nodeName: "worker-4"},
603 {name: "e5", phase: v1.PodUnknown, nodeName: "worker-4"},
604
605
606 {name: "a6", nodeName: "worker-5"},
607 {name: "b6", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-5"},
608 {name: "c6", phase: v1.PodPending, nodeName: "worker-5"},
609 {name: "d6", phase: v1.PodRunning, nodeName: "worker-5"},
610 {name: "e6", phase: v1.PodUnknown, nodeName: "worker-5"},
611 },
612 deletedPodNames: sets.NewString("b1", "b4", "b5", "b6"),
613 patchedPodNames: sets.NewString("b1", "b4", "b5", "b6"),
614 },
615 {
616 name: "pods deleted from node tained out-of-service; PodDisruptionConditions enabled",
617 nodes: []node{
618 {name: "worker", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
619 Effect: v1.TaintEffectNoExecute}}},
620 },
621 pods: []nameToPodConfig{
622 {name: "a", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: "worker"},
623 {name: "b", phase: v1.PodFailed, deletionTimeStamp: &metav1.Time{}, nodeName: "worker"},
624 {name: "c", phase: v1.PodSucceeded, deletionTimeStamp: &metav1.Time{}, nodeName: "worker"},
625 },
626 deletedPodNames: sets.NewString("a", "b", "c"),
627 patchedPodNames: sets.NewString("a"),
628 enablePodDisruptionConditions: true,
629 },
630 }
631 for _, test := range testCases {
632 t.Run(test.name, func(t *testing.T) {
633 _, ctx := ktesting.NewTestContext(t)
634 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
635
636 creationTime := time.Unix(0, 0)
637 nodes := make([]*v1.Node, 0, len(test.nodes))
638 for _, node := range test.nodes {
639 creationTime = creationTime.Add(2 * time.Hour)
640 nodes = append(nodes, &v1.Node{
641 ObjectMeta: metav1.ObjectMeta{Name: node.name, CreationTimestamp: metav1.Time{Time: creationTime}},
642 Spec: v1.NodeSpec{
643 Taints: node.taints,
644 },
645 Status: v1.NodeStatus{
646 Conditions: []v1.NodeCondition{
647 {
648 Type: v1.NodeReady,
649 Status: node.readyCondition,
650 },
651 },
652 },
653 })
654 }
655 pods := make([]*v1.Pod, 0, len(test.pods))
656 for _, pod := range test.pods {
657 creationTime = creationTime.Add(1 * time.Hour)
658 pods = append(pods, &v1.Pod{
659 ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime},
660 DeletionTimestamp: pod.deletionTimeStamp},
661 Status: v1.PodStatus{Phase: pod.phase},
662 Spec: v1.PodSpec{NodeName: pod.nodeName},
663 })
664 }
665 client := setupNewSimpleClient(nodes, pods)
666 gcc, podInformer, nodeInformer := NewFromClient(ctx, client, -1)
667
668 for _, pod := range pods {
669 podInformer.Informer().GetStore().Add(pod)
670 }
671 for _, node := range nodes {
672 nodeInformer.Informer().GetStore().Add(node)
673 }
674
675 gcc.gc(ctx)
676 verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
677 })
678 }
679
680 testDeletingPodsMetrics(t, 7, metrics.PodGCReasonTerminatingOutOfService)
681 }
682
683 func TestGCInspectingPatchedPodBeforeDeletion(t *testing.T) {
684 testCases := []struct {
685 name string
686 pod *v1.Pod
687 expectedPatchedPod *v1.Pod
688 expectedDeleteAction *clienttesting.DeleteActionImpl
689 }{
690 {
691 name: "orphaned pod should have DisruptionTarget condition added before deletion",
692 pod: &v1.Pod{
693 ObjectMeta: metav1.ObjectMeta{
694 Namespace: "default",
695 Name: "testPod",
696 },
697 Spec: v1.PodSpec{
698 NodeName: "deletedNode",
699 },
700 Status: v1.PodStatus{
701 Phase: v1.PodRunning,
702 Conditions: []v1.PodCondition{
703 {
704 Type: v1.PodReady,
705 Status: v1.ConditionTrue,
706 },
707 },
708 },
709 },
710 expectedPatchedPod: &v1.Pod{
711 ObjectMeta: metav1.ObjectMeta{
712 Namespace: "default",
713 Name: "testPod",
714 },
715 Spec: v1.PodSpec{
716 NodeName: "deletedNode",
717 },
718 Status: v1.PodStatus{
719 Phase: v1.PodFailed,
720 Conditions: []v1.PodCondition{
721 {
722 Type: v1.PodReady,
723 Status: v1.ConditionTrue,
724 },
725 {
726 Type: v1.DisruptionTarget,
727 Status: v1.ConditionTrue,
728 Reason: "DeletionByPodGC",
729 Message: "PodGC: node no longer exists",
730 },
731 },
732 },
733 },
734 expectedDeleteAction: &clienttesting.DeleteActionImpl{
735 Name: "testPod",
736 DeleteOptions: metav1.DeleteOptions{GracePeriodSeconds: pointer.Int64(0)},
737 },
738 },
739 }
740
741 for _, test := range testCases {
742 t.Run(test.name, func(t *testing.T) {
743 _, ctx := ktesting.NewTestContext(t)
744 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, true)()
745
746 pods := []*v1.Pod{test.pod}
747
748 client := setupNewSimpleClient(nil, pods)
749 gcc, podInformer, _ := NewFromClient(ctx, client, -1)
750 gcc.quarantineTime = time.Duration(-1)
751 podInformer.Informer().GetStore().Add(test.pod)
752 gcc.gc(ctx)
753
754 actions := client.Actions()
755
756 var patchAction clienttesting.PatchAction
757 var deleteAction clienttesting.DeleteAction
758
759 for _, action := range actions {
760 if action.GetVerb() == "patch" {
761 patchAction = action.(clienttesting.PatchAction)
762 }
763
764 if action.GetVerb() == "delete" {
765 deleteAction = action.(clienttesting.DeleteAction)
766 }
767 }
768
769 if patchAction != nil && test.expectedPatchedPod == nil {
770 t.Fatalf("Pod was pactched but expectedPatchedPod is nil")
771 }
772 if test.expectedPatchedPod != nil {
773 patchedPodBytes := patchAction.GetPatch()
774 originalPod, err := json.Marshal(test.pod)
775 if err != nil {
776 t.Fatalf("Failed to marshal original pod %#v: %v", originalPod, err)
777 }
778 updated, err := strategicpatch.StrategicMergePatch(originalPod, patchedPodBytes, v1.Pod{})
779 if err != nil {
780 t.Fatalf("Failed to apply strategic merge patch %q on pod %#v: %v", patchedPodBytes, originalPod, err)
781 }
782
783 updatedPod := &v1.Pod{}
784 if err := json.Unmarshal(updated, updatedPod); err != nil {
785 t.Fatalf("Failed to unmarshal updated pod %q: %v", updated, err)
786 }
787
788 if diff := cmp.Diff(test.expectedPatchedPod, updatedPod, cmpopts.IgnoreFields(v1.Pod{}, "TypeMeta"), cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")); diff != "" {
789 t.Fatalf("Unexpected diff on pod (-want,+got):\n%s", diff)
790 }
791 }
792
793 if deleteAction != nil && test.expectedDeleteAction == nil {
794 t.Fatalf("Pod was deleted but expectedDeleteAction is nil")
795 }
796 if test.expectedDeleteAction != nil {
797 if diff := cmp.Diff(*test.expectedDeleteAction, deleteAction, cmpopts.IgnoreFields(clienttesting.DeleteActionImpl{}, "ActionImpl")); diff != "" {
798 t.Fatalf("Unexpected diff on deleteAction (-want,+got):\n%s", diff)
799 }
800 }
801 })
802 }
803 }
804
805 func verifyDeletedAndPatchedPods(t *testing.T, client *fake.Clientset, wantDeletedPodNames, wantPatchedPodNames sets.String) {
806 t.Helper()
807 deletedPodNames := getDeletedPodNames(client)
808 if diff := cmp.Diff(wantDeletedPodNames, deletedPodNames); diff != "" {
809 t.Errorf("Deleted pod names (-want,+got):\n%s", diff)
810 }
811 patchedPodNames := getPatchedPodNames(client)
812 if diff := cmp.Diff(wantPatchedPodNames, patchedPodNames); diff != "" {
813 t.Errorf("Patched pod names (-want,+got):\n%s", diff)
814 }
815 }
816
817 func testDeletingPodsMetrics(t *testing.T, total int, reason string) {
818 t.Helper()
819
820 actualDeletingPodsTotal, err := metricstestutil.GetCounterMetricValue(metrics.DeletingPodsTotal.WithLabelValues(metav1.NamespaceDefault, reason))
821 if err != nil {
822 t.Errorf("Error getting actualDeletingPodsTotal")
823 }
824 if actualDeletingPodsTotal != float64(total) {
825 t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", total, actualDeletingPodsTotal)
826 }
827
828 actualDeletingPodsErrorTotal, err := metricstestutil.GetCounterMetricValue(metrics.DeletingPodsErrorTotal.WithLabelValues("", reason))
829 if err != nil {
830 t.Errorf("Error getting actualDeletingPodsErrorTotal")
831 }
832 if actualDeletingPodsErrorTotal != float64(0) {
833 t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", 0, actualDeletingPodsErrorTotal)
834 }
835 }
836
837 func setupNewSimpleClient(nodes []*v1.Node, pods []*v1.Pod) *fake.Clientset {
838 podList := &v1.PodList{}
839 for _, podItem := range pods {
840 podList.Items = append(podList.Items, *podItem)
841 }
842 nodeList := &v1.NodeList{}
843 for _, nodeItem := range nodes {
844 nodeList.Items = append(nodeList.Items, *nodeItem)
845 }
846 return fake.NewSimpleClientset(nodeList, podList)
847 }
848
849 func getDeletedPodNames(client *fake.Clientset) sets.String {
850 deletedPodNames := sets.NewString()
851 for _, action := range client.Actions() {
852 if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
853 deleteAction := action.(clienttesting.DeleteAction)
854 deletedPodNames.Insert(deleteAction.GetName())
855 }
856 }
857 return deletedPodNames
858 }
859
860 func getPatchedPodNames(client *fake.Clientset) sets.String {
861 patchedPodNames := sets.NewString()
862 for _, action := range client.Actions() {
863 if action.GetVerb() == "patch" && action.GetResource().Resource == "pods" {
864 patchAction := action.(clienttesting.PatchAction)
865 patchedPodNames.Insert(patchAction.GetName())
866 }
867 }
868 return patchedPodNames
869 }
870
View as plain text