1
16
17
18
19 package preemption
20
21 import (
22 "context"
23 "fmt"
24 "strings"
25 "testing"
26 "time"
27
28 v1 "k8s.io/api/core/v1"
29 policy "k8s.io/api/policy/v1"
30 "k8s.io/apimachinery/pkg/api/resource"
31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
32 "k8s.io/apimachinery/pkg/runtime"
33 "k8s.io/apimachinery/pkg/types"
34 "k8s.io/apimachinery/pkg/util/intstr"
35 "k8s.io/apimachinery/pkg/util/wait"
36 "k8s.io/apiserver/pkg/util/feature"
37 "k8s.io/client-go/informers"
38 clientset "k8s.io/client-go/kubernetes"
39 restclient "k8s.io/client-go/rest"
40 featuregatetesting "k8s.io/component-base/featuregate/testing"
41 "k8s.io/component-helpers/storage/volume"
42 "k8s.io/klog/v2"
43 configv1 "k8s.io/kube-scheduler/config/v1"
44 podutil "k8s.io/kubernetes/pkg/api/v1/pod"
45 "k8s.io/kubernetes/pkg/apis/scheduling"
46 "k8s.io/kubernetes/pkg/features"
47 "k8s.io/kubernetes/pkg/scheduler"
48 configtesting "k8s.io/kubernetes/pkg/scheduler/apis/config/testing"
49 "k8s.io/kubernetes/pkg/scheduler/framework"
50 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/volumerestrictions"
51 frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
52 st "k8s.io/kubernetes/pkg/scheduler/testing"
53 "k8s.io/kubernetes/plugin/pkg/admission/priority"
54 testutils "k8s.io/kubernetes/test/integration/util"
55 "k8s.io/utils/pointer"
56 )
57
58
59 var (
60 initPausePod = testutils.InitPausePod
61 createNode = testutils.CreateNode
62 createPausePod = testutils.CreatePausePod
63 runPausePod = testutils.RunPausePod
64 deletePod = testutils.DeletePod
65 initTest = testutils.InitTestSchedulerWithNS
66 initTestDisablePreemption = testutils.InitTestDisablePreemption
67 initDisruptionController = testutils.InitDisruptionController
68 waitCachedPodsStable = testutils.WaitCachedPodsStable
69 podIsGettingEvicted = testutils.PodIsGettingEvicted
70 podUnschedulable = testutils.PodUnschedulable
71 waitForPDBsStable = testutils.WaitForPDBsStable
72 waitForPodToScheduleWithTimeout = testutils.WaitForPodToScheduleWithTimeout
73 waitForPodUnschedulable = testutils.WaitForPodUnschedulable
74 )
75
76 const filterPluginName = "filter-plugin"
77
78 var lowPriority, mediumPriority, highPriority = int32(100), int32(200), int32(300)
79
80 func waitForNominatedNodeNameWithTimeout(cs clientset.Interface, pod *v1.Pod, timeout time.Duration) error {
81 if err := wait.PollUntilContextTimeout(context.TODO(), 100*time.Millisecond, timeout, false, func(ctx context.Context) (bool, error) {
82 pod, err := cs.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{})
83 if err != nil {
84 return false, err
85 }
86 if len(pod.Status.NominatedNodeName) > 0 {
87 return true, nil
88 }
89 return false, err
90 }); err != nil {
91 return fmt.Errorf(".status.nominatedNodeName of Pod %v/%v did not get set: %v", pod.Namespace, pod.Name, err)
92 }
93 return nil
94 }
95
96 func waitForNominatedNodeName(cs clientset.Interface, pod *v1.Pod) error {
97 return waitForNominatedNodeNameWithTimeout(cs, pod, wait.ForeverTestTimeout)
98 }
99
100 const tokenFilterName = "token-filter"
101
102
103
104
105
106
107
108 type tokenFilter struct {
109 Tokens int
110 Unresolvable bool
111 EnablePreFilter bool
112 }
113
114
115 func (fp *tokenFilter) Name() string {
116 return tokenFilterName
117 }
118
119 func (fp *tokenFilter) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod,
120 nodeInfo *framework.NodeInfo) *framework.Status {
121 if fp.Tokens > 0 {
122 fp.Tokens--
123 return nil
124 }
125 status := framework.Unschedulable
126 if fp.Unresolvable {
127 status = framework.UnschedulableAndUnresolvable
128 }
129 return framework.NewStatus(status, fmt.Sprintf("can't fit %v", pod.Name))
130 }
131
132 func (fp *tokenFilter) PreFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod) (*framework.PreFilterResult, *framework.Status) {
133 if !fp.EnablePreFilter || fp.Tokens > 0 {
134 return nil, nil
135 }
136 return nil, framework.NewStatus(framework.Unschedulable)
137 }
138
139 func (fp *tokenFilter) AddPod(ctx context.Context, state *framework.CycleState, podToSchedule *v1.Pod,
140 podInfoToAdd *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status {
141 fp.Tokens--
142 return nil
143 }
144
145 func (fp *tokenFilter) RemovePod(ctx context.Context, state *framework.CycleState, podToSchedule *v1.Pod,
146 podInfoToRemove *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status {
147 fp.Tokens++
148 return nil
149 }
150
151 func (fp *tokenFilter) PreFilterExtensions() framework.PreFilterExtensions {
152 return fp
153 }
154
155 var _ framework.FilterPlugin = &tokenFilter{}
156
157
158 func TestPreemption(t *testing.T) {
159
160 var filter tokenFilter
161 registry := make(frameworkruntime.Registry)
162 err := registry.Register(filterPluginName, func(_ context.Context, _ runtime.Object, fh framework.Handle) (framework.Plugin, error) {
163 return &filter, nil
164 })
165 if err != nil {
166 t.Fatalf("Error registering a filter: %v", err)
167 }
168 cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
169 Profiles: []configv1.KubeSchedulerProfile{{
170 SchedulerName: pointer.String(v1.DefaultSchedulerName),
171 Plugins: &configv1.Plugins{
172 Filter: configv1.PluginSet{
173 Enabled: []configv1.Plugin{
174 {Name: filterPluginName},
175 },
176 },
177 PreFilter: configv1.PluginSet{
178 Enabled: []configv1.Plugin{
179 {Name: filterPluginName},
180 },
181 },
182 },
183 }},
184 })
185
186 testCtx := testutils.InitTestSchedulerWithOptions(t,
187 testutils.InitTestAPIServer(t, "preemption", nil),
188 0,
189 scheduler.WithProfiles(cfg.Profiles...),
190 scheduler.WithFrameworkOutOfTreeRegistry(registry))
191 testutils.SyncSchedulerInformerFactory(testCtx)
192 go testCtx.Scheduler.Run(testCtx.Ctx)
193
194 cs := testCtx.ClientSet
195
196 defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
197 v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
198 v1.ResourceMemory: *resource.NewQuantity(100, resource.DecimalSI)},
199 }
200
201 maxTokens := 1000
202 tests := []struct {
203 name string
204 existingPods []*v1.Pod
205 pod *v1.Pod
206 initTokens int
207 enablePreFilter bool
208 unresolvable bool
209 preemptedPodIndexes map[int]struct{}
210 enablePodDisruptionConditions bool
211 }{
212 {
213 name: "basic pod preemption with PodDisruptionConditions enabled",
214 initTokens: maxTokens,
215 existingPods: []*v1.Pod{
216 initPausePod(&testutils.PausePodConfig{
217 Name: "victim-pod",
218 Namespace: testCtx.NS.Name,
219 Priority: &lowPriority,
220 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
221 v1.ResourceCPU: *resource.NewMilliQuantity(400, resource.DecimalSI),
222 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
223 },
224 }),
225 },
226 pod: initPausePod(&testutils.PausePodConfig{
227 Name: "preemptor-pod",
228 Namespace: testCtx.NS.Name,
229 Priority: &highPriority,
230 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
231 v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
232 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
233 },
234 }),
235 preemptedPodIndexes: map[int]struct{}{0: {}},
236 enablePodDisruptionConditions: true,
237 },
238 {
239 name: "basic pod preemption",
240 initTokens: maxTokens,
241 existingPods: []*v1.Pod{
242 initPausePod(&testutils.PausePodConfig{
243 Name: "victim-pod",
244 Namespace: testCtx.NS.Name,
245 Priority: &lowPriority,
246 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
247 v1.ResourceCPU: *resource.NewMilliQuantity(400, resource.DecimalSI),
248 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
249 },
250 }),
251 },
252 pod: initPausePod(&testutils.PausePodConfig{
253 Name: "preemptor-pod",
254 Namespace: testCtx.NS.Name,
255 Priority: &highPriority,
256 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
257 v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
258 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
259 },
260 }),
261 preemptedPodIndexes: map[int]struct{}{0: {}},
262 },
263 {
264 name: "basic pod preemption with filter",
265 initTokens: 1,
266 existingPods: []*v1.Pod{
267 initPausePod(&testutils.PausePodConfig{
268 Name: "victim-pod",
269 Namespace: testCtx.NS.Name,
270 Priority: &lowPriority,
271 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
272 v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
273 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
274 },
275 }),
276 },
277 pod: initPausePod(&testutils.PausePodConfig{
278 Name: "preemptor-pod",
279 Namespace: testCtx.NS.Name,
280 Priority: &highPriority,
281 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
282 v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
283 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
284 },
285 }),
286 preemptedPodIndexes: map[int]struct{}{0: {}},
287 },
288
289
290
291
292 {
293 name: "basic pod preemption with preFilter",
294 initTokens: 1,
295 enablePreFilter: true,
296 existingPods: []*v1.Pod{
297 initPausePod(&testutils.PausePodConfig{
298 Name: "victim-pod",
299 Namespace: testCtx.NS.Name,
300 Priority: &lowPriority,
301 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
302 v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
303 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
304 },
305 }),
306 },
307 pod: initPausePod(&testutils.PausePodConfig{
308 Name: "preemptor-pod",
309 Namespace: testCtx.NS.Name,
310 Priority: &highPriority,
311 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
312 v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
313 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
314 },
315 }),
316 preemptedPodIndexes: map[int]struct{}{0: {}},
317 },
318 {
319
320 name: "basic pod preemption with unresolvable filter",
321 initTokens: 1,
322 unresolvable: true,
323 existingPods: []*v1.Pod{
324 initPausePod(&testutils.PausePodConfig{
325 Name: "victim-pod",
326 Namespace: testCtx.NS.Name,
327 Priority: &lowPriority,
328 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
329 v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
330 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
331 },
332 }),
333 },
334 pod: initPausePod(&testutils.PausePodConfig{
335 Name: "preemptor-pod",
336 Namespace: testCtx.NS.Name,
337 Priority: &highPriority,
338 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
339 v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
340 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
341 },
342 }),
343 preemptedPodIndexes: map[int]struct{}{},
344 },
345 {
346 name: "preemption is performed to satisfy anti-affinity",
347 initTokens: maxTokens,
348 existingPods: []*v1.Pod{
349 initPausePod(&testutils.PausePodConfig{
350 Name: "pod-0", Namespace: testCtx.NS.Name,
351 Priority: &mediumPriority,
352 Labels: map[string]string{"pod": "p0"},
353 Resources: defaultPodRes,
354 }),
355 initPausePod(&testutils.PausePodConfig{
356 Name: "pod-1", Namespace: testCtx.NS.Name,
357 Priority: &lowPriority,
358 Labels: map[string]string{"pod": "p1"},
359 Resources: defaultPodRes,
360 Affinity: &v1.Affinity{
361 PodAntiAffinity: &v1.PodAntiAffinity{
362 RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
363 {
364 LabelSelector: &metav1.LabelSelector{
365 MatchExpressions: []metav1.LabelSelectorRequirement{
366 {
367 Key: "pod",
368 Operator: metav1.LabelSelectorOpIn,
369 Values: []string{"preemptor"},
370 },
371 },
372 },
373 TopologyKey: "node",
374 },
375 },
376 },
377 },
378 }),
379 },
380
381 pod: initPausePod(&testutils.PausePodConfig{
382 Name: "preemptor-pod",
383 Namespace: testCtx.NS.Name,
384 Priority: &highPriority,
385 Labels: map[string]string{"pod": "preemptor"},
386 Resources: defaultPodRes,
387 Affinity: &v1.Affinity{
388 PodAntiAffinity: &v1.PodAntiAffinity{
389 RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
390 {
391 LabelSelector: &metav1.LabelSelector{
392 MatchExpressions: []metav1.LabelSelectorRequirement{
393 {
394 Key: "pod",
395 Operator: metav1.LabelSelectorOpIn,
396 Values: []string{"p0"},
397 },
398 },
399 },
400 TopologyKey: "node",
401 },
402 },
403 },
404 },
405 }),
406 preemptedPodIndexes: map[int]struct{}{0: {}, 1: {}},
407 },
408 {
409
410 name: "preemption is not performed when anti-affinity is not satisfied",
411 initTokens: maxTokens,
412 existingPods: []*v1.Pod{
413 initPausePod(&testutils.PausePodConfig{
414 Name: "pod-0", Namespace: testCtx.NS.Name,
415 Priority: &mediumPriority,
416 Labels: map[string]string{"pod": "p0"},
417 Resources: defaultPodRes,
418 }),
419 initPausePod(&testutils.PausePodConfig{
420 Name: "pod-1", Namespace: testCtx.NS.Name,
421 Priority: &highPriority,
422 Labels: map[string]string{"pod": "p1"},
423 Resources: defaultPodRes,
424 Affinity: &v1.Affinity{
425 PodAntiAffinity: &v1.PodAntiAffinity{
426 RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
427 {
428 LabelSelector: &metav1.LabelSelector{
429 MatchExpressions: []metav1.LabelSelectorRequirement{
430 {
431 Key: "pod",
432 Operator: metav1.LabelSelectorOpIn,
433 Values: []string{"preemptor"},
434 },
435 },
436 },
437 TopologyKey: "node",
438 },
439 },
440 },
441 },
442 }),
443 },
444
445 pod: initPausePod(&testutils.PausePodConfig{
446 Name: "preemptor-pod",
447 Namespace: testCtx.NS.Name,
448 Priority: &highPriority,
449 Labels: map[string]string{"pod": "preemptor"},
450 Resources: defaultPodRes,
451 Affinity: &v1.Affinity{
452 PodAntiAffinity: &v1.PodAntiAffinity{
453 RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
454 {
455 LabelSelector: &metav1.LabelSelector{
456 MatchExpressions: []metav1.LabelSelectorRequirement{
457 {
458 Key: "pod",
459 Operator: metav1.LabelSelectorOpIn,
460 Values: []string{"p0"},
461 },
462 },
463 },
464 TopologyKey: "node",
465 },
466 },
467 },
468 },
469 }),
470 preemptedPodIndexes: map[int]struct{}{},
471 },
472 }
473
474
475 nodeRes := map[v1.ResourceName]string{
476 v1.ResourcePods: "32",
477 v1.ResourceCPU: "500m",
478 v1.ResourceMemory: "500",
479 }
480 nodeObject := st.MakeNode().Name("node1").Capacity(nodeRes).Label("node", "node1").Obj()
481 if _, err := createNode(testCtx.ClientSet, nodeObject); err != nil {
482 t.Fatalf("Error creating node: %v", err)
483 }
484
485 for _, test := range tests {
486 t.Run(test.name, func(t *testing.T) {
487 defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
488 filter.Tokens = test.initTokens
489 filter.EnablePreFilter = test.enablePreFilter
490 filter.Unresolvable = test.unresolvable
491 pods := make([]*v1.Pod, len(test.existingPods))
492
493 for i, p := range test.existingPods {
494 pods[i], err = runPausePod(cs, p)
495 if err != nil {
496 t.Fatalf("Error running pause pod: %v", err)
497 }
498 }
499
500 preemptor, err := createPausePod(cs, test.pod)
501 if err != nil {
502 t.Errorf("Error while creating high priority pod: %v", err)
503 }
504
505 for i, p := range pods {
506 if _, found := test.preemptedPodIndexes[i]; found {
507 if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false,
508 podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil {
509 t.Errorf("Pod %v/%v is not getting evicted.", p.Namespace, p.Name)
510 }
511 pod, err := cs.CoreV1().Pods(p.Namespace).Get(testCtx.Ctx, p.Name, metav1.GetOptions{})
512 if err != nil {
513 t.Errorf("Error %v when getting the updated status for pod %v/%v ", err, p.Namespace, p.Name)
514 }
515 _, cond := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget)
516 if test.enablePodDisruptionConditions && cond == nil {
517 t.Errorf("Pod %q does not have the expected condition: %q", klog.KObj(pod), v1.DisruptionTarget)
518 } else if test.enablePodDisruptionConditions == false && cond != nil {
519 t.Errorf("Pod %q has an unexpected condition: %q", klog.KObj(pod), v1.DisruptionTarget)
520 }
521 } else {
522 if p.DeletionTimestamp != nil {
523 t.Errorf("Didn't expect pod %v to get preempted.", p.Name)
524 }
525 }
526 }
527
528 if len(test.preemptedPodIndexes) > 0 {
529 if err := waitForNominatedNodeName(cs, preemptor); err != nil {
530 t.Errorf("NominatedNodeName field was not set for pod %v: %v", preemptor.Name, err)
531 }
532 }
533
534
535 pods = append(pods, preemptor)
536 testutils.CleanupPods(testCtx.Ctx, cs, t, pods)
537 })
538 }
539 }
540
541
542 func TestNonPreemption(t *testing.T) {
543 var preemptNever = v1.PreemptNever
544
545 testCtx := initTest(t, "non-preemption")
546 cs := testCtx.ClientSet
547 tests := []struct {
548 name string
549 PreemptionPolicy *v1.PreemptionPolicy
550 }{
551 {
552 name: "pod preemption will happen",
553 PreemptionPolicy: nil,
554 },
555 {
556 name: "pod preemption will not happen",
557 PreemptionPolicy: &preemptNever,
558 },
559 }
560 victim := initPausePod(&testutils.PausePodConfig{
561 Name: "victim-pod",
562 Namespace: testCtx.NS.Name,
563 Priority: &lowPriority,
564 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
565 v1.ResourceCPU: *resource.NewMilliQuantity(400, resource.DecimalSI),
566 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
567 },
568 })
569
570 preemptor := initPausePod(&testutils.PausePodConfig{
571 Name: "preemptor-pod",
572 Namespace: testCtx.NS.Name,
573 Priority: &highPriority,
574 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
575 v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
576 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
577 },
578 })
579
580
581 nodeRes := map[v1.ResourceName]string{
582 v1.ResourcePods: "32",
583 v1.ResourceCPU: "500m",
584 v1.ResourceMemory: "500",
585 }
586 _, err := createNode(testCtx.ClientSet, st.MakeNode().Name("node1").Capacity(nodeRes).Obj())
587 if err != nil {
588 t.Fatalf("Error creating nodes: %v", err)
589 }
590 for _, test := range tests {
591 t.Run(test.name, func(t *testing.T) {
592 defer testutils.CleanupPods(testCtx.Ctx, cs, t, []*v1.Pod{preemptor, victim})
593 preemptor.Spec.PreemptionPolicy = test.PreemptionPolicy
594 victimPod, err := createPausePod(cs, victim)
595 if err != nil {
596 t.Fatalf("Error while creating victim: %v", err)
597 }
598 if err := waitForPodToScheduleWithTimeout(cs, victimPod, 5*time.Second); err != nil {
599 t.Fatalf("victim %v should be become scheduled", victimPod.Name)
600 }
601
602 preemptorPod, err := createPausePod(cs, preemptor)
603 if err != nil {
604 t.Fatalf("Error while creating preemptor: %v", err)
605 }
606
607 err = waitForNominatedNodeNameWithTimeout(cs, preemptorPod, 5*time.Second)
608
609 expect := test.PreemptionPolicy == nil
610
611 got := err == nil
612 if got != expect {
613 t.Errorf("Expect preemptor to be nominated=%v, but got=%v", expect, got)
614 }
615 })
616 }
617 }
618
619
620 func TestDisablePreemption(t *testing.T) {
621
622 testCtx := initTestDisablePreemption(t, "disable-preemption")
623 cs := testCtx.ClientSet
624
625 tests := []struct {
626 name string
627 existingPods []*v1.Pod
628 pod *v1.Pod
629 }{
630 {
631 name: "pod preemption will not happen",
632 existingPods: []*v1.Pod{
633 initPausePod(&testutils.PausePodConfig{
634 Name: "victim-pod",
635 Namespace: testCtx.NS.Name,
636 Priority: &lowPriority,
637 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
638 v1.ResourceCPU: *resource.NewMilliQuantity(400, resource.DecimalSI),
639 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
640 },
641 }),
642 },
643 pod: initPausePod(&testutils.PausePodConfig{
644 Name: "preemptor-pod",
645 Namespace: testCtx.NS.Name,
646 Priority: &highPriority,
647 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
648 v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
649 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
650 },
651 }),
652 },
653 }
654
655
656 nodeRes := map[v1.ResourceName]string{
657 v1.ResourcePods: "32",
658 v1.ResourceCPU: "500m",
659 v1.ResourceMemory: "500",
660 }
661 _, err := createNode(testCtx.ClientSet, st.MakeNode().Name("node1").Capacity(nodeRes).Obj())
662 if err != nil {
663 t.Fatalf("Error creating nodes: %v", err)
664 }
665
666 for _, test := range tests {
667 t.Run(test.name, func(t *testing.T) {
668 pods := make([]*v1.Pod, len(test.existingPods))
669
670 for i, p := range test.existingPods {
671 pods[i], err = runPausePod(cs, p)
672 if err != nil {
673 t.Fatalf("Test [%v]: Error running pause pod: %v", test.name, err)
674 }
675 }
676
677 preemptor, err := createPausePod(cs, test.pod)
678 if err != nil {
679 t.Errorf("Error while creating high priority pod: %v", err)
680 }
681
682 if err := waitForPodUnschedulable(cs, preemptor); err != nil {
683 t.Errorf("Preemptor %v should not become scheduled", preemptor.Name)
684 }
685
686
687 if err := waitForNominatedNodeNameWithTimeout(cs, preemptor, 5*time.Second); err == nil {
688 t.Errorf("Preemptor %v should not be nominated", preemptor.Name)
689 }
690
691
692 pods = append(pods, preemptor)
693 testutils.CleanupPods(testCtx.Ctx, cs, t, pods)
694 })
695 }
696 }
697
698
699 func TestPodPriorityResolution(t *testing.T) {
700 admission := priority.NewPlugin()
701 testCtx := testutils.InitTestScheduler(t, testutils.InitTestAPIServer(t, "preemption", admission))
702 cs := testCtx.ClientSet
703
704
705 externalClientConfig := restclient.CopyConfig(testCtx.KubeConfig)
706 externalClientConfig.QPS = -1
707 externalClientset := clientset.NewForConfigOrDie(externalClientConfig)
708 externalInformers := informers.NewSharedInformerFactory(externalClientset, time.Second)
709 admission.SetExternalKubeClientSet(externalClientset)
710 admission.SetExternalKubeInformerFactory(externalInformers)
711
712
713 testutils.SyncSchedulerInformerFactory(testCtx)
714 externalInformers.Start(testCtx.Ctx.Done())
715 externalInformers.WaitForCacheSync(testCtx.Ctx.Done())
716
717
718 go testCtx.Scheduler.Run(testCtx.Ctx)
719
720 tests := []struct {
721 Name string
722 PriorityClass string
723 Pod *v1.Pod
724 ExpectedPriority int32
725 ExpectedError error
726 }{
727 {
728 Name: "SystemNodeCritical priority class",
729 PriorityClass: scheduling.SystemNodeCritical,
730 ExpectedPriority: scheduling.SystemCriticalPriority + 1000,
731 Pod: initPausePod(&testutils.PausePodConfig{
732 Name: fmt.Sprintf("pod1-%v", scheduling.SystemNodeCritical),
733 Namespace: metav1.NamespaceSystem,
734 PriorityClassName: scheduling.SystemNodeCritical,
735 }),
736 },
737 {
738 Name: "SystemClusterCritical priority class",
739 PriorityClass: scheduling.SystemClusterCritical,
740 ExpectedPriority: scheduling.SystemCriticalPriority,
741 Pod: initPausePod(&testutils.PausePodConfig{
742 Name: fmt.Sprintf("pod2-%v", scheduling.SystemClusterCritical),
743 Namespace: metav1.NamespaceSystem,
744 PriorityClassName: scheduling.SystemClusterCritical,
745 }),
746 },
747 {
748 Name: "Invalid priority class should result in error",
749 PriorityClass: "foo",
750 ExpectedPriority: scheduling.SystemCriticalPriority,
751 Pod: initPausePod(&testutils.PausePodConfig{
752 Name: fmt.Sprintf("pod3-%v", scheduling.SystemClusterCritical),
753 Namespace: metav1.NamespaceSystem,
754 PriorityClassName: "foo",
755 }),
756 ExpectedError: fmt.Errorf("failed to create pause pod: pods \"pod3-system-cluster-critical\" is forbidden: no PriorityClass with name foo was found"),
757 },
758 }
759
760
761 nodeRes := map[v1.ResourceName]string{
762 v1.ResourcePods: "32",
763 v1.ResourceCPU: "500m",
764 v1.ResourceMemory: "500",
765 }
766 _, err := createNode(testCtx.ClientSet, st.MakeNode().Name("node1").Capacity(nodeRes).Obj())
767 if err != nil {
768 t.Fatalf("Error creating nodes: %v", err)
769 }
770
771 pods := make([]*v1.Pod, 0, len(tests))
772 for _, test := range tests {
773 t.Run(test.Name, func(t *testing.T) {
774 t.Run(test.Name, func(t *testing.T) {
775 pod, err := runPausePod(cs, test.Pod)
776 if err != nil {
777 if test.ExpectedError == nil {
778 t.Fatalf("Test [PodPriority/%v]: Error running pause pod: %v", test.PriorityClass, err)
779 }
780 if err.Error() != test.ExpectedError.Error() {
781 t.Fatalf("Test [PodPriority/%v]: Expected error %v but got error %v", test.PriorityClass, test.ExpectedError, err)
782 }
783 return
784 }
785 pods = append(pods, pod)
786 if pod.Spec.Priority != nil {
787 if *pod.Spec.Priority != test.ExpectedPriority {
788 t.Errorf("Expected pod %v to have priority %v but was %v", pod.Name, test.ExpectedPriority, pod.Spec.Priority)
789 }
790 } else {
791 t.Errorf("Expected pod %v to have priority %v but was nil", pod.Name, test.PriorityClass)
792 }
793 })
794 })
795 }
796 testutils.CleanupPods(testCtx.Ctx, cs, t, pods)
797 testutils.CleanupNodes(cs, t)
798 }
799
800 func mkPriorityPodWithGrace(tc *testutils.TestContext, name string, priority int32, grace int64) *v1.Pod {
801 defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
802 v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
803 v1.ResourceMemory: *resource.NewQuantity(100, resource.DecimalSI)},
804 }
805 pod := initPausePod(&testutils.PausePodConfig{
806 Name: name,
807 Namespace: tc.NS.Name,
808 Priority: &priority,
809 Labels: map[string]string{"pod": name},
810 Resources: defaultPodRes,
811 })
812 pod.Spec.TerminationGracePeriodSeconds = &grace
813 return pod
814 }
815
816
817
818
819 func TestPreemptionStarvation(t *testing.T) {
820
821 testCtx := initTest(t, "preemption")
822 cs := testCtx.ClientSet
823
824 tests := []struct {
825 name string
826 numExistingPod int
827 numExpectedPending int
828 preemptor *v1.Pod
829 }{
830 {
831
832
833
834 name: "starvation test: higher priority pod is scheduled before the lower priority ones",
835 numExistingPod: 10,
836 numExpectedPending: 5,
837 preemptor: initPausePod(&testutils.PausePodConfig{
838 Name: "preemptor-pod",
839 Namespace: testCtx.NS.Name,
840 Priority: &highPriority,
841 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
842 v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
843 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
844 },
845 }),
846 },
847 }
848
849
850 nodeRes := map[v1.ResourceName]string{
851 v1.ResourcePods: "32",
852 v1.ResourceCPU: "500m",
853 v1.ResourceMemory: "500",
854 }
855 _, err := createNode(testCtx.ClientSet, st.MakeNode().Name("node1").Capacity(nodeRes).Obj())
856 if err != nil {
857 t.Fatalf("Error creating nodes: %v", err)
858 }
859
860 for _, test := range tests {
861 t.Run(test.name, func(t *testing.T) {
862 pendingPods := make([]*v1.Pod, test.numExpectedPending)
863 numRunningPods := test.numExistingPod - test.numExpectedPending
864 runningPods := make([]*v1.Pod, numRunningPods)
865
866 for i := 0; i < numRunningPods; i++ {
867 runningPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(testCtx, fmt.Sprintf("rpod-%v", i), mediumPriority, 0))
868 if err != nil {
869 t.Fatalf("Error creating pause pod: %v", err)
870 }
871 }
872
873 for _, p := range runningPods {
874 if err := testutils.WaitForPodToSchedule(cs, p); err != nil {
875 t.Fatalf("Pod %v/%v didn't get scheduled: %v", p.Namespace, p.Name, err)
876 }
877 }
878
879 for i := 0; i < test.numExpectedPending; i++ {
880 pendingPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(testCtx, fmt.Sprintf("ppod-%v", i), mediumPriority, 0))
881 if err != nil {
882 t.Fatalf("Error creating pending pod: %v", err)
883 }
884 }
885
886 for _, p := range pendingPods {
887 if err := wait.PollUntilContextTimeout(testCtx.Ctx, 100*time.Millisecond, wait.ForeverTestTimeout, false,
888 podUnschedulable(cs, p.Namespace, p.Name)); err != nil {
889 t.Errorf("Pod %v/%v didn't get marked unschedulable: %v", p.Namespace, p.Name, err)
890 }
891 }
892
893 preemptor, err := createPausePod(cs, test.preemptor)
894 if err != nil {
895 t.Errorf("Error while creating the preempting pod: %v", err)
896 }
897
898 if err := waitForNominatedNodeName(cs, preemptor); err != nil {
899 t.Errorf(".status.nominatedNodeName was not set for pod %v/%v: %v", preemptor.Namespace, preemptor.Name, err)
900 }
901
902 if err := testutils.WaitForPodToScheduleWithTimeout(cs, preemptor, 60*time.Second); err != nil {
903 t.Errorf("Preemptor pod %v didn't get scheduled: %v", preemptor.Name, err)
904 }
905
906 klog.Info("Cleaning up all pods...")
907 allPods := pendingPods
908 allPods = append(allPods, runningPods...)
909 allPods = append(allPods, preemptor)
910 testutils.CleanupPods(testCtx.Ctx, cs, t, allPods)
911 })
912 }
913 }
914
915
916
917 func TestPreemptionRaces(t *testing.T) {
918
919 testCtx := initTest(t, "preemption-race")
920 cs := testCtx.ClientSet
921
922 tests := []struct {
923 name string
924 numInitialPods int
925 numAdditionalPods int
926 numRepetitions int
927 preemptor *v1.Pod
928 }{
929 {
930
931
932
933 name: "ensures that other pods are not scheduled while preemptor is being marked as nominated (issue #72124)",
934 numInitialPods: 2,
935 numAdditionalPods: 20,
936 numRepetitions: 5,
937 preemptor: initPausePod(&testutils.PausePodConfig{
938 Name: "preemptor-pod",
939 Namespace: testCtx.NS.Name,
940 Priority: &highPriority,
941 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
942 v1.ResourceCPU: *resource.NewMilliQuantity(4900, resource.DecimalSI),
943 v1.ResourceMemory: *resource.NewQuantity(4900, resource.DecimalSI)},
944 },
945 }),
946 },
947 }
948
949
950 nodeRes := map[v1.ResourceName]string{
951 v1.ResourcePods: "100",
952 v1.ResourceCPU: "5000m",
953 v1.ResourceMemory: "5000",
954 }
955 _, err := createNode(testCtx.ClientSet, st.MakeNode().Name("node1").Capacity(nodeRes).Obj())
956 if err != nil {
957 t.Fatalf("Error creating nodes: %v", err)
958 }
959
960 for _, test := range tests {
961 t.Run(test.name, func(t *testing.T) {
962 if test.numRepetitions <= 0 {
963 test.numRepetitions = 1
964 }
965 for n := 0; n < test.numRepetitions; n++ {
966 initialPods := make([]*v1.Pod, test.numInitialPods)
967 additionalPods := make([]*v1.Pod, test.numAdditionalPods)
968
969 for i := 0; i < test.numInitialPods; i++ {
970 initialPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(testCtx, fmt.Sprintf("rpod-%v", i), mediumPriority, 0))
971 if err != nil {
972 t.Fatalf("Error creating pause pod: %v", err)
973 }
974 }
975
976 for _, p := range initialPods {
977 if err := testutils.WaitForPodToSchedule(cs, p); err != nil {
978 t.Fatalf("Pod %v/%v didn't get scheduled: %v", p.Namespace, p.Name, err)
979 }
980 }
981
982 klog.Info("Creating the preemptor pod...")
983 preemptor, err := createPausePod(cs, test.preemptor)
984 if err != nil {
985 t.Errorf("Error while creating the preempting pod: %v", err)
986 }
987
988 klog.Info("Creating additional pods...")
989 for i := 0; i < test.numAdditionalPods; i++ {
990 additionalPods[i], err = createPausePod(cs, mkPriorityPodWithGrace(testCtx, fmt.Sprintf("ppod-%v", i), mediumPriority, 0))
991 if err != nil {
992 t.Fatalf("Error creating pending pod: %v", err)
993 }
994 }
995
996 if err := waitForNominatedNodeName(cs, preemptor); err != nil {
997 t.Errorf(".status.nominatedNodeName was not set for pod %v/%v: %v", preemptor.Namespace, preemptor.Name, err)
998 }
999
1000 if err := testutils.WaitForPodToScheduleWithTimeout(cs, preemptor, 60*time.Second); err != nil {
1001 t.Errorf("Preemptor pod %v didn't get scheduled: %v", preemptor.Name, err)
1002 }
1003
1004 klog.Info("Check unschedulable pods still exists and were never scheduled...")
1005 for _, p := range additionalPods {
1006 pod, err := cs.CoreV1().Pods(p.Namespace).Get(testCtx.Ctx, p.Name, metav1.GetOptions{})
1007 if err != nil {
1008 t.Errorf("Error in getting Pod %v/%v info: %v", p.Namespace, p.Name, err)
1009 }
1010 if len(pod.Spec.NodeName) > 0 {
1011 t.Errorf("Pod %v/%v is already scheduled", p.Namespace, p.Name)
1012 }
1013 _, cond := podutil.GetPodCondition(&pod.Status, v1.PodScheduled)
1014 if cond != nil && cond.Status != v1.ConditionFalse {
1015 t.Errorf("Pod %v/%v is no longer unschedulable: %v", p.Namespace, p.Name, err)
1016 }
1017 }
1018
1019 klog.Info("Cleaning up all pods...")
1020 allPods := additionalPods
1021 allPods = append(allPods, initialPods...)
1022 allPods = append(allPods, preemptor)
1023 testutils.CleanupPods(testCtx.Ctx, cs, t, allPods)
1024 }
1025 })
1026 }
1027 }
1028
1029 const (
1030 alwaysFailPlugin = "alwaysFailPlugin"
1031 doNotFailMe = "do-not-fail-me"
1032 )
1033
1034
1035
1036 type alwaysFail struct{}
1037
1038 func (af *alwaysFail) Name() string {
1039 return alwaysFailPlugin
1040 }
1041
1042 func (af *alwaysFail) PreBind(_ context.Context, _ *framework.CycleState, p *v1.Pod, _ string) *framework.Status {
1043 if strings.Contains(p.Name, doNotFailMe) {
1044 return nil
1045 }
1046 return framework.NewStatus(framework.Unschedulable)
1047 }
1048
1049 func newAlwaysFail(_ context.Context, _ runtime.Object, _ framework.Handle) (framework.Plugin, error) {
1050 return &alwaysFail{}, nil
1051 }
1052
1053
1054
1055 func TestNominatedNodeCleanUp(t *testing.T) {
1056 tests := []struct {
1057 name string
1058 nodeCapacity map[v1.ResourceName]string
1059
1060 podsToCreate [][]*v1.Pod
1061
1062 postChecks []func(cs clientset.Interface, pod *v1.Pod) error
1063
1064 deleteNode bool
1065
1066 podNamesToDelete []string
1067
1068
1069 customPlugins *configv1.Plugins
1070 outOfTreeRegistry frameworkruntime.Registry
1071 }{
1072 {
1073 name: "mid-priority pod preempts low-priority pod, followed by a high-priority pod with another preemption",
1074 nodeCapacity: map[v1.ResourceName]string{v1.ResourceCPU: "5"},
1075 podsToCreate: [][]*v1.Pod{
1076 {
1077 st.MakePod().Name("low-1").Priority(lowPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "1"}).Obj(),
1078 st.MakePod().Name("low-2").Priority(lowPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "1"}).Obj(),
1079 st.MakePod().Name("low-3").Priority(lowPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "1"}).Obj(),
1080 st.MakePod().Name("low-4").Priority(lowPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "1"}).Obj(),
1081 },
1082 {
1083 st.MakePod().Name("medium").Priority(mediumPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "4"}).Obj(),
1084 },
1085 {
1086 st.MakePod().Name("high").Priority(highPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "3"}).Obj(),
1087 },
1088 },
1089 postChecks: []func(cs clientset.Interface, pod *v1.Pod) error{
1090 testutils.WaitForPodToSchedule,
1091 waitForNominatedNodeName,
1092 waitForNominatedNodeName,
1093 },
1094 },
1095 {
1096 name: "mid-priority pod preempts low-priority pod, followed by a high-priority pod without additional preemption",
1097 nodeCapacity: map[v1.ResourceName]string{v1.ResourceCPU: "2"},
1098 podsToCreate: [][]*v1.Pod{
1099 {
1100 st.MakePod().Name("low").Priority(lowPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "1"}).Obj(),
1101 },
1102 {
1103 st.MakePod().Name("medium").Priority(mediumPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "2"}).Obj(),
1104 },
1105 {
1106 st.MakePod().Name("high").Priority(highPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "1"}).Obj(),
1107 },
1108 },
1109 postChecks: []func(cs clientset.Interface, pod *v1.Pod) error{
1110 testutils.WaitForPodToSchedule,
1111 waitForNominatedNodeName,
1112 testutils.WaitForPodToSchedule,
1113 },
1114 podNamesToDelete: []string{"low"},
1115 },
1116 {
1117 name: "mid-priority pod preempts low-priority pod, followed by a node deletion",
1118 nodeCapacity: map[v1.ResourceName]string{v1.ResourceCPU: "1"},
1119 podsToCreate: [][]*v1.Pod{
1120 {
1121 st.MakePod().Name("low").Priority(lowPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "1"}).Obj(),
1122 },
1123 {
1124 st.MakePod().Name("medium").Priority(mediumPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "1"}).Obj(),
1125 },
1126 },
1127 postChecks: []func(cs clientset.Interface, pod *v1.Pod) error{
1128 testutils.WaitForPodToSchedule,
1129 waitForNominatedNodeName,
1130 },
1131
1132 deleteNode: true,
1133 podNamesToDelete: []string{"low"},
1134 },
1135 {
1136 name: "mid-priority pod preempts low-priority pod, but failed the scheduling unexpectedly",
1137 nodeCapacity: map[v1.ResourceName]string{v1.ResourceCPU: "1"},
1138 podsToCreate: [][]*v1.Pod{
1139 {
1140 st.MakePod().Name(fmt.Sprintf("low-%v", doNotFailMe)).Priority(lowPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "1"}).Obj(),
1141 },
1142 {
1143 st.MakePod().Name("medium").Priority(mediumPriority).Req(map[v1.ResourceName]string{v1.ResourceCPU: "1"}).Obj(),
1144 },
1145 },
1146 postChecks: []func(cs clientset.Interface, pod *v1.Pod) error{
1147 testutils.WaitForPodToSchedule,
1148 waitForNominatedNodeName,
1149 },
1150 podNamesToDelete: []string{fmt.Sprintf("low-%v", doNotFailMe)},
1151 customPlugins: &configv1.Plugins{
1152 PreBind: configv1.PluginSet{
1153 Enabled: []configv1.Plugin{
1154 {Name: alwaysFailPlugin},
1155 },
1156 },
1157 },
1158 outOfTreeRegistry: frameworkruntime.Registry{alwaysFailPlugin: newAlwaysFail},
1159 },
1160 }
1161
1162 for _, tt := range tests {
1163 t.Run(tt.name, func(t *testing.T) {
1164 cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
1165 Profiles: []configv1.KubeSchedulerProfile{{
1166 SchedulerName: pointer.String(v1.DefaultSchedulerName),
1167 Plugins: tt.customPlugins,
1168 }},
1169 })
1170 testCtx := initTest(
1171 t,
1172 "preemption",
1173 scheduler.WithProfiles(cfg.Profiles...),
1174 scheduler.WithFrameworkOutOfTreeRegistry(tt.outOfTreeRegistry),
1175 )
1176
1177 cs, ns := testCtx.ClientSet, testCtx.NS.Name
1178
1179 nodeName := "fake-node"
1180 if _, err := createNode(cs, st.MakeNode().Name(nodeName).Capacity(tt.nodeCapacity).Obj()); err != nil {
1181 t.Fatalf("Error creating node %v: %v", nodeName, err)
1182 }
1183
1184
1185 for i, pods := range tt.podsToCreate {
1186 for _, p := range pods {
1187 p.Namespace = ns
1188 if _, err := createPausePod(cs, p); err != nil {
1189 t.Fatalf("Error creating pod %v: %v", p.Name, err)
1190 }
1191 }
1192
1193 if len(tt.postChecks) > i && tt.postChecks[i] != nil {
1194 for _, p := range pods {
1195 if err := tt.postChecks[i](cs, p); err != nil {
1196 t.Fatalf("Pod %v didn't pass the postChecks[%v]: %v", p.Name, i, err)
1197 }
1198 }
1199 }
1200 }
1201
1202
1203 if tt.deleteNode {
1204 if err := cs.CoreV1().Nodes().Delete(context.TODO(), nodeName, *metav1.NewDeleteOptions(0)); err != nil {
1205 t.Fatalf("Node %v cannot be deleted: %v", nodeName, err)
1206 }
1207 }
1208
1209
1210
1211 for _, podName := range tt.podNamesToDelete {
1212 if err := deletePod(cs, podName, ns); err != nil {
1213 t.Fatalf("Pod %v cannot be deleted: %v", podName, err)
1214 }
1215 }
1216
1217
1218 if err := wait.PollUntilContextTimeout(testCtx.Ctx, 100*time.Millisecond, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
1219 pod, err := cs.CoreV1().Pods(ns).Get(ctx, "medium", metav1.GetOptions{})
1220 if err != nil {
1221 t.Errorf("Error getting the medium pod: %v", err)
1222 }
1223 if len(pod.Status.NominatedNodeName) == 0 {
1224 return true, nil
1225 }
1226 return false, err
1227 }); err != nil {
1228 t.Errorf(".status.nominatedNodeName of the medium pod was not cleared: %v", err)
1229 }
1230 })
1231 }
1232 }
1233
1234 func mkMinAvailablePDB(name, namespace string, uid types.UID, minAvailable int, matchLabels map[string]string) *policy.PodDisruptionBudget {
1235 intMinAvailable := intstr.FromInt32(int32(minAvailable))
1236 return &policy.PodDisruptionBudget{
1237 ObjectMeta: metav1.ObjectMeta{
1238 Name: name,
1239 Namespace: namespace,
1240 },
1241 Spec: policy.PodDisruptionBudgetSpec{
1242 MinAvailable: &intMinAvailable,
1243 Selector: &metav1.LabelSelector{MatchLabels: matchLabels},
1244 },
1245 }
1246 }
1247
1248 func addPodConditionReady(pod *v1.Pod) {
1249 pod.Status = v1.PodStatus{
1250 Phase: v1.PodRunning,
1251 Conditions: []v1.PodCondition{
1252 {
1253 Type: v1.PodReady,
1254 Status: v1.ConditionTrue,
1255 },
1256 },
1257 }
1258 }
1259
1260
1261 func TestPDBInPreemption(t *testing.T) {
1262
1263 testCtx := initTest(t, "preemption-pdb")
1264 cs := testCtx.ClientSet
1265
1266 initDisruptionController(t, testCtx)
1267
1268 defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
1269 v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
1270 v1.ResourceMemory: *resource.NewQuantity(100, resource.DecimalSI)},
1271 }
1272 defaultNodeRes := map[v1.ResourceName]string{
1273 v1.ResourcePods: "32",
1274 v1.ResourceCPU: "500m",
1275 v1.ResourceMemory: "500",
1276 }
1277
1278 tests := []struct {
1279 name string
1280 nodeCnt int
1281 pdbs []*policy.PodDisruptionBudget
1282 pdbPodNum []int32
1283 existingPods []*v1.Pod
1284 pod *v1.Pod
1285 preemptedPodIndexes map[int]struct{}
1286 }{
1287 {
1288 name: "A non-PDB violating pod is preempted despite its higher priority",
1289 nodeCnt: 1,
1290 pdbs: []*policy.PodDisruptionBudget{
1291 mkMinAvailablePDB("pdb-1", testCtx.NS.Name, types.UID("pdb-1-uid"), 2, map[string]string{"foo": "bar"}),
1292 },
1293 pdbPodNum: []int32{2},
1294 existingPods: []*v1.Pod{
1295 initPausePod(&testutils.PausePodConfig{
1296 Name: "low-pod1",
1297 Namespace: testCtx.NS.Name,
1298 Priority: &lowPriority,
1299 Resources: defaultPodRes,
1300 Labels: map[string]string{"foo": "bar"},
1301 }),
1302 initPausePod(&testutils.PausePodConfig{
1303 Name: "low-pod2",
1304 Namespace: testCtx.NS.Name,
1305 Priority: &lowPriority,
1306 Resources: defaultPodRes,
1307 Labels: map[string]string{"foo": "bar"},
1308 }),
1309 initPausePod(&testutils.PausePodConfig{
1310 Name: "mid-pod3",
1311 Namespace: testCtx.NS.Name,
1312 Priority: &mediumPriority,
1313 Resources: defaultPodRes,
1314 }),
1315 },
1316 pod: initPausePod(&testutils.PausePodConfig{
1317 Name: "preemptor-pod",
1318 Namespace: testCtx.NS.Name,
1319 Priority: &highPriority,
1320 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
1321 v1.ResourceCPU: *resource.NewMilliQuantity(300, resource.DecimalSI),
1322 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
1323 },
1324 }),
1325 preemptedPodIndexes: map[int]struct{}{2: {}},
1326 },
1327 {
1328 name: "A node without any PDB violating pods is preferred for preemption",
1329 nodeCnt: 2,
1330 pdbs: []*policy.PodDisruptionBudget{
1331 mkMinAvailablePDB("pdb-1", testCtx.NS.Name, types.UID("pdb-1-uid"), 2, map[string]string{"foo": "bar"}),
1332 },
1333 pdbPodNum: []int32{1},
1334 existingPods: []*v1.Pod{
1335 initPausePod(&testutils.PausePodConfig{
1336 Name: "low-pod1",
1337 Namespace: testCtx.NS.Name,
1338 Priority: &lowPriority,
1339 Resources: defaultPodRes,
1340 NodeName: "node-1",
1341 Labels: map[string]string{"foo": "bar"},
1342 }),
1343 initPausePod(&testutils.PausePodConfig{
1344 Name: "mid-pod2",
1345 Namespace: testCtx.NS.Name,
1346 Priority: &mediumPriority,
1347 NodeName: "node-2",
1348 Resources: defaultPodRes,
1349 }),
1350 },
1351 pod: initPausePod(&testutils.PausePodConfig{
1352 Name: "preemptor-pod",
1353 Namespace: testCtx.NS.Name,
1354 Priority: &highPriority,
1355 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
1356 v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
1357 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
1358 },
1359 }),
1360 preemptedPodIndexes: map[int]struct{}{1: {}},
1361 },
1362 {
1363 name: "A node with fewer PDB violating pods is preferred for preemption",
1364 nodeCnt: 3,
1365 pdbs: []*policy.PodDisruptionBudget{
1366 mkMinAvailablePDB("pdb-1", testCtx.NS.Name, types.UID("pdb-1-uid"), 2, map[string]string{"foo1": "bar"}),
1367 mkMinAvailablePDB("pdb-2", testCtx.NS.Name, types.UID("pdb-2-uid"), 2, map[string]string{"foo2": "bar"}),
1368 },
1369 pdbPodNum: []int32{1, 5},
1370 existingPods: []*v1.Pod{
1371 initPausePod(&testutils.PausePodConfig{
1372 Name: "low-pod1",
1373 Namespace: testCtx.NS.Name,
1374 Priority: &lowPriority,
1375 Resources: defaultPodRes,
1376 NodeName: "node-1",
1377 Labels: map[string]string{"foo1": "bar"},
1378 }),
1379 initPausePod(&testutils.PausePodConfig{
1380 Name: "mid-pod1",
1381 Namespace: testCtx.NS.Name,
1382 Priority: &mediumPriority,
1383 Resources: defaultPodRes,
1384 NodeName: "node-1",
1385 }),
1386 initPausePod(&testutils.PausePodConfig{
1387 Name: "low-pod2",
1388 Namespace: testCtx.NS.Name,
1389 Priority: &lowPriority,
1390 Resources: defaultPodRes,
1391 NodeName: "node-2",
1392 Labels: map[string]string{"foo2": "bar"},
1393 }),
1394 initPausePod(&testutils.PausePodConfig{
1395 Name: "mid-pod2",
1396 Namespace: testCtx.NS.Name,
1397 Priority: &mediumPriority,
1398 Resources: defaultPodRes,
1399 NodeName: "node-2",
1400 Labels: map[string]string{"foo2": "bar"},
1401 }),
1402 initPausePod(&testutils.PausePodConfig{
1403 Name: "low-pod4",
1404 Namespace: testCtx.NS.Name,
1405 Priority: &lowPriority,
1406 Resources: defaultPodRes,
1407 NodeName: "node-3",
1408 Labels: map[string]string{"foo2": "bar"},
1409 }),
1410 initPausePod(&testutils.PausePodConfig{
1411 Name: "low-pod5",
1412 Namespace: testCtx.NS.Name,
1413 Priority: &lowPriority,
1414 Resources: defaultPodRes,
1415 NodeName: "node-3",
1416 Labels: map[string]string{"foo2": "bar"},
1417 }),
1418 initPausePod(&testutils.PausePodConfig{
1419 Name: "low-pod6",
1420 Namespace: testCtx.NS.Name,
1421 Priority: &lowPriority,
1422 Resources: defaultPodRes,
1423 NodeName: "node-3",
1424 Labels: map[string]string{"foo2": "bar"},
1425 }),
1426 },
1427 pod: initPausePod(&testutils.PausePodConfig{
1428 Name: "preemptor-pod",
1429 Namespace: testCtx.NS.Name,
1430 Priority: &highPriority,
1431 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
1432 v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
1433 v1.ResourceMemory: *resource.NewQuantity(400, resource.DecimalSI)},
1434 },
1435 }),
1436
1437 preemptedPodIndexes: map[int]struct{}{4: {}, 5: {}, 6: {}},
1438 },
1439 }
1440
1441 for _, test := range tests {
1442 t.Run(test.name, func(t *testing.T) {
1443 for i := 1; i <= test.nodeCnt; i++ {
1444 nodeName := fmt.Sprintf("node-%v", i)
1445 _, err := createNode(cs, st.MakeNode().Name(nodeName).Capacity(defaultNodeRes).Obj())
1446 if err != nil {
1447 t.Fatalf("Error creating node %v: %v", nodeName, err)
1448 }
1449 }
1450
1451 pods := make([]*v1.Pod, len(test.existingPods))
1452 var err error
1453
1454 for i, p := range test.existingPods {
1455 if pods[i], err = runPausePod(cs, p); err != nil {
1456 t.Fatalf("Test [%v]: Error running pause pod: %v", test.name, err)
1457 }
1458
1459 addPodConditionReady(p)
1460 if _, err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).UpdateStatus(context.TODO(), p, metav1.UpdateOptions{}); err != nil {
1461 t.Fatal(err)
1462 }
1463 }
1464
1465 if err := waitCachedPodsStable(testCtx, test.existingPods); err != nil {
1466 t.Fatalf("Not all pods are stable in the cache: %v", err)
1467 }
1468
1469
1470 for _, pdb := range test.pdbs {
1471 _, err := testCtx.ClientSet.PolicyV1().PodDisruptionBudgets(testCtx.NS.Name).Create(context.TODO(), pdb, metav1.CreateOptions{})
1472 if err != nil {
1473 t.Fatalf("Failed to create PDB: %v", err)
1474 }
1475 }
1476
1477 if err := waitForPDBsStable(testCtx, test.pdbs, test.pdbPodNum); err != nil {
1478 t.Fatalf("Not all pdbs are stable in the cache: %v", err)
1479 }
1480
1481
1482 preemptor, err := createPausePod(cs, test.pod)
1483 if err != nil {
1484 t.Errorf("Error while creating high priority pod: %v", err)
1485 }
1486
1487 for i, p := range pods {
1488 if _, found := test.preemptedPodIndexes[i]; found {
1489 if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false,
1490 podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil {
1491 t.Errorf("Test [%v]: Pod %v/%v is not getting evicted.", test.name, p.Namespace, p.Name)
1492 }
1493 } else {
1494 if p.DeletionTimestamp != nil {
1495 t.Errorf("Test [%v]: Didn't expect pod %v/%v to get preempted.", test.name, p.Namespace, p.Name)
1496 }
1497 }
1498 }
1499
1500 if len(test.preemptedPodIndexes) > 0 {
1501 if err := waitForNominatedNodeName(cs, preemptor); err != nil {
1502 t.Errorf("Test [%v]: .status.nominatedNodeName was not set for pod %v/%v: %v", test.name, preemptor.Namespace, preemptor.Name, err)
1503 }
1504 }
1505
1506
1507 pods = append(pods, preemptor)
1508 testutils.CleanupPods(testCtx.Ctx, cs, t, pods)
1509 if err := cs.PolicyV1().PodDisruptionBudgets(testCtx.NS.Name).DeleteCollection(testCtx.Ctx, metav1.DeleteOptions{}, metav1.ListOptions{}); err != nil {
1510 t.Errorf("error while deleting PDBs, error: %v", err)
1511 }
1512 if err := cs.CoreV1().Nodes().DeleteCollection(testCtx.Ctx, metav1.DeleteOptions{}, metav1.ListOptions{}); err != nil {
1513 t.Errorf("error whiling deleting nodes, error: %v", err)
1514 }
1515 })
1516 }
1517 }
1518
1519 func initTestPreferNominatedNode(t *testing.T, nsPrefix string, opts ...scheduler.Option) *testutils.TestContext {
1520 testCtx := testutils.InitTestSchedulerWithOptions(t, testutils.InitTestAPIServer(t, nsPrefix, nil), 0, opts...)
1521 testutils.SyncSchedulerInformerFactory(testCtx)
1522
1523 f := testCtx.Scheduler.NextPod
1524 testCtx.Scheduler.NextPod = func(logger klog.Logger) (*framework.QueuedPodInfo, error) {
1525 podInfo, _ := f(klog.FromContext(testCtx.Ctx))
1526
1527 if podInfo != nil {
1528 podInfo.Pod.Status.NominatedNodeName = "node-1"
1529 }
1530 return podInfo, nil
1531 }
1532 go testCtx.Scheduler.Run(testCtx.Ctx)
1533 return testCtx
1534 }
1535
1536
1537
1538 func TestPreferNominatedNode(t *testing.T) {
1539 defaultNodeRes := map[v1.ResourceName]string{
1540 v1.ResourcePods: "32",
1541 v1.ResourceCPU: "500m",
1542 v1.ResourceMemory: "500",
1543 }
1544 defaultPodRes := &v1.ResourceRequirements{Requests: v1.ResourceList{
1545 v1.ResourceCPU: *resource.NewMilliQuantity(100, resource.DecimalSI),
1546 v1.ResourceMemory: *resource.NewQuantity(100, resource.DecimalSI)},
1547 }
1548 tests := []struct {
1549 name string
1550 nodeNames []string
1551 existingPods []*v1.Pod
1552 pod *v1.Pod
1553 runningNode string
1554 }{
1555 {
1556 name: "nominated node released all resource, preemptor is scheduled to the nominated node",
1557 nodeNames: []string{"node-1", "node-2"},
1558 existingPods: []*v1.Pod{
1559 initPausePod(&testutils.PausePodConfig{
1560 Name: "low-pod1",
1561 Priority: &lowPriority,
1562 NodeName: "node-2",
1563 Resources: defaultPodRes,
1564 }),
1565 },
1566 pod: initPausePod(&testutils.PausePodConfig{
1567 Name: "preemptor-pod",
1568 Priority: &highPriority,
1569 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
1570 v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
1571 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
1572 },
1573 }),
1574 runningNode: "node-1",
1575 },
1576 {
1577 name: "nominated node cannot pass all the filters, preemptor should find a different node",
1578 nodeNames: []string{"node-1", "node-2"},
1579 existingPods: []*v1.Pod{
1580 initPausePod(&testutils.PausePodConfig{
1581 Name: "low-pod",
1582 Priority: &lowPriority,
1583 Resources: defaultPodRes,
1584 NodeName: "node-1",
1585 }),
1586 },
1587 pod: initPausePod(&testutils.PausePodConfig{
1588 Name: "preemptor-pod1",
1589 Priority: &highPriority,
1590 Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
1591 v1.ResourceCPU: *resource.NewMilliQuantity(500, resource.DecimalSI),
1592 v1.ResourceMemory: *resource.NewQuantity(200, resource.DecimalSI)},
1593 },
1594 }),
1595 runningNode: "node-2",
1596 },
1597 }
1598
1599 for _, test := range tests {
1600 t.Run(test.name, func(t *testing.T) {
1601 testCtx := initTestPreferNominatedNode(t, "perfer-nominated-node")
1602 cs := testCtx.ClientSet
1603 nsName := testCtx.NS.Name
1604 var err error
1605 var preemptor *v1.Pod
1606 for _, nodeName := range test.nodeNames {
1607 _, err := createNode(cs, st.MakeNode().Name(nodeName).Capacity(defaultNodeRes).Obj())
1608 if err != nil {
1609 t.Fatalf("Error creating node %v: %v", nodeName, err)
1610 }
1611 }
1612
1613 pods := make([]*v1.Pod, len(test.existingPods))
1614
1615 for i, p := range test.existingPods {
1616 p.Namespace = nsName
1617 pods[i], err = runPausePod(cs, p)
1618 if err != nil {
1619 t.Fatalf("Error running pause pod: %v", err)
1620 }
1621 }
1622 test.pod.Namespace = nsName
1623 preemptor, err = createPausePod(cs, test.pod)
1624 if err != nil {
1625 t.Errorf("Error while creating high priority pod: %v", err)
1626 }
1627 err = wait.PollUntilContextTimeout(testCtx.Ctx, 100*time.Millisecond, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
1628 preemptor, err = cs.CoreV1().Pods(test.pod.Namespace).Get(ctx, test.pod.Name, metav1.GetOptions{})
1629 if err != nil {
1630 t.Errorf("Error getting the preemptor pod info: %v", err)
1631 }
1632 if len(preemptor.Spec.NodeName) == 0 {
1633 return false, err
1634 }
1635 return true, nil
1636 })
1637 if err != nil {
1638 t.Errorf("Cannot schedule Pod %v/%v, error: %v", test.pod.Namespace, test.pod.Name, err)
1639 }
1640
1641 if preemptor.Spec.NodeName != test.runningNode {
1642 t.Errorf("Expect pod running on %v, got %v.", test.runningNode, preemptor.Spec.NodeName)
1643 }
1644 })
1645 }
1646 }
1647
1648
1649
1650 func TestReadWriteOncePodPreemption(t *testing.T) {
1651 cfg := configtesting.V1ToInternalWithDefaults(t, configv1.KubeSchedulerConfiguration{
1652 Profiles: []configv1.KubeSchedulerProfile{{
1653 SchedulerName: pointer.StringPtr(v1.DefaultSchedulerName),
1654 Plugins: &configv1.Plugins{
1655 Filter: configv1.PluginSet{
1656 Enabled: []configv1.Plugin{
1657 {Name: volumerestrictions.Name},
1658 },
1659 },
1660 PreFilter: configv1.PluginSet{
1661 Enabled: []configv1.Plugin{
1662 {Name: volumerestrictions.Name},
1663 },
1664 },
1665 },
1666 }},
1667 })
1668
1669 testCtx := testutils.InitTestSchedulerWithOptions(t,
1670 testutils.InitTestAPIServer(t, "preemption", nil),
1671 0,
1672 scheduler.WithProfiles(cfg.Profiles...))
1673 testutils.SyncSchedulerInformerFactory(testCtx)
1674 go testCtx.Scheduler.Run(testCtx.Ctx)
1675
1676 cs := testCtx.ClientSet
1677
1678 storage := v1.VolumeResourceRequirements{Requests: v1.ResourceList{v1.ResourceStorage: resource.MustParse("1Mi")}}
1679 volType := v1.HostPathDirectoryOrCreate
1680 pv1 := st.MakePersistentVolume().
1681 Name("pv-with-read-write-once-pod-1").
1682 AccessModes([]v1.PersistentVolumeAccessMode{v1.ReadWriteOncePod}).
1683 Capacity(storage.Requests).
1684 HostPathVolumeSource(&v1.HostPathVolumeSource{Path: "/mnt1", Type: &volType}).
1685 Obj()
1686 pvc1 := st.MakePersistentVolumeClaim().
1687 Name("pvc-with-read-write-once-pod-1").
1688 Namespace(testCtx.NS.Name).
1689
1690 Annotation(volume.AnnBindCompleted, "true").
1691 VolumeName(pv1.Name).
1692 AccessModes([]v1.PersistentVolumeAccessMode{v1.ReadWriteOncePod}).
1693 Resources(storage).
1694 Obj()
1695 pv2 := st.MakePersistentVolume().
1696 Name("pv-with-read-write-once-pod-2").
1697 AccessModes([]v1.PersistentVolumeAccessMode{v1.ReadWriteOncePod}).
1698 Capacity(storage.Requests).
1699 HostPathVolumeSource(&v1.HostPathVolumeSource{Path: "/mnt2", Type: &volType}).
1700 Obj()
1701 pvc2 := st.MakePersistentVolumeClaim().
1702 Name("pvc-with-read-write-once-pod-2").
1703 Namespace(testCtx.NS.Name).
1704
1705 Annotation(volume.AnnBindCompleted, "true").
1706 VolumeName(pv2.Name).
1707 AccessModes([]v1.PersistentVolumeAccessMode{v1.ReadWriteOncePod}).
1708 Resources(storage).
1709 Obj()
1710
1711 tests := []struct {
1712 name string
1713 init func() error
1714 existingPods []*v1.Pod
1715 pod *v1.Pod
1716 unresolvable bool
1717 preemptedPodIndexes map[int]struct{}
1718 cleanup func() error
1719 }{
1720 {
1721 name: "preempt single pod",
1722 init: func() error {
1723 _, err := testutils.CreatePV(cs, pv1)
1724 if err != nil {
1725 return fmt.Errorf("cannot create pv: %v", err)
1726 }
1727 _, err = testutils.CreatePVC(cs, pvc1)
1728 if err != nil {
1729 return fmt.Errorf("cannot create pvc: %v", err)
1730 }
1731 return nil
1732 },
1733 existingPods: []*v1.Pod{
1734 initPausePod(&testutils.PausePodConfig{
1735 Name: "victim-pod",
1736 Namespace: testCtx.NS.Name,
1737 Priority: &lowPriority,
1738 Volumes: []v1.Volume{{
1739 Name: "volume",
1740 VolumeSource: v1.VolumeSource{
1741 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
1742 ClaimName: pvc1.Name,
1743 },
1744 },
1745 }},
1746 }),
1747 },
1748 pod: initPausePod(&testutils.PausePodConfig{
1749 Name: "preemptor-pod",
1750 Namespace: testCtx.NS.Name,
1751 Priority: &highPriority,
1752 Volumes: []v1.Volume{{
1753 Name: "volume",
1754 VolumeSource: v1.VolumeSource{
1755 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
1756 ClaimName: pvc1.Name,
1757 },
1758 },
1759 }},
1760 }),
1761 preemptedPodIndexes: map[int]struct{}{0: {}},
1762 cleanup: func() error {
1763 if err := testutils.DeletePVC(cs, pvc1.Name, pvc1.Namespace); err != nil {
1764 return fmt.Errorf("cannot delete pvc: %v", err)
1765 }
1766 if err := testutils.DeletePV(cs, pv1.Name); err != nil {
1767 return fmt.Errorf("cannot delete pv: %v", err)
1768 }
1769 return nil
1770 },
1771 },
1772 {
1773 name: "preempt two pods",
1774 init: func() error {
1775 for _, pv := range []*v1.PersistentVolume{pv1, pv2} {
1776 _, err := testutils.CreatePV(cs, pv)
1777 if err != nil {
1778 return fmt.Errorf("cannot create pv: %v", err)
1779 }
1780 }
1781 for _, pvc := range []*v1.PersistentVolumeClaim{pvc1, pvc2} {
1782 _, err := testutils.CreatePVC(cs, pvc)
1783 if err != nil {
1784 return fmt.Errorf("cannot create pvc: %v", err)
1785 }
1786 }
1787 return nil
1788 },
1789 existingPods: []*v1.Pod{
1790 initPausePod(&testutils.PausePodConfig{
1791 Name: "victim-pod-1",
1792 Namespace: testCtx.NS.Name,
1793 Priority: &lowPriority,
1794 Volumes: []v1.Volume{{
1795 Name: "volume",
1796 VolumeSource: v1.VolumeSource{
1797 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
1798 ClaimName: pvc1.Name,
1799 },
1800 },
1801 }},
1802 }),
1803 initPausePod(&testutils.PausePodConfig{
1804 Name: "victim-pod-2",
1805 Namespace: testCtx.NS.Name,
1806 Priority: &lowPriority,
1807 Volumes: []v1.Volume{{
1808 Name: "volume",
1809 VolumeSource: v1.VolumeSource{
1810 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
1811 ClaimName: pvc2.Name,
1812 },
1813 },
1814 }},
1815 }),
1816 },
1817 pod: initPausePod(&testutils.PausePodConfig{
1818 Name: "preemptor-pod",
1819 Namespace: testCtx.NS.Name,
1820 Priority: &highPriority,
1821 Volumes: []v1.Volume{
1822 {
1823 Name: "volume-1",
1824 VolumeSource: v1.VolumeSource{
1825 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
1826 ClaimName: pvc1.Name,
1827 },
1828 },
1829 },
1830 {
1831 Name: "volume-2",
1832 VolumeSource: v1.VolumeSource{
1833 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
1834 ClaimName: pvc2.Name,
1835 },
1836 },
1837 },
1838 },
1839 }),
1840 preemptedPodIndexes: map[int]struct{}{0: {}, 1: {}},
1841 cleanup: func() error {
1842 for _, pvc := range []*v1.PersistentVolumeClaim{pvc1, pvc2} {
1843 if err := testutils.DeletePVC(cs, pvc.Name, pvc.Namespace); err != nil {
1844 return fmt.Errorf("cannot delete pvc: %v", err)
1845 }
1846 }
1847 for _, pv := range []*v1.PersistentVolume{pv1, pv2} {
1848 if err := testutils.DeletePV(cs, pv.Name); err != nil {
1849 return fmt.Errorf("cannot delete pv: %v", err)
1850 }
1851 }
1852 return nil
1853 },
1854 },
1855 {
1856 name: "preempt single pod with two volumes",
1857 init: func() error {
1858 for _, pv := range []*v1.PersistentVolume{pv1, pv2} {
1859 _, err := testutils.CreatePV(cs, pv)
1860 if err != nil {
1861 return fmt.Errorf("cannot create pv: %v", err)
1862 }
1863 }
1864 for _, pvc := range []*v1.PersistentVolumeClaim{pvc1, pvc2} {
1865 _, err := testutils.CreatePVC(cs, pvc)
1866 if err != nil {
1867 return fmt.Errorf("cannot create pvc: %v", err)
1868 }
1869 }
1870 return nil
1871 },
1872 existingPods: []*v1.Pod{
1873 initPausePod(&testutils.PausePodConfig{
1874 Name: "victim-pod",
1875 Namespace: testCtx.NS.Name,
1876 Priority: &lowPriority,
1877 Volumes: []v1.Volume{
1878 {
1879 Name: "volume-1",
1880 VolumeSource: v1.VolumeSource{
1881 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
1882 ClaimName: pvc1.Name,
1883 },
1884 },
1885 },
1886 {
1887 Name: "volume-2",
1888 VolumeSource: v1.VolumeSource{
1889 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
1890 ClaimName: pvc2.Name,
1891 },
1892 },
1893 },
1894 },
1895 }),
1896 },
1897 pod: initPausePod(&testutils.PausePodConfig{
1898 Name: "preemptor-pod",
1899 Namespace: testCtx.NS.Name,
1900 Priority: &highPriority,
1901 Volumes: []v1.Volume{
1902 {
1903 Name: "volume-1",
1904 VolumeSource: v1.VolumeSource{
1905 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
1906 ClaimName: pvc1.Name,
1907 },
1908 },
1909 },
1910 {
1911 Name: "volume-2",
1912 VolumeSource: v1.VolumeSource{
1913 PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
1914 ClaimName: pvc2.Name,
1915 },
1916 },
1917 },
1918 },
1919 }),
1920 preemptedPodIndexes: map[int]struct{}{0: {}},
1921 cleanup: func() error {
1922 for _, pvc := range []*v1.PersistentVolumeClaim{pvc1, pvc2} {
1923 if err := testutils.DeletePVC(cs, pvc.Name, pvc.Namespace); err != nil {
1924 return fmt.Errorf("cannot delete pvc: %v", err)
1925 }
1926 }
1927 for _, pv := range []*v1.PersistentVolume{pv1, pv2} {
1928 if err := testutils.DeletePV(cs, pv.Name); err != nil {
1929 return fmt.Errorf("cannot delete pv: %v", err)
1930 }
1931 }
1932 return nil
1933 },
1934 },
1935 }
1936
1937
1938 nodeRes := map[v1.ResourceName]string{
1939 v1.ResourcePods: "32",
1940 v1.ResourceCPU: "500m",
1941 v1.ResourceMemory: "500",
1942 }
1943 nodeObject := st.MakeNode().Name("node1").Capacity(nodeRes).Label("node", "node1").Obj()
1944 if _, err := createNode(cs, nodeObject); err != nil {
1945 t.Fatalf("Error creating node: %v", err)
1946 }
1947
1948 for _, test := range tests {
1949 t.Run(test.name, func(t *testing.T) {
1950 if err := test.init(); err != nil {
1951 t.Fatalf("Error while initializing test: %v", err)
1952 }
1953
1954 pods := make([]*v1.Pod, len(test.existingPods))
1955 t.Cleanup(func() {
1956 testutils.CleanupPods(testCtx.Ctx, cs, t, pods)
1957 if err := test.cleanup(); err != nil {
1958 t.Errorf("Error cleaning up test: %v", err)
1959 }
1960 })
1961
1962 for i, p := range test.existingPods {
1963 var err error
1964 pods[i], err = runPausePod(cs, p)
1965 if err != nil {
1966 t.Fatalf("Error running pause pod: %v", err)
1967 }
1968 }
1969
1970 preemptor, err := createPausePod(cs, test.pod)
1971 if err != nil {
1972 t.Errorf("Error while creating high priority pod: %v", err)
1973 }
1974 pods = append(pods, preemptor)
1975
1976 for i, p := range pods {
1977 if _, found := test.preemptedPodIndexes[i]; found {
1978 if err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, wait.ForeverTestTimeout, false,
1979 podIsGettingEvicted(cs, p.Namespace, p.Name)); err != nil {
1980 t.Errorf("Pod %v/%v is not getting evicted.", p.Namespace, p.Name)
1981 }
1982 } else {
1983 if p.DeletionTimestamp != nil {
1984 t.Errorf("Didn't expect pod %v to get preempted.", p.Name)
1985 }
1986 }
1987 }
1988
1989 if len(test.preemptedPodIndexes) > 0 {
1990 if err := waitForNominatedNodeName(cs, preemptor); err != nil {
1991 t.Errorf("NominatedNodeName field was not set for pod %v: %v", preemptor.Name, err)
1992 }
1993 }
1994 })
1995 }
1996 }
1997
View as plain text