1
16
17 package e2enode
18
19 import (
20 "context"
21 "errors"
22 "fmt"
23 "os"
24 "path/filepath"
25 "regexp"
26 "sort"
27 "strings"
28 "time"
29
30 v1 "k8s.io/api/core/v1"
31 "k8s.io/apimachinery/pkg/api/resource"
32 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
33 "k8s.io/apimachinery/pkg/util/uuid"
34 runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
35 "k8s.io/klog/v2"
36 kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
37 "k8s.io/kubernetes/pkg/kubelet/apis/podresources"
38 "k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
39 "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
40 "k8s.io/kubernetes/pkg/kubelet/util"
41 admissionapi "k8s.io/pod-security-admission/api"
42
43 "k8s.io/kubernetes/test/e2e/feature"
44 "k8s.io/kubernetes/test/e2e/framework"
45 e2enode "k8s.io/kubernetes/test/e2e/framework/node"
46 e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
47 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
48 e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
49 "k8s.io/kubernetes/test/e2e/nodefeature"
50 testutils "k8s.io/kubernetes/test/utils"
51
52 "github.com/onsi/ginkgo/v2"
53 "github.com/onsi/gomega"
54 "github.com/onsi/gomega/gcustom"
55 "github.com/onsi/gomega/types"
56 )
57
58 const (
59 devicePluginDir = "/var/lib/kubelet/device-plugins"
60 checkpointName = "kubelet_internal_checkpoint"
61 )
62
63
64 var _ = SIGDescribe("Device Manager", framework.WithSerial(), feature.DeviceManager, nodefeature.DeviceManager, func() {
65 checkpointFullPath := filepath.Join(devicePluginDir, checkpointName)
66 f := framework.NewDefaultFramework("devicemanager-test")
67 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
68
69 ginkgo.Context("With SRIOV devices in the system", func() {
70
71 ginkgo.It("should be able to recover V1 (aka pre-1.20) checkpoint data and reject pods before device re-registration", func(ctx context.Context) {
72 if sriovdevCount, err := countSRIOVDevices(); err != nil || sriovdevCount == 0 {
73 e2eskipper.Skipf("this test is meant to run on a system with at least one configured VF from SRIOV device")
74 }
75
76 configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
77 sd := setupSRIOVConfigOrFail(ctx, f, configMap)
78
79 waitForSRIOVResources(ctx, f, sd)
80
81 cntName := "gu-container"
82
83 ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 1 core, 1 %s device", sd.resourceName))
84 var initCtnAttrs []tmCtnAttribute
85 ctnAttrs := []tmCtnAttribute{
86 {
87 ctnName: cntName,
88 cpuRequest: "1000m",
89 cpuLimit: "1000m",
90 deviceName: sd.resourceName,
91 deviceRequest: "1",
92 deviceLimit: "1",
93 },
94 }
95
96 podName := "gu-pod-rec-pre-1"
97 framework.Logf("creating pod %s attrs %v", podName, ctnAttrs)
98 pod := makeTopologyManagerTestPod(podName, ctnAttrs, initCtnAttrs)
99 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
100
101
102
103 ginkgo.By("deleting the pod")
104
105
106 deletePodSyncByName(ctx, f, pod.Name)
107 waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
108
109 ginkgo.By("teardown the sriov device plugin")
110
111 teardownSRIOVConfigOrFail(ctx, f, sd)
112
113 ginkgo.By("stopping the kubelet")
114 killKubelet("SIGSTOP")
115
116 ginkgo.By("rewriting the kubelet checkpoint file as v1")
117 err := rewriteCheckpointAsV1(devicePluginDir, checkpointName)
118
119 defer os.Remove(checkpointFullPath)
120 framework.ExpectNoError(err)
121
122
123
124 ginkgo.By("killing the kubelet")
125 killKubelet("SIGKILL")
126
127 ginkgo.By("waiting for the kubelet to be ready again")
128
129 gomega.Eventually(ctx, func(ctx context.Context) bool {
130 nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
131 framework.ExpectNoError(err)
132 return nodes == 1
133 }, time.Minute, time.Second).Should(gomega.BeTrue())
134
135
136
137
138
139
140
141 podName = "gu-pod-rec-post-2"
142 framework.Logf("creating pod %s attrs %v", podName, ctnAttrs)
143 pod = makeTopologyManagerTestPod(podName, ctnAttrs, initCtnAttrs)
144
145 pod = e2epod.NewPodClient(f).Create(ctx, pod)
146 err = e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) {
147 if pod.Status.Phase != v1.PodPending {
148 return true, nil
149 }
150 return false, nil
151 })
152 framework.ExpectNoError(err)
153 pod, err = e2epod.NewPodClient(f).Get(ctx, pod.Name, metav1.GetOptions{})
154 framework.ExpectNoError(err)
155
156 if pod.Status.Phase != v1.PodFailed {
157 framework.Failf("pod %s not failed: %v", pod.Name, pod.Status)
158 }
159
160 framework.Logf("checking pod %s status reason (%s)", pod.Name, pod.Status.Reason)
161 if !isUnexpectedAdmissionError(pod) {
162 framework.Failf("pod %s failed for wrong reason: %q", pod.Name, pod.Status.Reason)
163 }
164
165 deletePodSyncByName(ctx, f, pod.Name)
166 })
167
168 ginkgo.It("should be able to recover V1 (aka pre-1.20) checkpoint data and update topology info on device re-registration", func(ctx context.Context) {
169 if sriovdevCount, err := countSRIOVDevices(); err != nil || sriovdevCount == 0 {
170 e2eskipper.Skipf("this test is meant to run on a system with at least one configured VF from SRIOV device")
171 }
172
173 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
174 framework.ExpectNoError(err)
175
176 configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
177
178 sd := setupSRIOVConfigOrFail(ctx, f, configMap)
179 waitForSRIOVResources(ctx, f, sd)
180
181 cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
182 framework.ExpectNoError(err)
183
184 resp, err := cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{})
185 conn.Close()
186 framework.ExpectNoError(err)
187
188 suitableDevs := 0
189 for _, dev := range resp.GetDevices() {
190 for _, node := range dev.GetTopology().GetNodes() {
191 if node.GetID() != 0 {
192 suitableDevs++
193 }
194 }
195 }
196 if suitableDevs == 0 {
197 teardownSRIOVConfigOrFail(ctx, f, sd)
198 e2eskipper.Skipf("no devices found on NUMA Cell other than 0")
199 }
200
201 cntName := "gu-container"
202
203 ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 1 core, 1 %s device", sd.resourceName))
204 var initCtnAttrs []tmCtnAttribute
205 ctnAttrs := []tmCtnAttribute{
206 {
207 ctnName: cntName,
208 cpuRequest: "1000m",
209 cpuLimit: "1000m",
210 deviceName: sd.resourceName,
211 deviceRequest: "1",
212 deviceLimit: "1",
213 },
214 }
215
216 podName := "gu-pod-rec-pre-1"
217 framework.Logf("creating pod %s attrs %v", podName, ctnAttrs)
218 pod := makeTopologyManagerTestPod(podName, ctnAttrs, initCtnAttrs)
219 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
220
221
222
223 ginkgo.By("deleting the pod")
224
225
226 deletePodSyncByName(ctx, f, pod.Name)
227 waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
228
229 ginkgo.By("teardown the sriov device plugin")
230
231 deleteSRIOVPodOrFail(ctx, f, sd)
232
233 ginkgo.By("stopping the kubelet")
234 killKubelet("SIGSTOP")
235
236 ginkgo.By("rewriting the kubelet checkpoint file as v1")
237 err = rewriteCheckpointAsV1(devicePluginDir, checkpointName)
238
239 defer os.Remove(checkpointFullPath)
240 framework.ExpectNoError(err)
241
242
243
244 ginkgo.By("killing the kubelet")
245 killKubelet("SIGKILL")
246
247 ginkgo.By("waiting for the kubelet to be ready again")
248
249 gomega.Eventually(ctx, func(ctx context.Context) bool {
250 nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
251 framework.ExpectNoError(err)
252 return nodes == 1
253 }, time.Minute, time.Second).Should(gomega.BeTrue())
254
255 sd2 := &sriovData{
256 configMap: sd.configMap,
257 serviceAccount: sd.serviceAccount,
258 }
259 sd2.pod = createSRIOVPodOrFail(ctx, f)
260 ginkgo.DeferCleanup(teardownSRIOVConfigOrFail, f, sd2)
261 waitForSRIOVResources(ctx, f, sd2)
262
263 compareSRIOVResources(sd, sd2)
264
265 cli, conn, err = podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
266 framework.ExpectNoError(err)
267 defer conn.Close()
268
269 resp2, err := cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{})
270 framework.ExpectNoError(err)
271
272 cntDevs := stringifyContainerDevices(resp.GetDevices())
273 cntDevs2 := stringifyContainerDevices(resp2.GetDevices())
274 if cntDevs != cntDevs2 {
275 framework.Failf("different allocatable resources expected %v got %v", cntDevs, cntDevs2)
276 }
277 })
278
279 })
280
281
324 f.Context("With sample device plugin", f.WithSerial(), f.WithDisruptive(), func() {
325 var deviceCount int = 2
326 var devicePluginPod *v1.Pod
327 var triggerPathFile, triggerPathDir string
328
329
330 ginkgo.BeforeEach(func(ctx context.Context) {
331 ginkgo.By("Wait for node to be ready")
332 gomega.Eventually(ctx, e2enode.TotalReady).
333 WithArguments(f.ClientSet).
334 WithTimeout(time.Minute).
335 Should(gomega.BeEquivalentTo(1))
336
337 ginkgo.By("Setting up the directory and file for controlling registration")
338 triggerPathDir = filepath.Join(devicePluginDir, "sample")
339 if _, err := os.Stat(triggerPathDir); errors.Is(err, os.ErrNotExist) {
340 err := os.Mkdir(triggerPathDir, os.ModePerm)
341 if err != nil {
342 klog.Errorf("Directory creation %s failed: %v ", triggerPathDir, err)
343 panic(err)
344 }
345 klog.InfoS("Directory created successfully")
346
347 triggerPathFile = filepath.Join(triggerPathDir, "registration")
348 if _, err := os.Stat(triggerPathFile); errors.Is(err, os.ErrNotExist) {
349 _, err = os.Create(triggerPathFile)
350 if err != nil {
351 klog.Errorf("File creation %s failed: %v ", triggerPathFile, err)
352 panic(err)
353 }
354 }
355 }
356
357 ginkgo.By("Scheduling a sample device plugin pod")
358 data, err := e2etestfiles.Read(SampleDevicePluginControlRegistrationDSYAML)
359 if err != nil {
360 framework.Fail(err.Error())
361 }
362 ds := readDaemonSetV1OrDie(data)
363
364 dp := &v1.Pod{
365 ObjectMeta: metav1.ObjectMeta{
366 Name: SampleDevicePluginName,
367 },
368 Spec: ds.Spec.Template.Spec,
369 }
370
371 devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dp)
372
373 go func() {
374
375
376
377
378
379
380 defer ginkgo.GinkgoRecover()
381 framework.Logf("Deleting the control file: %q to trigger registration", triggerPathFile)
382 err := os.Remove(triggerPathFile)
383 framework.ExpectNoError(err)
384 }()
385
386 ginkgo.By("Waiting for devices to become available on the local node")
387
388 gomega.Eventually(ctx, isNodeReadyWithSampleResources).
389 WithArguments(f).
390 WithTimeout(5 * time.Minute).
391 Should(BeReady())
392
393 framework.Logf("Successfully created device plugin pod")
394
395 devsLen := int64(deviceCount)
396 ginkgo.By("Waiting for the resource exported by the sample device plugin to become available on the local node")
397
398 gomega.Eventually(ctx, isNodeReadyWithAllocatableSampleResources).
399 WithArguments(f, devsLen).
400 WithTimeout(5 * time.Minute).
401 Should(HaveAllocatableDevices())
402 })
403
404 ginkgo.It("should deploy pod consuming devices first but fail with admission error after kubelet restart in case device plugin hasn't re-registered", func(ctx context.Context) {
405 var err error
406 podCMD := "while true; do sleep 1000; done;"
407
408 ginkgo.By(fmt.Sprintf("creating a pods requiring %d %q", deviceCount, SampleDeviceResourceName))
409
410 pod := makeBusyboxDeviceRequiringPod(SampleDeviceResourceName, podCMD)
411 testPod := e2epod.NewPodClient(f).CreateSync(ctx, pod)
412
413 ginkgo.By("making sure all the pods are ready")
414
415 err = e2epod.WaitForPodCondition(ctx, f.ClientSet, testPod.Namespace, testPod.Name, "Ready", 120*time.Second, testutils.PodRunningReady)
416 framework.ExpectNoError(err, "pod %s/%s did not go running", testPod.Namespace, testPod.Name)
417 framework.Logf("pod %s/%s running", testPod.Namespace, testPod.Name)
418
419 ginkgo.By("stopping the kubelet")
420 startKubelet := stopKubelet()
421
422 ginkgo.By("stopping all the local containers - using CRI")
423 rs, _, err := getCRIClient()
424 framework.ExpectNoError(err)
425 sandboxes, err := rs.ListPodSandbox(ctx, &runtimeapi.PodSandboxFilter{})
426 framework.ExpectNoError(err)
427 for _, sandbox := range sandboxes {
428 gomega.Expect(sandbox.Metadata).ToNot(gomega.BeNil())
429 ginkgo.By(fmt.Sprintf("deleting pod using CRI: %s/%s -> %s", sandbox.Metadata.Namespace, sandbox.Metadata.Name, sandbox.Id))
430
431 err := rs.RemovePodSandbox(ctx, sandbox.Id)
432 framework.ExpectNoError(err)
433 }
434
435 ginkgo.By("restarting the kubelet")
436 startKubelet()
437
438 ginkgo.By("waiting for the kubelet to be ready again")
439
440
441 gomega.Eventually(ctx, e2enode.TotalReady).
442 WithArguments(f.ClientSet).
443 WithTimeout(2 * time.Minute).
444 Should(gomega.BeEquivalentTo(1))
445
446 ginkgo.By("making sure all the pods are ready after the recovery")
447
448 var devicePluginPodAfterRestart *v1.Pod
449
450 devicePluginPodAfterRestart, err = e2epod.NewPodClient(f).Get(ctx, devicePluginPod.Name, metav1.GetOptions{})
451 framework.ExpectNoError(err)
452
453 err = e2epod.WaitForPodCondition(ctx, f.ClientSet, devicePluginPodAfterRestart.Namespace, devicePluginPodAfterRestart.Name, "Ready", 120*time.Second, testutils.PodRunningReady)
454 framework.ExpectNoError(err, "pod %s/%s did not go running", devicePluginPodAfterRestart.Namespace, devicePluginPodAfterRestart.Name)
455 framework.Logf("pod %s/%s running", devicePluginPodAfterRestart.Namespace, devicePluginPodAfterRestart.Name)
456
457 ginkgo.By("Waiting for the resource capacity/allocatable exported by the sample device plugin to become zero")
458
459
460
461
462
463
464 gomega.Eventually(ctx, isNodeReadyWithAllocatableSampleResources).
465 WithArguments(f, int64(0)).
466 WithTimeout(5 * time.Minute).
467 Should(HaveAllocatableDevices())
468
469 ginkgo.By("Checking that pod requesting devices failed to start because of admission error")
470
471
472
473
474 gomega.Eventually(ctx, getPod).
475 WithArguments(f, testPod.Name).
476 WithTimeout(time.Minute).
477 Should(HaveFailedWithAdmissionError(),
478 "the pod succeeded to start, when it should fail with the admission error")
479
480 ginkgo.By("removing application pods")
481 e2epod.NewPodClient(f).DeleteSync(ctx, testPod.Name, metav1.DeleteOptions{}, 2*time.Minute)
482 })
483
484 ginkgo.AfterEach(func(ctx context.Context) {
485 ginkgo.By("Deleting the device plugin pod")
486 e2epod.NewPodClient(f).DeleteSync(ctx, devicePluginPod.Name, metav1.DeleteOptions{}, time.Minute)
487
488 ginkgo.By("Deleting the directory and file setup for controlling registration")
489 err := os.RemoveAll(triggerPathDir)
490 framework.ExpectNoError(err)
491
492 ginkgo.By("Deleting any Pods created by the test")
493 l, err := e2epod.NewPodClient(f).List(context.TODO(), metav1.ListOptions{})
494 framework.ExpectNoError(err)
495 for _, p := range l.Items {
496 if p.Namespace != f.Namespace.Name {
497 continue
498 }
499
500 framework.Logf("Deleting pod: %s", p.Name)
501 e2epod.NewPodClient(f).DeleteSync(ctx, p.Name, metav1.DeleteOptions{}, 2*time.Minute)
502 }
503
504 ginkgo.By("Waiting for devices to become unavailable on the local node")
505 gomega.Eventually(ctx, isNodeReadyWithoutSampleResources).
506 WithArguments(f).
507 WithTimeout(5 * time.Minute).
508 Should(BeReady())
509 })
510
511 })
512
513 })
514
515 func compareSRIOVResources(expected, got *sriovData) {
516 if expected.resourceName != got.resourceName {
517 framework.Failf("different SRIOV resource name: expected %q got %q", expected.resourceName, got.resourceName)
518 }
519 if expected.resourceAmount != got.resourceAmount {
520 framework.Failf("different SRIOV resource amount: expected %d got %d", expected.resourceAmount, got.resourceAmount)
521 }
522 }
523
524 func isUnexpectedAdmissionError(pod *v1.Pod) bool {
525 re := regexp.MustCompile(`Unexpected.*Admission.*Error`)
526 return re.MatchString(pod.Status.Reason)
527 }
528
529 func rewriteCheckpointAsV1(dir, name string) error {
530 ginkgo.By(fmt.Sprintf("Creating temporary checkpoint manager (dir=%q)", dir))
531 checkpointManager, err := checkpointmanager.NewCheckpointManager(dir)
532 if err != nil {
533 return err
534 }
535 cp := checkpoint.New(make([]checkpoint.PodDevicesEntry, 0), make(map[string][]string))
536 err = checkpointManager.GetCheckpoint(name, cp)
537 if err != nil {
538 return err
539 }
540
541 ginkgo.By(fmt.Sprintf("Read checkpoint %q %#v", name, cp))
542
543 podDevices, registeredDevs := cp.GetDataInLatestFormat()
544 podDevicesV1 := convertPodDeviceEntriesToV1(podDevices)
545 cpV1 := checkpoint.NewV1(podDevicesV1, registeredDevs)
546
547 blob, err := cpV1.MarshalCheckpoint()
548 if err != nil {
549 return err
550 }
551
552
553 ckPath := filepath.Join(dir, name)
554 os.WriteFile(filepath.Join("/tmp", name), blob, 0600)
555 return os.WriteFile(ckPath, blob, 0600)
556 }
557
558 func convertPodDeviceEntriesToV1(entries []checkpoint.PodDevicesEntry) []checkpoint.PodDevicesEntryV1 {
559 entriesv1 := []checkpoint.PodDevicesEntryV1{}
560 for _, entry := range entries {
561 deviceIDs := []string{}
562 for _, perNUMANodeDevIDs := range entry.DeviceIDs {
563 deviceIDs = append(deviceIDs, perNUMANodeDevIDs...)
564 }
565 entriesv1 = append(entriesv1, checkpoint.PodDevicesEntryV1{
566 PodUID: entry.PodUID,
567 ContainerName: entry.ContainerName,
568 ResourceName: entry.ResourceName,
569 DeviceIDs: deviceIDs,
570 AllocResp: entry.AllocResp,
571 })
572 }
573 return entriesv1
574 }
575
576 func stringifyContainerDevices(devs []*kubeletpodresourcesv1.ContainerDevices) string {
577 entries := []string{}
578 for _, dev := range devs {
579 devIDs := dev.GetDeviceIds()
580 if devIDs != nil {
581 for _, devID := range dev.DeviceIds {
582 nodes := dev.GetTopology().GetNodes()
583 if nodes != nil {
584 for _, node := range nodes {
585 entries = append(entries, fmt.Sprintf("%s[%s]@NUMA=%d", dev.ResourceName, devID, node.GetID()))
586 }
587 } else {
588 entries = append(entries, fmt.Sprintf("%s[%s]@NUMA=none", dev.ResourceName, devID))
589 }
590 }
591 } else {
592 entries = append(entries, dev.ResourceName)
593 }
594 }
595 sort.Strings(entries)
596 return strings.Join(entries, ", ")
597 }
598
599 func makeBusyboxDeviceRequiringPod(resourceName, cmd string) *v1.Pod {
600 podName := "device-manager-test-" + string(uuid.NewUUID())
601 rl := v1.ResourceList{
602 v1.ResourceName(resourceName): *resource.NewQuantity(2, resource.DecimalSI),
603 }
604 return &v1.Pod{
605 ObjectMeta: metav1.ObjectMeta{
606 Name: podName,
607 },
608 Spec: v1.PodSpec{
609 RestartPolicy: v1.RestartPolicyNever,
610 Containers: []v1.Container{{
611 Image: busyboxImage,
612 Name: podName,
613
614 Command: []string{"sh", "-c", cmd},
615 Resources: v1.ResourceRequirements{
616 Limits: rl,
617 Requests: rl,
618 },
619 }},
620 },
621 }
622 }
623
624
625 func BeReady() types.GomegaMatcher {
626 return gomega.And(
627
628 gcustom.MakeMatcher(func(ready bool) (bool, error) {
629 if !ready {
630 return false, fmt.Errorf("expected node to be ready=%t", ready)
631 }
632 return true, nil
633 }),
634 BeInReadyPhase(true),
635 )
636 }
637
638
639 func BeInReadyPhase(isReady bool) types.GomegaMatcher {
640 return gcustom.MakeMatcher(func(ready bool) (bool, error) {
641 return ready == isReady, nil
642 }).WithTemplate("expected Node Ready {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(isReady)
643 }
644
645 func isNodeReadyWithSampleResources(ctx context.Context, f *framework.Framework) (bool, error) {
646 node, ready := getLocalTestNode(ctx, f)
647 if !ready {
648 return false, fmt.Errorf("expected node to be ready=%t", ready)
649 }
650
651 if CountSampleDeviceCapacity(node) <= 0 {
652 return false, fmt.Errorf("expected devices to be advertised")
653 }
654 return true, nil
655 }
656
657
658 func HaveAllocatableDevices() types.GomegaMatcher {
659 return gomega.And(
660
661 gcustom.MakeMatcher(func(hasAllocatable bool) (bool, error) {
662 if !hasAllocatable {
663 return false, fmt.Errorf("expected node to be have allocatable devices=%t", hasAllocatable)
664 }
665 return true, nil
666 }),
667 hasAllocatable(true),
668 )
669 }
670
671
672 func hasAllocatable(hasAllocatable bool) types.GomegaMatcher {
673 return gcustom.MakeMatcher(func(hasAllocatableDevices bool) (bool, error) {
674 return hasAllocatableDevices == hasAllocatable, nil
675 }).WithTemplate("expected Node with allocatable {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(hasAllocatable)
676 }
677
678 func isNodeReadyWithAllocatableSampleResources(ctx context.Context, f *framework.Framework, devCount int64) (bool, error) {
679 node, ready := getLocalTestNode(ctx, f)
680 if !ready {
681 return false, fmt.Errorf("expected node to be ready=%t", ready)
682 }
683
684 if CountSampleDeviceCapacity(node) != devCount {
685 return false, fmt.Errorf("expected devices capacity to be: %d", devCount)
686 }
687
688 if CountSampleDeviceAllocatable(node) != devCount {
689 return false, fmt.Errorf("expected devices allocatable to be: %d", devCount)
690 }
691 return true, nil
692 }
693
694 func isNodeReadyWithoutSampleResources(ctx context.Context, f *framework.Framework) (bool, error) {
695 node, ready := getLocalTestNode(ctx, f)
696 if !ready {
697 return false, fmt.Errorf("expected node to be ready=%t", ready)
698 }
699
700 if CountSampleDeviceCapacity(node) > 0 {
701 return false, fmt.Errorf("expected devices to be not present")
702 }
703 return true, nil
704 }
705
706
707 func HaveFailedWithAdmissionError() types.GomegaMatcher {
708 return gomega.And(
709 gcustom.MakeMatcher(func(hasFailed bool) (bool, error) {
710 if !hasFailed {
711 return false, fmt.Errorf("expected pod to have failed=%t", hasFailed)
712 }
713 return true, nil
714 }),
715 hasFailed(true),
716 )
717 }
718
719
720 func hasFailed(hasFailed bool) types.GomegaMatcher {
721 return gcustom.MakeMatcher(func(hasPodFailed bool) (bool, error) {
722 return hasPodFailed == hasFailed, nil
723 }).WithTemplate("expected Pod failed {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(hasFailed)
724 }
725
726 func getPodByName(ctx context.Context, f *framework.Framework, podName string) (*v1.Pod, error) {
727 return e2epod.NewPodClient(f).Get(ctx, podName, metav1.GetOptions{})
728 }
729
730 func getPod(ctx context.Context, f *framework.Framework, podName string) (bool, error) {
731 pod, err := getPodByName(ctx, f, podName)
732 if err != nil {
733 return false, err
734 }
735
736 expectedStatusReason := "UnexpectedAdmissionError"
737 expectedStatusMessage := "Allocate failed due to no healthy devices present; cannot allocate unhealthy devices"
738
739
740 if pod.Status.Phase != v1.PodFailed {
741 return false, fmt.Errorf("expected pod to reach phase %q, got final phase %q instead.", v1.PodFailed, pod.Status.Phase)
742 }
743 if pod.Status.Reason != expectedStatusReason {
744 return false, fmt.Errorf("expected pod status reason to be %q, got %q instead.", expectedStatusReason, pod.Status.Reason)
745 }
746 if !strings.Contains(pod.Status.Message, expectedStatusMessage) {
747 return false, fmt.Errorf("expected pod status reason to contain %q, got %q instead.", expectedStatusMessage, pod.Status.Message)
748 }
749 return true, nil
750 }
751
View as plain text