1
16
17 package e2enode
18
19 import (
20 "context"
21 "errors"
22 "fmt"
23 "os"
24 "path/filepath"
25 "regexp"
26 "strings"
27 "time"
28
29 "github.com/onsi/ginkgo/v2"
30 "github.com/onsi/gomega"
31 "github.com/onsi/gomega/gcustom"
32 "github.com/onsi/gomega/types"
33
34 appsv1 "k8s.io/api/apps/v1"
35 v1 "k8s.io/api/core/v1"
36 "k8s.io/apimachinery/pkg/runtime"
37 "k8s.io/apimachinery/pkg/runtime/serializer"
38 k8stypes "k8s.io/apimachinery/pkg/types"
39 "k8s.io/apimachinery/pkg/util/sets"
40 utilfeature "k8s.io/apiserver/pkg/util/feature"
41 runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
42 kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
43 admissionapi "k8s.io/pod-security-admission/api"
44
45 "k8s.io/apimachinery/pkg/api/resource"
46 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
47 "k8s.io/apimachinery/pkg/util/uuid"
48 "k8s.io/kubectl/pkg/util/podutils"
49 kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
50 kubeletpodresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
51 "k8s.io/kubernetes/pkg/features"
52 "k8s.io/kubernetes/test/e2e/feature"
53 "k8s.io/kubernetes/test/e2e/framework"
54 e2enode "k8s.io/kubernetes/test/e2e/framework/node"
55 e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
56 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
57 e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
58 "k8s.io/kubernetes/test/e2e/nodefeature"
59 )
60
61 var (
62 appsScheme = runtime.NewScheme()
63 appsCodecs = serializer.NewCodecFactory(appsScheme)
64 )
65
66
67 var _ = SIGDescribe("Device Plugin", feature.DevicePluginProbe, nodefeature.DevicePluginProbe, framework.WithSerial(), func() {
68 f := framework.NewDefaultFramework("device-plugin-errors")
69 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
70 testDevicePlugin(f, kubeletdevicepluginv1beta1.DevicePluginPath)
71 testDevicePluginNodeReboot(f, kubeletdevicepluginv1beta1.DevicePluginPath)
72 })
73
74
75 func readDaemonSetV1OrDie(objBytes []byte) *appsv1.DaemonSet {
76 appsv1.AddToScheme(appsScheme)
77 requiredObj, err := runtime.Decode(appsCodecs.UniversalDecoder(appsv1.SchemeGroupVersion), objBytes)
78 if err != nil {
79 panic(err)
80 }
81 return requiredObj.(*appsv1.DaemonSet)
82 }
83
84 const (
85
86
87 expectedSampleDevsAmount int64 = 2
88
89
90 sleepIntervalForever string = "24h"
91
92
93 sleepIntervalWithRestart string = "60s"
94 )
95
96 func testDevicePlugin(f *framework.Framework, pluginSockDir string) {
97 pluginSockDir = filepath.Join(pluginSockDir) + "/"
98 f.Context("DevicePlugin", f.WithSerial(), f.WithDisruptive(), func() {
99 var devicePluginPod, dptemplate *v1.Pod
100 var v1alphaPodResources *kubeletpodresourcesv1alpha1.ListPodResourcesResponse
101 var v1PodResources *kubeletpodresourcesv1.ListPodResourcesResponse
102 var err error
103
104 ginkgo.BeforeEach(func(ctx context.Context) {
105 ginkgo.By("Wait for node to be ready")
106 gomega.Eventually(ctx, func(ctx context.Context) bool {
107 nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
108 framework.ExpectNoError(err)
109 return nodes == 1
110 }, time.Minute, time.Second).Should(gomega.BeTrue())
111
112
113
114
115
116
117 gomega.Eventually(ctx, func(ctx context.Context) error {
118 v1alphaPodResources, err = getV1alpha1NodeDevices(ctx)
119 if err != nil {
120 return fmt.Errorf("failed to get node local podresources by accessing the (v1alpha) podresources API endpoint: %v", err)
121 }
122
123 v1PodResources, err = getV1NodeDevices(ctx)
124 if err != nil {
125 return fmt.Errorf("failed to get node local podresources by accessing the (v1) podresources API endpoint: %v", err)
126 }
127
128 if len(v1alphaPodResources.PodResources) > 0 {
129 return fmt.Errorf("expected v1alpha pod resources to be empty, but got non-empty resources: %+v", v1alphaPodResources.PodResources)
130 }
131
132 if len(v1PodResources.PodResources) > 0 {
133 return fmt.Errorf("expected v1 pod resources to be empty, but got non-empty resources: %+v", v1PodResources.PodResources)
134 }
135 return nil
136 }, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.Succeed())
137
138 ginkgo.By("Scheduling a sample device plugin pod")
139 dp := getSampleDevicePluginPod(pluginSockDir)
140 dptemplate = dp.DeepCopy()
141 devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dp)
142
143 ginkgo.By("Waiting for devices to become available on the local node")
144 gomega.Eventually(ctx, func(ctx context.Context) bool {
145 node, ready := getLocalTestNode(ctx, f)
146 return ready && CountSampleDeviceCapacity(node) > 0
147 }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
148 framework.Logf("Successfully created device plugin pod")
149
150 ginkgo.By(fmt.Sprintf("Waiting for the resource exported by the sample device plugin to become available on the local node (instances: %d)", expectedSampleDevsAmount))
151 gomega.Eventually(ctx, func(ctx context.Context) bool {
152 node, ready := getLocalTestNode(ctx, f)
153 return ready &&
154 CountSampleDeviceCapacity(node) == expectedSampleDevsAmount &&
155 CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
156 }, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
157 })
158
159 ginkgo.AfterEach(func(ctx context.Context) {
160 ginkgo.By("Deleting the device plugin pod")
161 e2epod.NewPodClient(f).DeleteSync(ctx, devicePluginPod.Name, metav1.DeleteOptions{}, time.Minute)
162
163 ginkgo.By("Deleting any Pods created by the test")
164 l, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{})
165 framework.ExpectNoError(err)
166 for _, p := range l.Items {
167 if p.Namespace != f.Namespace.Name {
168 continue
169 }
170
171 framework.Logf("Deleting pod: %s", p.Name)
172 e2epod.NewPodClient(f).DeleteSync(ctx, p.Name, metav1.DeleteOptions{}, 2*time.Minute)
173 }
174
175 restartKubelet(true)
176
177 ginkgo.By("Waiting for devices to become unavailable on the local node")
178 gomega.Eventually(ctx, func(ctx context.Context) bool {
179 node, ready := getLocalTestNode(ctx, f)
180 return ready && CountSampleDeviceCapacity(node) <= 0
181 }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
182
183 ginkgo.By("devices now unavailable on the local node")
184 })
185
186 ginkgo.It("Can schedule a pod that requires a device", func(ctx context.Context) {
187 podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalWithRestart)
188 pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
189 deviceIDRE := "stub devices: (Dev-[0-9]+)"
190 devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
191 framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
192 gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1 requested a device but started successfully without")
193
194 v1alphaPodResources, err = getV1alpha1NodeDevices(ctx)
195 framework.ExpectNoError(err)
196
197 v1PodResources, err = getV1NodeDevices(ctx)
198 framework.ExpectNoError(err)
199
200 framework.Logf("v1alphaPodResources.PodResources:%+v\n", v1alphaPodResources.PodResources)
201 framework.Logf("v1PodResources.PodResources:%+v\n", v1PodResources.PodResources)
202 framework.Logf("len(v1alphaPodResources.PodResources):%+v", len(v1alphaPodResources.PodResources))
203 framework.Logf("len(v1PodResources.PodResources):%+v", len(v1PodResources.PodResources))
204
205 gomega.Expect(v1alphaPodResources.PodResources).To(gomega.HaveLen(2))
206 gomega.Expect(v1PodResources.PodResources).To(gomega.HaveLen(2))
207
208 var v1alphaResourcesForOurPod *kubeletpodresourcesv1alpha1.PodResources
209 for _, res := range v1alphaPodResources.GetPodResources() {
210 if res.Name == pod1.Name {
211 v1alphaResourcesForOurPod = res
212 }
213 }
214
215 var v1ResourcesForOurPod *kubeletpodresourcesv1.PodResources
216 for _, res := range v1PodResources.GetPodResources() {
217 if res.Name == pod1.Name {
218 v1ResourcesForOurPod = res
219 }
220 }
221
222 gomega.Expect(v1alphaResourcesForOurPod).NotTo(gomega.BeNil())
223 gomega.Expect(v1ResourcesForOurPod).NotTo(gomega.BeNil())
224
225 gomega.Expect(v1alphaResourcesForOurPod.Name).To(gomega.Equal(pod1.Name))
226 gomega.Expect(v1ResourcesForOurPod.Name).To(gomega.Equal(pod1.Name))
227
228 gomega.Expect(v1alphaResourcesForOurPod.Namespace).To(gomega.Equal(pod1.Namespace))
229 gomega.Expect(v1ResourcesForOurPod.Namespace).To(gomega.Equal(pod1.Namespace))
230
231 gomega.Expect(v1alphaResourcesForOurPod.Containers).To(gomega.HaveLen(1))
232 gomega.Expect(v1ResourcesForOurPod.Containers).To(gomega.HaveLen(1))
233
234 gomega.Expect(v1alphaResourcesForOurPod.Containers[0].Name).To(gomega.Equal(pod1.Spec.Containers[0].Name))
235 gomega.Expect(v1ResourcesForOurPod.Containers[0].Name).To(gomega.Equal(pod1.Spec.Containers[0].Name))
236
237 gomega.Expect(v1alphaResourcesForOurPod.Containers[0].Devices).To(gomega.HaveLen(1))
238 gomega.Expect(v1ResourcesForOurPod.Containers[0].Devices).To(gomega.HaveLen(1))
239
240 gomega.Expect(v1alphaResourcesForOurPod.Containers[0].Devices[0].ResourceName).To(gomega.Equal(SampleDeviceResourceName))
241 gomega.Expect(v1ResourcesForOurPod.Containers[0].Devices[0].ResourceName).To(gomega.Equal(SampleDeviceResourceName))
242
243 gomega.Expect(v1alphaResourcesForOurPod.Containers[0].Devices[0].DeviceIds).To(gomega.HaveLen(1))
244 gomega.Expect(v1ResourcesForOurPod.Containers[0].Devices[0].DeviceIds).To(gomega.HaveLen(1))
245 })
246
247 ginkgo.It("[NodeSpecialFeature:CDI] can make a CDI device accessible in a container", func(ctx context.Context) {
248 e2eskipper.SkipUnlessFeatureGateEnabled(features.DevicePluginCDIDevices)
249
250
251 podObj := makeBusyboxPod(SampleDeviceResourceName, "[ $(ls /tmp/CDI-Dev-[1,2] | wc -l) -eq 1 -a -b /tmp/$CDI_DEVICE ]")
252 podObj.Spec.RestartPolicy = v1.RestartPolicyNever
253 pod := e2epod.NewPodClient(f).Create(ctx, podObj)
254 framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace))
255 })
256
257
258
259
260 ginkgo.It("Keeps device plugin assignments across pod restarts (no kubelet restart, no device plugin restart)", func(ctx context.Context) {
261 podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalWithRestart)
262 pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
263 deviceIDRE := "stub devices: (Dev-[0-9]+)"
264 devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
265 framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
266 gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1 requested a device but started successfully without")
267
268 pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
269 framework.ExpectNoError(err)
270
271 ginkgo.By("Waiting for container to restart")
272 ensurePodContainerRestart(ctx, f, pod1.Name, pod1.Name)
273
274
275 ginkgo.By("Confirming that after a container restart, fake-device assignment is kept")
276 devIDRestart1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
277 framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
278 gomega.Expect(devIDRestart1).To(gomega.Equal(devID1))
279
280
281
282 ginkgo.By("Verifying the device assignment after container restart using podresources API")
283 v1PodResources, err = getV1NodeDevices(ctx)
284 if err != nil {
285 framework.ExpectNoError(err, "getting pod resources assignment after pod restart")
286 }
287 err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
288 framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
289
290 ginkgo.By("Creating another pod")
291 pod2 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
292 err = e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, pod2.Name, f.Namespace.Name, 1*time.Minute)
293 framework.ExpectNoError(err)
294
295 ginkgo.By("Checking that pod got a fake device")
296 devID2, err := parseLog(ctx, f, pod2.Name, pod2.Name, deviceIDRE)
297 framework.ExpectNoError(err, "getting logs for pod %q", pod2.Name)
298
299 gomega.Expect(devID2).To(gomega.Not(gomega.Equal("")), "pod2 requested a device but started successfully without")
300
301 ginkgo.By("Verifying the device assignment after extra container start using podresources API")
302 v1PodResources, err = getV1NodeDevices(ctx)
303 if err != nil {
304 framework.ExpectNoError(err, "getting pod resources assignment after pod restart")
305 }
306 err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
307 framework.ExpectNoError(err, "inconsistent device assignment after extra container restart - pod1")
308 err, _ = checkPodResourcesAssignment(v1PodResources, pod2.Namespace, pod2.Name, pod2.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID2})
309 framework.ExpectNoError(err, "inconsistent device assignment after extra container restart - pod2")
310 })
311
312
313
314
315
316 ginkgo.It("Keeps device plugin assignments across kubelet restarts (no pod restart, no device plugin restart)", func(ctx context.Context) {
317 podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
318 pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
319 deviceIDRE := "stub devices: (Dev-[0-9]+)"
320 devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
321 framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
322 gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1 requested a device but started successfully without")
323
324 pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
325 framework.ExpectNoError(err)
326 framework.Logf("testing pod: pre-restart UID=%s namespace=%s name=%s ready=%v", pod1.UID, pod1.Namespace, pod1.Name, podutils.IsPodReady(pod1))
327
328 ginkgo.By("Restarting Kubelet")
329 restartKubelet(true)
330
331 ginkgo.By("Wait for node to be ready again")
332 e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
333
334 ginkgo.By("Waiting for resource to become available on the local node after restart")
335 gomega.Eventually(ctx, func() bool {
336 node, ready := getLocalTestNode(ctx, f)
337 return ready &&
338 CountSampleDeviceCapacity(node) == expectedSampleDevsAmount &&
339 CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
340 }, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
341
342 ginkgo.By("Checking the same instance of the pod is still running")
343 gomega.Eventually(ctx, getPodByName).
344 WithArguments(f, pod1.Name).
345 WithTimeout(time.Minute).
346 Should(BeTheSamePodStillRunning(pod1),
347 "the same pod instance not running across kubelet restarts, workload should not be perturbed by kubelet restarts")
348
349 pod2, err := e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
350 framework.ExpectNoError(err)
351 framework.Logf("testing pod: post-restart UID=%s namespace=%s name=%s ready=%v", pod2.UID, pod2.Namespace, pod2.Name, podutils.IsPodReady(pod2))
352
353
354
355
356
357 ginkgo.By("Verifying the device assignment after kubelet restart using podresources API")
358 gomega.Eventually(ctx, func() error {
359 v1PodResources, err = getV1NodeDevices(ctx)
360 return err
361 }, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
362
363 err, _ = checkPodResourcesAssignment(v1PodResources, pod2.Namespace, pod2.Name, pod2.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
364 framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
365 })
366
367
368
369
370 ginkgo.It("Keeps device plugin assignments across pod and kubelet restarts (no device plugin restart)", func(ctx context.Context) {
371 podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalWithRestart)
372 pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
373 deviceIDRE := "stub devices: (Dev-[0-9]+)"
374 devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
375 framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
376
377 gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1 requested a device but started successfully without")
378
379 pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
380 framework.ExpectNoError(err)
381
382 ginkgo.By("Wait for node to be ready again")
383 e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
384
385 ginkgo.By("Waiting for container to restart")
386 ensurePodContainerRestart(ctx, f, pod1.Name, pod1.Name)
387
388 ginkgo.By("Confirming that after a container restart, fake-device assignment is kept")
389 devIDRestart1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
390 framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
391 gomega.Expect(devIDRestart1).To(gomega.Equal(devID1))
392
393 ginkgo.By("Restarting Kubelet")
394 restartKubelet(true)
395
396 ginkgo.By("Wait for node to be ready again")
397 e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
398
399 ginkgo.By("Checking an instance of the pod is running")
400 gomega.Eventually(ctx, getPodByName).
401 WithArguments(f, pod1.Name).
402
403
404 WithTimeout(5*time.Minute+10*time.Second).
405 Should(gomega.And(
406 BeAPodInPhase(v1.PodRunning),
407 BeAPodReady(),
408 ),
409 "the pod should still be running, the workload should not be perturbed by kubelet restarts")
410
411 ginkgo.By("Verifying the device assignment after pod and kubelet restart using container logs")
412 var devID1Restarted string
413 gomega.Eventually(ctx, func() string {
414 devID1Restarted, err = parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
415 if err != nil {
416 framework.Logf("error getting logds for pod %q: %v", pod1.Name, err)
417 return ""
418 }
419 return devID1Restarted
420 }, 30*time.Second, framework.Poll).Should(gomega.Equal(devID1), "pod %s reports a different device after restarts: %s (expected %s)", pod1.Name, devID1Restarted, devID1)
421
422 ginkgo.By("Verifying the device assignment after pod and kubelet restart using podresources API")
423 gomega.Eventually(ctx, func() error {
424 v1PodResources, err = getV1NodeDevices(ctx)
425 return err
426 }, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
427
428 err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
429 framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
430 })
431
432
433
434
435
436 ginkgo.It("Keeps device plugin assignments after the device plugin has restarted (no kubelet restart, pod restart)", func(ctx context.Context) {
437 podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
438 pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
439 deviceIDRE := "stub devices: (Dev-[0-9]+)"
440 devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
441 framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
442 gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1 requested a device but started successfully without")
443
444 pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
445 framework.ExpectNoError(err)
446
447 ginkgo.By("Wait for node to be ready again")
448 e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
449
450 ginkgo.By("Re-Register resources and delete the plugin pod")
451 gp := int64(0)
452 deleteOptions := metav1.DeleteOptions{
453 GracePeriodSeconds: &gp,
454 }
455 e2epod.NewPodClient(f).DeleteSync(ctx, devicePluginPod.Name, deleteOptions, time.Minute)
456 waitForContainerRemoval(ctx, devicePluginPod.Spec.Containers[0].Name, devicePluginPod.Name, devicePluginPod.Namespace)
457
458 ginkgo.By("Recreating the plugin pod")
459 devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dptemplate)
460 err = e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, devicePluginPod.Name, devicePluginPod.Namespace, 1*time.Minute)
461 framework.ExpectNoError(err)
462
463 ginkgo.By("Waiting for resource to become available on the local node after re-registration")
464 gomega.Eventually(ctx, func() bool {
465 node, ready := getLocalTestNode(ctx, f)
466 return ready &&
467 CountSampleDeviceCapacity(node) == expectedSampleDevsAmount &&
468 CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
469 }, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
470
471
472
473
474
475
476 ginkgo.By("Verifying the device assignment after device plugin restart using podresources API")
477 gomega.Eventually(ctx, func() error {
478 v1PodResources, err = getV1NodeDevices(ctx)
479 return err
480 }, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
481
482 err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
483 framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
484 })
485
486
487
488
489 ginkgo.It("Keeps device plugin assignments after kubelet restart and device plugin restart (no pod restart)", func(ctx context.Context) {
490 podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
491 pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
492 deviceIDRE := "stub devices: (Dev-[0-9]+)"
493 devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
494 framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
495
496 gomega.Expect(devID1).To(gomega.Not(gomega.BeEmpty()), "pod1 requested a device but started successfully without")
497
498 pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
499 framework.ExpectNoError(err)
500
501 ginkgo.By("Restarting Kubelet")
502 restartKubelet(true)
503
504 ginkgo.By("Wait for node to be ready again")
505 e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
506
507 ginkgo.By("Checking the same instance of the pod is still running after kubelet restart")
508 gomega.Eventually(ctx, getPodByName).
509 WithArguments(f, pod1.Name).
510 WithTimeout(time.Minute).
511 Should(BeTheSamePodStillRunning(pod1),
512 "the same pod instance not running across kubelet restarts, workload should not be perturbed by kubelet restarts")
513
514
515
516
517
518 ginkgo.By("Verifying the device assignment after kubelet restart using podresources API")
519 gomega.Eventually(ctx, func() error {
520 v1PodResources, err = getV1NodeDevices(ctx)
521 return err
522 }, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
523
524 err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
525 framework.ExpectNoError(err, "inconsistent device assignment after pod restart")
526
527 ginkgo.By("Re-Register resources by deleting the plugin pod")
528 gp := int64(0)
529 deleteOptions := metav1.DeleteOptions{
530 GracePeriodSeconds: &gp,
531 }
532 e2epod.NewPodClient(f).DeleteSync(ctx, devicePluginPod.Name, deleteOptions, time.Minute)
533 waitForContainerRemoval(ctx, devicePluginPod.Spec.Containers[0].Name, devicePluginPod.Name, devicePluginPod.Namespace)
534
535 ginkgo.By("Recreating the plugin pod")
536 devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dptemplate)
537
538 ginkgo.By("Waiting for resource to become available on the local node after restart")
539 gomega.Eventually(ctx, func() bool {
540 node, ready := getLocalTestNode(ctx, f)
541 return ready &&
542 CountSampleDeviceCapacity(node) == expectedSampleDevsAmount &&
543 CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
544 }, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
545
546 ginkgo.By("Checking the same instance of the pod is still running after the device plugin restart")
547 gomega.Eventually(ctx, getPodByName).
548 WithArguments(f, pod1.Name).
549 WithTimeout(time.Minute).
550 Should(BeTheSamePodStillRunning(pod1),
551 "the same pod instance not running across kubelet restarts, workload should not be perturbed by device plugins restarts")
552 })
553
554 ginkgo.It("[OrphanedPods] Ensures pods consuming devices deleted while kubelet is down are cleaned up correctly", func(ctx context.Context) {
555 podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalWithRestart)
556 pod := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
557
558 deviceIDRE := "stub devices: (Dev-[0-9]+)"
559 devID, err := parseLog(ctx, f, pod.Name, pod.Name, deviceIDRE)
560 framework.ExpectNoError(err, "getting logs for pod %q", pod.Name)
561 gomega.Expect(devID).To(gomega.Not(gomega.BeEmpty()), "pod1 requested a device but started successfully without")
562
563 pod, err = e2epod.NewPodClient(f).Get(ctx, pod.Name, metav1.GetOptions{})
564 framework.ExpectNoError(err)
565
566 ginkgo.By("stopping the kubelet")
567 startKubelet := stopKubelet()
568
569
570 gomega.Eventually(ctx, func() bool {
571 ok := kubeletHealthCheck(kubeletHealthCheckURL)
572 framework.Logf("kubelet health check at %q value=%v", kubeletHealthCheckURL, ok)
573 return ok
574 }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
575
576 framework.Logf("Delete the pod while the kubelet is not running")
577
578 deletePodSyncByName(ctx, f, pod.Name)
579
580 framework.Logf("Starting the kubelet")
581 startKubelet()
582
583
584 gomega.Eventually(ctx, func() bool {
585 ok := kubeletHealthCheck(kubeletHealthCheckURL)
586 framework.Logf("kubelet health check at %q value=%v", kubeletHealthCheckURL, ok)
587 return ok
588 }, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
589
590 framework.Logf("wait for the pod %v to disappear", pod.Name)
591 gomega.Eventually(ctx, func(ctx context.Context) error {
592 err := checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
593 framework.Logf("pod %s/%s disappear check err=%v", pod.Namespace, pod.Name, err)
594 return err
595 }, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
596
597 waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
598
599 ginkgo.By("Verifying the device assignment after device plugin restart using podresources API")
600 gomega.Eventually(ctx, func() error {
601 v1PodResources, err = getV1NodeDevices(ctx)
602 return err
603 }, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
604 err, allocated := checkPodResourcesAssignment(v1PodResources, pod.Namespace, pod.Name, pod.Spec.Containers[0].Name, SampleDeviceResourceName, []string{})
605 if err == nil || allocated {
606 framework.Fail(fmt.Sprintf("stale device assignment after pod deletion while kubelet was down allocated=%v error=%v", allocated, err))
607 }
608 })
609
610 f.It("Can schedule a pod with a restartable init container", nodefeature.SidecarContainers, func(ctx context.Context) {
611 podRECMD := "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s"
612 sleepOneSecond := "1s"
613 rl := v1.ResourceList{v1.ResourceName(SampleDeviceResourceName): *resource.NewQuantity(1, resource.DecimalSI)}
614 pod := &v1.Pod{
615 ObjectMeta: metav1.ObjectMeta{Name: "device-plugin-test-" + string(uuid.NewUUID())},
616 Spec: v1.PodSpec{
617 RestartPolicy: v1.RestartPolicyAlways,
618 InitContainers: []v1.Container{
619 {
620 Image: busyboxImage,
621 Name: "init-1",
622 Command: []string{"sh", "-c", fmt.Sprintf(podRECMD, sleepOneSecond)},
623 Resources: v1.ResourceRequirements{
624 Limits: rl,
625 Requests: rl,
626 },
627 },
628 {
629 Image: busyboxImage,
630 Name: "restartable-init-2",
631 Command: []string{"sh", "-c", fmt.Sprintf(podRECMD, sleepIntervalForever)},
632 Resources: v1.ResourceRequirements{
633 Limits: rl,
634 Requests: rl,
635 },
636 RestartPolicy: &containerRestartPolicyAlways,
637 },
638 },
639 Containers: []v1.Container{{
640 Image: busyboxImage,
641 Name: "regular-1",
642 Command: []string{"sh", "-c", fmt.Sprintf(podRECMD, sleepIntervalForever)},
643 Resources: v1.ResourceRequirements{
644 Limits: rl,
645 Requests: rl,
646 },
647 }},
648 },
649 }
650
651 pod1 := e2epod.NewPodClient(f).CreateSync(ctx, pod)
652 deviceIDRE := "stub devices: (Dev-[0-9]+)"
653
654 devID1, err := parseLog(ctx, f, pod1.Name, pod1.Spec.InitContainers[0].Name, deviceIDRE)
655 framework.ExpectNoError(err, "getting logs for pod %q/%q", pod1.Name, pod1.Spec.InitContainers[0].Name)
656 gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1's init container requested a device but started successfully without")
657
658 devID2, err := parseLog(ctx, f, pod1.Name, pod1.Spec.InitContainers[1].Name, deviceIDRE)
659 framework.ExpectNoError(err, "getting logs for pod %q/%q", pod1.Name, pod1.Spec.InitContainers[1].Name)
660 gomega.Expect(devID2).To(gomega.Not(gomega.Equal("")), "pod1's restartable init container requested a device but started successfully without")
661
662 gomega.Expect(devID2).To(gomega.Equal(devID1), "pod1's init container and restartable init container should share the same device")
663
664 devID3, err := parseLog(ctx, f, pod1.Name, pod1.Spec.Containers[0].Name, deviceIDRE)
665 framework.ExpectNoError(err, "getting logs for pod %q/%q", pod1.Name, pod1.Spec.Containers[0].Name)
666 gomega.Expect(devID3).To(gomega.Not(gomega.Equal("")), "pod1's regular container requested a device but started successfully without")
667
668 gomega.Expect(devID3).NotTo(gomega.Equal(devID2), "pod1's restartable init container and regular container should not share the same device")
669
670 podResources, err := getV1NodeDevices(ctx)
671 framework.ExpectNoError(err)
672
673 framework.Logf("PodResources.PodResources:%+v\n", podResources.PodResources)
674 framework.Logf("len(PodResources.PodResources):%+v", len(podResources.PodResources))
675
676 gomega.Expect(podResources.PodResources).To(gomega.HaveLen(2))
677
678 var resourcesForOurPod *kubeletpodresourcesv1.PodResources
679 for _, res := range podResources.GetPodResources() {
680 if res.Name == pod1.Name {
681 resourcesForOurPod = res
682 }
683 }
684
685 gomega.Expect(resourcesForOurPod).NotTo(gomega.BeNil())
686
687 gomega.Expect(resourcesForOurPod.Name).To(gomega.Equal(pod1.Name))
688 gomega.Expect(resourcesForOurPod.Namespace).To(gomega.Equal(pod1.Namespace))
689
690 gomega.Expect(resourcesForOurPod.Containers).To(gomega.HaveLen(2))
691
692 for _, container := range resourcesForOurPod.Containers {
693 if container.Name == pod1.Spec.InitContainers[1].Name {
694 gomega.Expect(container.Devices).To(gomega.HaveLen(1))
695 gomega.Expect(container.Devices[0].ResourceName).To(gomega.Equal(SampleDeviceResourceName))
696 gomega.Expect(container.Devices[0].DeviceIds).To(gomega.HaveLen(1))
697 } else if container.Name == pod1.Spec.Containers[0].Name {
698 gomega.Expect(container.Devices).To(gomega.HaveLen(1))
699 gomega.Expect(container.Devices[0].ResourceName).To(gomega.Equal(SampleDeviceResourceName))
700 gomega.Expect(container.Devices[0].DeviceIds).To(gomega.HaveLen(1))
701 } else {
702 framework.Failf("unexpected container name: %s", container.Name)
703 }
704 }
705 })
706 })
707 }
708
709 func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) {
710 f.Context("DevicePlugin", f.WithSerial(), f.WithDisruptive(), func() {
711 var devicePluginPod *v1.Pod
712 var v1PodResources *kubeletpodresourcesv1.ListPodResourcesResponse
713 var triggerPathFile, triggerPathDir string
714 var err error
715
716 ginkgo.BeforeEach(func(ctx context.Context) {
717 ginkgo.By("Wait for node to be ready")
718 gomega.Eventually(ctx, func(ctx context.Context) bool {
719 nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
720 framework.ExpectNoError(err)
721 return nodes == 1
722 }, time.Minute, time.Second).Should(gomega.BeTrue())
723
724
725
726
727
728
729 gomega.Eventually(ctx, func(ctx context.Context) error {
730 v1PodResources, err = getV1NodeDevices(ctx)
731 if err != nil {
732 return fmt.Errorf("failed to get node local podresources by accessing the (v1) podresources API endpoint: %v", err)
733 }
734
735 if len(v1PodResources.PodResources) > 0 {
736 return fmt.Errorf("expected v1 pod resources to be empty, but got non-empty resources: %+v", v1PodResources.PodResources)
737 }
738 return nil
739 }, f.Timeouts.SystemDaemonsetStartup, f.Timeouts.Poll).Should(gomega.Succeed())
740
741 ginkgo.By("Setting up the directory for controlling registration")
742 triggerPathDir = filepath.Join(devicePluginDir, "sample")
743 if _, err := os.Stat(triggerPathDir); err != nil {
744 if errors.Is(err, os.ErrNotExist) {
745 if err := os.Mkdir(triggerPathDir, os.ModePerm); err != nil {
746 framework.Fail(fmt.Sprintf("registration control directory %q creation failed: %v ", triggerPathDir, err))
747 }
748 framework.Logf("registration control directory created successfully")
749 } else {
750 framework.Fail(fmt.Sprintf("unexpected error checking %q: %v", triggerPathDir, err))
751 }
752 } else {
753 framework.Logf("registration control directory %q already present", triggerPathDir)
754 }
755
756 ginkgo.By("Setting up the file trigger for controlling registration")
757 triggerPathFile = filepath.Join(triggerPathDir, "registration")
758 if _, err := os.Stat(triggerPathFile); err != nil {
759 if errors.Is(err, os.ErrNotExist) {
760 if _, err = os.Create(triggerPathFile); err != nil {
761 framework.Fail(fmt.Sprintf("registration control file %q creation failed: %v", triggerPathFile, err))
762 }
763 framework.Logf("registration control file created successfully")
764 } else {
765 framework.Fail(fmt.Sprintf("unexpected error creating %q: %v", triggerPathFile, err))
766 }
767 } else {
768 framework.Logf("registration control file %q already present", triggerPathFile)
769 }
770
771 ginkgo.By("Scheduling a sample device plugin pod")
772 data, err := e2etestfiles.Read(SampleDevicePluginControlRegistrationDSYAML)
773 if err != nil {
774 framework.Fail(fmt.Sprintf("error reading test data %q: %v", SampleDevicePluginControlRegistrationDSYAML, err))
775 }
776 ds := readDaemonSetV1OrDie(data)
777
778 dp := &v1.Pod{
779 ObjectMeta: metav1.ObjectMeta{
780 Name: SampleDevicePluginName,
781 },
782 Spec: ds.Spec.Template.Spec,
783 }
784
785 devicePluginPod = e2epod.NewPodClient(f).CreateSync(ctx, dp)
786
787 go func() {
788
789
790
791
792
793
794 defer ginkgo.GinkgoRecover()
795 framework.Logf("Deleting the control file: %q to trigger registration", triggerPathFile)
796 err := os.Remove(triggerPathFile)
797 framework.ExpectNoError(err)
798 }()
799
800 ginkgo.By("Waiting for devices to become available on the local node")
801 gomega.Eventually(ctx, func(ctx context.Context) bool {
802 node, ready := getLocalTestNode(ctx, f)
803 return ready && CountSampleDeviceCapacity(node) > 0
804 }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
805 framework.Logf("Successfully created device plugin pod")
806
807 ginkgo.By(fmt.Sprintf("Waiting for the resource exported by the sample device plugin to become available on the local node (instances: %d)", expectedSampleDevsAmount))
808 gomega.Eventually(ctx, func(ctx context.Context) bool {
809 node, ready := getLocalTestNode(ctx, f)
810 return ready &&
811 CountSampleDeviceCapacity(node) == expectedSampleDevsAmount &&
812 CountSampleDeviceAllocatable(node) == expectedSampleDevsAmount
813 }, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
814 })
815
816 ginkgo.AfterEach(func(ctx context.Context) {
817 ginkgo.By("Deleting the device plugin pod")
818 e2epod.NewPodClient(f).DeleteSync(ctx, devicePluginPod.Name, metav1.DeleteOptions{}, time.Minute)
819
820 ginkgo.By("Deleting any Pods created by the test")
821 l, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{})
822 framework.ExpectNoError(err)
823 for _, p := range l.Items {
824 if p.Namespace != f.Namespace.Name {
825 continue
826 }
827
828 framework.Logf("Deleting pod: %s", p.Name)
829 e2epod.NewPodClient(f).DeleteSync(ctx, p.Name, metav1.DeleteOptions{}, 2*time.Minute)
830 }
831
832 err = os.Remove(triggerPathDir)
833 framework.ExpectNoError(err)
834
835 ginkgo.By("Waiting for devices to become unavailable on the local node")
836 gomega.Eventually(ctx, func(ctx context.Context) bool {
837 node, ready := getLocalTestNode(ctx, f)
838 return ready && CountSampleDeviceCapacity(node) <= 0
839 }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
840
841 ginkgo.By("devices now unavailable on the local node")
842 })
843
844
845
846
847 ginkgo.It("Keeps device plugin assignments across node reboots (no pod restart, no device plugin re-registration)", func(ctx context.Context) {
848 podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
849 pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
850 deviceIDRE := "stub devices: (Dev-[0-9]+)"
851 devID1, err := parseLog(ctx, f, pod1.Name, pod1.Name, deviceIDRE)
852 framework.ExpectNoError(err, "getting logs for pod %q", pod1.Name)
853
854 gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")))
855
856 pod1, err = e2epod.NewPodClient(f).Get(ctx, pod1.Name, metav1.GetOptions{})
857 framework.ExpectNoError(err)
858
859 ginkgo.By("stopping the kubelet")
860 startKubelet := stopKubelet()
861
862 ginkgo.By("stopping all the local containers - using CRI")
863 rs, _, err := getCRIClient()
864 framework.ExpectNoError(err)
865 sandboxes, err := rs.ListPodSandbox(ctx, &runtimeapi.PodSandboxFilter{})
866 framework.ExpectNoError(err)
867 for _, sandbox := range sandboxes {
868 gomega.Expect(sandbox.Metadata).ToNot(gomega.BeNil())
869 ginkgo.By(fmt.Sprintf("deleting pod using CRI: %s/%s -> %s", sandbox.Metadata.Namespace, sandbox.Metadata.Name, sandbox.Id))
870
871 err := rs.RemovePodSandbox(ctx, sandbox.Id)
872 framework.ExpectNoError(err)
873 }
874
875 ginkgo.By("restarting the kubelet")
876 startKubelet()
877
878 ginkgo.By("Wait for node to be ready again")
879 e2enode.WaitForAllNodesSchedulable(ctx, f.ClientSet, 5*time.Minute)
880
881 ginkgo.By("Waiting for the pod to fail with admission error as device plugin hasn't re-registered yet")
882 gomega.Eventually(ctx, getPod).
883 WithArguments(f, pod1.Name).
884 WithTimeout(time.Minute).
885 Should(HaveFailedWithAdmissionError(),
886 "the pod succeeded to start, when it should fail with the admission error")
887
888
889
890
891
892 ginkgo.By("Verifying the device assignment after kubelet restart using podresources API")
893 gomega.Eventually(ctx, func() error {
894 v1PodResources, err = getV1NodeDevices(ctx)
895 return err
896 }, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
897
898 err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
899 framework.ExpectNoError(err, "inconsistent device assignment after node reboot")
900
901 })
902 })
903 }
904
905
906
907 func makeBusyboxPod(SampleDeviceResourceName, cmd string) *v1.Pod {
908 podName := "device-plugin-test-" + string(uuid.NewUUID())
909 rl := v1.ResourceList{v1.ResourceName(SampleDeviceResourceName): *resource.NewQuantity(1, resource.DecimalSI)}
910
911 return &v1.Pod{
912 ObjectMeta: metav1.ObjectMeta{Name: podName},
913 Spec: v1.PodSpec{
914 RestartPolicy: v1.RestartPolicyAlways,
915 Containers: []v1.Container{{
916 Image: busyboxImage,
917 Name: podName,
918
919 Command: []string{"sh", "-c", cmd},
920 Resources: v1.ResourceRequirements{
921 Limits: rl,
922 Requests: rl,
923 },
924 }},
925 },
926 }
927 }
928
929
930 func ensurePodContainerRestart(ctx context.Context, f *framework.Framework, podName string, contName string) {
931 var initialCount int32
932 var currentCount int32
933 p, err := e2epod.NewPodClient(f).Get(ctx, podName, metav1.GetOptions{})
934 if err != nil || len(p.Status.ContainerStatuses) < 1 {
935 framework.Failf("ensurePodContainerRestart failed for pod %q: %v", podName, err)
936 }
937 initialCount = p.Status.ContainerStatuses[0].RestartCount
938 gomega.Eventually(ctx, func() bool {
939 p, err = e2epod.NewPodClient(f).Get(ctx, podName, metav1.GetOptions{})
940 if err != nil || len(p.Status.ContainerStatuses) < 1 {
941 return false
942 }
943 currentCount = p.Status.ContainerStatuses[0].RestartCount
944 framework.Logf("initial %v, current %v", initialCount, currentCount)
945 return currentCount > initialCount
946 }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue())
947 }
948
949
950 func parseLog(ctx context.Context, f *framework.Framework, podName string, contName string, re string) (string, error) {
951 logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, podName, contName)
952 if err != nil {
953 return "", err
954 }
955
956 framework.Logf("got pod logs: %v", logs)
957 regex := regexp.MustCompile(re)
958 matches := regex.FindStringSubmatch(logs)
959 if len(matches) < 2 {
960 return "", fmt.Errorf("unexpected match in logs: %q", logs)
961 }
962
963 return matches[1], nil
964 }
965
966 func checkPodResourcesAssignment(v1PodRes *kubeletpodresourcesv1.ListPodResourcesResponse, podNamespace, podName, containerName, resourceName string, devs []string) (error, bool) {
967 for _, podRes := range v1PodRes.PodResources {
968 if podRes.Namespace != podNamespace || podRes.Name != podName {
969 continue
970 }
971 for _, contRes := range podRes.Containers {
972 if contRes.Name != containerName {
973 continue
974 }
975 return matchContainerDevices(podNamespace+"/"+podName+"/"+containerName, contRes.Devices, resourceName, devs)
976 }
977 }
978 err := fmt.Errorf("no resources found for %s/%s/%s", podNamespace, podName, containerName)
979 framework.Logf("%v", err)
980 return err, false
981 }
982
983 func matchContainerDevices(ident string, contDevs []*kubeletpodresourcesv1.ContainerDevices, resourceName string, devs []string) (error, bool) {
984 expected := sets.New[string](devs...)
985 assigned := sets.New[string]()
986 for _, contDev := range contDevs {
987 if contDev.ResourceName != resourceName {
988 continue
989 }
990 assigned = assigned.Insert(contDev.DeviceIds...)
991 }
992 expectedStr := strings.Join(expected.UnsortedList(), ",")
993 assignedStr := strings.Join(assigned.UnsortedList(), ",")
994 framework.Logf("%s: devices expected %q assigned %q", ident, expectedStr, assignedStr)
995 if !assigned.Equal(expected) {
996 return fmt.Errorf("device allocation mismatch for %s expected %s assigned %s", ident, expectedStr, assignedStr), true
997 }
998 return nil, true
999 }
1000
1001
1002 func getSampleDevicePluginPod(pluginSockDir string) *v1.Pod {
1003 data, err := e2etestfiles.Read(SampleDevicePluginDSYAML)
1004 if err != nil {
1005 framework.Fail(err.Error())
1006 }
1007
1008 ds := readDaemonSetV1OrDie(data)
1009 dp := &v1.Pod{
1010 ObjectMeta: metav1.ObjectMeta{
1011 Name: SampleDevicePluginName,
1012 },
1013 Spec: ds.Spec.Template.Spec,
1014 }
1015 for i := range dp.Spec.Containers[0].Env {
1016 if dp.Spec.Containers[0].Env[i].Name == SampleDeviceEnvVarNamePluginSockDir {
1017 dp.Spec.Containers[0].Env[i].Value = pluginSockDir
1018 }
1019 }
1020
1021 if utilfeature.DefaultFeatureGate.Enabled(features.DevicePluginCDIDevices) {
1022 dp.Spec.Containers[0].Env = append(dp.Spec.Containers[0].Env, v1.EnvVar{Name: "CDI_ENABLED", Value: "1"})
1023 }
1024
1025 return dp
1026 }
1027
1028 func BeTheSamePodStillRunning(expected *v1.Pod) types.GomegaMatcher {
1029 return gomega.And(
1030 BeTheSamePodAs(expected.UID),
1031 BeAPodInPhase(v1.PodRunning),
1032 BeAPodReady(),
1033 )
1034 }
1035
1036
1037 func BeAPodReady() types.GomegaMatcher {
1038 return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
1039 return podutils.IsPodReady(actual), nil
1040 }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} not ready yet")
1041 }
1042
1043
1044 func BeAPodInPhase(phase v1.PodPhase) types.GomegaMatcher {
1045 return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
1046 return actual.Status.Phase == phase, nil
1047 }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} failed {{.To}} be in phase {{.Data}} instead is in phase {{.Actual.Status.Phase}}").WithTemplateData(phase)
1048 }
1049
1050
1051 func BeTheSamePodAs(podUID k8stypes.UID) types.GomegaMatcher {
1052 return gcustom.MakeMatcher(func(actual *v1.Pod) (bool, error) {
1053 return actual.UID == podUID, nil
1054 }).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} expected UID {{.Data}} has UID instead {{.Actual.UID}}").WithTemplateData(podUID)
1055 }
1056
View as plain text