1
16
17 package e2enode
18
19 import (
20 "context"
21 "errors"
22 "fmt"
23 "os"
24 "strings"
25 "time"
26
27 v1 "k8s.io/api/core/v1"
28 "k8s.io/apimachinery/pkg/api/resource"
29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30 kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
31 kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
32 kubefeatures "k8s.io/kubernetes/pkg/features"
33 kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
34 apisgrpc "k8s.io/kubernetes/pkg/kubelet/apis/grpc"
35 "k8s.io/kubernetes/pkg/kubelet/apis/podresources"
36 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
37 "k8s.io/kubernetes/pkg/kubelet/util"
38 testutils "k8s.io/kubernetes/test/utils"
39 admissionapi "k8s.io/pod-security-admission/api"
40 "k8s.io/utils/cpuset"
41
42 "github.com/onsi/ginkgo/v2"
43 "github.com/onsi/gomega"
44 "github.com/onsi/gomega/gstruct"
45 "github.com/onsi/gomega/types"
46 "k8s.io/kubernetes/test/e2e/feature"
47 "k8s.io/kubernetes/test/e2e/framework"
48 e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
49 e2enode "k8s.io/kubernetes/test/e2e/framework/node"
50 e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
51 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
52 "k8s.io/kubernetes/test/e2e/nodefeature"
53 )
54
55 const (
56 defaultTopologyUnawareResourceName = "example.com/resource"
57 )
58
59 type podDesc struct {
60 podName string
61 cntName string
62 resourceName string
63 resourceAmount int
64 cpuRequest int
65 initContainers []initContainerDesc
66 }
67
68 func (desc podDesc) CpuRequestQty() resource.Quantity {
69 qty := resource.NewMilliQuantity(int64(desc.cpuRequest), resource.DecimalSI)
70 return *qty
71 }
72
73 func (desc podDesc) CpuRequestExclusive() int {
74 if (desc.cpuRequest % 1000) != 0 {
75
76
77 return 0
78 }
79 return desc.cpuRequest / 1000
80 }
81
82 func (desc podDesc) RequiresCPU() bool {
83 return desc.cpuRequest > 0
84 }
85
86 func (desc podDesc) RequiresDevices() bool {
87 return desc.resourceName != "" && desc.resourceAmount > 0
88 }
89
90 type initContainerDesc struct {
91 cntName string
92 resourceName string
93 resourceAmount int
94 cpuRequest int
95 restartPolicy *v1.ContainerRestartPolicy
96 }
97
98 func (desc initContainerDesc) CPURequestQty() resource.Quantity {
99 qty := resource.NewMilliQuantity(int64(desc.cpuRequest), resource.DecimalSI)
100 return *qty
101 }
102
103 func (desc initContainerDesc) CPURequestExclusive() int {
104 if (desc.cpuRequest % 1000) != 0 {
105
106
107 return 0
108 }
109 return desc.cpuRequest / 1000
110 }
111
112 func (desc initContainerDesc) RequiresCPU() bool {
113 return desc.cpuRequest > 0
114 }
115
116 func (desc initContainerDesc) RequiresDevices() bool {
117 return desc.resourceName != "" && desc.resourceAmount > 0
118 }
119
120 func makePodResourcesTestPod(desc podDesc) *v1.Pod {
121 cnt := v1.Container{
122 Name: desc.cntName,
123 Image: busyboxImage,
124 Resources: v1.ResourceRequirements{
125 Requests: v1.ResourceList{},
126 Limits: v1.ResourceList{},
127 },
128 Command: []string{"sh", "-c", "sleep 1d"},
129 }
130 if desc.RequiresCPU() {
131 cpuRequestQty := desc.CpuRequestQty()
132 cnt.Resources.Requests[v1.ResourceCPU] = cpuRequestQty
133 cnt.Resources.Limits[v1.ResourceCPU] = cpuRequestQty
134
135 cnt.Resources.Requests[v1.ResourceMemory] = resource.MustParse("100Mi")
136 cnt.Resources.Limits[v1.ResourceMemory] = resource.MustParse("100Mi")
137 }
138 if desc.RequiresDevices() {
139 cnt.Resources.Requests[v1.ResourceName(desc.resourceName)] = resource.MustParse(fmt.Sprintf("%d", desc.resourceAmount))
140 cnt.Resources.Limits[v1.ResourceName(desc.resourceName)] = resource.MustParse(fmt.Sprintf("%d", desc.resourceAmount))
141 }
142
143 var initCnts []v1.Container
144 for _, cntDesc := range desc.initContainers {
145 initCnt := v1.Container{
146 Name: cntDesc.cntName,
147 Image: busyboxImage,
148 Resources: v1.ResourceRequirements{
149 Requests: v1.ResourceList{},
150 Limits: v1.ResourceList{},
151 },
152 Command: []string{"sh", "-c", "sleep 5s"},
153 RestartPolicy: cntDesc.restartPolicy,
154 }
155 if cntDesc.restartPolicy != nil && *cntDesc.restartPolicy == v1.ContainerRestartPolicyAlways {
156 initCnt.Command = []string{"sh", "-c", "sleep 1d"}
157 }
158 if cntDesc.RequiresCPU() {
159 cpuRequestQty := cntDesc.CPURequestQty()
160 initCnt.Resources.Requests[v1.ResourceCPU] = cpuRequestQty
161 initCnt.Resources.Limits[v1.ResourceCPU] = cpuRequestQty
162
163 initCnt.Resources.Requests[v1.ResourceMemory] = resource.MustParse("100Mi")
164 initCnt.Resources.Limits[v1.ResourceMemory] = resource.MustParse("100Mi")
165 }
166 if cntDesc.RequiresDevices() {
167 initCnt.Resources.Requests[v1.ResourceName(cntDesc.resourceName)] = resource.MustParse(fmt.Sprintf("%d", cntDesc.resourceAmount))
168 initCnt.Resources.Limits[v1.ResourceName(cntDesc.resourceName)] = resource.MustParse(fmt.Sprintf("%d", cntDesc.resourceAmount))
169 }
170 initCnts = append(initCnts, initCnt)
171 }
172
173 return &v1.Pod{
174 ObjectMeta: metav1.ObjectMeta{
175 Name: desc.podName,
176 },
177 Spec: v1.PodSpec{
178 RestartPolicy: v1.RestartPolicyNever,
179 InitContainers: initCnts,
180 Containers: []v1.Container{
181 cnt,
182 },
183 },
184 }
185 }
186
187 func logPodResources(podIdx int, pr *kubeletpodresourcesv1.PodResources) {
188 ns := pr.GetNamespace()
189 cnts := pr.GetContainers()
190 if len(cnts) == 0 {
191 framework.Logf("#%02d/%02d/%02d - %s/%s/%s No containers", podIdx, 0, 0, ns, pr.GetName(), "_")
192 return
193 }
194
195 for cntIdx, cnt := range cnts {
196 if len(cnt.Devices) == 0 {
197 framework.Logf("#%02d/%02d/%02d - %s/%s/%s cpus -> %v resources -> none", podIdx, cntIdx, 0, ns, pr.GetName(), cnt.Name, cnt.CpuIds)
198 continue
199 }
200
201 for devIdx, dev := range cnt.Devices {
202 framework.Logf("#%02d/%02d/%02d - %s/%s/%s cpus -> %v %s -> %s", podIdx, cntIdx, devIdx, ns, pr.GetName(), cnt.Name, cnt.CpuIds, dev.ResourceName, strings.Join(dev.DeviceIds, ", "))
203 }
204 }
205 }
206
207 type podResMap map[string]map[string]kubeletpodresourcesv1.ContainerResources
208
209 func convertToMap(podsResources []*kubeletpodresourcesv1.PodResources) podResMap {
210 res := make(map[string]map[string]kubeletpodresourcesv1.ContainerResources)
211 for idx, podResource := range podsResources {
212
213 logPodResources(idx, podResource)
214
215 cnts := make(map[string]kubeletpodresourcesv1.ContainerResources)
216 for _, cnt := range podResource.GetContainers() {
217 cnts[cnt.GetName()] = *cnt
218 }
219 res[podResource.GetName()] = cnts
220 }
221 return res
222 }
223
224 func getPodResourcesValues(ctx context.Context, cli kubeletpodresourcesv1.PodResourcesListerClient) (podResMap, error) {
225 resp, err := cli.List(ctx, &kubeletpodresourcesv1.ListPodResourcesRequest{})
226 if err != nil {
227 return nil, err
228 }
229 return convertToMap(resp.GetPodResources()), nil
230 }
231
232 type testPodData struct {
233 PodMap map[string]*v1.Pod
234 }
235
236 func newTestPodData() *testPodData {
237 return &testPodData{
238 PodMap: make(map[string]*v1.Pod),
239 }
240 }
241
242 func (tpd *testPodData) createPodsForTest(ctx context.Context, f *framework.Framework, podReqs []podDesc) {
243 for _, podReq := range podReqs {
244 pod := makePodResourcesTestPod(podReq)
245 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
246
247 framework.Logf("created pod %s", podReq.podName)
248 tpd.PodMap[podReq.podName] = pod
249 }
250 }
251
252
253 func (tpd *testPodData) deletePodsForTest(ctx context.Context, f *framework.Framework) {
254 deletePodsAsync(ctx, f, tpd.PodMap)
255 }
256
257
258 func (tpd *testPodData) deletePod(ctx context.Context, f *framework.Framework, podName string) {
259 _, ok := tpd.PodMap[podName]
260 if !ok {
261 return
262 }
263 deletePodSyncByName(ctx, f, podName)
264 delete(tpd.PodMap, podName)
265 }
266
267 func findContainerDeviceByName(devs []*kubeletpodresourcesv1.ContainerDevices, resourceName string) *kubeletpodresourcesv1.ContainerDevices {
268 for _, dev := range devs {
269 if dev.ResourceName == resourceName {
270 return dev
271 }
272 }
273 return nil
274 }
275
276 func matchPodDescWithResources(expected []podDesc, found podResMap) error {
277 for _, podReq := range expected {
278 framework.Logf("matching: %#v", podReq)
279
280 podInfo, ok := found[podReq.podName]
281 if !ok {
282 return fmt.Errorf("no pod resources for pod %q", podReq.podName)
283 }
284 cntInfo, ok := podInfo[podReq.cntName]
285 if !ok {
286 return fmt.Errorf("no container resources for pod %q container %q", podReq.podName, podReq.cntName)
287 }
288 if podReq.RequiresCPU() {
289 if exclusiveCpus := podReq.CpuRequestExclusive(); exclusiveCpus != len(cntInfo.CpuIds) {
290 if exclusiveCpus == 0 {
291 return fmt.Errorf("pod %q container %q requested %d expected to be allocated CPUs from shared pool %v", podReq.podName, podReq.cntName, podReq.cpuRequest, cntInfo.CpuIds)
292 }
293 return fmt.Errorf("pod %q container %q expected %d cpus got %v", podReq.podName, podReq.cntName, exclusiveCpus, cntInfo.CpuIds)
294 }
295 }
296 if podReq.RequiresDevices() {
297 dev := findContainerDeviceByName(cntInfo.GetDevices(), podReq.resourceName)
298 if dev == nil {
299 return fmt.Errorf("pod %q container %q expected data for resource %q not found", podReq.podName, podReq.cntName, podReq.resourceName)
300 }
301 if len(dev.DeviceIds) != podReq.resourceAmount {
302 return fmt.Errorf("pod %q container %q resource %q expected %d items got %v", podReq.podName, podReq.cntName, podReq.resourceName, podReq.resourceAmount, dev.DeviceIds)
303 }
304 } else {
305 devs := cntInfo.GetDevices()
306 if len(devs) > 0 {
307 return fmt.Errorf("pod %q container %q expected no resources, got %v", podReq.podName, podReq.cntName, devs)
308 }
309 }
310 if cnts, ok := found[defaultTopologyUnawareResourceName]; ok {
311 for _, cnt := range cnts {
312 for _, cd := range cnt.GetDevices() {
313 if cd.ResourceName != defaultTopologyUnawareResourceName {
314 continue
315 }
316 if cd.Topology != nil {
317
318 return fmt.Errorf("Nil topology is expected")
319 }
320 }
321
322 }
323 }
324
325
326 for _, initCntDesc := range podReq.initContainers {
327 if initCntDesc.restartPolicy == nil || *initCntDesc.restartPolicy != v1.ContainerRestartPolicyAlways {
328
329
330 _, ok := podInfo[initCntDesc.cntName]
331 if ok {
332 return fmt.Errorf("pod %q regular init container %q should not be reported", podReq.podName, initCntDesc.cntName)
333 }
334 continue
335 }
336
337 cntInfo, ok := podInfo[initCntDesc.cntName]
338 if !ok {
339 return fmt.Errorf("no container resources for pod %q container %q", podReq.podName, initCntDesc.cntName)
340 }
341 if initCntDesc.RequiresCPU() {
342 if exclusiveCpus := initCntDesc.CPURequestExclusive(); exclusiveCpus != len(cntInfo.CpuIds) {
343 if exclusiveCpus == 0 {
344 return fmt.Errorf("pod %q container %q requested %d expected to be allocated CPUs from shared pool %v", podReq.podName, initCntDesc.cntName, initCntDesc.cpuRequest, cntInfo.CpuIds)
345 }
346 return fmt.Errorf("pod %q container %q expected %d cpus got %v", podReq.podName, initCntDesc.cntName, exclusiveCpus, cntInfo.CpuIds)
347 }
348 }
349 if initCntDesc.RequiresDevices() {
350 dev := findContainerDeviceByName(cntInfo.GetDevices(), initCntDesc.resourceName)
351 if dev == nil {
352 return fmt.Errorf("pod %q container %q expected data for resource %q not found", podReq.podName, initCntDesc.cntName, initCntDesc.resourceName)
353 }
354 if len(dev.DeviceIds) != initCntDesc.resourceAmount {
355 return fmt.Errorf("pod %q container %q resource %q expected %d items got %v", podReq.podName, initCntDesc.cntName, initCntDesc.resourceName, initCntDesc.resourceAmount, dev.DeviceIds)
356 }
357 } else {
358 devs := cntInfo.GetDevices()
359 if len(devs) > 0 {
360 return fmt.Errorf("pod %q container %q expected no resources, got %v", podReq.podName, initCntDesc.cntName, devs)
361 }
362 }
363 if cnts, ok := found[defaultTopologyUnawareResourceName]; ok {
364 for _, cnt := range cnts {
365 for _, cd := range cnt.GetDevices() {
366 if cd.ResourceName != defaultTopologyUnawareResourceName {
367 continue
368 }
369 if cd.Topology != nil {
370
371 return fmt.Errorf("Nil topology is expected")
372 }
373 }
374 }
375 }
376 }
377 }
378 return nil
379 }
380
381 func expectPodResources(ctx context.Context, offset int, cli kubeletpodresourcesv1.PodResourcesListerClient, expected []podDesc) {
382 gomega.EventuallyWithOffset(1+offset, ctx, func(ctx context.Context) error {
383 found, err := getPodResourcesValues(ctx, cli)
384 if err != nil {
385 return err
386 }
387 return matchPodDescWithResources(expected, found)
388 }, time.Minute, 10*time.Second).Should(gomega.Succeed())
389 }
390
391 func filterOutDesc(descs []podDesc, name string) []podDesc {
392 var ret []podDesc
393 for _, desc := range descs {
394 if desc.podName == name {
395 continue
396 }
397 ret = append(ret, desc)
398 }
399 return ret
400 }
401
402 func podresourcesListTests(ctx context.Context, f *framework.Framework, cli kubeletpodresourcesv1.PodResourcesListerClient, sd *sriovData, sidecarContainersEnabled bool) {
403 var tpd *testPodData
404
405 var found podResMap
406 var expected []podDesc
407 var extra podDesc
408
409 expectedBasePods := 0
410 if sd != nil {
411 expectedBasePods = 1
412 }
413
414 var err error
415 ginkgo.By("checking the output when no pods are present")
416 found, err = getPodResourcesValues(ctx, cli)
417 framework.ExpectNoError(err, "getPodResourcesValues() failed err: %v", err)
418 gomega.ExpectWithOffset(1, found).To(gomega.HaveLen(expectedBasePods), "base pod expectation mismatch")
419
420 tpd = newTestPodData()
421 ginkgo.By("checking the output when only pods which don't require resources are present")
422 expected = []podDesc{
423 {
424 podName: "pod-00",
425 cntName: "cnt-00",
426 },
427 {
428 podName: "pod-01",
429 cntName: "cnt-00",
430 },
431 }
432
433 tpd.createPodsForTest(ctx, f, expected)
434 expectPodResources(ctx, 1, cli, expected)
435 tpd.deletePodsForTest(ctx, f)
436
437 tpd = newTestPodData()
438 ginkgo.By("checking the output when only a subset of pods require resources")
439 if sd != nil {
440 expected = []podDesc{
441 {
442 podName: "pod-00",
443 cntName: "cnt-00",
444 },
445 {
446 podName: "pod-01",
447 cntName: "cnt-00",
448 resourceName: sd.resourceName,
449 resourceAmount: 1,
450 cpuRequest: 1000,
451 },
452 {
453 podName: "pod-02",
454 cntName: "cnt-00",
455 cpuRequest: 1000,
456 },
457 {
458 podName: "pod-03",
459 cntName: "cnt-00",
460 resourceName: sd.resourceName,
461 resourceAmount: 1,
462 cpuRequest: 1000,
463 },
464 }
465 } else {
466 expected = []podDesc{
467 {
468 podName: "pod-00",
469 cntName: "cnt-00",
470 },
471 {
472 podName: "pod-01",
473 cntName: "cnt-00",
474 cpuRequest: 1000,
475 },
476 {
477 podName: "pod-02",
478 cntName: "cnt-00",
479 cpuRequest: 1000,
480 },
481 {
482 podName: "pod-03",
483 cntName: "cnt-00",
484 cpuRequest: 1000,
485 },
486 }
487
488 }
489 tpd.createPodsForTest(ctx, f, expected)
490 expectPodResources(ctx, 1, cli, expected)
491 tpd.deletePodsForTest(ctx, f)
492
493 tpd = newTestPodData()
494 ginkgo.By("checking the output when creating pods which require resources between calls")
495 if sd != nil {
496 expected = []podDesc{
497 {
498 podName: "pod-00",
499 cntName: "cnt-00",
500 },
501 {
502 podName: "pod-01",
503 cntName: "cnt-00",
504 resourceName: sd.resourceName,
505 resourceAmount: 1,
506 cpuRequest: 1000,
507 },
508 {
509 podName: "pod-02",
510 cntName: "cnt-00",
511 cpuRequest: 1000,
512 },
513 }
514 } else {
515 expected = []podDesc{
516 {
517 podName: "pod-00",
518 cntName: "cnt-00",
519 },
520 {
521 podName: "pod-01",
522 cntName: "cnt-00",
523 cpuRequest: 1000,
524 },
525 {
526 podName: "pod-02",
527 cntName: "cnt-00",
528 cpuRequest: 1000,
529 },
530 }
531 }
532
533 tpd.createPodsForTest(ctx, f, expected)
534 expectPodResources(ctx, 1, cli, expected)
535
536 if sd != nil {
537 extra = podDesc{
538 podName: "pod-03",
539 cntName: "cnt-00",
540 resourceName: sd.resourceName,
541 resourceAmount: 1,
542 cpuRequest: 1000,
543 }
544 } else {
545 extra = podDesc{
546 podName: "pod-03",
547 cntName: "cnt-00",
548 cpuRequest: 1000,
549 }
550
551 }
552
553 tpd.createPodsForTest(ctx, f, []podDesc{
554 extra,
555 })
556
557 expected = append(expected, extra)
558 expectPodResources(ctx, 1, cli, expected)
559 tpd.deletePodsForTest(ctx, f)
560
561 tpd = newTestPodData()
562 ginkgo.By("checking the output when deleting pods which require resources between calls")
563
564 if sd != nil {
565 expected = []podDesc{
566 {
567 podName: "pod-00",
568 cntName: "cnt-00",
569 cpuRequest: 1000,
570 },
571 {
572 podName: "pod-01",
573 cntName: "cnt-00",
574 resourceName: sd.resourceName,
575 resourceAmount: 1,
576 cpuRequest: 2000,
577 },
578 {
579 podName: "pod-02",
580 cntName: "cnt-00",
581 },
582 {
583 podName: "pod-03",
584 cntName: "cnt-00",
585 resourceName: sd.resourceName,
586 resourceAmount: 1,
587 cpuRequest: 1000,
588 },
589 }
590 } else {
591 expected = []podDesc{
592 {
593 podName: "pod-00",
594 cntName: "cnt-00",
595 cpuRequest: 1000,
596 },
597 {
598 podName: "pod-01",
599 cntName: "cnt-00",
600 cpuRequest: 1000,
601 },
602 {
603 podName: "pod-02",
604 cntName: "cnt-00",
605 },
606 {
607 podName: "pod-03",
608 cntName: "cnt-00",
609 cpuRequest: 1000,
610 },
611 }
612 }
613 tpd.createPodsForTest(ctx, f, expected)
614 expectPodResources(ctx, 1, cli, expected)
615
616 tpd.deletePod(ctx, f, "pod-01")
617 expectedPostDelete := filterOutDesc(expected, "pod-01")
618 expectPodResources(ctx, 1, cli, expectedPostDelete)
619 tpd.deletePodsForTest(ctx, f)
620
621 tpd = newTestPodData()
622 ginkgo.By("checking the output when pods request non integral CPUs")
623 if sd != nil {
624 expected = []podDesc{
625 {
626 podName: "pod-00",
627 cntName: "cnt-00",
628 cpuRequest: 1500,
629 },
630 {
631 podName: "pod-01",
632 cntName: "cnt-00",
633 resourceName: sd.resourceName,
634 resourceAmount: 1,
635 cpuRequest: 1500,
636 },
637 }
638 } else {
639 expected = []podDesc{
640 {
641 podName: "pod-00",
642 cntName: "cnt-00",
643 cpuRequest: 1500,
644 },
645 }
646
647 }
648 tpd.createPodsForTest(ctx, f, expected)
649 expectPodResources(ctx, 1, cli, expected)
650 tpd.deletePodsForTest(ctx, f)
651
652 if sidecarContainersEnabled {
653 containerRestartPolicyAlways := v1.ContainerRestartPolicyAlways
654
655 tpd = newTestPodData()
656 ginkgo.By("checking the output when pods have init containers")
657 if sd != nil {
658 expected = []podDesc{
659 {
660 podName: "pod-00",
661 cntName: "regular-00",
662 cpuRequest: 1000,
663 initContainers: []initContainerDesc{
664 {
665 cntName: "init-00",
666 resourceName: sd.resourceName,
667 resourceAmount: 1,
668 cpuRequest: 1000,
669 },
670 },
671 },
672 {
673 podName: "pod-01",
674 cntName: "regular-00",
675 cpuRequest: 1000,
676 initContainers: []initContainerDesc{
677 {
678 cntName: "restartable-init-00",
679 resourceName: sd.resourceName,
680 resourceAmount: 1,
681 cpuRequest: 1000,
682 restartPolicy: &containerRestartPolicyAlways,
683 },
684 },
685 },
686 }
687 } else {
688 expected = []podDesc{
689 {
690 podName: "pod-00",
691 cntName: "regular-00",
692 cpuRequest: 1000,
693 initContainers: []initContainerDesc{
694 {
695 cntName: "init-00",
696 cpuRequest: 1000,
697 },
698 },
699 },
700 {
701 podName: "pod-01",
702 cntName: "regular-00",
703 cpuRequest: 1000,
704 initContainers: []initContainerDesc{
705 {
706 cntName: "restartable-init-00",
707 cpuRequest: 1000,
708 restartPolicy: &containerRestartPolicyAlways,
709 },
710 },
711 },
712 }
713 }
714
715 tpd.createPodsForTest(ctx, f, expected)
716 expectPodResources(ctx, 1, cli, expected)
717 tpd.deletePodsForTest(ctx, f)
718 }
719 }
720
721 func podresourcesGetAllocatableResourcesTests(ctx context.Context, cli kubeletpodresourcesv1.PodResourcesListerClient, sd *sriovData, onlineCPUs, reservedSystemCPUs cpuset.CPUSet) {
722 ginkgo.By("checking the devices known to the kubelet")
723 resp, err := cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{})
724 framework.ExpectNoErrorWithOffset(1, err)
725 devs := resp.GetDevices()
726 var cpus []int
727 for _, cpuid := range resp.GetCpuIds() {
728 cpus = append(cpus, int(cpuid))
729 }
730 allocatableCPUs := cpuset.New(cpus...)
731
732 if onlineCPUs.Size() == 0 {
733 ginkgo.By("expecting no CPUs reported")
734 gomega.ExpectWithOffset(1, onlineCPUs.Size()).To(gomega.Equal(reservedSystemCPUs.Size()), "with no online CPUs, no CPUs should be reserved")
735 } else {
736 ginkgo.By(fmt.Sprintf("expecting online CPUs reported - online=%v (%d) reserved=%v (%d)", onlineCPUs, onlineCPUs.Size(), reservedSystemCPUs, reservedSystemCPUs.Size()))
737 if reservedSystemCPUs.Size() > onlineCPUs.Size() {
738 ginkgo.Fail("more reserved CPUs than online")
739 }
740 expectedCPUs := onlineCPUs.Difference(reservedSystemCPUs)
741
742 ginkgo.By(fmt.Sprintf("expecting CPUs '%v'='%v'", allocatableCPUs, expectedCPUs))
743 gomega.ExpectWithOffset(1, allocatableCPUs.Equals(expectedCPUs)).To(gomega.BeTrue(), "mismatch expecting CPUs")
744 }
745
746 if sd == nil {
747 ginkgo.By("expecting no devices reported")
748 gomega.ExpectWithOffset(1, devs).To(gomega.BeEmpty(), fmt.Sprintf("got unexpected devices %#v", devs))
749 return
750 }
751
752 ginkgo.By(fmt.Sprintf("expecting some %q devices reported", sd.resourceName))
753 gomega.ExpectWithOffset(1, devs).ToNot(gomega.BeEmpty())
754 for _, dev := range devs {
755 gomega.Expect(dev.ResourceName).To(gomega.Equal(sd.resourceName))
756 gomega.ExpectWithOffset(1, dev.DeviceIds).ToNot(gomega.BeEmpty())
757 }
758 }
759
760 func podresourcesGetTests(ctx context.Context, f *framework.Framework, cli kubeletpodresourcesv1.PodResourcesListerClient, sidecarContainersEnabled bool) {
761
762 ginkgo.By("checking the output when no pods are present")
763 expected := []podDesc{}
764 resp, err := cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "test", PodNamespace: f.Namespace.Name})
765 podResourceList := []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
766 gomega.Expect(err).To(gomega.HaveOccurred(), "pod not found")
767 res := convertToMap(podResourceList)
768 err = matchPodDescWithResources(expected, res)
769 framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
770
771 tpd := newTestPodData()
772 ginkgo.By("checking the output when only pods which don't require resources are present")
773 expected = []podDesc{
774 {
775 podName: "pod-00",
776 cntName: "cnt-00",
777 },
778 }
779 tpd.createPodsForTest(ctx, f, expected)
780 resp, err = cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "pod-00", PodNamespace: f.Namespace.Name})
781 framework.ExpectNoError(err, "Get() call failed for pod %s/%s", f.Namespace.Name, "pod-00")
782 podResourceList = []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
783 res = convertToMap(podResourceList)
784 err = matchPodDescWithResources(expected, res)
785 framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
786 tpd.deletePodsForTest(ctx, f)
787
788 tpd = newTestPodData()
789 ginkgo.By("checking the output when only pod require CPU")
790 expected = []podDesc{
791 {
792 podName: "pod-01",
793 cntName: "cnt-00",
794 cpuRequest: 1000,
795 },
796 }
797 tpd.createPodsForTest(ctx, f, expected)
798 resp, err = cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "pod-01", PodNamespace: f.Namespace.Name})
799 framework.ExpectNoError(err, "Get() call failed for pod %s/%s", f.Namespace.Name, "pod-01")
800 podResourceList = []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
801 res = convertToMap(podResourceList)
802 err = matchPodDescWithResources(expected, res)
803 framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
804 tpd.deletePodsForTest(ctx, f)
805
806 if sidecarContainersEnabled {
807 containerRestartPolicyAlways := v1.ContainerRestartPolicyAlways
808
809 tpd = newTestPodData()
810 ginkgo.By("checking the output when only pod with init containers require CPU")
811 expected = []podDesc{
812 {
813 podName: "pod-01",
814 cntName: "cnt-00",
815 cpuRequest: 1000,
816 initContainers: []initContainerDesc{
817 {
818 cntName: "init-00",
819 cpuRequest: 1000,
820 },
821 {
822 cntName: "restartable-init-01",
823 cpuRequest: 1000,
824 restartPolicy: &containerRestartPolicyAlways,
825 },
826 },
827 },
828 }
829 tpd.createPodsForTest(ctx, f, expected)
830 resp, err = cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "pod-01", PodNamespace: f.Namespace.Name})
831 framework.ExpectNoError(err, "Get() call failed for pod %s/%s", f.Namespace.Name, "pod-01")
832 podResourceList = []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
833 res = convertToMap(podResourceList)
834 err = matchPodDescWithResources(expected, res)
835 framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
836 tpd.deletePodsForTest(ctx, f)
837 }
838 }
839
840
841 var _ = SIGDescribe("POD Resources", framework.WithSerial(), feature.PodResources, nodefeature.PodResources, func() {
842 f := framework.NewDefaultFramework("podresources-test")
843 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
844
845 reservedSystemCPUs := cpuset.New(1)
846
847 ginkgo.Context("with SRIOV devices in the system", func() {
848 ginkgo.BeforeEach(func() {
849 requireSRIOVDevices()
850 })
851
852 ginkgo.Context("with CPU manager Static policy", func() {
853 ginkgo.BeforeEach(func(ctx context.Context) {
854
855 _, cpuAlloc, _ := getLocalNodeCPUDetails(ctx, f)
856
857 if cpuAlloc < minCoreCount {
858 e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount)
859 }
860 })
861
862
863 ginkgo.Context("", func() {
864 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
865
866 initialConfig.CPUManagerPolicy = string(cpumanager.PolicyStatic)
867
868
869 initialConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second}
870
871 cpus := reservedSystemCPUs.String()
872 framework.Logf("configurePodResourcesInKubelet: using reservedSystemCPUs=%q", cpus)
873 initialConfig.ReservedSystemCPUs = cpus
874 })
875
876 ginkgo.It("should return the expected responses", func(ctx context.Context) {
877 onlineCPUs, err := getOnlineCPUs()
878 framework.ExpectNoError(err, "getOnlineCPUs() failed err: %v", err)
879
880 configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
881 sd := setupSRIOVConfigOrFail(ctx, f, configMap)
882 ginkgo.DeferCleanup(teardownSRIOVConfigOrFail, f, sd)
883
884 waitForSRIOVResources(ctx, f, sd)
885
886 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
887 framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
888
889 cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
890 framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
891 defer conn.Close()
892
893 waitForSRIOVResources(ctx, f, sd)
894
895 ginkgo.By("checking List()")
896 podresourcesListTests(ctx, f, cli, sd, false)
897 ginkgo.By("checking GetAllocatableResources()")
898 podresourcesGetAllocatableResourcesTests(ctx, cli, sd, onlineCPUs, reservedSystemCPUs)
899 })
900
901 framework.It("should return the expected responses", nodefeature.SidecarContainers, func(ctx context.Context) {
902 onlineCPUs, err := getOnlineCPUs()
903 framework.ExpectNoError(err, "getOnlineCPUs() failed err: %v", err)
904
905 configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
906 sd := setupSRIOVConfigOrFail(ctx, f, configMap)
907 ginkgo.DeferCleanup(teardownSRIOVConfigOrFail, f, sd)
908
909 waitForSRIOVResources(ctx, f, sd)
910
911 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
912 framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
913
914 cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
915 framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
916 defer framework.ExpectNoError(conn.Close())
917
918 waitForSRIOVResources(ctx, f, sd)
919
920 ginkgo.By("checking List()")
921 podresourcesListTests(ctx, f, cli, sd, true)
922 ginkgo.By("checking GetAllocatableResources()")
923 podresourcesGetAllocatableResourcesTests(ctx, cli, sd, onlineCPUs, reservedSystemCPUs)
924 })
925 })
926 })
927
928 ginkgo.Context("with CPU manager None policy", func() {
929 ginkgo.It("should return the expected responses", func(ctx context.Context) {
930
931
932 requireSRIOVDevices()
933
934 configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
935 sd := setupSRIOVConfigOrFail(ctx, f, configMap)
936 ginkgo.DeferCleanup(teardownSRIOVConfigOrFail, f, sd)
937
938 waitForSRIOVResources(ctx, f, sd)
939
940 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
941 framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
942
943 cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
944 framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
945 defer conn.Close()
946
947 waitForSRIOVResources(ctx, f, sd)
948
949
950
951 podresourcesGetAllocatableResourcesTests(ctx, cli, sd, cpuset.CPUSet{}, cpuset.CPUSet{})
952 })
953 })
954 })
955
956 ginkgo.Context("without SRIOV devices in the system", func() {
957 ginkgo.BeforeEach(func() {
958 requireLackOfSRIOVDevices()
959 })
960
961 ginkgo.Context("with CPU manager Static policy", func() {
962 ginkgo.BeforeEach(func(ctx context.Context) {
963
964 _, cpuAlloc, _ := getLocalNodeCPUDetails(ctx, f)
965
966 if cpuAlloc < minCoreCount {
967 e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount)
968 }
969 })
970
971
972 ginkgo.Context("", func() {
973 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
974
975 initialConfig.CPUManagerPolicy = string(cpumanager.PolicyStatic)
976
977
978 initialConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second}
979
980 cpus := reservedSystemCPUs.String()
981 framework.Logf("configurePodResourcesInKubelet: using reservedSystemCPUs=%q", cpus)
982 initialConfig.ReservedSystemCPUs = cpus
983 if initialConfig.FeatureGates == nil {
984 initialConfig.FeatureGates = make(map[string]bool)
985 }
986 initialConfig.FeatureGates[string(kubefeatures.KubeletPodResourcesGet)] = true
987 })
988
989 ginkgo.It("should return the expected responses", func(ctx context.Context) {
990 onlineCPUs, err := getOnlineCPUs()
991 framework.ExpectNoError(err, "getOnlineCPUs() failed err: %v", err)
992
993 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
994 framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
995
996 cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
997 framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
998 defer conn.Close()
999
1000 podresourcesListTests(ctx, f, cli, nil, false)
1001 podresourcesGetAllocatableResourcesTests(ctx, cli, nil, onlineCPUs, reservedSystemCPUs)
1002 podresourcesGetTests(ctx, f, cli, false)
1003 })
1004
1005 framework.It("should return the expected responses", nodefeature.SidecarContainers, func(ctx context.Context) {
1006 onlineCPUs, err := getOnlineCPUs()
1007 framework.ExpectNoError(err, "getOnlineCPUs() failed err: %v", err)
1008
1009 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
1010 framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
1011
1012 cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
1013 framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
1014 defer func() {
1015 framework.ExpectNoError(conn.Close())
1016 }()
1017
1018 podresourcesListTests(ctx, f, cli, nil, true)
1019 podresourcesGetAllocatableResourcesTests(ctx, cli, nil, onlineCPUs, reservedSystemCPUs)
1020 podresourcesGetTests(ctx, f, cli, true)
1021 })
1022 ginkgo.It("should account for resources of pods in terminal phase", func(ctx context.Context) {
1023 pd := podDesc{
1024 cntName: "e2e-test-cnt",
1025 podName: "e2e-test-pod",
1026 cpuRequest: 1000,
1027 }
1028 pod := makePodResourcesTestPod(pd)
1029 pod.Spec.Containers[0].Command = []string{"sh", "-c", "/bin/true"}
1030 pod = e2epod.NewPodClient(f).Create(ctx, pod)
1031 defer e2epod.NewPodClient(f).DeleteSync(ctx, pod.Name, metav1.DeleteOptions{}, time.Minute)
1032 err := e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name, "Pod Succeeded", time.Minute*2, testutils.PodSucceeded)
1033 framework.ExpectNoError(err)
1034 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
1035 framework.ExpectNoError(err)
1036 cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
1037 framework.ExpectNoError(err)
1038 defer conn.Close()
1039
1040 expectPodResources(ctx, 1, cli, []podDesc{pd})
1041
1042 })
1043 })
1044 })
1045
1046 ginkgo.Context("with CPU manager None policy", func() {
1047 ginkgo.It("should return the expected responses", func(ctx context.Context) {
1048 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
1049 framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
1050
1051 cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
1052 framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
1053 defer conn.Close()
1054
1055
1056
1057 podresourcesGetAllocatableResourcesTests(ctx, cli, nil, cpuset.CPUSet{}, cpuset.CPUSet{})
1058 })
1059 })
1060
1061 ginkgo.Context("with disabled KubeletPodResourcesGet feature gate", func() {
1062
1063 ginkgo.It("should return the expected error with the feature gate disabled", func(ctx context.Context) {
1064 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
1065 framework.ExpectNoError(err, "LocalEndpoint() faild err %v", err)
1066
1067 cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
1068 framework.ExpectNoError(err, "GetV1Client() failed err %v", err)
1069 defer conn.Close()
1070
1071 ginkgo.By("checking Get fail if the feature gate is not enabled")
1072 getRes, err := cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "test", PodNamespace: f.Namespace.Name})
1073 framework.Logf("Get result: %v, err: %v", getRes, err)
1074 gomega.Expect(err).To(gomega.HaveOccurred(), "With feature gate disabled, the call must fail")
1075 })
1076 })
1077 })
1078
1079 ginkgo.Context("with a topology-unaware device plugin, which reports resources w/o hardware topology", func() {
1080 ginkgo.Context("with CPU manager Static policy", func() {
1081 ginkgo.BeforeEach(func(ctx context.Context) {
1082
1083 _, cpuAlloc, _ := getLocalNodeCPUDetails(ctx, f)
1084
1085 if cpuAlloc < minCoreCount {
1086 e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount)
1087 }
1088 })
1089
1090
1091 ginkgo.Context("", func() {
1092 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
1093
1094 initialConfig.CPUManagerPolicy = string(cpumanager.PolicyStatic)
1095
1096
1097 initialConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second}
1098
1099 cpus := reservedSystemCPUs.String()
1100 framework.Logf("configurePodResourcesInKubelet: using reservedSystemCPUs=%q", cpus)
1101 initialConfig.ReservedSystemCPUs = cpus
1102 })
1103
1104 ginkgo.It("should return proper podresources the same as before the restart of kubelet", func(ctx context.Context) {
1105 dpPod := setupSampleDevicePluginOrFail(ctx, f)
1106 ginkgo.DeferCleanup(teardownSampleDevicePluginOrFail, f, dpPod)
1107
1108 waitForTopologyUnawareResources(ctx, f)
1109
1110 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
1111 framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
1112
1113 cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
1114 framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
1115 defer conn.Close()
1116
1117 ginkgo.By("checking List and resources topology unaware resource should be without topology")
1118
1119 allocatableResponse, _ := cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{})
1120 for _, dev := range allocatableResponse.GetDevices() {
1121 if dev.ResourceName != defaultTopologyUnawareResourceName {
1122 continue
1123 }
1124 gomega.Expect(dev.Topology).To(gomega.BeNil(), "Topology is expected to be empty for topology unaware resources")
1125 }
1126
1127 desc := podDesc{
1128 podName: "pod-01",
1129 cntName: "cnt-01",
1130 resourceName: defaultTopologyUnawareResourceName,
1131 resourceAmount: 1,
1132 cpuRequest: 1000,
1133 }
1134
1135 tpd := newTestPodData()
1136 tpd.createPodsForTest(ctx, f, []podDesc{
1137 desc,
1138 })
1139
1140 expectPodResources(ctx, 1, cli, []podDesc{desc})
1141
1142 ginkgo.By("Restarting Kubelet")
1143 restartKubelet(true)
1144
1145
1146
1147 ginkgo.By("Wait for node to be ready")
1148 waitForTopologyUnawareResources(ctx, f)
1149
1150 expectPodResources(ctx, 1, cli, []podDesc{desc})
1151 tpd.deletePodsForTest(ctx, f)
1152 })
1153 })
1154 })
1155 })
1156
1157 f.Context("when querying /metrics", f.WithNodeConformance(), func() {
1158 tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
1159 if initialConfig.FeatureGates == nil {
1160 initialConfig.FeatureGates = make(map[string]bool)
1161 }
1162 initialConfig.FeatureGates[string(kubefeatures.KubeletPodResourcesGet)] = true
1163 })
1164 ginkgo.BeforeEach(func(ctx context.Context) {
1165
1166 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
1167 framework.ExpectNoError(err, "LocalEndpoint() failed err %v", err)
1168
1169 cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
1170 framework.ExpectNoError(err, "GetV1Client() failed err %v", err)
1171 defer conn.Close()
1172
1173 _, err = cli.List(ctx, &kubeletpodresourcesv1.ListPodResourcesRequest{})
1174 framework.ExpectNoError(err, "List() failed err %v", err)
1175
1176 _, err = cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{})
1177 framework.ExpectNoError(err, "GetAllocatableResources() failed err %v", err)
1178
1179 desc := podDesc{
1180 podName: "pod-01",
1181 cntName: "cnt-01",
1182 }
1183 tpd := newTestPodData()
1184 tpd.createPodsForTest(ctx, f, []podDesc{
1185 desc,
1186 })
1187 expectPodResources(ctx, 1, cli, []podDesc{desc})
1188
1189 expected := []podDesc{}
1190 resp, err := cli.Get(ctx, &kubeletpodresourcesv1.GetPodResourcesRequest{PodName: "pod-01", PodNamespace: f.Namespace.Name})
1191 framework.ExpectNoError(err, "Get() call failed for pod %s/%s", f.Namespace.Name, "pod-01")
1192 podResourceList := []*kubeletpodresourcesv1.PodResources{resp.GetPodResources()}
1193 res := convertToMap(podResourceList)
1194 err = matchPodDescWithResources(expected, res)
1195 framework.ExpectNoError(err, "matchPodDescWithResources() failed err %v", err)
1196 tpd.deletePodsForTest(ctx, f)
1197 })
1198
1199 ginkgo.It("should report the values for the podresources metrics", func(ctx context.Context) {
1200
1201
1202 ginkgo.By("Checking the value of the podresources metrics")
1203
1204 matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
1205 "kubelet_pod_resources_endpoint_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
1206 "": timelessSampleAtLeast(1),
1207 }),
1208 "kubelet_pod_resources_endpoint_requests_list": gstruct.MatchAllElements(nodeID, gstruct.Elements{
1209 "": timelessSampleAtLeast(1),
1210 }),
1211 "kubelet_pod_resources_endpoint_requests_get_allocatable": gstruct.MatchAllElements(nodeID, gstruct.Elements{
1212 "": timelessSampleAtLeast(1),
1213 }),
1214 "kubelet_pod_resources_endpoint_requests_get": gstruct.MatchAllElements(nodeID, gstruct.Elements{
1215 "": timelessSampleAtLeast(1),
1216 }),
1217
1218 })
1219
1220 ginkgo.By("Giving the Kubelet time to start up and produce metrics")
1221 gomega.Eventually(ctx, getPodResourcesMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
1222 ginkgo.By("Ensuring the metrics match the expectations a few more times")
1223 gomega.Consistently(ctx, getPodResourcesMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
1224 })
1225 })
1226
1227 ginkgo.Context("with the builtin rate limit values", func() {
1228 ginkgo.It("should hit throttling when calling podresources List in a tight loop", func(ctx context.Context) {
1229
1230 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
1231 framework.ExpectNoError(err, "LocalEndpoint() failed err %v", err)
1232
1233 ginkgo.By("Connecting to the kubelet endpoint")
1234 cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
1235 framework.ExpectNoError(err, "GetV1Client() failed err %v", err)
1236 defer conn.Close()
1237
1238 tries := podresources.DefaultQPS * 2
1239 errs := []error{}
1240
1241 ginkgo.By(fmt.Sprintf("Issuing %d List() calls in a tight loop", tries))
1242 startTime := time.Now()
1243 for try := 0; try < tries; try++ {
1244 _, err = cli.List(ctx, &kubeletpodresourcesv1.ListPodResourcesRequest{})
1245 errs = append(errs, err)
1246 }
1247 elapsed := time.Since(startTime)
1248
1249 ginkgo.By(fmt.Sprintf("Checking return codes for %d List() calls in %v", tries, elapsed))
1250
1251 framework.ExpectNoError(errs[0], "the first List() call unexpectedly failed with %v", errs[0])
1252
1253
1254
1255
1256 errLimitExceededCount := 0
1257 for _, err := range errs[1:] {
1258 if errors.Is(err, apisgrpc.ErrorLimitExceeded) {
1259 errLimitExceededCount++
1260 }
1261 }
1262 gomega.Expect(errLimitExceededCount).ToNot(gomega.BeZero(), "never hit the rate limit trying %d calls in %v", tries, elapsed)
1263
1264 framework.Logf("got %d/%d rate limit errors, at least one needed, the more the better", errLimitExceededCount, tries)
1265
1266
1267
1268
1269
1270
1271 ginkgo.By("Cooling down to reset the podresources API rate limit")
1272 time.Sleep(5 * time.Second)
1273 })
1274 })
1275 })
1276
1277 func requireLackOfSRIOVDevices() {
1278 if sriovdevCount, err := countSRIOVDevices(); err != nil || sriovdevCount > 0 {
1279 e2eskipper.Skipf("this test is meant to run on a system with no configured VF from SRIOV device")
1280 }
1281 }
1282
1283 func getOnlineCPUs() (cpuset.CPUSet, error) {
1284 onlineCPUList, err := os.ReadFile("/sys/devices/system/cpu/online")
1285 if err != nil {
1286 return cpuset.CPUSet{}, err
1287 }
1288 return cpuset.Parse(strings.TrimSpace(string(onlineCPUList)))
1289 }
1290
1291 func setupSampleDevicePluginOrFail(ctx context.Context, f *framework.Framework) *v1.Pod {
1292 e2enode.WaitForNodeToBeReady(ctx, f.ClientSet, framework.TestContext.NodeName, 5*time.Minute)
1293
1294 dp := getSampleDevicePluginPod(kubeletdevicepluginv1beta1.DevicePluginPath)
1295 dp.Spec.NodeName = framework.TestContext.NodeName
1296
1297 ginkgo.By("Create the sample device plugin pod")
1298
1299 dpPod := e2epod.NewPodClient(f).CreateSync(ctx, dp)
1300
1301 err := e2epod.WaitForPodCondition(ctx, f.ClientSet, dpPod.Namespace, dpPod.Name, "Ready", 120*time.Second, testutils.PodRunningReady)
1302 if err != nil {
1303 framework.Logf("Sample Device Pod %v took too long to enter running/ready: %v", dp.Name, err)
1304 }
1305 framework.ExpectNoError(err, "WaitForPodCondition() failed err: %v", err)
1306
1307 return dpPod
1308 }
1309
1310 func teardownSampleDevicePluginOrFail(ctx context.Context, f *framework.Framework, pod *v1.Pod) {
1311 gp := int64(0)
1312 deleteOptions := metav1.DeleteOptions{
1313 GracePeriodSeconds: &gp,
1314 }
1315 ginkgo.By(fmt.Sprintf("Delete sample device plugin pod %s/%s", pod.Namespace, pod.Name))
1316 err := f.ClientSet.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, deleteOptions)
1317
1318 framework.ExpectNoError(err, "Failed to delete Pod %v in Namspace %v", pod.Name, pod.Namespace)
1319 waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
1320 }
1321
1322 func waitForTopologyUnawareResources(ctx context.Context, f *framework.Framework) {
1323 ginkgo.By(fmt.Sprintf("Waiting for %q resources to become available on the local node", defaultTopologyUnawareResourceName))
1324
1325 gomega.Eventually(ctx, func(ctx context.Context) bool {
1326 node := getLocalNode(ctx, f)
1327 resourceAmount := CountSampleDeviceAllocatable(node)
1328 return resourceAmount > 0
1329 }, 2*time.Minute, framework.Poll).Should(gomega.BeTrue())
1330 }
1331
1332 func getPodResourcesMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) {
1333
1334 ginkgo.By("getting Pod Resources metrics from the metrics API")
1335 return e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, nodeNameOrIP()+":10255", "/metrics")
1336 }
1337
1338 func timelessSampleAtLeast(lower interface{}) types.GomegaMatcher {
1339 return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{
1340
1341 "Metric": gstruct.Ignore(),
1342 "Value": gomega.BeNumerically(">=", lower),
1343 "Timestamp": gstruct.Ignore(),
1344 "Histogram": gstruct.Ignore(),
1345 }))
1346 }
1347
View as plain text