1
16
17 package e2enode
18
19 import (
20 "context"
21 "fmt"
22 "os/exec"
23 "regexp"
24 "strconv"
25 "strings"
26 "time"
27
28 v1 "k8s.io/api/core/v1"
29 "k8s.io/apimachinery/pkg/api/resource"
30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
31 runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
32 "k8s.io/kubelet/pkg/types"
33 kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
34 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
35 admissionapi "k8s.io/pod-security-admission/api"
36 "k8s.io/utils/cpuset"
37
38 "github.com/onsi/ginkgo/v2"
39 "github.com/onsi/gomega"
40 "k8s.io/kubernetes/test/e2e/feature"
41 "k8s.io/kubernetes/test/e2e/framework"
42 e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
43 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
44 "k8s.io/kubernetes/test/e2e/nodefeature"
45 )
46
47
48 type ctnAttribute struct {
49 ctnName string
50 cpuRequest string
51 cpuLimit string
52 restartPolicy *v1.ContainerRestartPolicy
53 }
54
55
56 func makeCPUManagerPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod {
57 var containers []v1.Container
58 for _, ctnAttr := range ctnAttributes {
59 cpusetCmd := fmt.Sprintf("grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d")
60 ctn := v1.Container{
61 Name: ctnAttr.ctnName,
62 Image: busyboxImage,
63 Resources: v1.ResourceRequirements{
64 Requests: v1.ResourceList{
65 v1.ResourceCPU: resource.MustParse(ctnAttr.cpuRequest),
66 v1.ResourceMemory: resource.MustParse("100Mi"),
67 },
68 Limits: v1.ResourceList{
69 v1.ResourceCPU: resource.MustParse(ctnAttr.cpuLimit),
70 v1.ResourceMemory: resource.MustParse("100Mi"),
71 },
72 },
73 Command: []string{"sh", "-c", cpusetCmd},
74 }
75 containers = append(containers, ctn)
76 }
77
78 return &v1.Pod{
79 ObjectMeta: metav1.ObjectMeta{
80 Name: podName,
81 },
82 Spec: v1.PodSpec{
83 RestartPolicy: v1.RestartPolicyNever,
84 Containers: containers,
85 },
86 }
87 }
88
89
90
91 func makeCPUManagerInitContainersPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod {
92 var containers []v1.Container
93 cpusetCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2"
94 cpusetAndSleepCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d"
95 for _, ctnAttr := range ctnAttributes {
96 ctn := v1.Container{
97 Name: ctnAttr.ctnName,
98 Image: busyboxImage,
99 Resources: v1.ResourceRequirements{
100 Requests: v1.ResourceList{
101 v1.ResourceCPU: resource.MustParse(ctnAttr.cpuRequest),
102 v1.ResourceMemory: resource.MustParse("100Mi"),
103 },
104 Limits: v1.ResourceList{
105 v1.ResourceCPU: resource.MustParse(ctnAttr.cpuLimit),
106 v1.ResourceMemory: resource.MustParse("100Mi"),
107 },
108 },
109 Command: []string{"sh", "-c", cpusetCmd},
110 RestartPolicy: ctnAttr.restartPolicy,
111 }
112 if ctnAttr.restartPolicy != nil && *ctnAttr.restartPolicy == v1.ContainerRestartPolicyAlways {
113 ctn.Command = []string{"sh", "-c", cpusetAndSleepCmd}
114 }
115 containers = append(containers, ctn)
116 }
117
118 return &v1.Pod{
119 ObjectMeta: metav1.ObjectMeta{
120 Name: podName,
121 },
122 Spec: v1.PodSpec{
123 RestartPolicy: v1.RestartPolicyNever,
124 InitContainers: containers,
125 Containers: []v1.Container{
126 {
127 Name: "regular",
128 Image: busyboxImage,
129 Resources: v1.ResourceRequirements{
130 Requests: v1.ResourceList{
131 v1.ResourceCPU: resource.MustParse("1000m"),
132 v1.ResourceMemory: resource.MustParse("100Mi"),
133 },
134 Limits: v1.ResourceList{
135 v1.ResourceCPU: resource.MustParse("1000m"),
136 v1.ResourceMemory: resource.MustParse("100Mi"),
137 },
138 },
139 Command: []string{"sh", "-c", cpusetAndSleepCmd},
140 },
141 },
142 },
143 }
144 }
145
146 func deletePodSyncByName(ctx context.Context, f *framework.Framework, podName string) {
147 gp := int64(0)
148 delOpts := metav1.DeleteOptions{
149 GracePeriodSeconds: &gp,
150 }
151 e2epod.NewPodClient(f).DeleteSync(ctx, podName, delOpts, e2epod.DefaultPodDeletionTimeout)
152 }
153
154 func deletePods(ctx context.Context, f *framework.Framework, podNames []string) {
155 for _, podName := range podNames {
156 deletePodSyncByName(ctx, f, podName)
157 }
158 }
159
160 func getLocalNodeCPUDetails(ctx context.Context, f *framework.Framework) (cpuCapVal int64, cpuAllocVal int64, cpuResVal int64) {
161 localNodeCap := getLocalNode(ctx, f).Status.Capacity
162 cpuCap := localNodeCap[v1.ResourceCPU]
163 localNodeAlloc := getLocalNode(ctx, f).Status.Allocatable
164 cpuAlloc := localNodeAlloc[v1.ResourceCPU]
165 cpuRes := cpuCap.DeepCopy()
166 cpuRes.Sub(cpuAlloc)
167
168
169 cpuRes.RoundUp(0)
170
171 return cpuCap.Value(), cpuCap.Value() - cpuRes.Value(), cpuRes.Value()
172 }
173
174 func waitForContainerRemoval(ctx context.Context, containerName, podName, podNS string) {
175 rs, _, err := getCRIClient()
176 framework.ExpectNoError(err)
177 gomega.Eventually(ctx, func(ctx context.Context) bool {
178 containers, err := rs.ListContainers(ctx, &runtimeapi.ContainerFilter{
179 LabelSelector: map[string]string{
180 types.KubernetesPodNameLabel: podName,
181 types.KubernetesPodNamespaceLabel: podNS,
182 types.KubernetesContainerNameLabel: containerName,
183 },
184 })
185 if err != nil {
186 return false
187 }
188 return len(containers) == 0
189 }, 2*time.Minute, 1*time.Second).Should(gomega.BeTrue())
190 }
191
192 func isHTEnabled() bool {
193 outData, err := exec.Command("/bin/sh", "-c", "lscpu | grep \"Thread(s) per core:\" | cut -d \":\" -f 2").Output()
194 framework.ExpectNoError(err)
195
196 threadsPerCore, err := strconv.Atoi(strings.TrimSpace(string(outData)))
197 framework.ExpectNoError(err)
198
199 return threadsPerCore > 1
200 }
201
202 func isMultiNUMA() bool {
203 outData, err := exec.Command("/bin/sh", "-c", "lscpu | grep \"NUMA node(s):\" | cut -d \":\" -f 2").Output()
204 framework.ExpectNoError(err)
205
206 numaNodes, err := strconv.Atoi(strings.TrimSpace(string(outData)))
207 framework.ExpectNoError(err)
208
209 return numaNodes > 1
210 }
211
212 func getSMTLevel() int {
213 cpuID := 0
214 out, err := exec.Command("/bin/sh", "-c", fmt.Sprintf("cat /sys/devices/system/cpu/cpu%d/topology/thread_siblings_list | tr -d \"\n\r\"", cpuID)).Output()
215 framework.ExpectNoError(err)
216
217
218 cpus, err := cpuset.Parse(strings.TrimSpace(string(out)))
219 framework.ExpectNoError(err)
220 return cpus.Size()
221 }
222
223 func getCPUSiblingList(cpuRes int64) string {
224 out, err := exec.Command("/bin/sh", "-c", fmt.Sprintf("cat /sys/devices/system/cpu/cpu%d/topology/thread_siblings_list | tr -d \"\n\r\"", cpuRes)).Output()
225 framework.ExpectNoError(err)
226 return string(out)
227 }
228
229 func getCoreSiblingList(cpuRes int64) string {
230 out, err := exec.Command("/bin/sh", "-c", fmt.Sprintf("cat /sys/devices/system/cpu/cpu%d/topology/core_siblings_list | tr -d \"\n\r\"", cpuRes)).Output()
231 framework.ExpectNoError(err)
232 return string(out)
233 }
234
235 type cpuManagerKubeletArguments struct {
236 policyName string
237 enableCPUManagerOptions bool
238 reservedSystemCPUs cpuset.CPUSet
239 options map[string]string
240 }
241
242 func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, kubeletArguments *cpuManagerKubeletArguments) *kubeletconfig.KubeletConfiguration {
243 newCfg := oldCfg.DeepCopy()
244 if newCfg.FeatureGates == nil {
245 newCfg.FeatureGates = make(map[string]bool)
246 }
247
248 newCfg.FeatureGates["CPUManagerPolicyOptions"] = kubeletArguments.enableCPUManagerOptions
249 newCfg.FeatureGates["CPUManagerPolicyBetaOptions"] = kubeletArguments.enableCPUManagerOptions
250 newCfg.FeatureGates["CPUManagerPolicyAlphaOptions"] = kubeletArguments.enableCPUManagerOptions
251
252 newCfg.CPUManagerPolicy = kubeletArguments.policyName
253 newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second}
254
255 if kubeletArguments.options != nil {
256 newCfg.CPUManagerPolicyOptions = kubeletArguments.options
257 }
258
259 if kubeletArguments.reservedSystemCPUs.Size() > 0 {
260 cpus := kubeletArguments.reservedSystemCPUs.String()
261 framework.Logf("configureCPUManagerInKubelet: using reservedSystemCPUs=%q", cpus)
262 newCfg.ReservedSystemCPUs = cpus
263 } else {
264
265
266
267 if newCfg.KubeReserved == nil {
268 newCfg.KubeReserved = map[string]string{}
269 }
270
271 if _, ok := newCfg.KubeReserved["cpu"]; !ok {
272 newCfg.KubeReserved["cpu"] = "200m"
273 }
274 }
275
276 return newCfg
277 }
278
279 func runGuPodTest(ctx context.Context, f *framework.Framework, cpuCount int) {
280 var pod *v1.Pod
281
282 ctnAttrs := []ctnAttribute{
283 {
284 ctnName: "gu-container",
285 cpuRequest: fmt.Sprintf("%dm", 1000*cpuCount),
286 cpuLimit: fmt.Sprintf("%dm", 1000*cpuCount),
287 },
288 }
289 pod = makeCPUManagerPod("gu-pod", ctnAttrs)
290 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
291
292 ginkgo.By("checking if the expected cpuset was assigned")
293
294 for _, cnt := range pod.Spec.Containers {
295 ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name))
296
297 logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
298 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name)
299
300 framework.Logf("got pod logs: %v", logs)
301 cpus, err := cpuset.Parse(strings.TrimSpace(logs))
302 framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cnt.Name, pod.Name)
303
304 gomega.Expect(cpus.Size()).To(gomega.Equal(cpuCount), "expected cpu set size == %d, got %q", cpuCount, cpus.String())
305 }
306
307 ginkgo.By("by deleting the pods and waiting for container removal")
308 deletePods(ctx, f, []string{pod.Name})
309 waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
310 }
311
312 func runNonGuPodTest(ctx context.Context, f *framework.Framework, cpuCap int64) {
313 var ctnAttrs []ctnAttribute
314 var err error
315 var pod *v1.Pod
316 var expAllowedCPUsListRegex string
317
318 ctnAttrs = []ctnAttribute{
319 {
320 ctnName: "non-gu-container",
321 cpuRequest: "100m",
322 cpuLimit: "200m",
323 },
324 }
325 pod = makeCPUManagerPod("non-gu-pod", ctnAttrs)
326 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
327
328 ginkgo.By("checking if the expected cpuset was assigned")
329 expAllowedCPUsListRegex = fmt.Sprintf("^0-%d\n$", cpuCap-1)
330
331 if cpuCap == 1 {
332 expAllowedCPUsListRegex = "^0\n$"
333 }
334 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
335 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
336 pod.Spec.Containers[0].Name, pod.Name)
337
338 ginkgo.By("by deleting the pods and waiting for container removal")
339 deletePods(ctx, f, []string{pod.Name})
340 waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
341 }
342
343 func mustParseCPUSet(s string) cpuset.CPUSet {
344 res, err := cpuset.Parse(s)
345 framework.ExpectNoError(err)
346 return res
347 }
348
349 func runMultipleGuNonGuPods(ctx context.Context, f *framework.Framework, cpuCap int64, cpuAlloc int64) {
350 var cpuListString, expAllowedCPUsListRegex string
351 var cpuList []int
352 var cpu1 int
353 var cset cpuset.CPUSet
354 var err error
355 var ctnAttrs []ctnAttribute
356 var pod1, pod2 *v1.Pod
357
358 ctnAttrs = []ctnAttribute{
359 {
360 ctnName: "gu-container",
361 cpuRequest: "1000m",
362 cpuLimit: "1000m",
363 },
364 }
365 pod1 = makeCPUManagerPod("gu-pod", ctnAttrs)
366 pod1 = e2epod.NewPodClient(f).CreateSync(ctx, pod1)
367
368 ctnAttrs = []ctnAttribute{
369 {
370 ctnName: "non-gu-container",
371 cpuRequest: "200m",
372 cpuLimit: "300m",
373 },
374 }
375 pod2 = makeCPUManagerPod("non-gu-pod", ctnAttrs)
376 pod2 = e2epod.NewPodClient(f).CreateSync(ctx, pod2)
377
378 ginkgo.By("checking if the expected cpuset was assigned")
379 cpu1 = 1
380 if isHTEnabled() {
381 cpuList = mustParseCPUSet(getCPUSiblingList(0)).List()
382 cpu1 = cpuList[1]
383 } else if isMultiNUMA() {
384 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List()
385 if len(cpuList) > 1 {
386 cpu1 = cpuList[1]
387 }
388 }
389 expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1)
390 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod1.Name, pod1.Spec.Containers[0].Name, expAllowedCPUsListRegex)
391 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
392 pod1.Spec.Containers[0].Name, pod1.Name)
393
394 cpuListString = "0"
395 if cpuAlloc > 2 {
396 cset = mustParseCPUSet(fmt.Sprintf("0-%d", cpuCap-1))
397 cpuListString = fmt.Sprintf("%s", cset.Difference(cpuset.New(cpu1)))
398 }
399 expAllowedCPUsListRegex = fmt.Sprintf("^%s\n$", cpuListString)
400 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod2.Name, pod2.Spec.Containers[0].Name, expAllowedCPUsListRegex)
401 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
402 pod2.Spec.Containers[0].Name, pod2.Name)
403 ginkgo.By("by deleting the pods and waiting for container removal")
404 deletePods(ctx, f, []string{pod1.Name, pod2.Name})
405 waitForContainerRemoval(ctx, pod1.Spec.Containers[0].Name, pod1.Name, pod1.Namespace)
406 waitForContainerRemoval(ctx, pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace)
407 }
408
409 func runMultipleCPUGuPod(ctx context.Context, f *framework.Framework) {
410 var cpuListString, expAllowedCPUsListRegex string
411 var cpuList []int
412 var cset cpuset.CPUSet
413 var err error
414 var ctnAttrs []ctnAttribute
415 var pod *v1.Pod
416
417 ctnAttrs = []ctnAttribute{
418 {
419 ctnName: "gu-container",
420 cpuRequest: "2000m",
421 cpuLimit: "2000m",
422 },
423 }
424 pod = makeCPUManagerPod("gu-pod", ctnAttrs)
425 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
426
427 ginkgo.By("checking if the expected cpuset was assigned")
428 cpuListString = "1-2"
429 if isMultiNUMA() {
430 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List()
431 if len(cpuList) > 1 {
432 cset = mustParseCPUSet(getCPUSiblingList(int64(cpuList[1])))
433 if !isHTEnabled() && len(cpuList) > 2 {
434 cset = mustParseCPUSet(fmt.Sprintf("%d-%d", cpuList[1], cpuList[2]))
435 }
436 cpuListString = fmt.Sprintf("%s", cset)
437 }
438 } else if isHTEnabled() {
439 cpuListString = "2-3"
440 cpuList = mustParseCPUSet(getCPUSiblingList(0)).List()
441 if cpuList[1] != 1 {
442 cset = mustParseCPUSet(getCPUSiblingList(1))
443 cpuListString = fmt.Sprintf("%s", cset)
444 }
445 }
446 expAllowedCPUsListRegex = fmt.Sprintf("^%s\n$", cpuListString)
447 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
448 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
449 pod.Spec.Containers[0].Name, pod.Name)
450
451 ginkgo.By("by deleting the pods and waiting for container removal")
452 deletePods(ctx, f, []string{pod.Name})
453 waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
454 }
455
456 func runMultipleCPUContainersGuPod(ctx context.Context, f *framework.Framework) {
457 var expAllowedCPUsListRegex string
458 var cpuList []int
459 var cpu1, cpu2 int
460 var err error
461 var ctnAttrs []ctnAttribute
462 var pod *v1.Pod
463 ctnAttrs = []ctnAttribute{
464 {
465 ctnName: "gu-container1",
466 cpuRequest: "1000m",
467 cpuLimit: "1000m",
468 },
469 {
470 ctnName: "gu-container2",
471 cpuRequest: "1000m",
472 cpuLimit: "1000m",
473 },
474 }
475 pod = makeCPUManagerPod("gu-pod", ctnAttrs)
476 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
477
478 ginkgo.By("checking if the expected cpuset was assigned")
479 cpu1, cpu2 = 1, 2
480 if isHTEnabled() {
481 cpuList = mustParseCPUSet(getCPUSiblingList(0)).List()
482 if cpuList[1] != 1 {
483 cpu1, cpu2 = cpuList[1], 1
484 }
485 if isMultiNUMA() {
486 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List()
487 if len(cpuList) > 1 {
488 cpu2 = cpuList[1]
489 }
490 }
491 } else if isMultiNUMA() {
492 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List()
493 if len(cpuList) > 2 {
494 cpu1, cpu2 = cpuList[1], cpuList[2]
495 }
496 }
497 expAllowedCPUsListRegex = fmt.Sprintf("^%d|%d\n$", cpu1, cpu2)
498 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
499 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
500 pod.Spec.Containers[0].Name, pod.Name)
501
502 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[1].Name, expAllowedCPUsListRegex)
503 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
504 pod.Spec.Containers[1].Name, pod.Name)
505
506 ginkgo.By("by deleting the pods and waiting for container removal")
507 deletePods(ctx, f, []string{pod.Name})
508 waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
509 waitForContainerRemoval(ctx, pod.Spec.Containers[1].Name, pod.Name, pod.Namespace)
510 }
511
512 func runMultipleGuPods(ctx context.Context, f *framework.Framework) {
513 var expAllowedCPUsListRegex string
514 var cpuList []int
515 var cpu1, cpu2 int
516 var err error
517 var ctnAttrs []ctnAttribute
518 var pod1, pod2 *v1.Pod
519
520 ctnAttrs = []ctnAttribute{
521 {
522 ctnName: "gu-container1",
523 cpuRequest: "1000m",
524 cpuLimit: "1000m",
525 },
526 }
527 pod1 = makeCPUManagerPod("gu-pod1", ctnAttrs)
528 pod1 = e2epod.NewPodClient(f).CreateSync(ctx, pod1)
529
530 ctnAttrs = []ctnAttribute{
531 {
532 ctnName: "gu-container2",
533 cpuRequest: "1000m",
534 cpuLimit: "1000m",
535 },
536 }
537 pod2 = makeCPUManagerPod("gu-pod2", ctnAttrs)
538 pod2 = e2epod.NewPodClient(f).CreateSync(ctx, pod2)
539
540 ginkgo.By("checking if the expected cpuset was assigned")
541 cpu1, cpu2 = 1, 2
542 if isHTEnabled() {
543 cpuList = mustParseCPUSet(getCPUSiblingList(0)).List()
544 if cpuList[1] != 1 {
545 cpu1, cpu2 = cpuList[1], 1
546 }
547 if isMultiNUMA() {
548 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List()
549 if len(cpuList) > 1 {
550 cpu2 = cpuList[1]
551 }
552 }
553 } else if isMultiNUMA() {
554 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List()
555 if len(cpuList) > 2 {
556 cpu1, cpu2 = cpuList[1], cpuList[2]
557 }
558 }
559 expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1)
560 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod1.Name, pod1.Spec.Containers[0].Name, expAllowedCPUsListRegex)
561 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
562 pod1.Spec.Containers[0].Name, pod1.Name)
563
564 expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu2)
565 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod2.Name, pod2.Spec.Containers[0].Name, expAllowedCPUsListRegex)
566 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
567 pod2.Spec.Containers[0].Name, pod2.Name)
568 ginkgo.By("by deleting the pods and waiting for container removal")
569 deletePods(ctx, f, []string{pod1.Name, pod2.Name})
570 waitForContainerRemoval(ctx, pod1.Spec.Containers[0].Name, pod1.Name, pod1.Namespace)
571 waitForContainerRemoval(ctx, pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace)
572 }
573
574 func runCPUManagerTests(f *framework.Framework) {
575 var cpuCap, cpuAlloc int64
576 var oldCfg *kubeletconfig.KubeletConfiguration
577 var expAllowedCPUsListRegex string
578 var cpuList []int
579 var cpu1 int
580 var err error
581 var ctnAttrs []ctnAttribute
582 var pod *v1.Pod
583
584 ginkgo.BeforeEach(func(ctx context.Context) {
585 var err error
586 if oldCfg == nil {
587 oldCfg, err = getCurrentKubeletConfig(ctx)
588 framework.ExpectNoError(err)
589 }
590 })
591
592 ginkgo.It("should assign CPUs as expected based on the Pod spec", func(ctx context.Context) {
593 cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f)
594
595
596 if cpuCap < 2 {
597 e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < 2")
598 }
599
600
601 newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
602 policyName: string(cpumanager.PolicyStatic),
603 reservedSystemCPUs: cpuset.CPUSet{},
604 })
605 updateKubeletConfig(ctx, f, newCfg, true)
606
607 ginkgo.By("running a non-Gu pod")
608 runNonGuPodTest(ctx, f, cpuCap)
609
610 ginkgo.By("running a Gu pod")
611 runGuPodTest(ctx, f, 1)
612
613 ginkgo.By("running multiple Gu and non-Gu pods")
614 runMultipleGuNonGuPods(ctx, f, cpuCap, cpuAlloc)
615
616
617 if cpuCap < 3 {
618 e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3")
619 }
620
621 ginkgo.By("running a Gu pod requesting multiple CPUs")
622 runMultipleCPUGuPod(ctx, f)
623
624 ginkgo.By("running a Gu pod with multiple containers requesting integer CPUs")
625 runMultipleCPUContainersGuPod(ctx, f)
626
627 ginkgo.By("running multiple Gu pods")
628 runMultipleGuPods(ctx, f)
629
630 ginkgo.By("test for automatically remove inactive pods from cpumanager state file.")
631
632
633
634
635
636
637 ginkgo.By("running a Gu pod for test remove")
638 ctnAttrs = []ctnAttribute{
639 {
640 ctnName: "gu-container-testremove",
641 cpuRequest: "1000m",
642 cpuLimit: "1000m",
643 },
644 }
645 pod = makeCPUManagerPod("gu-pod-testremove", ctnAttrs)
646 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
647
648 ginkgo.By("checking if the expected cpuset was assigned")
649 cpu1 = 1
650 if isHTEnabled() {
651 cpuList = mustParseCPUSet(getCPUSiblingList(0)).List()
652 cpu1 = cpuList[1]
653 } else if isMultiNUMA() {
654 cpuList = mustParseCPUSet(getCoreSiblingList(0)).List()
655 if len(cpuList) > 1 {
656 cpu1 = cpuList[1]
657 }
658 }
659 expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1)
660 err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
661 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
662 pod.Spec.Containers[0].Name, pod.Name)
663
664 deletePodSyncByName(ctx, f, pod.Name)
665
666
667
668 waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
669 })
670
671 ginkgo.It("should assign CPUs as expected with enhanced policy based on strict SMT alignment", func(ctx context.Context) {
672 fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption)
673 _, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f)
674 smtLevel := getSMTLevel()
675
676
677 if smtLevel < 2 {
678 e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt)
679 }
680
681
682 if cpuAlloc < int64(smtLevel*2) {
683 e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < 4", fullCPUsOnlyOpt)
684 }
685
686 framework.Logf("SMT level %d", smtLevel)
687
688
689
690 cpuPolicyOptions := map[string]string{
691 cpumanager.FullPCPUsOnlyOption: "true",
692 }
693 newCfg := configureCPUManagerInKubelet(oldCfg,
694 &cpuManagerKubeletArguments{
695 policyName: string(cpumanager.PolicyStatic),
696 reservedSystemCPUs: cpuset.New(0),
697 enableCPUManagerOptions: true,
698 options: cpuPolicyOptions,
699 },
700 )
701 updateKubeletConfig(ctx, f, newCfg, true)
702
703
704 runSMTAlignmentNegativeTests(ctx, f)
705 runSMTAlignmentPositiveTests(ctx, f, smtLevel)
706 })
707
708 f.It("should not reuse CPUs of restartable init containers", nodefeature.SidecarContainers, func(ctx context.Context) {
709 cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f)
710
711
712 if cpuCap < 3 {
713 e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3, got %d", cpuCap)
714 }
715
716
717 newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
718 policyName: string(cpumanager.PolicyStatic),
719 reservedSystemCPUs: cpuset.CPUSet{},
720 })
721 updateKubeletConfig(ctx, f, newCfg, true)
722
723 ginkgo.By("running a Gu pod with a regular init container and a restartable init container")
724 ctrAttrs := []ctnAttribute{
725 {
726 ctnName: "gu-init-container1",
727 cpuRequest: "1000m",
728 cpuLimit: "1000m",
729 },
730 {
731 ctnName: "gu-restartable-init-container2",
732 cpuRequest: "1000m",
733 cpuLimit: "1000m",
734 restartPolicy: &containerRestartPolicyAlways,
735 },
736 }
737 pod := makeCPUManagerInitContainersPod("gu-pod", ctrAttrs)
738 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
739
740 ginkgo.By("checking if the expected cpuset was assigned")
741 logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.InitContainers[0].Name)
742 framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", pod.Spec.InitContainers[0].Name, pod.Name)
743
744 framework.Logf("got pod logs: %v", logs)
745 reusableCPUs, err := cpuset.Parse(strings.TrimSpace(logs))
746 framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", pod.Spec.InitContainers[0].Name, pod.Name)
747
748 gomega.Expect(reusableCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", reusableCPUs.String())
749
750 logs, err = e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.InitContainers[1].Name)
751 framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", pod.Spec.InitContainers[1].Name, pod.Name)
752
753 framework.Logf("got pod logs: %v", logs)
754 nonReusableCPUs, err := cpuset.Parse(strings.TrimSpace(logs))
755 framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", pod.Spec.InitContainers[1].Name, pod.Name)
756
757 gomega.Expect(nonReusableCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", nonReusableCPUs.String())
758
759 logs, err = e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.Containers[0].Name)
760 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod.Spec.Containers[0].Name, pod.Name)
761
762 framework.Logf("got pod logs: %v", logs)
763 cpus, err := cpuset.Parse(strings.TrimSpace(logs))
764 framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", pod.Spec.Containers[0].Name, pod.Name)
765
766 gomega.Expect(cpus.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", cpus.String())
767
768 gomega.Expect(reusableCPUs.Equals(nonReusableCPUs)).To(gomega.BeTrue(), "expected reusable cpuset [%s] to be equal to non-reusable cpuset [%s]", reusableCPUs.String(), nonReusableCPUs.String())
769 gomega.Expect(nonReusableCPUs.Intersection(cpus).IsEmpty()).To(gomega.BeTrue(), "expected non-reusable cpuset [%s] to be disjoint from cpuset [%s]", nonReusableCPUs.String(), cpus.String())
770
771 ginkgo.By("by deleting the pods and waiting for container removal")
772 deletePods(ctx, f, []string{pod.Name})
773 waitForContainerRemoval(ctx, pod.Spec.InitContainers[0].Name, pod.Name, pod.Namespace)
774 waitForContainerRemoval(ctx, pod.Spec.InitContainers[1].Name, pod.Name, pod.Namespace)
775 waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
776 })
777
778 ginkgo.AfterEach(func(ctx context.Context) {
779 updateKubeletConfig(ctx, f, oldCfg, true)
780 })
781 }
782
783 func runSMTAlignmentNegativeTests(ctx context.Context, f *framework.Framework) {
784
785 ctnAttrs := []ctnAttribute{
786 {
787 ctnName: "gu-container-neg",
788 cpuRequest: "1000m",
789 cpuLimit: "1000m",
790 },
791 }
792 pod := makeCPUManagerPod("gu-pod", ctnAttrs)
793
794 pod = e2epod.NewPodClient(f).Create(ctx, pod)
795
796 err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) {
797 if pod.Status.Phase != v1.PodPending {
798 return true, nil
799 }
800 return false, nil
801 })
802 framework.ExpectNoError(err)
803 pod, err = e2epod.NewPodClient(f).Get(ctx, pod.Name, metav1.GetOptions{})
804 framework.ExpectNoError(err)
805
806 if pod.Status.Phase != v1.PodFailed {
807 framework.Failf("pod %s not failed: %v", pod.Name, pod.Status)
808 }
809 if !isSMTAlignmentError(pod) {
810 framework.Failf("pod %s failed for wrong reason: %q", pod.Name, pod.Status.Reason)
811 }
812
813 deletePodSyncByName(ctx, f, pod.Name)
814
815
816
817 waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
818 }
819
820 func runSMTAlignmentPositiveTests(ctx context.Context, f *framework.Framework, smtLevel int) {
821
822
823
824
825
826 ctnAttrs := []ctnAttribute{
827 {
828 ctnName: "gu-container-pos",
829 cpuRequest: "2000m",
830 cpuLimit: "2000m",
831 },
832 }
833 pod := makeCPUManagerPod("gu-pod", ctnAttrs)
834 pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
835
836 for _, cnt := range pod.Spec.Containers {
837 ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name))
838
839 logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
840 framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name)
841
842 framework.Logf("got pod logs: %v", logs)
843 cpus, err := cpuset.Parse(strings.TrimSpace(logs))
844 framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cnt.Name, pod.Name)
845
846 validateSMTAlignment(cpus, smtLevel, pod, &cnt)
847 }
848
849 deletePodSyncByName(ctx, f, pod.Name)
850
851
852
853 waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
854 }
855
856 func validateSMTAlignment(cpus cpuset.CPUSet, smtLevel int, pod *v1.Pod, cnt *v1.Container) {
857 framework.Logf("validating cpus: %v", cpus)
858
859 if cpus.Size()%smtLevel != 0 {
860 framework.Failf("pod %q cnt %q received non-smt-multiple cpuset %v (SMT level %d)", pod.Name, cnt.Name, cpus, smtLevel)
861 }
862
863
864
865
866 siblingsCPUs := cpuset.New()
867 for _, cpuID := range cpus.UnsortedList() {
868 threadSiblings, err := cpuset.Parse(strings.TrimSpace(getCPUSiblingList(int64(cpuID))))
869 framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cnt.Name, pod.Name)
870 siblingsCPUs = siblingsCPUs.Union(threadSiblings)
871 }
872
873 framework.Logf("siblings cpus: %v", siblingsCPUs)
874 if !siblingsCPUs.Equals(cpus) {
875 framework.Failf("pod %q cnt %q received non-smt-aligned cpuset %v (expected %v)", pod.Name, cnt.Name, cpus, siblingsCPUs)
876 }
877 }
878
879 func isSMTAlignmentError(pod *v1.Pod) bool {
880 re := regexp.MustCompile(`SMT.*Alignment.*Error`)
881 return re.MatchString(pod.Status.Reason)
882 }
883
884
885 var _ = SIGDescribe("CPU Manager", framework.WithSerial(), feature.CPUManager, func() {
886 f := framework.NewDefaultFramework("cpu-manager-test")
887 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
888
889 ginkgo.Context("With kubeconfig updated with static CPU Manager policy run the CPU Manager tests", func() {
890 runCPUManagerTests(f)
891 })
892 })
893
View as plain text