1
16
17 package e2enode
18
19 import (
20 "context"
21 "fmt"
22 "os"
23 "os/exec"
24 "strconv"
25 "strings"
26 "time"
27
28 "github.com/onsi/ginkgo/v2"
29 "github.com/onsi/gomega"
30
31 v1 "k8s.io/api/core/v1"
32 "k8s.io/apimachinery/pkg/api/resource"
33 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
34 "k8s.io/apimachinery/pkg/types"
35 "k8s.io/apimachinery/pkg/util/uuid"
36 "k8s.io/kubernetes/pkg/kubelet/cm"
37 "k8s.io/kubernetes/test/e2e/feature"
38 "k8s.io/kubernetes/test/e2e/framework"
39 e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
40 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
41 admissionapi "k8s.io/pod-security-admission/api"
42 )
43
44 const (
45 hugepagesSize2M = 2048
46 hugepagesSize1G = 1048576
47 hugepagesDirPrefix = "/sys/kernel/mm/hugepages/hugepages"
48 hugepagesCapacityFile = "nr_hugepages"
49 hugepagesResourceName2Mi = "hugepages-2Mi"
50 hugepagesResourceName1Gi = "hugepages-1Gi"
51 hugepagesCgroup2MB = "hugetlb.2MB"
52 hugepagesCgroup1GB = "hugetlb.1GB"
53 mediumHugepages = "HugePages"
54 mediumHugepages2Mi = "HugePages-2Mi"
55 mediumHugepages1Gi = "HugePages-1Gi"
56 )
57
58 var (
59 resourceToSize = map[string]int{
60 hugepagesResourceName2Mi: hugepagesSize2M,
61 hugepagesResourceName1Gi: hugepagesSize1G,
62 }
63 resourceToCgroup = map[string]string{
64 hugepagesResourceName2Mi: hugepagesCgroup2MB,
65 hugepagesResourceName1Gi: hugepagesCgroup1GB,
66 }
67 )
68
69
70 func makePodToVerifyHugePages(baseName string, hugePagesLimit resource.Quantity, hugepagesCgroup string) *v1.Pod {
71
72 cgroupName := cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup, baseName)
73 cgroupFsName := ""
74 if kubeletCfg.CgroupDriver == "systemd" {
75 cgroupFsName = cgroupName.ToSystemd()
76 } else {
77 cgroupFsName = cgroupName.ToCgroupfs()
78 }
79
80 hugetlbLimitFile := ""
81
82 if IsCgroup2UnifiedMode() {
83 hugetlbLimitFile = fmt.Sprintf("/tmp/%s/%s.max", cgroupFsName, hugepagesCgroup)
84 } else {
85 hugetlbLimitFile = fmt.Sprintf("/tmp/hugetlb/%s/%s.limit_in_bytes", cgroupFsName, hugepagesCgroup)
86 }
87
88 command := fmt.Sprintf("expected=%v; actual=$(cat %v); if [ \"$expected\" -ne \"$actual\" ]; then exit 1; fi; ", hugePagesLimit.Value(), hugetlbLimitFile)
89 framework.Logf("Pod to run command: %v", command)
90 pod := &v1.Pod{
91 ObjectMeta: metav1.ObjectMeta{
92 Name: "pod" + string(uuid.NewUUID()),
93 },
94 Spec: v1.PodSpec{
95 RestartPolicy: v1.RestartPolicyNever,
96 Containers: []v1.Container{
97 {
98 Image: busyboxImage,
99 Name: "container" + string(uuid.NewUUID()),
100 Command: []string{"sh", "-c", command},
101 VolumeMounts: []v1.VolumeMount{
102 {
103 Name: "sysfscgroup",
104 MountPath: "/tmp",
105 },
106 },
107 },
108 },
109 Volumes: []v1.Volume{
110 {
111 Name: "sysfscgroup",
112 VolumeSource: v1.VolumeSource{
113 HostPath: &v1.HostPathVolumeSource{Path: "/sys/fs/cgroup"},
114 },
115 },
116 },
117 },
118 }
119 return pod
120 }
121
122
123 func configureHugePages(hugepagesSize int, hugepagesCount int, numaNodeID *int) error {
124
125
126
127 if _, err := os.Stat("/proc/sys/vm/compact_memory"); err == nil {
128 if err := exec.Command("/bin/sh", "-c", "echo 1 > /proc/sys/vm/compact_memory").Run(); err != nil {
129 return err
130 }
131 }
132
133
134 hugepagesSuffix := fmt.Sprintf("hugepages/hugepages-%dkB/%s", hugepagesSize, hugepagesCapacityFile)
135
136
137 hugepagesFile := fmt.Sprintf("/sys/kernel/mm/%s", hugepagesSuffix)
138 if numaNodeID != nil {
139
140 hugepagesFile = fmt.Sprintf("/sys/devices/system/node/node%d/%s", *numaNodeID, hugepagesSuffix)
141 }
142
143
144
145 command := fmt.Sprintf("echo %d > %s", hugepagesCount, hugepagesFile)
146 if err := exec.Command("/bin/sh", "-c", command).Run(); err != nil {
147 return err
148 }
149
150
151
152 command = fmt.Sprintf("cat %s", hugepagesFile)
153 outData, err := exec.Command("/bin/sh", "-c", command).Output()
154 if err != nil {
155 return err
156 }
157
158 numHugePages, err := strconv.Atoi(strings.TrimSpace(string(outData)))
159 if err != nil {
160 return err
161 }
162
163 framework.Logf("Hugepages total is set to %v", numHugePages)
164 if numHugePages == hugepagesCount {
165 return nil
166 }
167
168 return fmt.Errorf("expected hugepages %v, but found %v", hugepagesCount, numHugePages)
169 }
170
171
172 func isHugePageAvailable(hugepagesSize int) bool {
173 path := fmt.Sprintf("%s-%dkB/%s", hugepagesDirPrefix, hugepagesSize, hugepagesCapacityFile)
174 if _, err := os.Stat(path); err != nil {
175 return false
176 }
177 return true
178 }
179
180 func getHugepagesTestPod(f *framework.Framework, limits v1.ResourceList, mounts []v1.VolumeMount, volumes []v1.Volume) *v1.Pod {
181 return &v1.Pod{
182 ObjectMeta: metav1.ObjectMeta{
183 GenerateName: "hugepages-",
184 Namespace: f.Namespace.Name,
185 },
186 Spec: v1.PodSpec{
187 Containers: []v1.Container{
188 {
189 Name: "container" + string(uuid.NewUUID()),
190 Image: busyboxImage,
191 Resources: v1.ResourceRequirements{
192 Limits: limits,
193 },
194 Command: []string{"sleep", "3600"},
195 VolumeMounts: mounts,
196 },
197 },
198 Volumes: volumes,
199 },
200 }
201 }
202
203
204 var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, "[NodeSpecialFeature:HugePages]", func() {
205 f := framework.NewDefaultFramework("hugepages-test")
206 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
207
208 ginkgo.It("should remove resources for huge page sizes no longer supported", func(ctx context.Context) {
209 ginkgo.By("mimicking support for 9Mi of 3Mi huge page memory by patching the node status")
210 patch := []byte(`[{"op": "add", "path": "/status/capacity/hugepages-3Mi", "value": "9Mi"}, {"op": "add", "path": "/status/allocatable/hugepages-3Mi", "value": "9Mi"}]`)
211 result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(ctx)
212 framework.ExpectNoError(result.Error(), "while patching")
213
214 node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{})
215 framework.ExpectNoError(err, "while getting node status")
216
217 ginkgo.By("Verifying that the node now supports huge pages with size 3Mi")
218 value, ok := node.Status.Capacity["hugepages-3Mi"]
219 if !ok {
220 framework.Failf("capacity should contain resource hugepages-3Mi: %v", node.Status.Capacity)
221 }
222 gomega.Expect(value.String()).To(gomega.Equal("9Mi"), "huge pages with size 3Mi should be supported")
223
224 ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
225 restartKubelet(true)
226
227 ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
228 gomega.Eventually(ctx, func() bool {
229 node, err = f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{})
230 framework.ExpectNoError(err, "while getting node status")
231 _, isPresent := node.Status.Capacity["hugepages-3Mi"]
232 return isPresent
233 }, 30*time.Second, framework.Poll).Should(gomega.BeFalse())
234 })
235
236 ginkgo.It("should add resources for new huge page sizes on kubelet restart", func(ctx context.Context) {
237 ginkgo.By("Stopping kubelet")
238 startKubelet := stopKubelet()
239 ginkgo.By(`Patching away support for hugepage resource "hugepages-2Mi"`)
240 patch := []byte(`[{"op": "remove", "path": "/status/capacity/hugepages-2Mi"}, {"op": "remove", "path": "/status/allocatable/hugepages-2Mi"}]`)
241 result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(ctx)
242 framework.ExpectNoError(result.Error(), "while patching")
243
244 ginkgo.By("Starting kubelet again")
245 startKubelet()
246
247 ginkgo.By("verifying that the hugepages-2Mi resource is present")
248 gomega.Eventually(ctx, func() bool {
249 node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{})
250 framework.ExpectNoError(err, "while getting node status")
251 _, isPresent := node.Status.Capacity["hugepages-2Mi"]
252 return isPresent
253 }, 30*time.Second, framework.Poll).Should(gomega.BeTrue())
254 })
255
256 ginkgo.When("start the pod", func() {
257 var (
258 testpod *v1.Pod
259 limits v1.ResourceList
260 mounts []v1.VolumeMount
261 volumes []v1.Volume
262 hugepages map[string]int
263 )
264
265 setHugepages := func(ctx context.Context) {
266 for hugepagesResource, count := range hugepages {
267 size := resourceToSize[hugepagesResource]
268 ginkgo.By(fmt.Sprintf("Verifying hugepages %d are supported", size))
269 if !isHugePageAvailable(size) {
270 e2eskipper.Skipf("skipping test because hugepages of size %d not supported", size)
271 return
272 }
273
274 ginkgo.By(fmt.Sprintf("Configuring the host to reserve %d of pre-allocated hugepages of size %d", count, size))
275 gomega.Eventually(ctx, func() error {
276 if err := configureHugePages(size, count, nil); err != nil {
277 return err
278 }
279 return nil
280 }, 30*time.Second, framework.Poll).Should(gomega.BeNil())
281 }
282 }
283
284 waitForHugepages := func(ctx context.Context) {
285 ginkgo.By("Waiting for hugepages resource to become available on the local node")
286 gomega.Eventually(ctx, func(ctx context.Context) error {
287 node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{})
288 if err != nil {
289 return err
290 }
291
292 for hugepagesResource, count := range hugepages {
293 capacity, ok := node.Status.Capacity[v1.ResourceName(hugepagesResource)]
294 if !ok {
295 return fmt.Errorf("the node does not have the resource %s", hugepagesResource)
296 }
297
298 size, succeed := capacity.AsInt64()
299 if !succeed {
300 return fmt.Errorf("failed to convert quantity to int64")
301 }
302
303 expectedSize := count * resourceToSize[hugepagesResource] * 1024
304 if size != int64(expectedSize) {
305 return fmt.Errorf("the actual size %d is different from the expected one %d", size, expectedSize)
306 }
307 }
308 return nil
309 }, time.Minute, framework.Poll).Should(gomega.BeNil())
310 }
311
312 releaseHugepages := func(ctx context.Context) {
313 ginkgo.By("Releasing hugepages")
314 gomega.Eventually(ctx, func() error {
315 for hugepagesResource := range hugepages {
316 command := fmt.Sprintf("echo 0 > %s-%dkB/%s", hugepagesDirPrefix, resourceToSize[hugepagesResource], hugepagesCapacityFile)
317 if err := exec.Command("/bin/sh", "-c", command).Run(); err != nil {
318 return err
319 }
320 }
321 return nil
322 }, 30*time.Second, framework.Poll).Should(gomega.BeNil())
323 }
324
325 runHugePagesTests := func() {
326 ginkgo.It("should set correct hugetlb mount and limit under the container cgroup", func(ctx context.Context) {
327 ginkgo.By("getting mounts for the test pod")
328 command := []string{"mount"}
329 out := e2epod.ExecCommandInContainer(f, testpod.Name, testpod.Spec.Containers[0].Name, command...)
330
331 for _, mount := range mounts {
332 ginkgo.By(fmt.Sprintf("checking that the hugetlb mount %s exists under the container", mount.MountPath))
333 gomega.Expect(out).To(gomega.ContainSubstring(mount.MountPath))
334 }
335
336 for resourceName := range hugepages {
337 verifyPod := makePodToVerifyHugePages(
338 "pod"+string(testpod.UID),
339 testpod.Spec.Containers[0].Resources.Limits[v1.ResourceName(resourceName)],
340 resourceToCgroup[resourceName],
341 )
342 ginkgo.By("checking if the expected hugetlb settings were applied")
343 e2epod.NewPodClient(f).Create(ctx, verifyPod)
344 err := e2epod.WaitForPodSuccessInNamespace(ctx, f.ClientSet, verifyPod.Name, f.Namespace.Name)
345 framework.ExpectNoError(err)
346 }
347 })
348 }
349
350
351 ginkgo.JustBeforeEach(func(ctx context.Context) {
352 setHugepages(ctx)
353
354 ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
355 restartKubelet(true)
356
357 waitForHugepages(ctx)
358
359 pod := getHugepagesTestPod(f, limits, mounts, volumes)
360
361 ginkgo.By("by running a test pod that requests hugepages")
362 testpod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
363 })
364
365
366 ginkgo.JustAfterEach(func(ctx context.Context) {
367 ginkgo.By(fmt.Sprintf("deleting test pod %s", testpod.Name))
368 e2epod.NewPodClient(f).DeleteSync(ctx, testpod.Name, metav1.DeleteOptions{}, 2*time.Minute)
369
370 releaseHugepages(ctx)
371
372 ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
373 restartKubelet(true)
374
375 waitForHugepages(ctx)
376 })
377
378 ginkgo.Context("with the resources requests that contain only one hugepages resource ", func() {
379 ginkgo.Context("with the backward compatible API", func() {
380 ginkgo.BeforeEach(func() {
381 limits = v1.ResourceList{
382 v1.ResourceCPU: resource.MustParse("10m"),
383 v1.ResourceMemory: resource.MustParse("100Mi"),
384 hugepagesResourceName2Mi: resource.MustParse("6Mi"),
385 }
386 mounts = []v1.VolumeMount{
387 {
388 Name: "hugepages",
389 MountPath: "/hugepages",
390 },
391 }
392 volumes = []v1.Volume{
393 {
394 Name: "hugepages",
395 VolumeSource: v1.VolumeSource{
396 EmptyDir: &v1.EmptyDirVolumeSource{
397 Medium: mediumHugepages,
398 },
399 },
400 },
401 }
402 hugepages = map[string]int{hugepagesResourceName2Mi: 5}
403 })
404
405 runHugePagesTests()
406 })
407
408 ginkgo.Context("with the new API", func() {
409 ginkgo.BeforeEach(func() {
410 limits = v1.ResourceList{
411 v1.ResourceCPU: resource.MustParse("10m"),
412 v1.ResourceMemory: resource.MustParse("100Mi"),
413 hugepagesResourceName2Mi: resource.MustParse("6Mi"),
414 }
415 mounts = []v1.VolumeMount{
416 {
417 Name: "hugepages-2mi",
418 MountPath: "/hugepages-2Mi",
419 },
420 }
421 volumes = []v1.Volume{
422 {
423 Name: "hugepages-2mi",
424 VolumeSource: v1.VolumeSource{
425 EmptyDir: &v1.EmptyDirVolumeSource{
426 Medium: mediumHugepages2Mi,
427 },
428 },
429 },
430 }
431 hugepages = map[string]int{hugepagesResourceName2Mi: 5}
432 })
433
434 runHugePagesTests()
435 })
436
437 ginkgo.JustAfterEach(func() {
438 hugepages = map[string]int{hugepagesResourceName2Mi: 0}
439 })
440 })
441
442 ginkgo.Context("with the resources requests that contain multiple hugepages resources ", func() {
443 ginkgo.BeforeEach(func() {
444 hugepages = map[string]int{
445 hugepagesResourceName2Mi: 5,
446 hugepagesResourceName1Gi: 1,
447 }
448 limits = v1.ResourceList{
449 v1.ResourceCPU: resource.MustParse("10m"),
450 v1.ResourceMemory: resource.MustParse("100Mi"),
451 hugepagesResourceName2Mi: resource.MustParse("6Mi"),
452 hugepagesResourceName1Gi: resource.MustParse("1Gi"),
453 }
454 mounts = []v1.VolumeMount{
455 {
456 Name: "hugepages-2mi",
457 MountPath: "/hugepages-2Mi",
458 },
459 {
460 Name: "hugepages-1gi",
461 MountPath: "/hugepages-1Gi",
462 },
463 }
464 volumes = []v1.Volume{
465 {
466 Name: "hugepages-2mi",
467 VolumeSource: v1.VolumeSource{
468 EmptyDir: &v1.EmptyDirVolumeSource{
469 Medium: mediumHugepages2Mi,
470 },
471 },
472 },
473 {
474 Name: "hugepages-1gi",
475 VolumeSource: v1.VolumeSource{
476 EmptyDir: &v1.EmptyDirVolumeSource{
477 Medium: mediumHugepages1Gi,
478 },
479 },
480 },
481 }
482 })
483
484 runHugePagesTests()
485
486 ginkgo.JustAfterEach(func() {
487 hugepages = map[string]int{
488 hugepagesResourceName2Mi: 0,
489 hugepagesResourceName1Gi: 0,
490 }
491 })
492 })
493 })
494 })
495
View as plain text