1
16
17 package e2enode
18
19 import (
20 "context"
21 "fmt"
22 "strconv"
23 "time"
24
25 v1 "k8s.io/api/core/v1"
26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27 internalapi "k8s.io/cri-api/pkg/apis"
28 runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
29 "k8s.io/kubelet/pkg/types"
30 "k8s.io/kubernetes/test/e2e/framework"
31 e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
32 "k8s.io/kubernetes/test/e2e/nodefeature"
33 admissionapi "k8s.io/pod-security-admission/api"
34
35 "github.com/onsi/ginkgo/v2"
36 "github.com/onsi/gomega"
37 )
38
39 const (
40
41
42 maxPerPodContainer = 1
43 maxTotalContainers = -1
44
45 garbageCollectDuration = 3 * time.Minute
46 setupDuration = 10 * time.Minute
47 runtimePollInterval = 10 * time.Second
48 )
49
50 type testPodSpec struct {
51 podName string
52
53
54 containerPrefix string
55
56 restartCount int32
57
58 numContainers int
59
60 getContainerNames func() ([]string, error)
61 }
62
63 func (pod *testPodSpec) getContainerName(containerNumber int) string {
64 return fmt.Sprintf("%s%d", pod.containerPrefix, containerNumber)
65 }
66
67 type testRun struct {
68
69 testName string
70
71 testPods []*testPodSpec
72 }
73
74
75
76 var _ = SIGDescribe("GarbageCollect", framework.WithSerial(), nodefeature.GarbageCollect, func() {
77 f := framework.NewDefaultFramework("garbage-collect-test")
78 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
79 containerNamePrefix := "gc-test-container-"
80 podNamePrefix := "gc-test-pod-"
81
82
83
84
85
86 firstSuffix := "one-container-no-restarts"
87 secondSuffix := "many-containers-many-restarts-one-pod"
88 thirdSuffix := "many-containers-many-restarts-"
89 tests := []testRun{
90 {
91 testName: "One Non-restarting Container",
92 testPods: []*testPodSpec{
93 {
94 podName: podNamePrefix + firstSuffix,
95 containerPrefix: containerNamePrefix + firstSuffix,
96 restartCount: 0,
97 numContainers: 1,
98 },
99 },
100 },
101 {
102 testName: "Many Restarting Containers",
103 testPods: []*testPodSpec{
104 {
105 podName: podNamePrefix + secondSuffix,
106 containerPrefix: containerNamePrefix + secondSuffix,
107 restartCount: 4,
108 numContainers: 4,
109 },
110 },
111 },
112 {
113 testName: "Many Pods with Many Restarting Containers",
114 testPods: []*testPodSpec{
115 {
116 podName: podNamePrefix + thirdSuffix + "one",
117 containerPrefix: containerNamePrefix + thirdSuffix + "one",
118 restartCount: 3,
119 numContainers: 4,
120 },
121 {
122 podName: podNamePrefix + thirdSuffix + "two",
123 containerPrefix: containerNamePrefix + thirdSuffix + "two",
124 restartCount: 2,
125 numContainers: 6,
126 },
127 {
128 podName: podNamePrefix + thirdSuffix + "three",
129 containerPrefix: containerNamePrefix + thirdSuffix + "three",
130 restartCount: 3,
131 numContainers: 5,
132 },
133 },
134 },
135 }
136 for _, test := range tests {
137 containerGCTest(f, test)
138 }
139 })
140
141
142
143
144
145
146
147
148 func containerGCTest(f *framework.Framework, test testRun) {
149 var runtime internalapi.RuntimeService
150 ginkgo.BeforeEach(func() {
151 var err error
152 runtime, _, err = getCRIClient()
153 framework.ExpectNoError(err)
154 })
155 for _, pod := range test.testPods {
156
157 pod.getContainerNames = func() ([]string, error) {
158 relevantContainers := []string{}
159 containers, err := runtime.ListContainers(context.Background(), &runtimeapi.ContainerFilter{
160 LabelSelector: map[string]string{
161 types.KubernetesPodNameLabel: pod.podName,
162 types.KubernetesPodNamespaceLabel: f.Namespace.Name,
163 },
164 })
165 if err != nil {
166 return relevantContainers, err
167 }
168 for _, container := range containers {
169 relevantContainers = append(relevantContainers, container.Labels[types.KubernetesContainerNameLabel])
170 }
171 return relevantContainers, nil
172 }
173 }
174
175 ginkgo.Context(fmt.Sprintf("Garbage Collection Test: %s", test.testName), func() {
176 ginkgo.BeforeEach(func(ctx context.Context) {
177 realPods := getPods(test.testPods)
178 e2epod.NewPodClient(f).CreateBatch(ctx, realPods)
179 ginkgo.By("Making sure all containers restart the specified number of times")
180 gomega.Eventually(ctx, func(ctx context.Context) error {
181 for _, podSpec := range test.testPods {
182 err := verifyPodRestartCount(ctx, f, podSpec.podName, podSpec.numContainers, podSpec.restartCount)
183 if err != nil {
184 return err
185 }
186 }
187 return nil
188 }, setupDuration, runtimePollInterval).Should(gomega.BeNil())
189 })
190
191 ginkgo.It("Should eventually garbage collect containers when we exceed the number of dead containers per container", func(ctx context.Context) {
192 totalContainers := 0
193 for _, pod := range test.testPods {
194 totalContainers += pod.numContainers*2 + 1
195 }
196 gomega.Eventually(ctx, func() error {
197 total := 0
198 for _, pod := range test.testPods {
199 containerNames, err := pod.getContainerNames()
200 if err != nil {
201 return err
202 }
203 total += len(containerNames)
204
205 for i := 0; i < pod.numContainers; i++ {
206 containerCount := 0
207 for _, containerName := range containerNames {
208 if containerName == pod.getContainerName(i) {
209 containerCount++
210 }
211 }
212 if containerCount > maxPerPodContainer+1 {
213 return fmt.Errorf("expected number of copies of container: %s, to be <= maxPerPodContainer: %d; list of containers: %v",
214 pod.getContainerName(i), maxPerPodContainer, containerNames)
215 }
216 }
217 }
218
219 if maxTotalContainers > 0 && totalContainers <= maxTotalContainers && total > maxTotalContainers {
220 return fmt.Errorf("expected total number of containers: %v, to be <= maxTotalContainers: %v", total, maxTotalContainers)
221 }
222 return nil
223 }, garbageCollectDuration, runtimePollInterval).Should(gomega.BeNil())
224
225 if maxPerPodContainer >= 2 && maxTotalContainers < 0 {
226 ginkgo.By("Making sure the kubelet consistently keeps around an extra copy of each container.")
227 gomega.Consistently(ctx, func() error {
228 for _, pod := range test.testPods {
229 containerNames, err := pod.getContainerNames()
230 if err != nil {
231 return err
232 }
233 for i := 0; i < pod.numContainers; i++ {
234 containerCount := 0
235 for _, containerName := range containerNames {
236 if containerName == pod.getContainerName(i) {
237 containerCount++
238 }
239 }
240 if pod.restartCount > 0 && containerCount < maxPerPodContainer+1 {
241 return fmt.Errorf("expected pod %v to have extra copies of old containers", pod.podName)
242 }
243 }
244 }
245 return nil
246 }, garbageCollectDuration, runtimePollInterval).Should(gomega.BeNil())
247 }
248 })
249
250 ginkgo.AfterEach(func(ctx context.Context) {
251 for _, pod := range test.testPods {
252 ginkgo.By(fmt.Sprintf("Deleting Pod %v", pod.podName))
253 e2epod.NewPodClient(f).DeleteSync(ctx, pod.podName, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout)
254 }
255
256 ginkgo.By("Making sure all containers get cleaned up")
257 gomega.Eventually(ctx, func() error {
258 for _, pod := range test.testPods {
259 containerNames, err := pod.getContainerNames()
260 if err != nil {
261 return err
262 }
263 if len(containerNames) > 0 {
264 return fmt.Errorf("%v containers still remain", containerNames)
265 }
266 }
267 return nil
268 }, garbageCollectDuration, runtimePollInterval).Should(gomega.BeNil())
269
270 if ginkgo.CurrentSpecReport().Failed() && framework.TestContext.DumpLogsOnFailure {
271 logNodeEvents(ctx, f)
272 logPodEvents(ctx, f)
273 }
274 })
275 })
276 }
277
278 func getPods(specs []*testPodSpec) (pods []*v1.Pod) {
279 for _, spec := range specs {
280 ginkgo.By(fmt.Sprintf("Creating %v containers with restartCount: %v", spec.numContainers, spec.restartCount))
281 containers := []v1.Container{}
282 for i := 0; i < spec.numContainers; i++ {
283 containers = append(containers, v1.Container{
284 Image: busyboxImage,
285 Name: spec.getContainerName(i),
286 Command: getRestartingContainerCommand("/test-empty-dir-mnt", i, spec.restartCount, ""),
287 VolumeMounts: []v1.VolumeMount{
288 {MountPath: "/test-empty-dir-mnt", Name: "test-empty-dir"},
289 },
290 })
291 }
292 pods = append(pods, &v1.Pod{
293 ObjectMeta: metav1.ObjectMeta{Name: spec.podName},
294 Spec: v1.PodSpec{
295 RestartPolicy: v1.RestartPolicyAlways,
296 Containers: containers,
297 Volumes: []v1.Volume{
298 {Name: "test-empty-dir", VolumeSource: v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}},
299 },
300 },
301 })
302 }
303 return
304 }
305
306 func getRestartingContainerCommand(path string, containerNum int, restarts int32, loopingCommand string) []string {
307 return []string{
308 "sh",
309 "-c",
310 fmt.Sprintf(`
311 f=%s/countfile%s
312 count=$(echo 'hello' >> $f ; wc -l $f | awk {'print $1'})
313 if [ $count -lt %d ]; then
314 exit 0
315 fi
316 while true; do %s sleep 1; done`,
317 path, strconv.Itoa(containerNum), restarts+1, loopingCommand),
318 }
319 }
320
321 func verifyPodRestartCount(ctx context.Context, f *framework.Framework, podName string, expectedNumContainers int, expectedRestartCount int32) error {
322 updatedPod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(ctx, podName, metav1.GetOptions{})
323 if err != nil {
324 return err
325 }
326 if len(updatedPod.Status.ContainerStatuses) != expectedNumContainers {
327 return fmt.Errorf("expected pod %s to have %d containers, actual: %d",
328 updatedPod.Name, expectedNumContainers, len(updatedPod.Status.ContainerStatuses))
329 }
330 for _, containerStatus := range updatedPod.Status.ContainerStatuses {
331 if containerStatus.RestartCount != expectedRestartCount {
332 return fmt.Errorf("pod %s had container with restartcount %d. Should have been at least %d",
333 updatedPod.Name, containerStatus.RestartCount, expectedRestartCount)
334 }
335 }
336 return nil
337 }
338
View as plain text