1
16
17 package node
18
19 import (
20 "bytes"
21 "context"
22 "encoding/json"
23 "fmt"
24 "math/rand"
25 "net/http"
26 "regexp"
27 "strconv"
28 "strings"
29 "sync"
30 "time"
31
32 v1 "k8s.io/api/core/v1"
33 "k8s.io/apimachinery/pkg/api/resource"
34 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
35 "k8s.io/apimachinery/pkg/fields"
36 "k8s.io/apimachinery/pkg/labels"
37 "k8s.io/apimachinery/pkg/util/uuid"
38 "k8s.io/apimachinery/pkg/util/wait"
39 "k8s.io/apimachinery/pkg/watch"
40 v1core "k8s.io/client-go/kubernetes/typed/core/v1"
41 "k8s.io/client-go/util/retry"
42 "k8s.io/kubernetes/pkg/kubelet/events"
43 "k8s.io/kubernetes/test/e2e/framework"
44 e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
45 e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
46 imageutils "k8s.io/kubernetes/test/utils/image"
47 admissionapi "k8s.io/pod-security-admission/api"
48 utilpointer "k8s.io/utils/pointer"
49
50 "github.com/onsi/ginkgo/v2"
51 "github.com/onsi/gomega"
52 "github.com/prometheus/client_golang/prometheus"
53 "github.com/prometheus/common/expfmt"
54 )
55
56 var _ = SIGDescribe("Pods Extended", func() {
57 f := framework.NewDefaultFramework("pods")
58 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
59
60 ginkgo.Describe("Delete Grace Period", func() {
61 var podClient *e2epod.PodClient
62 ginkgo.BeforeEach(func() {
63 podClient = e2epod.NewPodClient(f)
64 })
65
66
71 ginkgo.It("should be submitted and removed", func(ctx context.Context) {
72 ginkgo.By("creating the pod")
73 name := "pod-submit-remove-" + string(uuid.NewUUID())
74 value := strconv.Itoa(time.Now().Nanosecond())
75 pod := e2epod.NewAgnhostPod(f.Namespace.Name, name, nil, nil, nil)
76 pod.ObjectMeta.Labels = map[string]string{
77 "name": "foo",
78 "time": value,
79 }
80
81 ginkgo.By("setting up selector")
82 selector := labels.SelectorFromSet(labels.Set(map[string]string{"time": value}))
83 options := metav1.ListOptions{LabelSelector: selector.String()}
84 pods, err := podClient.List(ctx, options)
85 framework.ExpectNoError(err, "failed to query for pod")
86 gomega.Expect(pods.Items).To(gomega.BeEmpty())
87
88 ginkgo.By("submitting the pod to kubernetes")
89 podClient.Create(ctx, pod)
90
91 ginkgo.By("verifying the pod is in kubernetes")
92 selector = labels.SelectorFromSet(labels.Set(map[string]string{"time": value}))
93 options = metav1.ListOptions{LabelSelector: selector.String()}
94 pods, err = podClient.List(ctx, options)
95 framework.ExpectNoError(err, "failed to query for pod")
96 gomega.Expect(pods.Items).To(gomega.HaveLen(1))
97
98
99
100 framework.ExpectNoError(e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name))
101
102 pod, err = podClient.Get(ctx, pod.Name, metav1.GetOptions{})
103 framework.ExpectNoError(err, "failed to GET scheduled pod")
104
105 ginkgo.By("deleting the pod gracefully")
106 var lastPod v1.Pod
107 var statusCode int
108 err = f.ClientSet.CoreV1().RESTClient().Delete().AbsPath("/api/v1/namespaces", pod.Namespace, "pods", pod.Name).Param("gracePeriodSeconds", "30").Do(ctx).StatusCode(&statusCode).Into(&lastPod)
109 framework.ExpectNoError(err, "failed to use http client to send delete")
110 gomega.Expect(statusCode).To(gomega.Equal(http.StatusOK), "failed to delete gracefully by client request")
111
112 ginkgo.By("verifying the kubelet observed the termination notice")
113
114
115
116
117 start := time.Now()
118 err = wait.Poll(time.Second*5, time.Second*30*3, func() (bool, error) {
119 podList, err := e2ekubelet.GetKubeletPods(ctx, f.ClientSet, pod.Spec.NodeName)
120 if err != nil {
121 framework.Logf("Unable to retrieve kubelet pods for node %v: %v", pod.Spec.NodeName, err)
122 return false, nil
123 }
124 for _, kubeletPod := range podList.Items {
125 if pod.Name != kubeletPod.Name || pod.Namespace != kubeletPod.Namespace {
126 continue
127 }
128 if kubeletPod.ObjectMeta.DeletionTimestamp == nil {
129 framework.Logf("deletion has not yet been observed")
130 return false, nil
131 }
132 data, _ := json.Marshal(kubeletPod)
133 framework.Logf("start=%s, now=%s, kubelet pod: %s", start, time.Now(), string(data))
134 return false, nil
135 }
136 framework.Logf("no pod exists with the name we were looking for, assuming the termination request was observed and completed")
137 return true, nil
138 })
139 framework.ExpectNoError(err, "kubelet never observed the termination notice")
140
141 gomega.Expect(lastPod.DeletionTimestamp).ToNot(gomega.BeNil())
142 gomega.Expect(lastPod.Spec.TerminationGracePeriodSeconds).ToNot(gomega.BeZero())
143
144 selector = labels.SelectorFromSet(labels.Set(map[string]string{"time": value}))
145 options = metav1.ListOptions{LabelSelector: selector.String()}
146 pods, err = podClient.List(ctx, options)
147 framework.ExpectNoError(err, "failed to query for pods")
148 gomega.Expect(pods.Items).To(gomega.BeEmpty())
149 })
150 })
151
152 ginkgo.Describe("Pods Set QOS Class", func() {
153 var podClient *e2epod.PodClient
154 ginkgo.BeforeEach(func() {
155 podClient = e2epod.NewPodClient(f)
156 })
157
158
163 framework.ConformanceIt("should be set on Pods with matching resource requests and limits for memory and cpu", func(ctx context.Context) {
164 ginkgo.By("creating the pod")
165 name := "pod-qos-class-" + string(uuid.NewUUID())
166 pod := &v1.Pod{
167 ObjectMeta: metav1.ObjectMeta{
168 Name: name,
169 Labels: map[string]string{
170 "name": name,
171 },
172 },
173 Spec: v1.PodSpec{
174 Containers: []v1.Container{
175 {
176 Name: "agnhost",
177 Image: imageutils.GetE2EImage(imageutils.Agnhost),
178 Args: []string{"pause"},
179 Resources: v1.ResourceRequirements{
180 Limits: v1.ResourceList{
181 v1.ResourceCPU: resource.MustParse("100m"),
182 v1.ResourceMemory: resource.MustParse("100Mi"),
183 },
184 Requests: v1.ResourceList{
185 v1.ResourceCPU: resource.MustParse("100m"),
186 v1.ResourceMemory: resource.MustParse("100Mi"),
187 },
188 },
189 },
190 },
191 },
192 }
193
194 ginkgo.By("submitting the pod to kubernetes")
195 podClient.Create(ctx, pod)
196
197 ginkgo.By("verifying QOS class is set on the pod")
198 pod, err := podClient.Get(ctx, name, metav1.GetOptions{})
199 framework.ExpectNoError(err, "failed to query for pod")
200 gomega.Expect(pod.Status.QOSClass).To(gomega.Equal(v1.PodQOSGuaranteed))
201 })
202 })
203
204 ginkgo.Describe("Pod Container Status", func() {
205 var podClient *e2epod.PodClient
206 ginkgo.BeforeEach(func() {
207 podClient = e2epod.NewPodClient(f)
208 })
209
210 ginkgo.It("should never report success for a pending container", func(ctx context.Context) {
211 ginkgo.By("creating pods that should always exit 1 and terminating the pod after a random delay")
212 createAndTestPodRepeatedly(ctx,
213 3, 15,
214 podFastDeleteScenario{client: podClient.PodInterface, delayMs: 2000},
215 podClient.PodInterface,
216 )
217 })
218 ginkgo.It("should never report container start when an init container fails", func(ctx context.Context) {
219 ginkgo.By("creating pods with an init container that always exit 1 and terminating the pod after a random delay")
220 createAndTestPodRepeatedly(ctx,
221 3, 15,
222 podFastDeleteScenario{client: podClient.PodInterface, delayMs: 2000, initContainer: true},
223 podClient.PodInterface,
224 )
225 })
226 })
227
228 ginkgo.Describe("Pod Container lifecycle", func() {
229 var podClient *e2epod.PodClient
230 ginkgo.BeforeEach(func() {
231 podClient = e2epod.NewPodClient(f)
232 })
233
234 ginkgo.It("should not create extra sandbox if all containers are done", func(ctx context.Context) {
235 ginkgo.By("creating the pod that should always exit 0")
236
237 name := "pod-always-succeed" + string(uuid.NewUUID())
238 image := imageutils.GetE2EImage(imageutils.BusyBox)
239 pod := &v1.Pod{
240 ObjectMeta: metav1.ObjectMeta{
241 Name: name,
242 },
243 Spec: v1.PodSpec{
244 RestartPolicy: v1.RestartPolicyOnFailure,
245 InitContainers: []v1.Container{
246 {
247 Name: "foo",
248 Image: image,
249 Command: []string{
250 "/bin/true",
251 },
252 },
253 },
254 Containers: []v1.Container{
255 {
256 Name: "bar",
257 Image: image,
258 Command: []string{
259 "/bin/true",
260 },
261 },
262 },
263 },
264 }
265
266 ginkgo.By("submitting the pod to kubernetes")
267 createdPod := podClient.Create(ctx, pod)
268 ginkgo.DeferCleanup(func(ctx context.Context) error {
269 ginkgo.By("deleting the pod")
270 return podClient.Delete(ctx, pod.Name, metav1.DeleteOptions{})
271 })
272
273 framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name))
274
275 var eventList *v1.EventList
276 var err error
277 ginkgo.By("Getting events about the pod")
278 framework.ExpectNoError(wait.Poll(time.Second*2, time.Second*60, func() (bool, error) {
279 selector := fields.Set{
280 "involvedObject.kind": "Pod",
281 "involvedObject.uid": string(createdPod.UID),
282 "involvedObject.namespace": f.Namespace.Name,
283 "source": "kubelet",
284 }.AsSelector().String()
285 options := metav1.ListOptions{FieldSelector: selector}
286 eventList, err = f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, options)
287 if err != nil {
288 return false, err
289 }
290 if len(eventList.Items) > 0 {
291 return true, nil
292 }
293 return false, nil
294 }))
295
296 ginkgo.By("Checking events about the pod")
297 for _, event := range eventList.Items {
298 if event.Reason == events.SandboxChanged {
299 framework.Fail("Unexpected SandboxChanged event")
300 }
301 }
302 })
303
304 ginkgo.It("evicted pods should be terminal", func(ctx context.Context) {
305 ginkgo.By("creating the pod that should be evicted")
306
307 name := "pod-should-be-evicted" + string(uuid.NewUUID())
308 image := imageutils.GetE2EImage(imageutils.BusyBox)
309 pod := &v1.Pod{
310 ObjectMeta: metav1.ObjectMeta{
311 Name: name,
312 },
313 Spec: v1.PodSpec{
314 RestartPolicy: v1.RestartPolicyOnFailure,
315 Containers: []v1.Container{
316 {
317 Name: "bar",
318 Image: image,
319 Command: []string{
320 "/bin/sh", "-c", "sleep 10; dd if=/dev/zero of=file bs=1M count=10; sleep 10000",
321 },
322 Resources: v1.ResourceRequirements{
323 Limits: v1.ResourceList{
324 "ephemeral-storage": resource.MustParse("5Mi"),
325 },
326 }},
327 },
328 },
329 }
330
331 ginkgo.By("submitting the pod to kubernetes")
332 podClient.Create(ctx, pod)
333 ginkgo.DeferCleanup(func(ctx context.Context) error {
334 ginkgo.By("deleting the pod")
335 return podClient.Delete(ctx, pod.Name, metav1.DeleteOptions{})
336 })
337
338 err := e2epod.WaitForPodTerminatedInNamespace(ctx, f.ClientSet, pod.Name, "Evicted", f.Namespace.Name)
339 if err != nil {
340 framework.Failf("error waiting for pod to be evicted: %v", err)
341 }
342
343 })
344 })
345
346 ginkgo.Describe("Pod TerminationGracePeriodSeconds is negative", func() {
347 var podClient *e2epod.PodClient
348 ginkgo.BeforeEach(func() {
349 podClient = e2epod.NewPodClient(f)
350 })
351
352 ginkgo.It("pod with negative grace period", func(ctx context.Context) {
353 name := "pod-negative-grace-period" + string(uuid.NewUUID())
354 image := imageutils.GetE2EImage(imageutils.BusyBox)
355 pod := &v1.Pod{
356 ObjectMeta: metav1.ObjectMeta{
357 Name: name,
358 },
359 Spec: v1.PodSpec{
360 RestartPolicy: v1.RestartPolicyOnFailure,
361 Containers: []v1.Container{
362 {
363 Name: "foo",
364 Image: image,
365 Command: []string{
366 "/bin/sh", "-c", "sleep 10000",
367 },
368 },
369 },
370 TerminationGracePeriodSeconds: utilpointer.Int64(-1),
371 },
372 }
373
374 ginkgo.By("submitting the pod to kubernetes")
375 podClient.Create(ctx, pod)
376
377 pod, err := podClient.Get(ctx, pod.Name, metav1.GetOptions{})
378 framework.ExpectNoError(err, "failed to query for pod")
379
380 if pod.Spec.TerminationGracePeriodSeconds == nil {
381 framework.Failf("pod spec TerminationGracePeriodSeconds is nil")
382 }
383
384 if *pod.Spec.TerminationGracePeriodSeconds != 1 {
385 framework.Failf("pod spec TerminationGracePeriodSeconds is not 1: %d", *pod.Spec.TerminationGracePeriodSeconds)
386 }
387
388
389
390 err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
391 pod, err := podClient.Get(ctx, pod.Name, metav1.GetOptions{})
392 framework.ExpectNoError(err, "failed to query for pod")
393 ginkgo.By("updating the pod to have a negative TerminationGracePeriodSeconds")
394 pod.Spec.TerminationGracePeriodSeconds = utilpointer.Int64(-1)
395 _, err = podClient.PodInterface.Update(ctx, pod, metav1.UpdateOptions{})
396 return err
397 })
398 framework.ExpectNoError(err, "failed to update pod")
399
400 pod, err = podClient.Get(ctx, pod.Name, metav1.GetOptions{})
401 framework.ExpectNoError(err, "failed to query for pod")
402
403 if pod.Spec.TerminationGracePeriodSeconds == nil {
404 framework.Failf("pod spec TerminationGracePeriodSeconds is nil")
405 }
406
407 if *pod.Spec.TerminationGracePeriodSeconds != 1 {
408 framework.Failf("pod spec TerminationGracePeriodSeconds is not 1: %d", *pod.Spec.TerminationGracePeriodSeconds)
409 }
410
411 ginkgo.DeferCleanup(func(ctx context.Context) error {
412 ginkgo.By("deleting the pod")
413 return podClient.Delete(ctx, pod.Name, metav1.DeleteOptions{})
414 })
415 })
416 })
417
418 })
419
420 func createAndTestPodRepeatedly(ctx context.Context, workers, iterations int, scenario podScenario, podClient v1core.PodInterface) {
421 var (
422 lock sync.Mutex
423 errs []error
424
425 wg sync.WaitGroup
426 )
427
428 r := prometheus.NewRegistry()
429 h := prometheus.NewSummaryVec(prometheus.SummaryOpts{
430 Name: "latency",
431 Objectives: map[float64]float64{
432 0.5: 0.05,
433 0.75: 0.025,
434 0.9: 0.01,
435 0.99: 0.001,
436 },
437 }, []string{"node"})
438 r.MustRegister(h)
439
440 for i := 0; i < workers; i++ {
441 wg.Add(1)
442 go func(i int) {
443 defer ginkgo.GinkgoRecover()
444 defer wg.Done()
445 for retries := 0; retries < iterations; retries++ {
446 pod := scenario.Pod(i, retries)
447
448
449 start := time.Now()
450 created, err := podClient.Create(ctx, pod, metav1.CreateOptions{})
451 framework.ExpectNoError(err, "failed to create pod")
452
453 ch := make(chan []watch.Event)
454 waitForWatch := make(chan struct{})
455 go func() {
456 defer ginkgo.GinkgoRecover()
457 defer close(ch)
458 w, err := podClient.Watch(ctx, metav1.ListOptions{
459 ResourceVersion: created.ResourceVersion,
460 FieldSelector: fmt.Sprintf("metadata.name=%s", pod.Name),
461 })
462 if err != nil {
463 framework.Logf("Unable to watch pod %s: %v", pod.Name, err)
464 return
465 }
466 defer w.Stop()
467 close(waitForWatch)
468 events := []watch.Event{
469 {Type: watch.Added, Object: created},
470 }
471 for event := range w.ResultChan() {
472 events = append(events, event)
473 if event.Type == watch.Error {
474 framework.Logf("watch error seen for %s: %#v", pod.Name, event.Object)
475 }
476 if scenario.IsLastEvent(event) {
477 framework.Logf("watch last event seen for %s", pod.Name)
478 break
479 }
480 }
481 ch <- events
482 }()
483
484 select {
485 case <-ch:
486 case <-waitForWatch:
487 }
488
489 verifier, scenario, err := scenario.Action(ctx, pod)
490 framework.ExpectNoError(err, "failed to take action")
491
492 var (
493 events []watch.Event
494 ok bool
495 )
496 select {
497 case events, ok = <-ch:
498 if !ok {
499 continue
500 }
501 if len(events) < 2 {
502 framework.Fail("only got a single event")
503 }
504 case <-time.After(5 * time.Minute):
505 framework.Failf("timed out waiting for watch events for %s", pod.Name)
506 }
507
508 end := time.Now()
509
510 var eventErr error
511 for _, event := range events[1:] {
512 if err := verifier.Verify(event); err != nil {
513 eventErr = err
514 break
515 }
516 }
517
518 total := end.Sub(start)
519
520 var lastPod *v1.Pod = pod
521 func() {
522 lock.Lock()
523 defer lock.Unlock()
524
525 if eventErr != nil {
526 errs = append(errs, eventErr)
527 return
528 }
529 pod, verifyErrs := verifier.VerifyFinal(scenario, total)
530 if pod != nil {
531 lastPod = pod
532 }
533 errs = append(errs, verifyErrs...)
534 }()
535
536 h.WithLabelValues(lastPod.Spec.NodeName).Observe(total.Seconds())
537 }
538 }(i)
539 }
540
541 wg.Wait()
542
543 if len(errs) > 0 {
544 var messages []string
545 for _, err := range errs {
546 messages = append(messages, err.Error())
547 }
548 framework.Failf("%d errors:\n%v", len(errs), strings.Join(messages, "\n"))
549 }
550 values, _ := r.Gather()
551 var buf bytes.Buffer
552 for _, m := range values {
553 expfmt.MetricFamilyToText(&buf, m)
554 }
555 framework.Logf("Summary of latencies:\n%s", buf.String())
556 }
557
558 type podScenario interface {
559 Pod(worker, attempt int) *v1.Pod
560 Action(context.Context, *v1.Pod) (podScenarioVerifier, string, error)
561 IsLastEvent(event watch.Event) bool
562 }
563
564 type podScenarioVerifier interface {
565 Verify(event watch.Event) error
566 VerifyFinal(scenario string, duration time.Duration) (*v1.Pod, []error)
567 }
568
569 type podFastDeleteScenario struct {
570 client v1core.PodInterface
571 delayMs int
572
573 initContainer bool
574 }
575
576 func (s podFastDeleteScenario) Verifier(pod *v1.Pod) podScenarioVerifier {
577 return &podStartVerifier{}
578 }
579
580 func (s podFastDeleteScenario) IsLastEvent(event watch.Event) bool {
581 if event.Type == watch.Deleted {
582 return true
583 }
584 return false
585 }
586
587 func (s podFastDeleteScenario) Action(ctx context.Context, pod *v1.Pod) (podScenarioVerifier, string, error) {
588 t := time.Duration(rand.Intn(s.delayMs)) * time.Millisecond
589 scenario := fmt.Sprintf("t=%s", t)
590 time.Sleep(t)
591 return &podStartVerifier{pod: pod}, scenario, s.client.Delete(ctx, pod.Name, metav1.DeleteOptions{})
592 }
593
594 func (s podFastDeleteScenario) Pod(worker, attempt int) *v1.Pod {
595 name := fmt.Sprintf("pod-terminate-status-%d-%d", worker, attempt)
596 value := strconv.Itoa(time.Now().Nanosecond())
597 one := int64(1)
598 if s.initContainer {
599 return &v1.Pod{
600 ObjectMeta: metav1.ObjectMeta{
601 Name: name,
602 Labels: map[string]string{
603 "name": "foo",
604 "time": value,
605 },
606 },
607 Spec: v1.PodSpec{
608 RestartPolicy: v1.RestartPolicyNever,
609 TerminationGracePeriodSeconds: &one,
610 InitContainers: []v1.Container{
611 {
612 Name: "fail",
613 Image: imageutils.GetE2EImage(imageutils.BusyBox),
614 Command: []string{
615 "/bin/false",
616 },
617 Resources: v1.ResourceRequirements{
618 Requests: v1.ResourceList{
619 v1.ResourceCPU: resource.MustParse("5m"),
620 v1.ResourceMemory: resource.MustParse("10Mi"),
621 },
622 },
623 },
624 },
625 Containers: []v1.Container{
626 {
627 Name: "blocked",
628 Image: imageutils.GetE2EImage(imageutils.BusyBox),
629 Command: []string{
630 "/bin/true",
631 },
632 Resources: v1.ResourceRequirements{
633 Requests: v1.ResourceList{
634 v1.ResourceCPU: resource.MustParse("5m"),
635 v1.ResourceMemory: resource.MustParse("10Mi"),
636 },
637 },
638 },
639 },
640 },
641 }
642 }
643 return &v1.Pod{
644 ObjectMeta: metav1.ObjectMeta{
645 Name: name,
646 Labels: map[string]string{
647 "name": "foo",
648 "time": value,
649 },
650 },
651 Spec: v1.PodSpec{
652 RestartPolicy: v1.RestartPolicyNever,
653 TerminationGracePeriodSeconds: &one,
654 Containers: []v1.Container{
655 {
656 Name: "fail",
657 Image: imageutils.GetE2EImage(imageutils.BusyBox),
658 Command: []string{
659 "/bin/false",
660 },
661 Resources: v1.ResourceRequirements{
662 Requests: v1.ResourceList{
663 v1.ResourceCPU: resource.MustParse("5m"),
664 v1.ResourceMemory: resource.MustParse("10Mi"),
665 },
666 },
667 },
668 },
669 },
670 }
671 }
672
673
674
675 type podStartVerifier struct {
676 pod *v1.Pod
677 hasInitContainers bool
678 hasContainers bool
679 hasTerminated bool
680 hasRunningContainers bool
681 hasTerminalPhase bool
682 duration time.Duration
683 completeDuration time.Duration
684 }
685
686 var reBug88766 = regexp.MustCompile(`rootfs_linux.*kubernetes\.io~(secret|projected).*no such file or directory`)
687
688
689
690 func (v *podStartVerifier) Verify(event watch.Event) error {
691 var ok bool
692 pod, ok := event.Object.(*v1.Pod)
693 if !ok {
694 framework.Logf("Unexpected event object: %s %#v", event.Type, event.Object)
695 return nil
696 }
697 v.pod = pod
698
699 if len(pod.Spec.InitContainers) > 0 {
700 if len(pod.Status.InitContainerStatuses) == 0 {
701 if v.hasInitContainers {
702 return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses)
703 }
704 return nil
705 }
706 v.hasInitContainers = true
707 if len(pod.Status.InitContainerStatuses) != 1 {
708 return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses)
709 }
710
711 } else {
712 if len(pod.Status.InitContainerStatuses) != 0 {
713 return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses)
714 }
715 }
716
717 if len(pod.Status.ContainerStatuses) == 0 {
718 if v.hasContainers {
719 return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.ContainerStatuses)
720 }
721 return nil
722 }
723 v.hasContainers = true
724 if len(pod.Status.ContainerStatuses) != 1 {
725 return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.ContainerStatuses)
726 }
727
728 if status := e2epod.FindContainerStatusInPod(pod, "blocked"); status != nil {
729 if (status.Started != nil && *status.Started) || status.LastTerminationState.Terminated != nil || status.State.Waiting == nil {
730 return fmt.Errorf("pod %s on node %s should not have started the blocked container: %#v", pod.Name, pod.Spec.NodeName, status)
731 }
732 }
733
734 status := e2epod.FindContainerStatusInPod(pod, "fail")
735 if status == nil {
736 return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status)
737 }
738
739 t := status.State.Terminated
740 if v.hasTerminated {
741 if status.State.Waiting != nil || status.State.Running != nil {
742 return fmt.Errorf("pod %s on node %s was terminated and then changed state: %#v", pod.Name, pod.Spec.NodeName, status)
743 }
744 if t == nil {
745 return fmt.Errorf("pod %s on node %s was terminated and then had termination cleared: %#v", pod.Name, pod.Spec.NodeName, status)
746 }
747 }
748 var hasNoStartTime bool
749 v.hasRunningContainers = status.State.Waiting == nil && status.State.Terminated == nil
750 if t != nil {
751 if !t.FinishedAt.Time.IsZero() {
752 if t.StartedAt.IsZero() {
753 hasNoStartTime = true
754 } else {
755 v.duration = t.FinishedAt.Sub(t.StartedAt.Time)
756 }
757 v.completeDuration = t.FinishedAt.Sub(pod.CreationTimestamp.Time)
758 }
759
760 defer func() { v.hasTerminated = true }()
761 switch {
762 case t.ExitCode == 1:
763
764 case t.ExitCode == 137 && (t.Reason == "ContainerStatusUnknown" || t.Reason == "Error"):
765
766 case t.ExitCode == 128 && (t.Reason == "StartError" || t.Reason == "ContainerCannotRun") && reBug88766.MatchString(t.Message):
767
768 framework.Logf("pod %s on node %s failed with the symptoms of https://github.com/kubernetes/kubernetes/issues/88766", pod.Name, pod.Spec.NodeName)
769 default:
770 data, _ := json.MarshalIndent(pod.Status, "", " ")
771 framework.Logf("pod %s on node %s had incorrect final status:\n%s", pod.Name, pod.Spec.NodeName, string(data))
772 return fmt.Errorf("pod %s on node %s container unexpected exit code %d: start=%s end=%s reason=%s message=%s", pod.Name, pod.Spec.NodeName, t.ExitCode, t.StartedAt, t.FinishedAt, t.Reason, t.Message)
773 }
774 switch {
775 case v.duration > time.Hour:
776
777 return fmt.Errorf("pod %s container %s on node %s had very long duration %s: start=%s end=%s", pod.Name, status.Name, pod.Spec.NodeName, v.duration, t.StartedAt, t.FinishedAt)
778 case hasNoStartTime:
779
780 return fmt.Errorf("pod %s container %s on node %s had finish time but not start time: end=%s", pod.Name, status.Name, pod.Spec.NodeName, t.FinishedAt)
781 }
782 }
783 if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded {
784 v.hasTerminalPhase = true
785 } else {
786 if v.hasTerminalPhase {
787 return fmt.Errorf("pod %s on node %s was in a terminal phase and then reverted: %#v", pod.Name, pod.Spec.NodeName, pod.Status)
788 }
789 }
790 return nil
791 }
792
793 func (v *podStartVerifier) VerifyFinal(scenario string, total time.Duration) (*v1.Pod, []error) {
794 var errs []error
795 pod := v.pod
796 if !v.hasTerminalPhase {
797 var names []string
798 for _, status := range pod.Status.ContainerStatuses {
799 if status.State.Running != nil {
800 names = append(names, status.Name)
801 }
802 }
803 switch {
804 case len(names) > 0:
805 errs = append(errs, fmt.Errorf("pod %s on node %s did not reach a terminal phase before being deleted but had running containers: phase=%s, running-containers=%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase, strings.Join(names, ",")))
806 case pod.Status.Phase != v1.PodPending:
807 errs = append(errs, fmt.Errorf("pod %s on node %s was not Pending but has no running containers: phase=%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase))
808 }
809 }
810 if v.hasRunningContainers {
811 data, _ := json.MarshalIndent(pod.Status.ContainerStatuses, "", " ")
812 errs = append(errs, fmt.Errorf("pod %s on node %s had running or unknown container status before being deleted:\n%s", pod.Name, pod.Spec.NodeName, string(data)))
813 }
814
815 framework.Logf("Pod %s on node %s %s total=%s run=%s execute=%s", pod.Name, pod.Spec.NodeName, scenario, total, v.completeDuration, v.duration)
816 return pod, errs
817 }
818
View as plain text