pods.go

Documentation: k8s.io/kubernetes/test/e2e/node

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package node
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"encoding/json"
    23  	"fmt"
    24  	"math/rand"
    25  	"net/http"
    26  	"regexp"
    27  	"strconv"
    28  	"strings"
    29  	"sync"
    30  	"time"
    31  
    32  	v1 "k8s.io/api/core/v1"
    33  	"k8s.io/apimachinery/pkg/api/resource"
    34  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    35  	"k8s.io/apimachinery/pkg/fields"
    36  	"k8s.io/apimachinery/pkg/labels"
    37  	"k8s.io/apimachinery/pkg/util/uuid"
    38  	"k8s.io/apimachinery/pkg/util/wait"
    39  	"k8s.io/apimachinery/pkg/watch"
    40  	v1core "k8s.io/client-go/kubernetes/typed/core/v1"
    41  	"k8s.io/client-go/util/retry"
    42  	"k8s.io/kubernetes/pkg/kubelet/events"
    43  	"k8s.io/kubernetes/test/e2e/framework"
    44  	e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
    45  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    46  	imageutils "k8s.io/kubernetes/test/utils/image"
    47  	admissionapi "k8s.io/pod-security-admission/api"
    48  	utilpointer "k8s.io/utils/pointer"
    49  
    50  	"github.com/onsi/ginkgo/v2"
    51  	"github.com/onsi/gomega"
    52  	"github.com/prometheus/client_golang/prometheus"
    53  	"github.com/prometheus/common/expfmt"
    54  )
    55  
    56  var _ = SIGDescribe("Pods Extended", func() {
    57  	f := framework.NewDefaultFramework("pods")
    58  	f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
    59  
    60  	ginkgo.Describe("Delete Grace Period", func() {
    61  		var podClient *e2epod.PodClient
    62  		ginkgo.BeforeEach(func() {
    63  			podClient = e2epod.NewPodClient(f)
    64  		})
    65  
    66  		/*
    67  			Release: v1.15
    68  			Testname: Pods, delete grace period
    69  			Description: Create a pod, make sure it is running. Using the http client send a 'delete' with gracePeriodSeconds=30. Pod SHOULD get terminated within gracePeriodSeconds and removed from API server within a window.
    70  		*/
    71  		ginkgo.It("should be submitted and removed", func(ctx context.Context) {
    72  			ginkgo.By("creating the pod")
    73  			name := "pod-submit-remove-" + string(uuid.NewUUID())
    74  			value := strconv.Itoa(time.Now().Nanosecond())
    75  			pod := e2epod.NewAgnhostPod(f.Namespace.Name, name, nil, nil, nil)
    76  			pod.ObjectMeta.Labels = map[string]string{
    77  				"name": "foo",
    78  				"time": value,
    79  			}
    80  
    81  			ginkgo.By("setting up selector")
    82  			selector := labels.SelectorFromSet(labels.Set(map[string]string{"time": value}))
    83  			options := metav1.ListOptions{LabelSelector: selector.String()}
    84  			pods, err := podClient.List(ctx, options)
    85  			framework.ExpectNoError(err, "failed to query for pod")
    86  			gomega.Expect(pods.Items).To(gomega.BeEmpty())
    87  
    88  			ginkgo.By("submitting the pod to kubernetes")
    89  			podClient.Create(ctx, pod)
    90  
    91  			ginkgo.By("verifying the pod is in kubernetes")
    92  			selector = labels.SelectorFromSet(labels.Set(map[string]string{"time": value}))
    93  			options = metav1.ListOptions{LabelSelector: selector.String()}
    94  			pods, err = podClient.List(ctx, options)
    95  			framework.ExpectNoError(err, "failed to query for pod")
    96  			gomega.Expect(pods.Items).To(gomega.HaveLen(1))
    97  
    98  			// We need to wait for the pod to be running, otherwise the deletion
    99  			// may be carried out immediately rather than gracefully.
   100  			framework.ExpectNoError(e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name))
   101  			// save the running pod
   102  			pod, err = podClient.Get(ctx, pod.Name, metav1.GetOptions{})
   103  			framework.ExpectNoError(err, "failed to GET scheduled pod")
   104  
   105  			ginkgo.By("deleting the pod gracefully")
   106  			var lastPod v1.Pod
   107  			var statusCode int
   108  			err = f.ClientSet.CoreV1().RESTClient().Delete().AbsPath("/api/v1/namespaces", pod.Namespace, "pods", pod.Name).Param("gracePeriodSeconds", "30").Do(ctx).StatusCode(&statusCode).Into(&lastPod)
   109  			framework.ExpectNoError(err, "failed to use http client to send delete")
   110  			gomega.Expect(statusCode).To(gomega.Equal(http.StatusOK), "failed to delete gracefully by client request")
   111  
   112  			ginkgo.By("verifying the kubelet observed the termination notice")
   113  
   114  			// allow up to 3x grace period (which allows process termination)
   115  			// for the kubelet to remove from api.  need to follow-up on if this
   116  			// latency between termination and reportal can be isolated further.
   117  			start := time.Now()
   118  			err = wait.Poll(time.Second*5, time.Second*30*3, func() (bool, error) {
   119  				podList, err := e2ekubelet.GetKubeletPods(ctx, f.ClientSet, pod.Spec.NodeName)
   120  				if err != nil {
   121  					framework.Logf("Unable to retrieve kubelet pods for node %v: %v", pod.Spec.NodeName, err)
   122  					return false, nil
   123  				}
   124  				for _, kubeletPod := range podList.Items {
   125  					if pod.Name != kubeletPod.Name || pod.Namespace != kubeletPod.Namespace {
   126  						continue
   127  					}
   128  					if kubeletPod.ObjectMeta.DeletionTimestamp == nil {
   129  						framework.Logf("deletion has not yet been observed")
   130  						return false, nil
   131  					}
   132  					data, _ := json.Marshal(kubeletPod)
   133  					framework.Logf("start=%s, now=%s, kubelet pod: %s", start, time.Now(), string(data))
   134  					return false, nil
   135  				}
   136  				framework.Logf("no pod exists with the name we were looking for, assuming the termination request was observed and completed")
   137  				return true, nil
   138  			})
   139  			framework.ExpectNoError(err, "kubelet never observed the termination notice")
   140  
   141  			gomega.Expect(lastPod.DeletionTimestamp).ToNot(gomega.BeNil())
   142  			gomega.Expect(lastPod.Spec.TerminationGracePeriodSeconds).ToNot(gomega.BeZero())
   143  
   144  			selector = labels.SelectorFromSet(labels.Set(map[string]string{"time": value}))
   145  			options = metav1.ListOptions{LabelSelector: selector.String()}
   146  			pods, err = podClient.List(ctx, options)
   147  			framework.ExpectNoError(err, "failed to query for pods")
   148  			gomega.Expect(pods.Items).To(gomega.BeEmpty())
   149  		})
   150  	})
   151  
   152  	ginkgo.Describe("Pods Set QOS Class", func() {
   153  		var podClient *e2epod.PodClient
   154  		ginkgo.BeforeEach(func() {
   155  			podClient = e2epod.NewPodClient(f)
   156  		})
   157  
   158  		/*
   159  			Release: v1.9
   160  			Testname: Pods, QOS
   161  			Description:  Create a Pod with CPU and Memory request and limits. Pod status MUST have QOSClass set to PodQOSGuaranteed.
   162  		*/
   163  		framework.ConformanceIt("should be set on Pods with matching resource requests and limits for memory and cpu", func(ctx context.Context) {
   164  			ginkgo.By("creating the pod")
   165  			name := "pod-qos-class-" + string(uuid.NewUUID())
   166  			pod := &v1.Pod{
   167  				ObjectMeta: metav1.ObjectMeta{
   168  					Name: name,
   169  					Labels: map[string]string{
   170  						"name": name,
   171  					},
   172  				},
   173  				Spec: v1.PodSpec{
   174  					Containers: []v1.Container{
   175  						{
   176  							Name:  "agnhost",
   177  							Image: imageutils.GetE2EImage(imageutils.Agnhost),
   178  							Args:  []string{"pause"},
   179  							Resources: v1.ResourceRequirements{
   180  								Limits: v1.ResourceList{
   181  									v1.ResourceCPU:    resource.MustParse("100m"),
   182  									v1.ResourceMemory: resource.MustParse("100Mi"),
   183  								},
   184  								Requests: v1.ResourceList{
   185  									v1.ResourceCPU:    resource.MustParse("100m"),
   186  									v1.ResourceMemory: resource.MustParse("100Mi"),
   187  								},
   188  							},
   189  						},
   190  					},
   191  				},
   192  			}
   193  
   194  			ginkgo.By("submitting the pod to kubernetes")
   195  			podClient.Create(ctx, pod)
   196  
   197  			ginkgo.By("verifying QOS class is set on the pod")
   198  			pod, err := podClient.Get(ctx, name, metav1.GetOptions{})
   199  			framework.ExpectNoError(err, "failed to query for pod")
   200  			gomega.Expect(pod.Status.QOSClass).To(gomega.Equal(v1.PodQOSGuaranteed))
   201  		})
   202  	})
   203  
   204  	ginkgo.Describe("Pod Container Status", func() {
   205  		var podClient *e2epod.PodClient
   206  		ginkgo.BeforeEach(func() {
   207  			podClient = e2epod.NewPodClient(f)
   208  		})
   209  
   210  		ginkgo.It("should never report success for a pending container", func(ctx context.Context) {
   211  			ginkgo.By("creating pods that should always exit 1 and terminating the pod after a random delay")
   212  			createAndTestPodRepeatedly(ctx,
   213  				3, 15,
   214  				podFastDeleteScenario{client: podClient.PodInterface, delayMs: 2000},
   215  				podClient.PodInterface,
   216  			)
   217  		})
   218  		ginkgo.It("should never report container start when an init container fails", func(ctx context.Context) {
   219  			ginkgo.By("creating pods with an init container that always exit 1 and terminating the pod after a random delay")
   220  			createAndTestPodRepeatedly(ctx,
   221  				3, 15,
   222  				podFastDeleteScenario{client: podClient.PodInterface, delayMs: 2000, initContainer: true},
   223  				podClient.PodInterface,
   224  			)
   225  		})
   226  	})
   227  
   228  	ginkgo.Describe("Pod Container lifecycle", func() {
   229  		var podClient *e2epod.PodClient
   230  		ginkgo.BeforeEach(func() {
   231  			podClient = e2epod.NewPodClient(f)
   232  		})
   233  
   234  		ginkgo.It("should not create extra sandbox if all containers are done", func(ctx context.Context) {
   235  			ginkgo.By("creating the pod that should always exit 0")
   236  
   237  			name := "pod-always-succeed" + string(uuid.NewUUID())
   238  			image := imageutils.GetE2EImage(imageutils.BusyBox)
   239  			pod := &v1.Pod{
   240  				ObjectMeta: metav1.ObjectMeta{
   241  					Name: name,
   242  				},
   243  				Spec: v1.PodSpec{
   244  					RestartPolicy: v1.RestartPolicyOnFailure,
   245  					InitContainers: []v1.Container{
   246  						{
   247  							Name:  "foo",
   248  							Image: image,
   249  							Command: []string{
   250  								"/bin/true",
   251  							},
   252  						},
   253  					},
   254  					Containers: []v1.Container{
   255  						{
   256  							Name:  "bar",
   257  							Image: image,
   258  							Command: []string{
   259  								"/bin/true",
   260  							},
   261  						},
   262  					},
   263  				},
   264  			}
   265  
   266  			ginkgo.By("submitting the pod to kubernetes")
   267  			createdPod := podClient.Create(ctx, pod)
   268  			ginkgo.DeferCleanup(func(ctx context.Context) error {
   269  				ginkgo.By("deleting the pod")
   270  				return podClient.Delete(ctx, pod.Name, metav1.DeleteOptions{})
   271  			})
   272  
   273  			framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name))
   274  
   275  			var eventList *v1.EventList
   276  			var err error
   277  			ginkgo.By("Getting events about the pod")
   278  			framework.ExpectNoError(wait.Poll(time.Second*2, time.Second*60, func() (bool, error) {
   279  				selector := fields.Set{
   280  					"involvedObject.kind":      "Pod",
   281  					"involvedObject.uid":       string(createdPod.UID),
   282  					"involvedObject.namespace": f.Namespace.Name,
   283  					"source":                   "kubelet",
   284  				}.AsSelector().String()
   285  				options := metav1.ListOptions{FieldSelector: selector}
   286  				eventList, err = f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, options)
   287  				if err != nil {
   288  					return false, err
   289  				}
   290  				if len(eventList.Items) > 0 {
   291  					return true, nil
   292  				}
   293  				return false, nil
   294  			}))
   295  
   296  			ginkgo.By("Checking events about the pod")
   297  			for _, event := range eventList.Items {
   298  				if event.Reason == events.SandboxChanged {
   299  					framework.Fail("Unexpected SandboxChanged event")
   300  				}
   301  			}
   302  		})
   303  
   304  		ginkgo.It("evicted pods should be terminal", func(ctx context.Context) {
   305  			ginkgo.By("creating the pod that should be evicted")
   306  
   307  			name := "pod-should-be-evicted" + string(uuid.NewUUID())
   308  			image := imageutils.GetE2EImage(imageutils.BusyBox)
   309  			pod := &v1.Pod{
   310  				ObjectMeta: metav1.ObjectMeta{
   311  					Name: name,
   312  				},
   313  				Spec: v1.PodSpec{
   314  					RestartPolicy: v1.RestartPolicyOnFailure,
   315  					Containers: []v1.Container{
   316  						{
   317  							Name:  "bar",
   318  							Image: image,
   319  							Command: []string{
   320  								"/bin/sh", "-c", "sleep 10; dd if=/dev/zero of=file bs=1M count=10; sleep 10000",
   321  							},
   322  							Resources: v1.ResourceRequirements{
   323  								Limits: v1.ResourceList{
   324  									"ephemeral-storage": resource.MustParse("5Mi"),
   325  								},
   326  							}},
   327  					},
   328  				},
   329  			}
   330  
   331  			ginkgo.By("submitting the pod to kubernetes")
   332  			podClient.Create(ctx, pod)
   333  			ginkgo.DeferCleanup(func(ctx context.Context) error {
   334  				ginkgo.By("deleting the pod")
   335  				return podClient.Delete(ctx, pod.Name, metav1.DeleteOptions{})
   336  			})
   337  
   338  			err := e2epod.WaitForPodTerminatedInNamespace(ctx, f.ClientSet, pod.Name, "Evicted", f.Namespace.Name)
   339  			if err != nil {
   340  				framework.Failf("error waiting for pod to be evicted: %v", err)
   341  			}
   342  
   343  		})
   344  	})
   345  
   346  	ginkgo.Describe("Pod TerminationGracePeriodSeconds is negative", func() {
   347  		var podClient *e2epod.PodClient
   348  		ginkgo.BeforeEach(func() {
   349  			podClient = e2epod.NewPodClient(f)
   350  		})
   351  
   352  		ginkgo.It("pod with negative grace period", func(ctx context.Context) {
   353  			name := "pod-negative-grace-period" + string(uuid.NewUUID())
   354  			image := imageutils.GetE2EImage(imageutils.BusyBox)
   355  			pod := &v1.Pod{
   356  				ObjectMeta: metav1.ObjectMeta{
   357  					Name: name,
   358  				},
   359  				Spec: v1.PodSpec{
   360  					RestartPolicy: v1.RestartPolicyOnFailure,
   361  					Containers: []v1.Container{
   362  						{
   363  							Name:  "foo",
   364  							Image: image,
   365  							Command: []string{
   366  								"/bin/sh", "-c", "sleep 10000",
   367  							},
   368  						},
   369  					},
   370  					TerminationGracePeriodSeconds: utilpointer.Int64(-1),
   371  				},
   372  			}
   373  
   374  			ginkgo.By("submitting the pod to kubernetes")
   375  			podClient.Create(ctx, pod)
   376  
   377  			pod, err := podClient.Get(ctx, pod.Name, metav1.GetOptions{})
   378  			framework.ExpectNoError(err, "failed to query for pod")
   379  
   380  			if pod.Spec.TerminationGracePeriodSeconds == nil {
   381  				framework.Failf("pod spec TerminationGracePeriodSeconds is nil")
   382  			}
   383  
   384  			if *pod.Spec.TerminationGracePeriodSeconds != 1 {
   385  				framework.Failf("pod spec TerminationGracePeriodSeconds is not 1: %d", *pod.Spec.TerminationGracePeriodSeconds)
   386  			}
   387  
   388  			// retry if the TerminationGracePeriodSeconds is overrided
   389  			// see more in https://github.com/kubernetes/kubernetes/pull/115606
   390  			err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
   391  				pod, err := podClient.Get(ctx, pod.Name, metav1.GetOptions{})
   392  				framework.ExpectNoError(err, "failed to query for pod")
   393  				ginkgo.By("updating the pod to have a negative TerminationGracePeriodSeconds")
   394  				pod.Spec.TerminationGracePeriodSeconds = utilpointer.Int64(-1)
   395  				_, err = podClient.PodInterface.Update(ctx, pod, metav1.UpdateOptions{})
   396  				return err
   397  			})
   398  			framework.ExpectNoError(err, "failed to update pod")
   399  
   400  			pod, err = podClient.Get(ctx, pod.Name, metav1.GetOptions{})
   401  			framework.ExpectNoError(err, "failed to query for pod")
   402  
   403  			if pod.Spec.TerminationGracePeriodSeconds == nil {
   404  				framework.Failf("pod spec TerminationGracePeriodSeconds is nil")
   405  			}
   406  
   407  			if *pod.Spec.TerminationGracePeriodSeconds != 1 {
   408  				framework.Failf("pod spec TerminationGracePeriodSeconds is not 1: %d", *pod.Spec.TerminationGracePeriodSeconds)
   409  			}
   410  
   411  			ginkgo.DeferCleanup(func(ctx context.Context) error {
   412  				ginkgo.By("deleting the pod")
   413  				return podClient.Delete(ctx, pod.Name, metav1.DeleteOptions{})
   414  			})
   415  		})
   416  	})
   417  
   418  })
   419  
   420  func createAndTestPodRepeatedly(ctx context.Context, workers, iterations int, scenario podScenario, podClient v1core.PodInterface) {
   421  	var (
   422  		lock sync.Mutex
   423  		errs []error
   424  
   425  		wg sync.WaitGroup
   426  	)
   427  
   428  	r := prometheus.NewRegistry()
   429  	h := prometheus.NewSummaryVec(prometheus.SummaryOpts{
   430  		Name: "latency",
   431  		Objectives: map[float64]float64{
   432  			0.5:  0.05,
   433  			0.75: 0.025,
   434  			0.9:  0.01,
   435  			0.99: 0.001,
   436  		},
   437  	}, []string{"node"})
   438  	r.MustRegister(h)
   439  
   440  	for i := 0; i < workers; i++ {
   441  		wg.Add(1)
   442  		go func(i int) {
   443  			defer ginkgo.GinkgoRecover()
   444  			defer wg.Done()
   445  			for retries := 0; retries < iterations; retries++ {
   446  				pod := scenario.Pod(i, retries)
   447  
   448  				// create the pod, capture the change events, then delete the pod
   449  				start := time.Now()
   450  				created, err := podClient.Create(ctx, pod, metav1.CreateOptions{})
   451  				framework.ExpectNoError(err, "failed to create pod")
   452  
   453  				ch := make(chan []watch.Event)
   454  				waitForWatch := make(chan struct{})
   455  				go func() {
   456  					defer ginkgo.GinkgoRecover()
   457  					defer close(ch)
   458  					w, err := podClient.Watch(ctx, metav1.ListOptions{
   459  						ResourceVersion: created.ResourceVersion,
   460  						FieldSelector:   fmt.Sprintf("metadata.name=%s", pod.Name),
   461  					})
   462  					if err != nil {
   463  						framework.Logf("Unable to watch pod %s: %v", pod.Name, err)
   464  						return
   465  					}
   466  					defer w.Stop()
   467  					close(waitForWatch)
   468  					events := []watch.Event{
   469  						{Type: watch.Added, Object: created},
   470  					}
   471  					for event := range w.ResultChan() {
   472  						events = append(events, event)
   473  						if event.Type == watch.Error {
   474  							framework.Logf("watch error seen for %s: %#v", pod.Name, event.Object)
   475  						}
   476  						if scenario.IsLastEvent(event) {
   477  							framework.Logf("watch last event seen for %s", pod.Name)
   478  							break
   479  						}
   480  					}
   481  					ch <- events
   482  				}()
   483  
   484  				select {
   485  				case <-ch: // in case the goroutine above exits before establishing the watch
   486  				case <-waitForWatch: // when the watch is established
   487  				}
   488  
   489  				verifier, scenario, err := scenario.Action(ctx, pod)
   490  				framework.ExpectNoError(err, "failed to take action")
   491  
   492  				var (
   493  					events []watch.Event
   494  					ok     bool
   495  				)
   496  				select {
   497  				case events, ok = <-ch:
   498  					if !ok {
   499  						continue
   500  					}
   501  					if len(events) < 2 {
   502  						framework.Fail("only got a single event")
   503  					}
   504  				case <-time.After(5 * time.Minute):
   505  					framework.Failf("timed out waiting for watch events for %s", pod.Name)
   506  				}
   507  
   508  				end := time.Now()
   509  
   510  				var eventErr error
   511  				for _, event := range events[1:] {
   512  					if err := verifier.Verify(event); err != nil {
   513  						eventErr = err
   514  						break
   515  					}
   516  				}
   517  
   518  				total := end.Sub(start)
   519  
   520  				var lastPod *v1.Pod = pod
   521  				func() {
   522  					lock.Lock()
   523  					defer lock.Unlock()
   524  
   525  					if eventErr != nil {
   526  						errs = append(errs, eventErr)
   527  						return
   528  					}
   529  					pod, verifyErrs := verifier.VerifyFinal(scenario, total)
   530  					if pod != nil {
   531  						lastPod = pod
   532  					}
   533  					errs = append(errs, verifyErrs...)
   534  				}()
   535  
   536  				h.WithLabelValues(lastPod.Spec.NodeName).Observe(total.Seconds())
   537  			}
   538  		}(i)
   539  	}
   540  
   541  	wg.Wait()
   542  
   543  	if len(errs) > 0 {
   544  		var messages []string
   545  		for _, err := range errs {
   546  			messages = append(messages, err.Error())
   547  		}
   548  		framework.Failf("%d errors:\n%v", len(errs), strings.Join(messages, "\n"))
   549  	}
   550  	values, _ := r.Gather()
   551  	var buf bytes.Buffer
   552  	for _, m := range values {
   553  		expfmt.MetricFamilyToText(&buf, m)
   554  	}
   555  	framework.Logf("Summary of latencies:\n%s", buf.String())
   556  }
   557  
   558  type podScenario interface {
   559  	Pod(worker, attempt int) *v1.Pod
   560  	Action(context.Context, *v1.Pod) (podScenarioVerifier, string, error)
   561  	IsLastEvent(event watch.Event) bool
   562  }
   563  
   564  type podScenarioVerifier interface {
   565  	Verify(event watch.Event) error
   566  	VerifyFinal(scenario string, duration time.Duration) (*v1.Pod, []error)
   567  }
   568  
   569  type podFastDeleteScenario struct {
   570  	client  v1core.PodInterface
   571  	delayMs int
   572  
   573  	initContainer bool
   574  }
   575  
   576  func (s podFastDeleteScenario) Verifier(pod *v1.Pod) podScenarioVerifier {
   577  	return &podStartVerifier{}
   578  }
   579  
   580  func (s podFastDeleteScenario) IsLastEvent(event watch.Event) bool {
   581  	if event.Type == watch.Deleted {
   582  		return true
   583  	}
   584  	return false
   585  }
   586  
   587  func (s podFastDeleteScenario) Action(ctx context.Context, pod *v1.Pod) (podScenarioVerifier, string, error) {
   588  	t := time.Duration(rand.Intn(s.delayMs)) * time.Millisecond
   589  	scenario := fmt.Sprintf("t=%s", t)
   590  	time.Sleep(t)
   591  	return &podStartVerifier{pod: pod}, scenario, s.client.Delete(ctx, pod.Name, metav1.DeleteOptions{})
   592  }
   593  
   594  func (s podFastDeleteScenario) Pod(worker, attempt int) *v1.Pod {
   595  	name := fmt.Sprintf("pod-terminate-status-%d-%d", worker, attempt)
   596  	value := strconv.Itoa(time.Now().Nanosecond())
   597  	one := int64(1)
   598  	if s.initContainer {
   599  		return &v1.Pod{
   600  			ObjectMeta: metav1.ObjectMeta{
   601  				Name: name,
   602  				Labels: map[string]string{
   603  					"name": "foo",
   604  					"time": value,
   605  				},
   606  			},
   607  			Spec: v1.PodSpec{
   608  				RestartPolicy:                 v1.RestartPolicyNever,
   609  				TerminationGracePeriodSeconds: &one,
   610  				InitContainers: []v1.Container{
   611  					{
   612  						Name:  "fail",
   613  						Image: imageutils.GetE2EImage(imageutils.BusyBox),
   614  						Command: []string{
   615  							"/bin/false",
   616  						},
   617  						Resources: v1.ResourceRequirements{
   618  							Requests: v1.ResourceList{
   619  								v1.ResourceCPU:    resource.MustParse("5m"),
   620  								v1.ResourceMemory: resource.MustParse("10Mi"),
   621  							},
   622  						},
   623  					},
   624  				},
   625  				Containers: []v1.Container{
   626  					{
   627  						Name:  "blocked",
   628  						Image: imageutils.GetE2EImage(imageutils.BusyBox),
   629  						Command: []string{
   630  							"/bin/true",
   631  						},
   632  						Resources: v1.ResourceRequirements{
   633  							Requests: v1.ResourceList{
   634  								v1.ResourceCPU:    resource.MustParse("5m"),
   635  								v1.ResourceMemory: resource.MustParse("10Mi"),
   636  							},
   637  						},
   638  					},
   639  				},
   640  			},
   641  		}
   642  	}
   643  	return &v1.Pod{
   644  		ObjectMeta: metav1.ObjectMeta{
   645  			Name: name,
   646  			Labels: map[string]string{
   647  				"name": "foo",
   648  				"time": value,
   649  			},
   650  		},
   651  		Spec: v1.PodSpec{
   652  			RestartPolicy:                 v1.RestartPolicyNever,
   653  			TerminationGracePeriodSeconds: &one,
   654  			Containers: []v1.Container{
   655  				{
   656  					Name:  "fail",
   657  					Image: imageutils.GetE2EImage(imageutils.BusyBox),
   658  					Command: []string{
   659  						"/bin/false",
   660  					},
   661  					Resources: v1.ResourceRequirements{
   662  						Requests: v1.ResourceList{
   663  							v1.ResourceCPU:    resource.MustParse("5m"),
   664  							v1.ResourceMemory: resource.MustParse("10Mi"),
   665  						},
   666  					},
   667  				},
   668  			},
   669  		},
   670  	}
   671  }
   672  
   673  // podStartVerifier checks events for a given pod and looks for unexpected
   674  // transitions. It assumes one container running to completion.
   675  type podStartVerifier struct {
   676  	pod                  *v1.Pod
   677  	hasInitContainers    bool
   678  	hasContainers        bool
   679  	hasTerminated        bool
   680  	hasRunningContainers bool
   681  	hasTerminalPhase     bool
   682  	duration             time.Duration
   683  	completeDuration     time.Duration
   684  }
   685  
   686  var reBug88766 = regexp.MustCompile(`rootfs_linux.*kubernetes\.io~(secret|projected).*no such file or directory`)
   687  
   688  // Verify takes successive watch events for a given pod and returns an error if the status is unexpected.
   689  // This verifier works for any pod which has 0 init containers and 1 regular container.
   690  func (v *podStartVerifier) Verify(event watch.Event) error {
   691  	var ok bool
   692  	pod, ok := event.Object.(*v1.Pod)
   693  	if !ok {
   694  		framework.Logf("Unexpected event object: %s %#v", event.Type, event.Object)
   695  		return nil
   696  	}
   697  	v.pod = pod
   698  
   699  	if len(pod.Spec.InitContainers) > 0 {
   700  		if len(pod.Status.InitContainerStatuses) == 0 {
   701  			if v.hasInitContainers {
   702  				return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses)
   703  			}
   704  			return nil
   705  		}
   706  		v.hasInitContainers = true
   707  		if len(pod.Status.InitContainerStatuses) != 1 {
   708  			return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses)
   709  		}
   710  
   711  	} else {
   712  		if len(pod.Status.InitContainerStatuses) != 0 {
   713  			return fmt.Errorf("pod %s on node %s had incorrect init containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.InitContainerStatuses)
   714  		}
   715  	}
   716  
   717  	if len(pod.Status.ContainerStatuses) == 0 {
   718  		if v.hasContainers {
   719  			return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.ContainerStatuses)
   720  		}
   721  		return nil
   722  	}
   723  	v.hasContainers = true
   724  	if len(pod.Status.ContainerStatuses) != 1 {
   725  		return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status.ContainerStatuses)
   726  	}
   727  
   728  	if status := e2epod.FindContainerStatusInPod(pod, "blocked"); status != nil {
   729  		if (status.Started != nil && *status.Started) || status.LastTerminationState.Terminated != nil || status.State.Waiting == nil {
   730  			return fmt.Errorf("pod %s on node %s should not have started the blocked container: %#v", pod.Name, pod.Spec.NodeName, status)
   731  		}
   732  	}
   733  
   734  	status := e2epod.FindContainerStatusInPod(pod, "fail")
   735  	if status == nil {
   736  		return fmt.Errorf("pod %s on node %s had incorrect containers: %#v", pod.Name, pod.Spec.NodeName, pod.Status)
   737  	}
   738  
   739  	t := status.State.Terminated
   740  	if v.hasTerminated {
   741  		if status.State.Waiting != nil || status.State.Running != nil {
   742  			return fmt.Errorf("pod %s on node %s was terminated and then changed state: %#v", pod.Name, pod.Spec.NodeName, status)
   743  		}
   744  		if t == nil {
   745  			return fmt.Errorf("pod %s on node %s was terminated and then had termination cleared: %#v", pod.Name, pod.Spec.NodeName, status)
   746  		}
   747  	}
   748  	var hasNoStartTime bool
   749  	v.hasRunningContainers = status.State.Waiting == nil && status.State.Terminated == nil
   750  	if t != nil {
   751  		if !t.FinishedAt.Time.IsZero() {
   752  			if t.StartedAt.IsZero() {
   753  				hasNoStartTime = true
   754  			} else {
   755  				v.duration = t.FinishedAt.Sub(t.StartedAt.Time)
   756  			}
   757  			v.completeDuration = t.FinishedAt.Sub(pod.CreationTimestamp.Time)
   758  		}
   759  
   760  		defer func() { v.hasTerminated = true }()
   761  		switch {
   762  		case t.ExitCode == 1:
   763  			// expected
   764  		case t.ExitCode == 137 && (t.Reason == "ContainerStatusUnknown" || t.Reason == "Error"):
   765  			// expected, pod was force-killed after grace period
   766  		case t.ExitCode == 128 && (t.Reason == "StartError" || t.Reason == "ContainerCannotRun") && reBug88766.MatchString(t.Message):
   767  			// pod volume teardown races with container start in CRI, which reports a failure
   768  			framework.Logf("pod %s on node %s failed with the symptoms of https://github.com/kubernetes/kubernetes/issues/88766", pod.Name, pod.Spec.NodeName)
   769  		default:
   770  			data, _ := json.MarshalIndent(pod.Status, "", "  ")
   771  			framework.Logf("pod %s on node %s had incorrect final status:\n%s", pod.Name, pod.Spec.NodeName, string(data))
   772  			return fmt.Errorf("pod %s on node %s container unexpected exit code %d: start=%s end=%s reason=%s message=%s", pod.Name, pod.Spec.NodeName, t.ExitCode, t.StartedAt, t.FinishedAt, t.Reason, t.Message)
   773  		}
   774  		switch {
   775  		case v.duration > time.Hour:
   776  			// problem with status reporting
   777  			return fmt.Errorf("pod %s container %s on node %s had very long duration %s: start=%s end=%s", pod.Name, status.Name, pod.Spec.NodeName, v.duration, t.StartedAt, t.FinishedAt)
   778  		case hasNoStartTime:
   779  			// should never happen
   780  			return fmt.Errorf("pod %s container %s on node %s had finish time but not start time: end=%s", pod.Name, status.Name, pod.Spec.NodeName, t.FinishedAt)
   781  		}
   782  	}
   783  	if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded {
   784  		v.hasTerminalPhase = true
   785  	} else {
   786  		if v.hasTerminalPhase {
   787  			return fmt.Errorf("pod %s on node %s was in a terminal phase and then reverted: %#v", pod.Name, pod.Spec.NodeName, pod.Status)
   788  		}
   789  	}
   790  	return nil
   791  }
   792  
   793  func (v *podStartVerifier) VerifyFinal(scenario string, total time.Duration) (*v1.Pod, []error) {
   794  	var errs []error
   795  	pod := v.pod
   796  	if !v.hasTerminalPhase {
   797  		var names []string
   798  		for _, status := range pod.Status.ContainerStatuses {
   799  			if status.State.Running != nil {
   800  				names = append(names, status.Name)
   801  			}
   802  		}
   803  		switch {
   804  		case len(names) > 0:
   805  			errs = append(errs, fmt.Errorf("pod %s on node %s did not reach a terminal phase before being deleted but had running containers: phase=%s, running-containers=%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase, strings.Join(names, ",")))
   806  		case pod.Status.Phase != v1.PodPending:
   807  			errs = append(errs, fmt.Errorf("pod %s on node %s was not Pending but has no running containers: phase=%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase))
   808  		}
   809  	}
   810  	if v.hasRunningContainers {
   811  		data, _ := json.MarshalIndent(pod.Status.ContainerStatuses, "", "  ")
   812  		errs = append(errs, fmt.Errorf("pod %s on node %s had running or unknown container status before being deleted:\n%s", pod.Name, pod.Spec.NodeName, string(data)))
   813  	}
   814  
   815  	framework.Logf("Pod %s on node %s %s total=%s run=%s execute=%s", pod.Name, pod.Spec.NodeName, scenario, total, v.completeDuration, v.duration)
   816  	return pod, errs
   817  }
   818
View as plain text