restart_test.go

Documentation: k8s.io/kubernetes/test/e2e_node

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2015 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package e2enode
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"os/exec"
    26  	"time"
    27  
    28  	v1 "k8s.io/api/core/v1"
    29  	"k8s.io/apimachinery/pkg/api/resource"
    30  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    31  	"k8s.io/apimachinery/pkg/watch"
    32  	"k8s.io/client-go/tools/cache"
    33  	watchtools "k8s.io/client-go/tools/watch"
    34  	"k8s.io/kubernetes/test/e2e/framework"
    35  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    36  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    37  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    38  	testutils "k8s.io/kubernetes/test/utils"
    39  	imageutils "k8s.io/kubernetes/test/utils/image"
    40  	admissionapi "k8s.io/pod-security-admission/api"
    41  
    42  	"github.com/onsi/ginkgo/v2"
    43  	"github.com/onsi/gomega"
    44  	"k8s.io/apimachinery/pkg/util/uuid"
    45  )
    46  
    47  type podCondition func(pod *v1.Pod) (bool, error)
    48  
    49  // waitForPodsCondition waits for `podCount` number of pods to match a specific pod condition within a timeout duration.
    50  // If the timeout is hit, it returns the list of currently running pods.
    51  func waitForPodsCondition(ctx context.Context, f *framework.Framework, podCount int, timeout time.Duration, condition podCondition) (runningPods []*v1.Pod) {
    52  	for start := time.Now(); time.Since(start) < timeout; time.Sleep(10 * time.Second) {
    53  		podList, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{})
    54  		if err != nil {
    55  			framework.Logf("Failed to list pods on node: %v", err)
    56  			continue
    57  		}
    58  
    59  		runningPods = []*v1.Pod{}
    60  		for i := range podList.Items {
    61  			pod := podList.Items[i]
    62  			if r, err := condition(&pod); err != nil || !r {
    63  				continue
    64  			}
    65  			runningPods = append(runningPods, &pod)
    66  		}
    67  		framework.Logf("Running pod count %d", len(runningPods))
    68  		if len(runningPods) >= podCount {
    69  			break
    70  		}
    71  	}
    72  	return runningPods
    73  }
    74  
    75  var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), framework.WithDisruptive(), func() {
    76  	const (
    77  		// Saturate the node. It's not necessary that all these pods enter
    78  		// Running/Ready, because we don't know the number of cores in the
    79  		// test node or default limits applied (if any). It's is essential
    80  		// that no containers end up in terminated. 100 was chosen because
    81  		// it's the max pods per node.
    82  		podCount            = 100
    83  		podCreationInterval = 100 * time.Millisecond
    84  		recoverTimeout      = 5 * time.Minute
    85  		startTimeout        = 3 * time.Minute
    86  		// restartCount is chosen so even with minPods we exhaust the default
    87  		// allocation of a /24.
    88  		minPods      = 50
    89  		restartCount = 6
    90  	)
    91  
    92  	f := framework.NewDefaultFramework("restart-test")
    93  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    94  	ginkgo.Context("Container Runtime", func() {
    95  		ginkgo.Context("Network", func() {
    96  			ginkgo.It("should recover from ip leak", func(ctx context.Context) {
    97  				pods := newTestPods(podCount, false, imageutils.GetPauseImageName(), "restart-container-runtime-test")
    98  				ginkgo.By(fmt.Sprintf("Trying to create %d pods on node", len(pods)))
    99  				createBatchPodWithRateControl(ctx, f, pods, podCreationInterval)
   100  				ginkgo.DeferCleanup(deletePodsSync, f, pods)
   101  
   102  				// Give the node some time to stabilize, assume pods that enter RunningReady within
   103  				// startTimeout fit on the node and the node is now saturated.
   104  				runningPods := waitForPodsCondition(ctx, f, podCount, startTimeout, testutils.PodRunningReadyOrSucceeded)
   105  				if len(runningPods) < minPods {
   106  					framework.Failf("Failed to start %d pods, cannot test that restarting container runtime doesn't leak IPs", minPods)
   107  				}
   108  
   109  				for i := 0; i < restartCount; i++ {
   110  					ginkgo.By(fmt.Sprintf("Killing container runtime iteration %d", i))
   111  					// Wait for container runtime to be running
   112  					var pid int
   113  					gomega.Eventually(ctx, func() error {
   114  						runtimePids, err := getPidsForProcess(framework.TestContext.ContainerRuntimeProcessName, framework.TestContext.ContainerRuntimePidFile)
   115  						if err != nil {
   116  							return err
   117  						}
   118  						if len(runtimePids) != 1 {
   119  							return fmt.Errorf("unexpected container runtime pid list: %+v", runtimePids)
   120  						}
   121  						// Make sure the container runtime is running, pid got from pid file may not be running.
   122  						pid = runtimePids[0]
   123  						if _, err := exec.Command("sudo", "ps", "-p", fmt.Sprintf("%d", pid)).CombinedOutput(); err != nil {
   124  							return err
   125  						}
   126  						return nil
   127  					}, 1*time.Minute, 2*time.Second).Should(gomega.BeNil())
   128  					if stdout, err := exec.Command("sudo", "kill", "-SIGKILL", fmt.Sprintf("%d", pid)).CombinedOutput(); err != nil {
   129  						framework.Failf("Failed to kill container runtime (pid=%d): %v, stdout: %q", pid, err, string(stdout))
   130  					}
   131  					// Assume that container runtime will be restarted by systemd/supervisord etc.
   132  					time.Sleep(20 * time.Second)
   133  				}
   134  
   135  				ginkgo.By("Checking currently Running/Ready pods")
   136  				postRestartRunningPods := waitForPodsCondition(ctx, f, len(runningPods), recoverTimeout, testutils.PodRunningReadyOrSucceeded)
   137  				if len(postRestartRunningPods) == 0 {
   138  					framework.Failf("Failed to start *any* pods after container runtime restart, this might indicate an IP leak")
   139  				}
   140  				ginkgo.By("Confirm no containers have terminated")
   141  				for _, pod := range postRestartRunningPods {
   142  					if c := testutils.TerminatedContainers(pod); len(c) != 0 {
   143  						framework.Failf("Pod %q has failed containers %+v after container runtime restart, this might indicate an IP leak", pod.Name, c)
   144  					}
   145  				}
   146  				ginkgo.By(fmt.Sprintf("Container runtime restart test passed with %d pods", len(postRestartRunningPods)))
   147  			})
   148  		})
   149  	})
   150  	ginkgo.Context("Kubelet", func() {
   151  		ginkgo.It("should correctly account for terminated pods after restart", func(ctx context.Context) {
   152  			node := getLocalNode(ctx, f)
   153  			cpus := node.Status.Allocatable[v1.ResourceCPU]
   154  			numCpus := int((&cpus).Value())
   155  			if numCpus < 1 {
   156  				e2eskipper.Skipf("insufficient CPU available for kubelet restart test")
   157  			}
   158  			if numCpus > 18 {
   159  				// 950m * 19 = 1805 CPUs -> not enough to block the scheduling of another 950m pod
   160  				e2eskipper.Skipf("test will return false positives on a machine with >18 cores")
   161  			}
   162  
   163  			// create as many restartNever pods as there are allocatable CPU
   164  			// nodes; if they are not correctly accounted for as terminated
   165  			// later, this will fill up all node capacity
   166  			podCountRestartNever := numCpus
   167  			ginkgo.By(fmt.Sprintf("creating %d RestartNever pods on node", podCountRestartNever))
   168  			restartNeverPods := newTestPods(podCountRestartNever, false, imageutils.GetE2EImage(imageutils.BusyBox), "restart-kubelet-test")
   169  			for _, pod := range restartNeverPods {
   170  				pod.Spec.RestartPolicy = "Never"
   171  				pod.Spec.Containers[0].Command = []string{"echo", "hi"}
   172  				pod.Spec.Containers[0].Resources.Limits = v1.ResourceList{
   173  					v1.ResourceCPU: resource.MustParse("950m"), // leave a little room for other workloads
   174  				}
   175  			}
   176  			createBatchPodWithRateControl(ctx, f, restartNeverPods, podCreationInterval)
   177  			ginkgo.DeferCleanup(deletePodsSync, f, restartNeverPods)
   178  			completedPods := waitForPodsCondition(ctx, f, podCountRestartNever, startTimeout, testutils.PodSucceeded)
   179  
   180  			if len(completedPods) < podCountRestartNever {
   181  				framework.Failf("Failed to run sufficient restartNever pods, got %d but expected %d", len(completedPods), podCountRestartNever)
   182  			}
   183  
   184  			podCountRestartAlways := (numCpus / 2) + 1
   185  			ginkgo.By(fmt.Sprintf("creating %d RestartAlways pods on node", podCountRestartAlways))
   186  			restartAlwaysPods := newTestPods(podCountRestartAlways, false, imageutils.GetPauseImageName(), "restart-kubelet-test")
   187  			for _, pod := range restartAlwaysPods {
   188  				pod.Spec.Containers[0].Resources.Limits = v1.ResourceList{
   189  					v1.ResourceCPU: resource.MustParse("1"),
   190  				}
   191  			}
   192  			createBatchPodWithRateControl(ctx, f, restartAlwaysPods, podCreationInterval)
   193  			ginkgo.DeferCleanup(deletePodsSync, f, restartAlwaysPods)
   194  
   195  			numAllPods := podCountRestartNever + podCountRestartAlways
   196  			allPods := waitForPodsCondition(ctx, f, numAllPods, startTimeout, testutils.PodRunningReadyOrSucceeded)
   197  			if len(allPods) < numAllPods {
   198  				framework.Failf("Failed to run sufficient restartAlways pods, got %d but expected %d", len(allPods), numAllPods)
   199  			}
   200  
   201  			ginkgo.By("killing and restarting kubelet")
   202  			// We want to kill the kubelet rather than a graceful restart
   203  			startKubelet := stopKubelet()
   204  			startKubelet()
   205  
   206  			// If this test works correctly, each of these pods will exit
   207  			// with no issue. But if accounting breaks, pods scheduled after
   208  			// restart may think these old pods are consuming CPU and we
   209  			// will get an OutOfCpu error.
   210  			ginkgo.By("verifying restartNever pods succeed and restartAlways pods stay running")
   211  			for start := time.Now(); time.Since(start) < startTimeout && ctx.Err() == nil; time.Sleep(10 * time.Second) {
   212  				postRestartRunningPods := waitForPodsCondition(ctx, f, numAllPods, recoverTimeout, testutils.PodRunningReadyOrSucceeded)
   213  				if len(postRestartRunningPods) < numAllPods {
   214  					framework.Failf("less pods are running after node restart, got %d but expected %d", len(postRestartRunningPods), numAllPods)
   215  				}
   216  			}
   217  		})
   218  		// Regression test for https://issues.k8s.io/116925
   219  		ginkgo.It("should delete pods which are marked as terminal and have a deletion timestamp set after restart", func(ctx context.Context) {
   220  			podName := "terminal-restart-pod" + string(uuid.NewUUID())
   221  			gracePeriod := int64(30)
   222  			podSpec := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{
   223  				ObjectMeta: metav1.ObjectMeta{
   224  					Name: podName,
   225  				},
   226  				Spec: v1.PodSpec{
   227  					TerminationGracePeriodSeconds: &gracePeriod,
   228  					RestartPolicy:                 v1.RestartPolicyNever,
   229  					Containers: []v1.Container{
   230  						{
   231  							Name:    podName,
   232  							Image:   imageutils.GetE2EImage(imageutils.BusyBox),
   233  							Command: []string{"sh", "-c"},
   234  							Args: []string{`
   235  							sleep 9999999 &
   236  							PID=$!
   237  
   238  							_term () {
   239  							   kill $PID
   240  							   echo "Caught SIGTERM!"
   241  							}
   242  
   243  							trap _term SIGTERM
   244  							wait $PID
   245  							trap - TERM
   246  
   247  							# Wait for the long running sleep to exit
   248  							wait $PID
   249  
   250  							exit 0
   251  							`,
   252  							},
   253  						},
   254  					},
   255  				},
   256  			})
   257  			ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v) with restart policy: %v", f.Namespace.Name, podName, podSpec.Spec.RestartPolicy))
   258  			pod := e2epod.NewPodClient(f).Create(ctx, podSpec)
   259  
   260  			ginkgo.By(fmt.Sprintf("Waiting for the pod (%v/%v) to be running", f.Namespace.Name, pod.Name))
   261  			err := e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name)
   262  			framework.ExpectNoError(err, "Failed to await for the pod to be running: (%v/%v)", f.Namespace.Name, pod.Name)
   263  
   264  			w := &cache.ListWatch{
   265  				WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
   266  					return f.ClientSet.CoreV1().Pods(f.Namespace.Name).Watch(ctx, options)
   267  				},
   268  			}
   269  
   270  			podsList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(ctx, metav1.ListOptions{})
   271  			framework.ExpectNoError(err, "Failed to list pods in namespace: %s", f.Namespace.Name)
   272  
   273  			ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
   274  			time.Sleep(time.Second)
   275  			err = e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
   276  			framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name)
   277  
   278  			ctxUntil, cancel := context.WithTimeout(ctx, f.Timeouts.PodStart)
   279  			defer cancel()
   280  
   281  			ginkgo.By(fmt.Sprintf("Started watch for pod (%v/%v) to enter succeeded phase", pod.Namespace, pod.Name))
   282  			_, err = watchtools.Until(ctxUntil, podsList.ResourceVersion, w, func(event watch.Event) (bool, error) {
   283  				if pod, ok := event.Object.(*v1.Pod); ok {
   284  					found := pod.ObjectMeta.Name == podName &&
   285  						pod.ObjectMeta.Namespace == f.Namespace.Name &&
   286  						pod.Status.Phase == v1.PodSucceeded
   287  					if !found {
   288  						ginkgo.By(fmt.Sprintf("Observed Pod (%s/%s) in phase %v", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name, pod.Status.Phase))
   289  						return false, nil
   290  					}
   291  					ginkgo.By(fmt.Sprintf("Found Pod (%s/%s) in phase %v", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name, pod.Status.Phase))
   292  					return found, nil
   293  				}
   294  				ginkgo.By(fmt.Sprintf("Observed event: %+v", event.Object))
   295  				return false, nil
   296  			})
   297  			ginkgo.By("Ended watch for pod entering succeeded phase")
   298  			framework.ExpectNoError(err, "failed to see event that pod (%s/%s) enter succeeded phase: %v", pod.Namespace, pod.Name, err)
   299  
   300  			// As soon as the pod enters succeeded phase (detected by the watch above); kill the kubelet.
   301  			// This is a bit racy, but the goal is to stop the kubelet before the kubelet is able to delete the pod from the API-sever in order to repro https://issues.k8s.io/116925
   302  			ginkgo.By("Stopping the kubelet")
   303  			startKubelet := stopKubelet()
   304  			// wait until the kubelet health check will fail
   305  			gomega.Eventually(ctx, func() bool {
   306  				return kubeletHealthCheck(kubeletHealthCheckURL)
   307  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
   308  
   309  			ginkgo.By("Starting the kubelet")
   310  			startKubelet()
   311  
   312  			// wait until the kubelet health check will succeed
   313  			gomega.Eventually(ctx, func() bool {
   314  				return kubeletHealthCheck(kubeletHealthCheckURL)
   315  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
   316  
   317  			// Wait for the Kubelet to be ready.
   318  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   319  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   320  				framework.ExpectNoError(err)
   321  				return nodes == 1
   322  			}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue())
   323  
   324  			ginkgo.By(fmt.Sprintf("After the kubelet is restarted, verify the pod (%s/%s) is deleted by kubelet", pod.Namespace, pod.Name))
   325  			gomega.Eventually(ctx, func(ctx context.Context) error {
   326  				return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
   327  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   328  		})
   329  		// Regression test for https://issues.k8s.io/118472
   330  		ginkgo.It("should force-delete non-admissible pods created and deleted during kubelet restart", func(ctx context.Context) {
   331  			podName := "rejected-deleted-pod" + string(uuid.NewUUID())
   332  			gracePeriod := int64(30)
   333  			nodeName := getNodeName(ctx, f)
   334  			podSpec := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{
   335  				ObjectMeta: metav1.ObjectMeta{
   336  					Name:      podName,
   337  					Namespace: f.Namespace.Name,
   338  				},
   339  				Spec: v1.PodSpec{
   340  					NodeName: nodeName,
   341  					NodeSelector: map[string]string{
   342  						"this-label": "does-not-exist-on-any-nodes",
   343  					},
   344  					TerminationGracePeriodSeconds: &gracePeriod,
   345  					RestartPolicy:                 v1.RestartPolicyNever,
   346  					Containers: []v1.Container{
   347  						{
   348  							Name:  podName,
   349  							Image: imageutils.GetPauseImageName(),
   350  						},
   351  					},
   352  				},
   353  			})
   354  			ginkgo.By("Stopping the kubelet")
   355  			startKubelet := stopKubelet()
   356  
   357  			// wait until the kubelet health check will fail
   358  			gomega.Eventually(ctx, func() bool {
   359  				return kubeletHealthCheck(kubeletHealthCheckURL)
   360  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
   361  
   362  			// Create the pod bound to the node. It will remain in the Pending
   363  			// phase as Kubelet is down.
   364  			ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v)", f.Namespace.Name, podName))
   365  			pod := e2epod.NewPodClient(f).Create(ctx, podSpec)
   366  
   367  			ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
   368  			err := e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
   369  			framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name)
   370  
   371  			// Restart Kubelet so that it proceeds with deletion
   372  			ginkgo.By("Starting the kubelet")
   373  			startKubelet()
   374  
   375  			// wait until the kubelet health check will succeed
   376  			gomega.Eventually(ctx, func() bool {
   377  				return kubeletHealthCheck(kubeletHealthCheckURL)
   378  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
   379  
   380  			// Wait for the Kubelet to be ready.
   381  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   382  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   383  				framework.ExpectNoError(err)
   384  				return nodes == 1
   385  			}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue())
   386  
   387  			ginkgo.By(fmt.Sprintf("After the kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name))
   388  			gomega.Eventually(ctx, func(ctx context.Context) error {
   389  				return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
   390  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   391  		})
   392  		// Regression test for an extended scenario for https://issues.k8s.io/118472
   393  		ginkgo.It("should force-delete non-admissible pods that was admitted and running before kubelet restart", func(ctx context.Context) {
   394  			nodeLabelKey := "custom-label-key-required"
   395  			nodeLabelValueRequired := "custom-label-value-required-for-admission"
   396  			podName := "rejected-deleted-run" + string(uuid.NewUUID())
   397  			gracePeriod := int64(30)
   398  			nodeName := getNodeName(ctx, f)
   399  			pod := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{
   400  				ObjectMeta: metav1.ObjectMeta{
   401  					Name:      podName,
   402  					Namespace: f.Namespace.Name,
   403  				},
   404  				Spec: v1.PodSpec{
   405  					NodeSelector: map[string]string{
   406  						nodeLabelKey: nodeLabelValueRequired,
   407  					},
   408  					NodeName:                      nodeName,
   409  					TerminationGracePeriodSeconds: &gracePeriod,
   410  					RestartPolicy:                 v1.RestartPolicyNever,
   411  					Containers: []v1.Container{
   412  						{
   413  							Name:  podName,
   414  							Image: imageutils.GetPauseImageName(),
   415  						},
   416  					},
   417  				},
   418  			})
   419  
   420  			ginkgo.By(fmt.Sprintf("Adding node label for node (%v) to allow admission of pod (%v/%v)", nodeName, f.Namespace.Name, podName))
   421  			e2enode.AddOrUpdateLabelOnNode(f.ClientSet, nodeName, nodeLabelKey, nodeLabelValueRequired)
   422  			ginkgo.DeferCleanup(func() { e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey) })
   423  
   424  			// Create the pod bound to the node. It will start, but will be rejected after kubelet restart.
   425  			ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v)", f.Namespace.Name, podName))
   426  			pod = e2epod.NewPodClient(f).Create(ctx, pod)
   427  
   428  			ginkgo.By(fmt.Sprintf("Waiting for the pod (%v/%v) to be running", f.Namespace.Name, pod.Name))
   429  			err := e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name)
   430  			framework.ExpectNoError(err, "Failed to await for the pod to be running: (%v/%v)", f.Namespace.Name, pod.Name)
   431  
   432  			ginkgo.By("Stopping the kubelet")
   433  			startKubelet := stopKubelet()
   434  
   435  			// wait until the kubelet health check will fail
   436  			gomega.Eventually(ctx, func() bool {
   437  				return kubeletHealthCheck(kubeletHealthCheckURL)
   438  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
   439  
   440  			ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
   441  			err = e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
   442  			framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name)
   443  
   444  			ginkgo.By(fmt.Sprintf("Removing node label for node (%v) to ensure the pod (%v/%v) is rejected after kubelet restart", nodeName, f.Namespace.Name, podName))
   445  			e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey)
   446  
   447  			// Restart Kubelet so that it proceeds with deletion
   448  			ginkgo.By("Starting the kubelet")
   449  			startKubelet()
   450  
   451  			// wait until the kubelet health check will succeed
   452  			gomega.Eventually(ctx, func() bool {
   453  				return kubeletHealthCheck(kubeletHealthCheckURL)
   454  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
   455  
   456  			// Wait for the Kubelet to be ready.
   457  			gomega.Eventually(ctx, func(ctx context.Context) bool {
   458  				nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
   459  				framework.ExpectNoError(err)
   460  				return nodes == 1
   461  			}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue())
   462  
   463  			ginkgo.By(fmt.Sprintf("Once Kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name))
   464  			gomega.Eventually(ctx, func(ctx context.Context) error {
   465  				return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
   466  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   467  		})
   468  	})
   469  
   470  })
   471
View as plain text