wait.go

Documentation: k8s.io/kubernetes/test/e2e/framework/pod

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package pod
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"reflect"
    24  	"strings"
    25  	"time"
    26  
    27  	"github.com/onsi/ginkgo/v2"
    28  	"github.com/onsi/gomega"
    29  	"github.com/onsi/gomega/gcustom"
    30  	"github.com/onsi/gomega/types"
    31  
    32  	appsv1 "k8s.io/api/apps/v1"
    33  	v1 "k8s.io/api/core/v1"
    34  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    35  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    36  	"k8s.io/apimachinery/pkg/labels"
    37  	apitypes "k8s.io/apimachinery/pkg/types"
    38  	clientset "k8s.io/client-go/kubernetes"
    39  	"k8s.io/kubectl/pkg/util/podutils"
    40  	"k8s.io/kubernetes/test/e2e/framework"
    41  	testutils "k8s.io/kubernetes/test/utils"
    42  	"k8s.io/kubernetes/test/utils/format"
    43  )
    44  
    45  const (
    46  	// defaultPodDeletionTimeout is the default timeout for deleting pod.
    47  	defaultPodDeletionTimeout = 3 * time.Minute
    48  
    49  	// podListTimeout is how long to wait for the pod to be listable.
    50  	podListTimeout = time.Minute
    51  
    52  	podRespondingTimeout = 15 * time.Minute
    53  
    54  	// How long pods have to become scheduled onto nodes
    55  	podScheduledBeforeTimeout = podListTimeout + (20 * time.Second)
    56  
    57  	// podStartTimeout is how long to wait for the pod to be started.
    58  	podStartTimeout = 5 * time.Minute
    59  
    60  	// singleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent
    61  	// transient failures from failing tests.
    62  	singleCallTimeout = 5 * time.Minute
    63  
    64  	// Some pods can take much longer to get ready due to volume attach/detach latency.
    65  	slowPodStartTimeout = 15 * time.Minute
    66  )
    67  
    68  type podCondition func(pod *v1.Pod) (bool, error)
    69  
    70  // BeRunningNoRetries verifies that a pod starts running. It's a permanent
    71  // failure when the pod enters some other permanent phase.
    72  func BeRunningNoRetries() types.GomegaMatcher {
    73  	return gomega.And(
    74  		// This additional matcher checks for the final error condition.
    75  		gcustom.MakeMatcher(func(pod *v1.Pod) (bool, error) {
    76  			switch pod.Status.Phase {
    77  			case v1.PodFailed, v1.PodSucceeded:
    78  				return false, gomega.StopTrying(fmt.Sprintf("Expected pod to reach phase %q, got final phase %q instead:\n%s", v1.PodRunning, pod.Status.Phase, format.Object(pod, 1)))
    79  			default:
    80  				return true, nil
    81  			}
    82  		}),
    83  		BeInPhase(v1.PodRunning),
    84  	)
    85  }
    86  
    87  // BeInPhase matches if pod.status.phase is the expected phase.
    88  func BeInPhase(phase v1.PodPhase) types.GomegaMatcher {
    89  	// A simple implementation of this would be:
    90  	// return gomega.HaveField("Status.Phase", phase)
    91  	//
    92  	// But that produces a fairly generic
    93  	//     Value for field 'Status.Phase' failed to satisfy matcher.
    94  	// failure message and doesn't show the pod. We can do better than
    95  	// that with a custom matcher.
    96  
    97  	return gcustom.MakeMatcher(func(pod *v1.Pod) (bool, error) {
    98  		return pod.Status.Phase == phase, nil
    99  	}).WithTemplate("Expected Pod {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(phase)
   100  }
   101  
   102  // WaitForPodsRunningReady waits up to timeout to ensure that all pods in
   103  // namespace ns are either running and ready, or failed but controlled by a
   104  // controller. Also, it ensures that at least minPods are running and
   105  // ready. It has separate behavior from other 'wait for' pods functions in
   106  // that it requests the list of pods on every iteration. This is useful, for
   107  // example, in cluster startup, because the number of pods increases while
   108  // waiting. All pods that are in SUCCESS state are not counted.
   109  //
   110  // If minPods or allowedNotReadyPods are -1, this method returns immediately
   111  // without waiting.
   112  func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration) error {
   113  	if minPods == -1 || allowedNotReadyPods == -1 {
   114  		return nil
   115  	}
   116  
   117  	// We get the new list of pods, replication controllers, and replica
   118  	// sets in every iteration because more pods come online during startup
   119  	// and we want to ensure they are also checked.
   120  	//
   121  	// This struct gets populated while polling, then gets checked, and in
   122  	// case of a timeout is included in the failure message.
   123  	type state struct {
   124  		ReplicationControllers []v1.ReplicationController
   125  		ReplicaSets            []appsv1.ReplicaSet
   126  		Pods                   []v1.Pod
   127  	}
   128  
   129  	// notReady is -1 for any failure other than a timeout.
   130  	// Otherwise it is the number of pods that we were still
   131  	// waiting for.
   132  	notReady := int32(-1)
   133  
   134  	err := framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*state, error) {
   135  		// Reset notReady at the start of a poll attempt.
   136  		notReady = -1
   137  
   138  		rcList, err := c.CoreV1().ReplicationControllers(ns).List(ctx, metav1.ListOptions{})
   139  		if err != nil {
   140  			return nil, fmt.Errorf("listing replication controllers in namespace %s: %w", ns, err)
   141  		}
   142  		rsList, err := c.AppsV1().ReplicaSets(ns).List(ctx, metav1.ListOptions{})
   143  		if err != nil {
   144  			return nil, fmt.Errorf("listing replication sets in namespace %s: %w", ns, err)
   145  		}
   146  		podList, err := c.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{})
   147  		if err != nil {
   148  			return nil, fmt.Errorf("listing pods in namespace %s: %w", ns, err)
   149  		}
   150  		return &state{
   151  			ReplicationControllers: rcList.Items,
   152  			ReplicaSets:            rsList.Items,
   153  			Pods:                   podList.Items,
   154  		}, nil
   155  	})).WithTimeout(timeout).Should(framework.MakeMatcher(func(s *state) (func() string, error) {
   156  		replicas, replicaOk := int32(0), int32(0)
   157  		for _, rc := range s.ReplicationControllers {
   158  			replicas += *rc.Spec.Replicas
   159  			replicaOk += rc.Status.ReadyReplicas
   160  		}
   161  		for _, rs := range s.ReplicaSets {
   162  			replicas += *rs.Spec.Replicas
   163  			replicaOk += rs.Status.ReadyReplicas
   164  		}
   165  
   166  		nOk := int32(0)
   167  		notReady = int32(0)
   168  		failedPods := []v1.Pod{}
   169  		otherPods := []v1.Pod{}
   170  		succeededPods := []string{}
   171  		for _, pod := range s.Pods {
   172  			res, err := testutils.PodRunningReady(&pod)
   173  			switch {
   174  			case res && err == nil:
   175  				nOk++
   176  			case pod.Status.Phase == v1.PodSucceeded:
   177  				// it doesn't make sense to wait for this pod
   178  				succeededPods = append(succeededPods, pod.Name)
   179  			case pod.Status.Phase == v1.PodFailed:
   180  				// ignore failed pods that are controlled by some controller
   181  				if metav1.GetControllerOf(&pod) == nil {
   182  					failedPods = append(failedPods, pod)
   183  				}
   184  			default:
   185  				notReady++
   186  				otherPods = append(otherPods, pod)
   187  			}
   188  		}
   189  		done := replicaOk == replicas && nOk >= minPods && (len(failedPods)+len(otherPods)) == 0
   190  		if done {
   191  			return nil, nil
   192  		}
   193  
   194  		// Delayed formatting of a failure message.
   195  		return func() string {
   196  			var buffer strings.Builder
   197  			buffer.WriteString(fmt.Sprintf("Expected all pods (need at least %d) in namespace %q to be running and ready (except for %d).\n", minPods, ns, allowedNotReadyPods))
   198  			buffer.WriteString(fmt.Sprintf("%d / %d pods were running and ready.\n", nOk, len(s.Pods)))
   199  			buffer.WriteString(fmt.Sprintf("Expected %d pod replicas, %d are Running and Ready.\n", replicas, replicaOk))
   200  			if len(succeededPods) > 0 {
   201  				buffer.WriteString(fmt.Sprintf("Pods that completed successfully:\n%s", format.Object(succeededPods, 1)))
   202  			}
   203  			if len(failedPods) > 0 {
   204  				buffer.WriteString(fmt.Sprintf("Pods that failed and were not controlled by some controller:\n%s", format.Object(failedPods, 1)))
   205  			}
   206  			if len(otherPods) > 0 {
   207  				buffer.WriteString(fmt.Sprintf("Pods that were neither completed nor running:\n%s", format.Object(otherPods, 1)))
   208  			}
   209  			return buffer.String()
   210  		}, nil
   211  	}))
   212  
   213  	// An error might not be fatal.
   214  	if err != nil && notReady >= 0 && notReady <= allowedNotReadyPods {
   215  		framework.Logf("Number of not-ready pods (%d) is below the allowed threshold (%d).", notReady, allowedNotReadyPods)
   216  		return nil
   217  	}
   218  	return err
   219  }
   220  
   221  // WaitForPodCondition waits a pods to be matched to the given condition.
   222  // The condition callback may use gomega.StopTrying to abort early.
   223  func WaitForPodCondition(ctx context.Context, c clientset.Interface, ns, podName, conditionDesc string, timeout time.Duration, condition podCondition) error {
   224  	return framework.Gomega().
   225  		Eventually(ctx, framework.RetryNotFound(framework.GetObject(c.CoreV1().Pods(ns).Get, podName, metav1.GetOptions{}))).
   226  		WithTimeout(timeout).
   227  		Should(framework.MakeMatcher(func(pod *v1.Pod) (func() string, error) {
   228  			done, err := condition(pod)
   229  			if err != nil {
   230  				return nil, err
   231  			}
   232  			if done {
   233  				return nil, nil
   234  			}
   235  			return func() string {
   236  				return fmt.Sprintf("expected pod to be %s, got instead:\n%s", conditionDesc, format.Object(pod, 1))
   237  			}, nil
   238  		}))
   239  }
   240  
   241  // Range determines how many items must exist and how many must match a certain
   242  // condition. Values <= 0 are ignored.
   243  // TODO (?): move to test/e2e/framework/range
   244  type Range struct {
   245  	// MinMatching must be <= actual matching items or <= 0.
   246  	MinMatching int
   247  	// MaxMatching must be >= actual matching items or <= 0.
   248  	// To check for "no matching items", set NonMatching.
   249  	MaxMatching int
   250  	// NoneMatching indicates that no item must match.
   251  	NoneMatching bool
   252  	// AllMatching indicates that all items must match.
   253  	AllMatching bool
   254  	// MinFound must be <= existing items or <= 0.
   255  	MinFound int
   256  }
   257  
   258  // Min returns how many items must exist.
   259  func (r Range) Min() int {
   260  	min := r.MinMatching
   261  	if min < r.MinFound {
   262  		min = r.MinFound
   263  	}
   264  	return min
   265  }
   266  
   267  // WaitForPods waits for pods in the given namespace to match the given
   268  // condition. How many pods must exist and how many must match the condition
   269  // is determined by the range parameter. The condition callback may use
   270  // gomega.StopTrying(...).Now() to abort early. The condition description
   271  // will be used with "expected pods to <description>".
   272  func WaitForPods(ctx context.Context, c clientset.Interface, ns string, opts metav1.ListOptions, r Range, timeout time.Duration, conditionDesc string, condition func(*v1.Pod) bool) (*v1.PodList, error) {
   273  	var finalPods *v1.PodList
   274  	minPods := r.Min()
   275  	match := func(pods *v1.PodList) (func() string, error) {
   276  		finalPods = pods
   277  
   278  		if len(pods.Items) < minPods {
   279  			return func() string {
   280  				return fmt.Sprintf("expected at least %d pods, only got %d", minPods, len(pods.Items))
   281  			}, nil
   282  		}
   283  
   284  		var nonMatchingPods, matchingPods []v1.Pod
   285  		for _, pod := range pods.Items {
   286  			if condition(&pod) {
   287  				matchingPods = append(matchingPods, pod)
   288  			} else {
   289  				nonMatchingPods = append(nonMatchingPods, pod)
   290  			}
   291  		}
   292  		matching := len(pods.Items) - len(nonMatchingPods)
   293  		if matching < r.MinMatching && r.MinMatching > 0 {
   294  			return func() string {
   295  				return fmt.Sprintf("expected at least %d pods to %s, %d out of %d were not:\n%s",
   296  					r.MinMatching, conditionDesc, len(nonMatchingPods), len(pods.Items),
   297  					format.Object(nonMatchingPods, 1))
   298  			}, nil
   299  		}
   300  		if len(nonMatchingPods) > 0 && r.AllMatching {
   301  			return func() string {
   302  				return fmt.Sprintf("expected all pods to %s, %d out of %d were not:\n%s",
   303  					conditionDesc, len(nonMatchingPods), len(pods.Items),
   304  					format.Object(nonMatchingPods, 1))
   305  			}, nil
   306  		}
   307  		if matching > r.MaxMatching && r.MaxMatching > 0 {
   308  			return func() string {
   309  				return fmt.Sprintf("expected at most %d pods to %s, %d out of %d were:\n%s",
   310  					r.MinMatching, conditionDesc, len(matchingPods), len(pods.Items),
   311  					format.Object(matchingPods, 1))
   312  			}, nil
   313  		}
   314  		if matching > 0 && r.NoneMatching {
   315  			return func() string {
   316  				return fmt.Sprintf("expected no pods to %s, %d out of %d were:\n%s",
   317  					conditionDesc, len(matchingPods), len(pods.Items),
   318  					format.Object(matchingPods, 1))
   319  			}, nil
   320  		}
   321  		return nil, nil
   322  	}
   323  
   324  	err := framework.Gomega().
   325  		Eventually(ctx, framework.ListObjects(c.CoreV1().Pods(ns).List, opts)).
   326  		WithTimeout(timeout).
   327  		Should(framework.MakeMatcher(match))
   328  	return finalPods, err
   329  }
   330  
   331  // RunningReady checks whether pod p's phase is running and it has a ready
   332  // condition of status true.
   333  func RunningReady(p *v1.Pod) bool {
   334  	return p.Status.Phase == v1.PodRunning && podutils.IsPodReady(p)
   335  }
   336  
   337  // WaitForPodsRunning waits for a given `timeout` to evaluate if a certain amount of pods in given `ns` are running.
   338  func WaitForPodsRunning(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration) error {
   339  	_, err := WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout,
   340  		"be running and ready", func(pod *v1.Pod) bool {
   341  			ready, _ := testutils.PodRunningReady(pod)
   342  			return ready
   343  		})
   344  	return err
   345  }
   346  
   347  // WaitForPodsSchedulingGated waits for a given `timeout` to evaluate if a certain amount of pods in given `ns` stay in scheduling gated state.
   348  func WaitForPodsSchedulingGated(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration) error {
   349  	_, err := WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout,
   350  		"be in scheduling gated state", func(pod *v1.Pod) bool {
   351  			for _, condition := range pod.Status.Conditions {
   352  				if condition.Type == v1.PodScheduled && condition.Reason == v1.PodReasonSchedulingGated {
   353  					return true
   354  				}
   355  			}
   356  			return false
   357  		})
   358  	return err
   359  }
   360  
   361  // WaitForPodsWithSchedulingGates waits for a given `timeout` to evaluate if a certain amount of pods in given `ns`
   362  // match the given `schedulingGates`stay in scheduling gated state.
   363  func WaitForPodsWithSchedulingGates(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration, schedulingGates []v1.PodSchedulingGate) error {
   364  	_, err := WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout,
   365  		"have certain scheduling gates", func(pod *v1.Pod) bool {
   366  			return reflect.DeepEqual(pod.Spec.SchedulingGates, schedulingGates)
   367  		})
   368  	return err
   369  }
   370  
   371  // WaitForPodTerminatedInNamespace returns an error if it takes too long for the pod to terminate,
   372  // if the pod Get api returns an error (IsNotFound or other), or if the pod failed (and thus did not
   373  // terminate) with an unexpected reason. Typically called to test that the passed-in pod is fully
   374  // terminated (reason==""), but may be called to detect if a pod did *not* terminate according to
   375  // the supplied reason.
   376  func WaitForPodTerminatedInNamespace(ctx context.Context, c clientset.Interface, podName, reason, namespace string) error {
   377  	return WaitForPodCondition(ctx, c, namespace, podName, fmt.Sprintf("terminated with reason %s", reason), podStartTimeout, func(pod *v1.Pod) (bool, error) {
   378  		// Only consider Failed pods. Successful pods will be deleted and detected in
   379  		// waitForPodCondition's Get call returning `IsNotFound`
   380  		if pod.Status.Phase == v1.PodFailed {
   381  			if pod.Status.Reason == reason { // short-circuit waitForPodCondition's loop
   382  				return true, nil
   383  			}
   384  			return true, fmt.Errorf("Expected pod %q in namespace %q to be terminated with reason %q, got reason: %q", podName, namespace, reason, pod.Status.Reason)
   385  		}
   386  		return false, nil
   387  	})
   388  }
   389  
   390  // WaitForPodTerminatingInNamespaceTimeout returns if the pod is terminating, or an error if it is not after the timeout.
   391  func WaitForPodTerminatingInNamespaceTimeout(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   392  	return WaitForPodCondition(ctx, c, namespace, podName, "is terminating", timeout, func(pod *v1.Pod) (bool, error) {
   393  		if pod.DeletionTimestamp != nil {
   394  			return true, nil
   395  		}
   396  		return false, nil
   397  	})
   398  }
   399  
   400  // WaitForPodSuccessInNamespaceTimeout returns nil if the pod reached state success, or an error if it reached failure or ran too long.
   401  func WaitForPodSuccessInNamespaceTimeout(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   402  	return WaitForPodCondition(ctx, c, namespace, podName, fmt.Sprintf("%s or %s", v1.PodSucceeded, v1.PodFailed), timeout, func(pod *v1.Pod) (bool, error) {
   403  		if pod.DeletionTimestamp == nil && pod.Spec.RestartPolicy == v1.RestartPolicyAlways {
   404  			return true, gomega.StopTrying(fmt.Sprintf("pod %q will never terminate with a succeeded state since its restart policy is Always", podName))
   405  		}
   406  		switch pod.Status.Phase {
   407  		case v1.PodSucceeded:
   408  			ginkgo.By("Saw pod success")
   409  			return true, nil
   410  		case v1.PodFailed:
   411  			return true, gomega.StopTrying(fmt.Sprintf("pod %q failed with status: %+v", podName, pod.Status))
   412  		default:
   413  			return false, nil
   414  		}
   415  	})
   416  }
   417  
   418  // WaitForPodNameUnschedulableInNamespace returns an error if it takes too long for the pod to become Pending
   419  // and have condition Status equal to Unschedulable,
   420  // if the pod Get api returns an error (IsNotFound or other), or if the pod failed with an unexpected reason.
   421  // Typically called to test that the passed-in pod is Pending and Unschedulable.
   422  func WaitForPodNameUnschedulableInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error {
   423  	return WaitForPodCondition(ctx, c, namespace, podName, v1.PodReasonUnschedulable, podStartTimeout, func(pod *v1.Pod) (bool, error) {
   424  		// Only consider Failed pods. Successful pods will be deleted and detected in
   425  		// waitForPodCondition's Get call returning `IsNotFound`
   426  		if pod.Status.Phase == v1.PodPending {
   427  			for _, cond := range pod.Status.Conditions {
   428  				if cond.Type == v1.PodScheduled && cond.Status == v1.ConditionFalse && cond.Reason == v1.PodReasonUnschedulable {
   429  					return true, nil
   430  				}
   431  			}
   432  		}
   433  		if pod.Status.Phase == v1.PodRunning || pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
   434  			return true, fmt.Errorf("Expected pod %q in namespace %q to be in phase Pending, but got phase: %v", podName, namespace, pod.Status.Phase)
   435  		}
   436  		return false, nil
   437  	})
   438  }
   439  
   440  // WaitForPodNameRunningInNamespace waits default amount of time (PodStartTimeout) for the specified pod to become running.
   441  // Returns an error if timeout occurs first, or pod goes in to failed state.
   442  func WaitForPodNameRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error {
   443  	return WaitTimeoutForPodRunningInNamespace(ctx, c, podName, namespace, podStartTimeout)
   444  }
   445  
   446  // WaitForPodRunningInNamespaceSlow waits an extended amount of time (slowPodStartTimeout) for the specified pod to become running.
   447  // The resourceVersion is used when Watching object changes, it tells since when we care
   448  // about changes to the pod. Returns an error if timeout occurs first, or pod goes in to failed state.
   449  func WaitForPodRunningInNamespaceSlow(ctx context.Context, c clientset.Interface, podName, namespace string) error {
   450  	return WaitTimeoutForPodRunningInNamespace(ctx, c, podName, namespace, slowPodStartTimeout)
   451  }
   452  
   453  // WaitTimeoutForPodRunningInNamespace waits the given timeout duration for the specified pod to become running.
   454  // It does not need to exist yet when this function gets called and the pod is not expected to be recreated
   455  // when it succeeds or fails.
   456  func WaitTimeoutForPodRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   457  	return framework.Gomega().Eventually(ctx, framework.RetryNotFound(framework.GetObject(c.CoreV1().Pods(namespace).Get, podName, metav1.GetOptions{}))).
   458  		WithTimeout(timeout).
   459  		Should(BeRunningNoRetries())
   460  }
   461  
   462  // WaitForPodRunningInNamespace waits default amount of time (podStartTimeout) for the specified pod to become running.
   463  // Returns an error if timeout occurs first, or pod goes in to failed state.
   464  func WaitForPodRunningInNamespace(ctx context.Context, c clientset.Interface, pod *v1.Pod) error {
   465  	if pod.Status.Phase == v1.PodRunning {
   466  		return nil
   467  	}
   468  	return WaitTimeoutForPodRunningInNamespace(ctx, c, pod.Name, pod.Namespace, podStartTimeout)
   469  }
   470  
   471  // WaitTimeoutForPodNoLongerRunningInNamespace waits the given timeout duration for the specified pod to stop.
   472  func WaitTimeoutForPodNoLongerRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   473  	return WaitForPodCondition(ctx, c, namespace, podName, "completed", timeout, func(pod *v1.Pod) (bool, error) {
   474  		switch pod.Status.Phase {
   475  		case v1.PodFailed, v1.PodSucceeded:
   476  			return true, nil
   477  		}
   478  		return false, nil
   479  	})
   480  }
   481  
   482  // WaitForPodNoLongerRunningInNamespace waits default amount of time (defaultPodDeletionTimeout) for the specified pod to stop running.
   483  // Returns an error if timeout occurs first.
   484  func WaitForPodNoLongerRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error {
   485  	return WaitTimeoutForPodNoLongerRunningInNamespace(ctx, c, podName, namespace, defaultPodDeletionTimeout)
   486  }
   487  
   488  // WaitTimeoutForPodReadyInNamespace waits the given timeout duration for the
   489  // specified pod to be ready and running.
   490  func WaitTimeoutForPodReadyInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   491  	return WaitForPodCondition(ctx, c, namespace, podName, "running and ready", timeout, func(pod *v1.Pod) (bool, error) {
   492  		switch pod.Status.Phase {
   493  		case v1.PodFailed, v1.PodSucceeded:
   494  			return false, gomega.StopTrying(fmt.Sprintf("The phase of Pod %s is %s which is unexpected.", pod.Name, pod.Status.Phase))
   495  		case v1.PodRunning:
   496  			return podutils.IsPodReady(pod), nil
   497  		}
   498  		return false, nil
   499  	})
   500  }
   501  
   502  // WaitForPodNotPending returns an error if it took too long for the pod to go out of pending state.
   503  // The resourceVersion is used when Watching object changes, it tells since when we care
   504  // about changes to the pod.
   505  func WaitForPodNotPending(ctx context.Context, c clientset.Interface, ns, podName string) error {
   506  	return WaitForPodCondition(ctx, c, ns, podName, "not pending", podStartTimeout, func(pod *v1.Pod) (bool, error) {
   507  		switch pod.Status.Phase {
   508  		case v1.PodPending:
   509  			return false, nil
   510  		default:
   511  			return true, nil
   512  		}
   513  	})
   514  }
   515  
   516  // WaitForPodSuccessInNamespace returns nil if the pod reached state success, or an error if it reached failure or until podStartupTimeout.
   517  func WaitForPodSuccessInNamespace(ctx context.Context, c clientset.Interface, podName string, namespace string) error {
   518  	return WaitForPodSuccessInNamespaceTimeout(ctx, c, podName, namespace, podStartTimeout)
   519  }
   520  
   521  // WaitForPodNotFoundInNamespace returns an error if it takes too long for the pod to fully terminate.
   522  // Unlike `waitForPodTerminatedInNamespace`, the pod's Phase and Reason are ignored. If the pod Get
   523  // api returns IsNotFound then the wait stops and nil is returned. If the Get api returns an error other
   524  // than "not found" and that error is final, that error is returned and the wait stops.
   525  func WaitForPodNotFoundInNamespace(ctx context.Context, c clientset.Interface, podName, ns string, timeout time.Duration) error {
   526  	err := framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*v1.Pod, error) {
   527  		pod, err := c.CoreV1().Pods(ns).Get(ctx, podName, metav1.GetOptions{})
   528  		if apierrors.IsNotFound(err) {
   529  			return nil, nil
   530  		}
   531  		return pod, err
   532  	})).WithTimeout(timeout).Should(gomega.BeNil())
   533  	if err != nil {
   534  		return fmt.Errorf("expected pod to not be found: %w", err)
   535  	}
   536  	return nil
   537  }
   538  
   539  // WaitForPodsResponding waits for the pods to response.
   540  func WaitForPodsResponding(ctx context.Context, c clientset.Interface, ns string, controllerName string, wantName bool, timeout time.Duration, pods *v1.PodList) error {
   541  	if timeout == 0 {
   542  		timeout = podRespondingTimeout
   543  	}
   544  	ginkgo.By("trying to dial each unique pod")
   545  	label := labels.SelectorFromSet(labels.Set(map[string]string{"name": controllerName}))
   546  	options := metav1.ListOptions{LabelSelector: label.String()}
   547  
   548  	type response struct {
   549  		podName  string
   550  		response string
   551  	}
   552  
   553  	get := func(ctx context.Context) ([]response, error) {
   554  		currentPods, err := c.CoreV1().Pods(ns).List(ctx, options)
   555  		if err != nil {
   556  			return nil, fmt.Errorf("list pods: %w", err)
   557  		}
   558  
   559  		var responses []response
   560  		for _, pod := range pods.Items {
   561  			// Check that the replica list remains unchanged, otherwise we have problems.
   562  			if !isElementOf(pod.UID, currentPods) {
   563  				return nil, gomega.StopTrying(fmt.Sprintf("Pod with UID %s is no longer a member of the replica set. Must have been restarted for some reason.\nCurrent replica set:\n%s", pod.UID, format.Object(currentPods, 1)))
   564  			}
   565  
   566  			ctxUntil, cancel := context.WithTimeout(ctx, singleCallTimeout)
   567  			defer cancel()
   568  
   569  			body, err := c.CoreV1().RESTClient().Get().
   570  				Namespace(ns).
   571  				Resource("pods").
   572  				SubResource("proxy").
   573  				Name(string(pod.Name)).
   574  				Do(ctxUntil).
   575  				Raw()
   576  
   577  			if err != nil {
   578  				// We may encounter errors here because of a race between the pod readiness and apiserver
   579  				// proxy or because of temporary failures. The error gets wrapped for framework.HandleRetry.
   580  				// Gomega+Ginkgo will handle logging.
   581  				return nil, fmt.Errorf("controller %s: failed to Get from replica pod %s:\n%w\nPod status:\n%s",
   582  					controllerName, pod.Name,
   583  					err, format.Object(pod.Status, 1))
   584  			}
   585  			responses = append(responses, response{podName: pod.Name, response: string(body)})
   586  		}
   587  		return responses, nil
   588  	}
   589  
   590  	match := func(responses []response) (func() string, error) {
   591  		// The response checker expects the pod's name unless !respondName, in
   592  		// which case it just checks for a non-empty response.
   593  		var unexpected []response
   594  		for _, response := range responses {
   595  			if wantName {
   596  				if response.response != response.podName {
   597  					unexpected = append(unexpected, response)
   598  				}
   599  			} else {
   600  				if len(response.response) == 0 {
   601  					unexpected = append(unexpected, response)
   602  				}
   603  			}
   604  		}
   605  		if len(unexpected) > 0 {
   606  			return func() string {
   607  				what := "some response"
   608  				if wantName {
   609  					what = "the pod's own name as response"
   610  				}
   611  				return fmt.Sprintf("Wanted %s, but the following pods replied with something else:\n%s", what, format.Object(unexpected, 1))
   612  			}, nil
   613  		}
   614  		return nil, nil
   615  	}
   616  
   617  	err := framework.Gomega().
   618  		Eventually(ctx, framework.HandleRetry(get)).
   619  		WithTimeout(timeout).
   620  		Should(framework.MakeMatcher(match))
   621  	if err != nil {
   622  		return fmt.Errorf("checking pod responses: %w", err)
   623  	}
   624  	return nil
   625  }
   626  
   627  func isElementOf(podUID apitypes.UID, pods *v1.PodList) bool {
   628  	for _, pod := range pods.Items {
   629  		if pod.UID == podUID {
   630  			return true
   631  		}
   632  	}
   633  	return false
   634  }
   635  
   636  // WaitForNumberOfPods waits up to timeout to ensure there are exact
   637  // `num` pods in namespace `ns`.
   638  // It returns the matching Pods or a timeout error.
   639  func WaitForNumberOfPods(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration) (pods *v1.PodList, err error) {
   640  	return WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, podScheduledBeforeTimeout, "exist", func(pod *v1.Pod) bool {
   641  		return true
   642  	})
   643  }
   644  
   645  // WaitForPodsWithLabelScheduled waits for all matching pods to become scheduled and at least one
   646  // matching pod exists.  Return the list of matching pods.
   647  func WaitForPodsWithLabelScheduled(ctx context.Context, c clientset.Interface, ns string, label labels.Selector) (pods *v1.PodList, err error) {
   648  	opts := metav1.ListOptions{LabelSelector: label.String()}
   649  	return WaitForPods(ctx, c, ns, opts, Range{MinFound: 1, AllMatching: true}, podScheduledBeforeTimeout, "be scheduled", func(pod *v1.Pod) bool {
   650  		return pod.Spec.NodeName != ""
   651  	})
   652  }
   653  
   654  // WaitForPodsWithLabel waits up to podListTimeout for getting pods with certain label
   655  func WaitForPodsWithLabel(ctx context.Context, c clientset.Interface, ns string, label labels.Selector) (*v1.PodList, error) {
   656  	opts := metav1.ListOptions{LabelSelector: label.String()}
   657  	return WaitForPods(ctx, c, ns, opts, Range{MinFound: 1}, podListTimeout, "exist", func(pod *v1.Pod) bool {
   658  		return true
   659  	})
   660  }
   661  
   662  // WaitForPodsWithLabelRunningReady waits for exact amount of matching pods to become running and ready.
   663  // Return the list of matching pods.
   664  func WaitForPodsWithLabelRunningReady(ctx context.Context, c clientset.Interface, ns string, label labels.Selector, num int, timeout time.Duration) (pods *v1.PodList, err error) {
   665  	opts := metav1.ListOptions{LabelSelector: label.String()}
   666  	return WaitForPods(ctx, c, ns, opts, Range{MinFound: num, AllMatching: true}, timeout, "be running and ready", RunningReady)
   667  }
   668  
   669  // WaitForNRestartablePods tries to list restarting pods using ps until it finds expect of them,
   670  // returning their names if it can do so before timeout.
   671  func WaitForNRestartablePods(ctx context.Context, ps *testutils.PodStore, expect int, timeout time.Duration) ([]string, error) {
   672  	var pods []*v1.Pod
   673  
   674  	get := func(ctx context.Context) ([]*v1.Pod, error) {
   675  		return ps.List(), nil
   676  	}
   677  
   678  	match := func(allPods []*v1.Pod) (func() string, error) {
   679  		pods = FilterNonRestartablePods(allPods)
   680  		if len(pods) != expect {
   681  			return func() string {
   682  				return fmt.Sprintf("expected to find non-restartable %d pods, but found %d:\n%s", expect, len(pods), format.Object(pods, 1))
   683  			}, nil
   684  		}
   685  		return nil, nil
   686  	}
   687  
   688  	err := framework.Gomega().
   689  		Eventually(ctx, framework.HandleRetry(get)).
   690  		WithTimeout(timeout).
   691  		Should(framework.MakeMatcher(match))
   692  	if err != nil {
   693  		return nil, err
   694  	}
   695  
   696  	podNames := make([]string, len(pods))
   697  	for i, p := range pods {
   698  		podNames[i] = p.Name
   699  	}
   700  	return podNames, nil
   701  }
   702  
   703  // WaitForPodContainerToFail waits for the given Pod container to fail with the given reason, specifically due to
   704  // invalid container configuration. In this case, the container will remain in a waiting state with a specific
   705  // reason set, which should match the given reason.
   706  func WaitForPodContainerToFail(ctx context.Context, c clientset.Interface, namespace, podName string, containerIndex int, reason string, timeout time.Duration) error {
   707  	conditionDesc := fmt.Sprintf("container %d failed with reason %s", containerIndex, reason)
   708  	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   709  		switch pod.Status.Phase {
   710  		case v1.PodPending:
   711  			if len(pod.Status.ContainerStatuses) == 0 {
   712  				return false, nil
   713  			}
   714  			containerStatus := pod.Status.ContainerStatuses[containerIndex]
   715  			if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason == reason {
   716  				return true, nil
   717  			}
   718  			return false, nil
   719  		case v1.PodFailed, v1.PodRunning, v1.PodSucceeded:
   720  			return false, fmt.Errorf("pod was expected to be pending, but it is in the state: %s", pod.Status.Phase)
   721  		}
   722  		return false, nil
   723  	})
   724  }
   725  
   726  // WaitForPodScheduled waits for the pod to be schedule, ie. the .spec.nodeName is set
   727  func WaitForPodScheduled(ctx context.Context, c clientset.Interface, namespace, podName string) error {
   728  	return WaitForPodCondition(ctx, c, namespace, podName, "pod is scheduled", podScheduledBeforeTimeout, func(pod *v1.Pod) (bool, error) {
   729  		return pod.Spec.NodeName != "", nil
   730  	})
   731  }
   732  
   733  // WaitForPodContainerStarted waits for the given Pod container to start, after a successful run of the startupProbe.
   734  func WaitForPodContainerStarted(ctx context.Context, c clientset.Interface, namespace, podName string, containerIndex int, timeout time.Duration) error {
   735  	conditionDesc := fmt.Sprintf("container %d started", containerIndex)
   736  	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   737  		if containerIndex > len(pod.Status.ContainerStatuses)-1 {
   738  			return false, nil
   739  		}
   740  		containerStatus := pod.Status.ContainerStatuses[containerIndex]
   741  		return *containerStatus.Started, nil
   742  	})
   743  }
   744  
   745  // WaitForPodFailedReason wait for pod failed reason in status, for example "SysctlForbidden".
   746  func WaitForPodFailedReason(ctx context.Context, c clientset.Interface, pod *v1.Pod, reason string, timeout time.Duration) error {
   747  	conditionDesc := fmt.Sprintf("failed with reason %s", reason)
   748  	return WaitForPodCondition(ctx, c, pod.Namespace, pod.Name, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   749  		switch pod.Status.Phase {
   750  		case v1.PodSucceeded:
   751  			return true, errors.New("pod succeeded unexpectedly")
   752  		case v1.PodFailed:
   753  			if pod.Status.Reason == reason {
   754  				return true, nil
   755  			} else {
   756  				return true, fmt.Errorf("pod failed with reason %s", pod.Status.Reason)
   757  			}
   758  		}
   759  		return false, nil
   760  	})
   761  }
   762  
   763  // WaitForContainerRunning waits for the given Pod container to have a state of running
   764  func WaitForContainerRunning(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, timeout time.Duration) error {
   765  	conditionDesc := fmt.Sprintf("container %s running", containerName)
   766  	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   767  		for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} {
   768  			for _, cs := range statuses {
   769  				if cs.Name == containerName {
   770  					return cs.State.Running != nil, nil
   771  				}
   772  			}
   773  		}
   774  		return false, nil
   775  	})
   776  }
   777  
   778  // WaitForContainerTerminated waits for the given Pod container to have a state of terminated
   779  func WaitForContainerTerminated(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, timeout time.Duration) error {
   780  	conditionDesc := fmt.Sprintf("container %s terminated", containerName)
   781  	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   782  		for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} {
   783  			for _, cs := range statuses {
   784  				if cs.Name == containerName {
   785  					return cs.State.Terminated != nil, nil
   786  				}
   787  			}
   788  		}
   789  		return false, nil
   790  	})
   791  }
   792
View as plain text