...

Source file src/k8s.io/kubernetes/test/e2e/scheduling/predicates.go

Documentation: k8s.io/kubernetes/test/e2e/scheduling

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package scheduling
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"time"
    24  
    25  	v1 "k8s.io/api/core/v1"
    26  	nodev1 "k8s.io/api/node/v1"
    27  	"k8s.io/apimachinery/pkg/api/resource"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/types"
    30  	"k8s.io/apimachinery/pkg/util/intstr"
    31  	"k8s.io/apimachinery/pkg/util/sets"
    32  	"k8s.io/apimachinery/pkg/util/strategicpatch"
    33  	"k8s.io/apimachinery/pkg/util/uuid"
    34  	utilversion "k8s.io/apimachinery/pkg/util/version"
    35  	clientset "k8s.io/client-go/kubernetes"
    36  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    37  	"k8s.io/kubernetes/test/e2e/feature"
    38  	"k8s.io/kubernetes/test/e2e/framework"
    39  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    40  	e2eruntimeclass "k8s.io/kubernetes/test/e2e/framework/node/runtimeclass"
    41  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    42  	e2erc "k8s.io/kubernetes/test/e2e/framework/rc"
    43  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    44  	testutils "k8s.io/kubernetes/test/utils"
    45  	imageutils "k8s.io/kubernetes/test/utils/image"
    46  	admissionapi "k8s.io/pod-security-admission/api"
    47  
    48  	"github.com/onsi/ginkgo/v2"
    49  	"github.com/onsi/gomega"
    50  
    51  	// ensure libs have a chance to initialize
    52  	_ "github.com/stretchr/testify/assert"
    53  )
    54  
    55  const (
    56  	maxNumberOfPods int64 = 10
    57  	defaultTimeout        = 3 * time.Minute
    58  )
    59  
    60  var localStorageVersion = utilversion.MustParseSemantic("v1.8.0-beta.0")
    61  
    62  // variable populated in BeforeEach, never modified afterwards
    63  var workerNodes = sets.Set[string]{}
    64  
    65  type pausePodConfig struct {
    66  	Name                              string
    67  	Namespace                         string
    68  	Finalizers                        []string
    69  	Affinity                          *v1.Affinity
    70  	Annotations, Labels, NodeSelector map[string]string
    71  	Resources                         *v1.ResourceRequirements
    72  	RuntimeClassHandler               *string
    73  	Tolerations                       []v1.Toleration
    74  	NodeName                          string
    75  	Ports                             []v1.ContainerPort
    76  	OwnerReferences                   []metav1.OwnerReference
    77  	PriorityClassName                 string
    78  	DeletionGracePeriodSeconds        *int64
    79  	TopologySpreadConstraints         []v1.TopologySpreadConstraint
    80  	SchedulingGates                   []v1.PodSchedulingGate
    81  }
    82  
    83  var _ = SIGDescribe("SchedulerPredicates", framework.WithSerial(), func() {
    84  	var cs clientset.Interface
    85  	var nodeList *v1.NodeList
    86  	var RCName string
    87  	var ns string
    88  	f := framework.NewDefaultFramework("sched-pred")
    89  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    90  
    91  	ginkgo.AfterEach(func(ctx context.Context) {
    92  		rc, err := cs.CoreV1().ReplicationControllers(ns).Get(ctx, RCName, metav1.GetOptions{})
    93  		if err == nil && *(rc.Spec.Replicas) != 0 {
    94  			ginkgo.By("Cleaning up the replication controller")
    95  			err := e2erc.DeleteRCAndWaitForGC(ctx, f.ClientSet, ns, RCName)
    96  			framework.ExpectNoError(err)
    97  		}
    98  	})
    99  
   100  	ginkgo.BeforeEach(func(ctx context.Context) {
   101  		cs = f.ClientSet
   102  		ns = f.Namespace.Name
   103  		nodeList = &v1.NodeList{}
   104  		var err error
   105  
   106  		e2enode.AllNodesReady(ctx, cs, time.Minute)
   107  
   108  		nodeList, err = e2enode.GetReadySchedulableNodes(ctx, cs)
   109  		if err != nil {
   110  			framework.Logf("Unexpected error occurred: %v", err)
   111  		}
   112  		framework.ExpectNoErrorWithOffset(0, err)
   113  		for _, n := range nodeList.Items {
   114  			workerNodes.Insert(n.Name)
   115  		}
   116  
   117  		err = framework.CheckTestingNSDeletedExcept(ctx, cs, ns)
   118  		framework.ExpectNoError(err)
   119  
   120  		for _, node := range nodeList.Items {
   121  			framework.Logf("\nLogging pods the apiserver thinks is on node %v before test", node.Name)
   122  			printAllPodsOnNode(ctx, cs, node.Name)
   123  		}
   124  
   125  	})
   126  
   127  	// This test verifies we don't allow scheduling of pods in a way that sum of local ephemeral storage resource requests of pods is greater than machines capacity.
   128  	// It assumes that cluster add-on pods stay stable and cannot be run in parallel with any other test that touches Nodes or Pods.
   129  	// It is so because we need to have precise control on what's running in the cluster.
   130  	f.It("validates local ephemeral storage resource limits of pods that are allowed to run", feature.LocalStorageCapacityIsolation, func(ctx context.Context) {
   131  
   132  		e2eskipper.SkipUnlessServerVersionGTE(localStorageVersion, f.ClientSet.Discovery())
   133  
   134  		nodeMaxAllocatable := int64(0)
   135  
   136  		nodeToAllocatableMap := make(map[string]int64)
   137  		for _, node := range nodeList.Items {
   138  			allocatable, found := node.Status.Allocatable[v1.ResourceEphemeralStorage]
   139  			if !found {
   140  				framework.Failf("node.Status.Allocatable %v does not contain entry %v", node.Status.Allocatable, v1.ResourceEphemeralStorage)
   141  			}
   142  			nodeToAllocatableMap[node.Name] = allocatable.Value()
   143  			if nodeMaxAllocatable < allocatable.Value() {
   144  				nodeMaxAllocatable = allocatable.Value()
   145  			}
   146  		}
   147  		WaitForStableCluster(cs, workerNodes)
   148  
   149  		pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
   150  		framework.ExpectNoError(err)
   151  		for _, pod := range pods.Items {
   152  			_, found := nodeToAllocatableMap[pod.Spec.NodeName]
   153  			if found && pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed {
   154  				framework.Logf("Pod %v requesting local ephemeral resource =%v on Node %v", pod.Name, getRequestedStorageEphemeralStorage(pod), pod.Spec.NodeName)
   155  				nodeToAllocatableMap[pod.Spec.NodeName] -= getRequestedStorageEphemeralStorage(pod)
   156  			}
   157  		}
   158  
   159  		var podsNeededForSaturation int
   160  		ephemeralStoragePerPod := nodeMaxAllocatable / maxNumberOfPods
   161  
   162  		framework.Logf("Using pod capacity: %v", ephemeralStoragePerPod)
   163  		for name, leftAllocatable := range nodeToAllocatableMap {
   164  			framework.Logf("Node: %v has local ephemeral resource allocatable: %v", name, leftAllocatable)
   165  			podsNeededForSaturation += (int)(leftAllocatable / ephemeralStoragePerPod)
   166  		}
   167  
   168  		ginkgo.By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster local ephemeral resource and trying to start another one", podsNeededForSaturation))
   169  
   170  		// As the pods are distributed randomly among nodes,
   171  		// it can easily happen that all nodes are saturated
   172  		// and there is no need to create additional pods.
   173  		// StartPods requires at least one pod to replicate.
   174  		if podsNeededForSaturation > 0 {
   175  			framework.ExpectNoError(testutils.StartPods(cs, podsNeededForSaturation, ns, "overcommit",
   176  				*initPausePod(f, pausePodConfig{
   177  					Name:   "",
   178  					Labels: map[string]string{"name": ""},
   179  					Resources: &v1.ResourceRequirements{
   180  						Limits: v1.ResourceList{
   181  							v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStoragePerPod, "DecimalSI"),
   182  						},
   183  						Requests: v1.ResourceList{
   184  							v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStoragePerPod, "DecimalSI"),
   185  						},
   186  					},
   187  				}), true, framework.Logf))
   188  		}
   189  		podName := "additional-pod"
   190  		conf := pausePodConfig{
   191  			Name:   podName,
   192  			Labels: map[string]string{"name": "additional"},
   193  			Resources: &v1.ResourceRequirements{
   194  				Limits: v1.ResourceList{
   195  					v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStoragePerPod, "DecimalSI"),
   196  				},
   197  				Requests: v1.ResourceList{
   198  					v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStoragePerPod, "DecimalSI"),
   199  				},
   200  			},
   201  		}
   202  		WaitForSchedulerAfterAction(ctx, f, createPausePodAction(f, conf), ns, podName, false)
   203  		verifyResult(ctx, cs, podsNeededForSaturation, 1, ns)
   204  	})
   205  
   206  	// This test verifies we don't allow scheduling of pods in a way that sum of limits +
   207  	// associated overhead is greater than machine's capacity.
   208  	// It assumes that cluster add-on pods stay stable and cannot be run in parallel
   209  	// with any other test that touches Nodes or Pods.
   210  	// Because of this we need to have precise control on what's running in the cluster.
   211  	// Test scenario:
   212  	// 1. Find the first ready node on the system, and add a fake resource for test
   213  	// 2. Create one with affinity to the particular node that uses 70% of the fake resource.
   214  	// 3. Wait for the pod to be scheduled.
   215  	// 4. Create another pod with affinity to the particular node that needs 20% of the fake resource and
   216  	//    an overhead set as 25% of the fake resource.
   217  	// 5. Make sure this additional pod is not scheduled.
   218  
   219  	ginkgo.Context("validates pod overhead is considered along with resource limits of pods that are allowed to run", func() {
   220  		var testNodeName string
   221  		var handler string
   222  		var beardsecond v1.ResourceName = "example.com/beardsecond"
   223  
   224  		ginkgo.BeforeEach(func(ctx context.Context) {
   225  			WaitForStableCluster(cs, workerNodes)
   226  			ginkgo.By("Add RuntimeClass and fake resource")
   227  
   228  			// find a node which can run a pod:
   229  			testNodeName = GetNodeThatCanRunPod(ctx, f)
   230  
   231  			// Get node object:
   232  			node, err := cs.CoreV1().Nodes().Get(ctx, testNodeName, metav1.GetOptions{})
   233  			framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName)
   234  
   235  			// update Node API object with a fake resource
   236  			nodeCopy := node.DeepCopy()
   237  			nodeCopy.ResourceVersion = "0"
   238  
   239  			nodeCopy.Status.Capacity[beardsecond] = resource.MustParse("1000")
   240  			_, err = cs.CoreV1().Nodes().UpdateStatus(ctx, nodeCopy, metav1.UpdateOptions{})
   241  			framework.ExpectNoError(err, "unable to apply fake resource to %v", testNodeName)
   242  
   243  			// Register a runtimeClass with overhead set as 25% of the available beard-seconds
   244  			handler = e2eruntimeclass.PreconfiguredRuntimeClassHandler
   245  
   246  			rc := &nodev1.RuntimeClass{
   247  				ObjectMeta: metav1.ObjectMeta{Name: handler},
   248  				Handler:    handler,
   249  				Overhead: &nodev1.Overhead{
   250  					PodFixed: v1.ResourceList{
   251  						beardsecond: resource.MustParse("250"),
   252  					},
   253  				},
   254  			}
   255  			_, err = cs.NodeV1().RuntimeClasses().Create(ctx, rc, metav1.CreateOptions{})
   256  			framework.ExpectNoError(err, "failed to create RuntimeClass resource")
   257  		})
   258  
   259  		ginkgo.AfterEach(func(ctx context.Context) {
   260  			ginkgo.By("Remove fake resource and RuntimeClass")
   261  			// remove fake resource:
   262  			if testNodeName != "" {
   263  				// Get node object:
   264  				node, err := cs.CoreV1().Nodes().Get(ctx, testNodeName, metav1.GetOptions{})
   265  				framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName)
   266  
   267  				nodeCopy := node.DeepCopy()
   268  				// force it to update
   269  				nodeCopy.ResourceVersion = "0"
   270  				delete(nodeCopy.Status.Capacity, beardsecond)
   271  				_, err = cs.CoreV1().Nodes().UpdateStatus(ctx, nodeCopy, metav1.UpdateOptions{})
   272  				framework.ExpectNoError(err, "unable to update node %v", testNodeName)
   273  			}
   274  
   275  			// remove RuntimeClass
   276  			_ = cs.NodeV1().RuntimeClasses().Delete(ctx, e2eruntimeclass.PreconfiguredRuntimeClassHandler, metav1.DeleteOptions{})
   277  		})
   278  
   279  		ginkgo.It("verify pod overhead is accounted for", func(ctx context.Context) {
   280  			if testNodeName == "" {
   281  				framework.Fail("unable to find a node which can run a pod")
   282  			}
   283  
   284  			ginkgo.By("Starting Pod to consume most of the node's resource.")
   285  
   286  			// Create pod which requires 70% of the available beard-seconds.
   287  			fillerPod := createPausePod(ctx, f, pausePodConfig{
   288  				Name: "filler-pod-" + string(uuid.NewUUID()),
   289  				Resources: &v1.ResourceRequirements{
   290  					Requests: v1.ResourceList{beardsecond: resource.MustParse("700")},
   291  					Limits:   v1.ResourceList{beardsecond: resource.MustParse("700")},
   292  				},
   293  			})
   294  
   295  			// Wait for filler pod to schedule.
   296  			framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, cs, fillerPod))
   297  
   298  			ginkgo.By("Creating another pod that requires unavailable amount of resources.")
   299  			// Create another pod that requires 20% of available beard-seconds, but utilizes the RuntimeClass
   300  			// which defines a pod overhead that requires an additional 25%.
   301  			// This pod should remain pending as at least 70% of beard-second in
   302  			// the node are already consumed.
   303  			podName := "additional-pod" + string(uuid.NewUUID())
   304  			conf := pausePodConfig{
   305  				RuntimeClassHandler: &handler,
   306  				Name:                podName,
   307  				Labels:              map[string]string{"name": "additional"},
   308  				Resources: &v1.ResourceRequirements{
   309  					Limits: v1.ResourceList{beardsecond: resource.MustParse("200")},
   310  				},
   311  			}
   312  
   313  			WaitForSchedulerAfterAction(ctx, f, createPausePodAction(f, conf), ns, podName, false)
   314  			verifyResult(ctx, cs, 1, 1, ns)
   315  		})
   316  	})
   317  
   318  	// This test verifies we don't allow scheduling of pods in a way that sum of
   319  	// resource requests of pods is greater than machines capacity.
   320  	// It assumes that cluster add-on pods stay stable and cannot be run in parallel
   321  	// with any other test that touches Nodes or Pods.
   322  	// It is so because we need to have precise control on what's running in the cluster.
   323  	// Test scenario:
   324  	// 1. Find the amount CPU resources on each node.
   325  	// 2. Create one pod with affinity to each node that uses 70% of the node CPU.
   326  	// 3. Wait for the pods to be scheduled.
   327  	// 4. Create another pod with no affinity to any node that need 50% of the largest node CPU.
   328  	// 5. Make sure this additional pod is not scheduled.
   329  	/*
   330  		Release: v1.9
   331  		Testname: Scheduler, resource limits
   332  		Description: Scheduling Pods MUST fail if the resource requests exceed Machine capacity.
   333  	*/
   334  	framework.ConformanceIt("validates resource limits of pods that are allowed to run", func(ctx context.Context) {
   335  		WaitForStableCluster(cs, workerNodes)
   336  		nodeMaxAllocatable := int64(0)
   337  		nodeToAllocatableMap := make(map[string]int64)
   338  		for _, node := range nodeList.Items {
   339  			nodeReady := false
   340  			for _, condition := range node.Status.Conditions {
   341  				if condition.Type == v1.NodeReady && condition.Status == v1.ConditionTrue {
   342  					nodeReady = true
   343  					break
   344  				}
   345  			}
   346  			if !nodeReady {
   347  				continue
   348  			}
   349  			// Apply node label to each node
   350  			e2enode.AddOrUpdateLabelOnNode(cs, node.Name, "node", node.Name)
   351  			e2enode.ExpectNodeHasLabel(ctx, cs, node.Name, "node", node.Name)
   352  			// Find allocatable amount of CPU.
   353  			allocatable, found := node.Status.Allocatable[v1.ResourceCPU]
   354  			if !found {
   355  				framework.Failf("node.Status.Allocatable %v does not contain entry %v", node.Status.Allocatable, v1.ResourceCPU)
   356  			}
   357  			nodeToAllocatableMap[node.Name] = allocatable.MilliValue()
   358  			if nodeMaxAllocatable < allocatable.MilliValue() {
   359  				nodeMaxAllocatable = allocatable.MilliValue()
   360  			}
   361  		}
   362  		// Clean up added labels after this test.
   363  		defer func() {
   364  			for nodeName := range nodeToAllocatableMap {
   365  				e2enode.RemoveLabelOffNode(cs, nodeName, "node")
   366  			}
   367  		}()
   368  
   369  		pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
   370  		framework.ExpectNoError(err)
   371  		for _, pod := range pods.Items {
   372  			_, found := nodeToAllocatableMap[pod.Spec.NodeName]
   373  			if found && pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed {
   374  				framework.Logf("Pod %v requesting resource cpu=%vm on Node %v", pod.Name, getRequestedCPU(pod), pod.Spec.NodeName)
   375  				nodeToAllocatableMap[pod.Spec.NodeName] -= getRequestedCPU(pod)
   376  			}
   377  		}
   378  
   379  		ginkgo.By("Starting Pods to consume most of the cluster CPU.")
   380  		// Create one pod per node that requires 70% of the node remaining CPU.
   381  		fillerPods := []*v1.Pod{}
   382  		for nodeName, cpu := range nodeToAllocatableMap {
   383  			requestedCPU := cpu * 7 / 10
   384  			framework.Logf("Creating a pod which consumes cpu=%vm on Node %v", requestedCPU, nodeName)
   385  			fillerPods = append(fillerPods, createPausePod(ctx, f, pausePodConfig{
   386  				Name: "filler-pod-" + string(uuid.NewUUID()),
   387  				Resources: &v1.ResourceRequirements{
   388  					Limits: v1.ResourceList{
   389  						v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
   390  					},
   391  					Requests: v1.ResourceList{
   392  						v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
   393  					},
   394  				},
   395  				Affinity: &v1.Affinity{
   396  					NodeAffinity: &v1.NodeAffinity{
   397  						RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
   398  							NodeSelectorTerms: []v1.NodeSelectorTerm{
   399  								{
   400  									MatchExpressions: []v1.NodeSelectorRequirement{
   401  										{
   402  											Key:      "node",
   403  											Operator: v1.NodeSelectorOpIn,
   404  											Values:   []string{nodeName},
   405  										},
   406  									},
   407  								},
   408  							},
   409  						},
   410  					},
   411  				},
   412  			}))
   413  		}
   414  		// Wait for filler pods to schedule.
   415  		for _, pod := range fillerPods {
   416  			framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, cs, pod))
   417  		}
   418  		ginkgo.By("Creating another pod that requires unavailable amount of CPU.")
   419  		// Create another pod that requires 50% of the largest node CPU resources.
   420  		// This pod should remain pending as at least 70% of CPU of other nodes in
   421  		// the cluster are already consumed.
   422  		podName := "additional-pod"
   423  		conf := pausePodConfig{
   424  			Name:   podName,
   425  			Labels: map[string]string{"name": "additional"},
   426  			Resources: &v1.ResourceRequirements{
   427  				Limits: v1.ResourceList{
   428  					v1.ResourceCPU: *resource.NewMilliQuantity(nodeMaxAllocatable*5/10, "DecimalSI"),
   429  				},
   430  				Requests: v1.ResourceList{
   431  					v1.ResourceCPU: *resource.NewMilliQuantity(nodeMaxAllocatable*5/10, "DecimalSI"),
   432  				},
   433  			},
   434  		}
   435  		WaitForSchedulerAfterAction(ctx, f, createPausePodAction(f, conf), ns, podName, false)
   436  		verifyResult(ctx, cs, len(fillerPods), 1, ns)
   437  	})
   438  
   439  	// Test Nodes does not have any label, hence it should be impossible to schedule Pod with
   440  	// nonempty Selector set.
   441  	/*
   442  		Release: v1.9
   443  		Testname: Scheduler, node selector not matching
   444  		Description: Create a Pod with a NodeSelector set to a value that does not match a node in the cluster. Since there are no nodes matching the criteria the Pod MUST not be scheduled.
   445  	*/
   446  	framework.ConformanceIt("validates that NodeSelector is respected if not matching", func(ctx context.Context) {
   447  		ginkgo.By("Trying to schedule Pod with nonempty NodeSelector.")
   448  		podName := "restricted-pod"
   449  
   450  		WaitForStableCluster(cs, workerNodes)
   451  
   452  		conf := pausePodConfig{
   453  			Name:   podName,
   454  			Labels: map[string]string{"name": "restricted"},
   455  			NodeSelector: map[string]string{
   456  				"label": "nonempty",
   457  			},
   458  		}
   459  
   460  		WaitForSchedulerAfterAction(ctx, f, createPausePodAction(f, conf), ns, podName, false)
   461  		verifyResult(ctx, cs, 0, 1, ns)
   462  	})
   463  
   464  	/*
   465  		Release: v1.9
   466  		Testname: Scheduler, node selector matching
   467  		Description: Create a label on the node {k: v}. Then create a Pod with a NodeSelector set to {k: v}. Check to see if the Pod is scheduled. When the NodeSelector matches then Pod MUST be scheduled on that node.
   468  	*/
   469  	framework.ConformanceIt("validates that NodeSelector is respected if matching", func(ctx context.Context) {
   470  		nodeName := GetNodeThatCanRunPod(ctx, f)
   471  
   472  		ginkgo.By("Trying to apply a random label on the found node.")
   473  		k := fmt.Sprintf("kubernetes.io/e2e-%s", string(uuid.NewUUID()))
   474  		v := "42"
   475  		e2enode.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
   476  		e2enode.ExpectNodeHasLabel(ctx, cs, nodeName, k, v)
   477  		defer e2enode.RemoveLabelOffNode(cs, nodeName, k)
   478  
   479  		ginkgo.By("Trying to relaunch the pod, now with labels.")
   480  		labelPodName := "with-labels"
   481  		createPausePod(ctx, f, pausePodConfig{
   482  			Name: labelPodName,
   483  			NodeSelector: map[string]string{
   484  				k: v,
   485  			},
   486  		})
   487  
   488  		// check that pod got scheduled. We intentionally DO NOT check that the
   489  		// pod is running because this will create a race condition with the
   490  		// kubelet and the scheduler: the scheduler might have scheduled a pod
   491  		// already when the kubelet does not know about its new label yet. The
   492  		// kubelet will then refuse to launch the pod.
   493  		framework.ExpectNoError(e2epod.WaitForPodNotPending(ctx, cs, ns, labelPodName))
   494  		labelPod, err := cs.CoreV1().Pods(ns).Get(ctx, labelPodName, metav1.GetOptions{})
   495  		framework.ExpectNoError(err)
   496  		gomega.Expect(labelPod.Spec.NodeName).To(gomega.Equal(nodeName))
   497  	})
   498  
   499  	// Test Nodes does not have any label, hence it should be impossible to schedule Pod with
   500  	// non-nil NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.
   501  	ginkgo.It("validates that NodeAffinity is respected if not matching", func(ctx context.Context) {
   502  		ginkgo.By("Trying to schedule Pod with nonempty NodeSelector.")
   503  		podName := "restricted-pod"
   504  
   505  		WaitForStableCluster(cs, workerNodes)
   506  
   507  		conf := pausePodConfig{
   508  			Name: podName,
   509  			Affinity: &v1.Affinity{
   510  				NodeAffinity: &v1.NodeAffinity{
   511  					RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
   512  						NodeSelectorTerms: []v1.NodeSelectorTerm{
   513  							{
   514  								MatchExpressions: []v1.NodeSelectorRequirement{
   515  									{
   516  										Key:      "foo",
   517  										Operator: v1.NodeSelectorOpIn,
   518  										Values:   []string{"bar", "value2"},
   519  									},
   520  								},
   521  							}, {
   522  								MatchExpressions: []v1.NodeSelectorRequirement{
   523  									{
   524  										Key:      "diffkey",
   525  										Operator: v1.NodeSelectorOpIn,
   526  										Values:   []string{"wrong", "value2"},
   527  									},
   528  								},
   529  							},
   530  						},
   531  					},
   532  				},
   533  			},
   534  			Labels: map[string]string{"name": "restricted"},
   535  		}
   536  		WaitForSchedulerAfterAction(ctx, f, createPausePodAction(f, conf), ns, podName, false)
   537  		verifyResult(ctx, cs, 0, 1, ns)
   538  	})
   539  
   540  	// Keep the same steps with the test on NodeSelector,
   541  	// but specify Affinity in Pod.Spec.Affinity, instead of NodeSelector.
   542  	ginkgo.It("validates that required NodeAffinity setting is respected if matching", func(ctx context.Context) {
   543  		nodeName := GetNodeThatCanRunPod(ctx, f)
   544  
   545  		ginkgo.By("Trying to apply a random label on the found node.")
   546  		k := fmt.Sprintf("kubernetes.io/e2e-%s", string(uuid.NewUUID()))
   547  		v := "42"
   548  		e2enode.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
   549  		e2enode.ExpectNodeHasLabel(ctx, cs, nodeName, k, v)
   550  		defer e2enode.RemoveLabelOffNode(cs, nodeName, k)
   551  
   552  		ginkgo.By("Trying to relaunch the pod, now with labels.")
   553  		labelPodName := "with-labels"
   554  		createPausePod(ctx, f, pausePodConfig{
   555  			Name: labelPodName,
   556  			Affinity: &v1.Affinity{
   557  				NodeAffinity: &v1.NodeAffinity{
   558  					RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
   559  						NodeSelectorTerms: []v1.NodeSelectorTerm{
   560  							{
   561  								MatchExpressions: []v1.NodeSelectorRequirement{
   562  									{
   563  										Key:      k,
   564  										Operator: v1.NodeSelectorOpIn,
   565  										Values:   []string{v},
   566  									},
   567  								},
   568  							},
   569  						},
   570  					},
   571  				},
   572  			},
   573  		})
   574  
   575  		// check that pod got scheduled. We intentionally DO NOT check that the
   576  		// pod is running because this will create a race condition with the
   577  		// kubelet and the scheduler: the scheduler might have scheduled a pod
   578  		// already when the kubelet does not know about its new label yet. The
   579  		// kubelet will then refuse to launch the pod.
   580  		framework.ExpectNoError(e2epod.WaitForPodNotPending(ctx, cs, ns, labelPodName))
   581  		labelPod, err := cs.CoreV1().Pods(ns).Get(ctx, labelPodName, metav1.GetOptions{})
   582  		framework.ExpectNoError(err)
   583  		gomega.Expect(labelPod.Spec.NodeName).To(gomega.Equal(nodeName))
   584  	})
   585  
   586  	// 1. Run a pod to get an available node, then delete the pod
   587  	// 2. Taint the node with a random taint
   588  	// 3. Try to relaunch the pod with tolerations tolerate the taints on node,
   589  	// and the pod's nodeName specified to the name of node found in step 1
   590  	ginkgo.It("validates that taints-tolerations is respected if matching", func(ctx context.Context) {
   591  		nodeName := getNodeThatCanRunPodWithoutToleration(ctx, f)
   592  
   593  		ginkgo.By("Trying to apply a random taint on the found node.")
   594  		testTaint := v1.Taint{
   595  			Key:    fmt.Sprintf("kubernetes.io/e2e-taint-key-%s", string(uuid.NewUUID())),
   596  			Value:  "testing-taint-value",
   597  			Effect: v1.TaintEffectNoSchedule,
   598  		}
   599  		e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint)
   600  		e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint)
   601  		ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint)
   602  
   603  		ginkgo.By("Trying to apply a random label on the found node.")
   604  		labelKey := fmt.Sprintf("kubernetes.io/e2e-label-key-%s", string(uuid.NewUUID()))
   605  		labelValue := "testing-label-value"
   606  		e2enode.AddOrUpdateLabelOnNode(cs, nodeName, labelKey, labelValue)
   607  		e2enode.ExpectNodeHasLabel(ctx, cs, nodeName, labelKey, labelValue)
   608  		defer e2enode.RemoveLabelOffNode(cs, nodeName, labelKey)
   609  
   610  		ginkgo.By("Trying to relaunch the pod, now with tolerations.")
   611  		tolerationPodName := "with-tolerations"
   612  		createPausePod(ctx, f, pausePodConfig{
   613  			Name:         tolerationPodName,
   614  			Tolerations:  []v1.Toleration{{Key: testTaint.Key, Value: testTaint.Value, Effect: testTaint.Effect}},
   615  			NodeSelector: map[string]string{labelKey: labelValue},
   616  		})
   617  
   618  		// check that pod got scheduled. We intentionally DO NOT check that the
   619  		// pod is running because this will create a race condition with the
   620  		// kubelet and the scheduler: the scheduler might have scheduled a pod
   621  		// already when the kubelet does not know about its new taint yet. The
   622  		// kubelet will then refuse to launch the pod.
   623  		framework.ExpectNoError(e2epod.WaitForPodNotPending(ctx, cs, ns, tolerationPodName))
   624  		deployedPod, err := cs.CoreV1().Pods(ns).Get(ctx, tolerationPodName, metav1.GetOptions{})
   625  		framework.ExpectNoError(err)
   626  		gomega.Expect(deployedPod.Spec.NodeName).To(gomega.Equal(nodeName))
   627  	})
   628  
   629  	// 1. Run a pod to get an available node, then delete the pod
   630  	// 2. Taint the node with a random taint
   631  	// 3. Try to relaunch the pod still no tolerations,
   632  	// and the pod's nodeName specified to the name of node found in step 1
   633  	ginkgo.It("validates that taints-tolerations is respected if not matching", func(ctx context.Context) {
   634  		nodeName := getNodeThatCanRunPodWithoutToleration(ctx, f)
   635  
   636  		ginkgo.By("Trying to apply a random taint on the found node.")
   637  		testTaint := v1.Taint{
   638  			Key:    fmt.Sprintf("kubernetes.io/e2e-taint-key-%s", string(uuid.NewUUID())),
   639  			Value:  "testing-taint-value",
   640  			Effect: v1.TaintEffectNoSchedule,
   641  		}
   642  		e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint)
   643  		e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint)
   644  		ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint)
   645  
   646  		ginkgo.By("Trying to apply a random label on the found node.")
   647  		labelKey := fmt.Sprintf("kubernetes.io/e2e-label-key-%s", string(uuid.NewUUID()))
   648  		labelValue := "testing-label-value"
   649  		e2enode.AddOrUpdateLabelOnNode(cs, nodeName, labelKey, labelValue)
   650  		e2enode.ExpectNodeHasLabel(ctx, cs, nodeName, labelKey, labelValue)
   651  		defer e2enode.RemoveLabelOffNode(cs, nodeName, labelKey)
   652  
   653  		ginkgo.By("Trying to relaunch the pod, still no tolerations.")
   654  		podNameNoTolerations := "still-no-tolerations"
   655  		conf := pausePodConfig{
   656  			Name:         podNameNoTolerations,
   657  			NodeSelector: map[string]string{labelKey: labelValue},
   658  		}
   659  
   660  		WaitForSchedulerAfterAction(ctx, f, createPausePodAction(f, conf), ns, podNameNoTolerations, false)
   661  		verifyResult(ctx, cs, 0, 1, ns)
   662  
   663  		ginkgo.By("Removing taint off the node")
   664  		WaitForSchedulerAfterAction(ctx, f, removeTaintFromNodeAction(cs, nodeName, testTaint), ns, podNameNoTolerations, true)
   665  		verifyResult(ctx, cs, 1, 0, ns)
   666  	})
   667  
   668  	ginkgo.It("validates that there is no conflict between pods with same hostPort but different hostIP and protocol", func(ctx context.Context) {
   669  
   670  		nodeName := GetNodeThatCanRunPod(ctx, f)
   671  		localhost := "127.0.0.1"
   672  		if framework.TestContext.ClusterIsIPv6() {
   673  			localhost = "::1"
   674  		}
   675  		hostIP := getNodeHostIP(ctx, f, nodeName)
   676  
   677  		// use nodeSelector to make sure the testing pods get assigned on the same node to explicitly verify there exists conflict or not
   678  		ginkgo.By("Trying to apply a random label on the found node.")
   679  		k := fmt.Sprintf("kubernetes.io/e2e-%s", string(uuid.NewUUID()))
   680  		v := "90"
   681  
   682  		nodeSelector := make(map[string]string)
   683  		nodeSelector[k] = v
   684  
   685  		e2enode.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
   686  		e2enode.ExpectNodeHasLabel(ctx, cs, nodeName, k, v)
   687  		defer e2enode.RemoveLabelOffNode(cs, nodeName, k)
   688  
   689  		port := int32(54321)
   690  		ginkgo.By(fmt.Sprintf("Trying to create a pod(pod1) with hostport %v and hostIP %s and expect scheduled", port, localhost))
   691  		createHostPortPodOnNode(ctx, f, "pod1", ns, localhost, port, v1.ProtocolTCP, nodeSelector, true)
   692  
   693  		ginkgo.By(fmt.Sprintf("Trying to create another pod(pod2) with hostport %v but hostIP %s on the node which pod1 resides and expect scheduled", port, hostIP))
   694  		createHostPortPodOnNode(ctx, f, "pod2", ns, hostIP, port, v1.ProtocolTCP, nodeSelector, true)
   695  
   696  		ginkgo.By(fmt.Sprintf("Trying to create a third pod(pod3) with hostport %v, hostIP %s but use UDP protocol on the node which pod2 resides", port, hostIP))
   697  		createHostPortPodOnNode(ctx, f, "pod3", ns, hostIP, port, v1.ProtocolUDP, nodeSelector, true)
   698  
   699  	})
   700  
   701  	/*
   702  		Release: v1.16
   703  		Testname: Scheduling, HostPort and Protocol match, HostIPs different but one is default HostIP (0.0.0.0)
   704  		Description: Pods with the same HostPort and Protocol, but different HostIPs, MUST NOT schedule to the
   705  		same node if one of those IPs is the default HostIP of 0.0.0.0, which represents all IPs on the host.
   706  	*/
   707  	framework.ConformanceIt("validates that there exists conflict between pods with same hostPort and protocol but one using 0.0.0.0 hostIP", func(ctx context.Context) {
   708  		nodeName := GetNodeThatCanRunPod(ctx, f)
   709  		hostIP := getNodeHostIP(ctx, f, nodeName)
   710  		// use nodeSelector to make sure the testing pods get assigned on the same node to explicitly verify there exists conflict or not
   711  		ginkgo.By("Trying to apply a random label on the found node.")
   712  		k := fmt.Sprintf("kubernetes.io/e2e-%s", string(uuid.NewUUID()))
   713  		v := "95"
   714  
   715  		nodeSelector := make(map[string]string)
   716  		nodeSelector[k] = v
   717  
   718  		e2enode.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
   719  		e2enode.ExpectNodeHasLabel(ctx, cs, nodeName, k, v)
   720  		defer e2enode.RemoveLabelOffNode(cs, nodeName, k)
   721  
   722  		port := int32(54322)
   723  		ginkgo.By(fmt.Sprintf("Trying to create a pod(pod4) with hostport %v and hostIP 0.0.0.0(empty string here) and expect scheduled", port))
   724  		createHostPortPodOnNode(ctx, f, "pod4", ns, "", port, v1.ProtocolTCP, nodeSelector, true)
   725  
   726  		ginkgo.By(fmt.Sprintf("Trying to create another pod(pod5) with hostport %v but hostIP %s on the node which pod4 resides and expect not scheduled", port, hostIP))
   727  		createHostPortPodOnNode(ctx, f, "pod5", ns, hostIP, port, v1.ProtocolTCP, nodeSelector, false)
   728  	})
   729  
   730  	ginkgo.Context("PodTopologySpread Filtering", func() {
   731  		var nodeNames []string
   732  		topologyKey := "kubernetes.io/e2e-pts-filter"
   733  
   734  		ginkgo.BeforeEach(func(ctx context.Context) {
   735  			if len(nodeList.Items) < 2 {
   736  				ginkgo.Skip("At least 2 nodes are required to run the test")
   737  			}
   738  			ginkgo.By("Trying to get 2 available nodes which can run pod")
   739  			nodeNames = Get2NodesThatCanRunPod(ctx, f)
   740  			ginkgo.By(fmt.Sprintf("Apply dedicated topologyKey %v for this test on the 2 nodes.", topologyKey))
   741  			for _, nodeName := range nodeNames {
   742  				e2enode.AddOrUpdateLabelOnNode(cs, nodeName, topologyKey, nodeName)
   743  			}
   744  		})
   745  		ginkgo.AfterEach(func() {
   746  			for _, nodeName := range nodeNames {
   747  				e2enode.RemoveLabelOffNode(cs, nodeName, topologyKey)
   748  			}
   749  		})
   750  
   751  		ginkgo.It("validates 4 pods with MaxSkew=1 are evenly distributed into 2 nodes", func(ctx context.Context) {
   752  			podLabel := "e2e-pts-filter"
   753  			replicas := 4
   754  			rsConfig := pauseRSConfig{
   755  				Replicas: int32(replicas),
   756  				PodConfig: pausePodConfig{
   757  					Name:      podLabel,
   758  					Namespace: ns,
   759  					Labels:    map[string]string{podLabel: ""},
   760  					Affinity: &v1.Affinity{
   761  						NodeAffinity: &v1.NodeAffinity{
   762  							RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
   763  								NodeSelectorTerms: []v1.NodeSelectorTerm{
   764  									{
   765  										MatchExpressions: []v1.NodeSelectorRequirement{
   766  											{
   767  												Key:      topologyKey,
   768  												Operator: v1.NodeSelectorOpIn,
   769  												Values:   nodeNames,
   770  											},
   771  										},
   772  									},
   773  								},
   774  							},
   775  						},
   776  					},
   777  					TopologySpreadConstraints: []v1.TopologySpreadConstraint{
   778  						{
   779  							MaxSkew:           1,
   780  							TopologyKey:       topologyKey,
   781  							WhenUnsatisfiable: v1.DoNotSchedule,
   782  							LabelSelector: &metav1.LabelSelector{
   783  								MatchExpressions: []metav1.LabelSelectorRequirement{
   784  									{
   785  										Key:      podLabel,
   786  										Operator: metav1.LabelSelectorOpExists,
   787  									},
   788  								},
   789  							},
   790  						},
   791  					},
   792  				},
   793  			}
   794  			runPauseRS(ctx, f, rsConfig)
   795  			podList, err := cs.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{})
   796  			framework.ExpectNoError(err)
   797  			numInNode1, numInNode2 := 0, 0
   798  			for _, pod := range podList.Items {
   799  				if pod.Spec.NodeName == nodeNames[0] {
   800  					numInNode1++
   801  				} else if pod.Spec.NodeName == nodeNames[1] {
   802  					numInNode2++
   803  				}
   804  			}
   805  			expected := replicas / len(nodeNames)
   806  			gomega.Expect(numInNode1).To(gomega.Equal(expected), fmt.Sprintf("Pods are not distributed as expected on node %q", nodeNames[0]))
   807  			gomega.Expect(numInNode2).To(gomega.Equal(expected), fmt.Sprintf("Pods are not distributed as expected on node %q", nodeNames[1]))
   808  		})
   809  	})
   810  
   811  	ginkgo.It("validates Pods with non-empty schedulingGates are blocked on scheduling", func(ctx context.Context) {
   812  		podLabel := "e2e-scheduling-gates"
   813  		replicas := 3
   814  		ginkgo.By(fmt.Sprintf("Creating a ReplicaSet with replicas=%v, carrying scheduling gates [foo bar]", replicas))
   815  		rsConfig := pauseRSConfig{
   816  			Replicas: int32(replicas),
   817  			PodConfig: pausePodConfig{
   818  				Name:      podLabel,
   819  				Namespace: ns,
   820  				Labels:    map[string]string{podLabel: ""},
   821  				SchedulingGates: []v1.PodSchedulingGate{
   822  					{Name: "foo"},
   823  					{Name: "bar"},
   824  				},
   825  			},
   826  		}
   827  		createPauseRS(ctx, f, rsConfig)
   828  
   829  		ginkgo.By("Expect all pods stay in pending state")
   830  		podList, err := e2epod.WaitForNumberOfPods(ctx, cs, ns, replicas, time.Minute)
   831  		framework.ExpectNoError(err)
   832  		framework.ExpectNoError(e2epod.WaitForPodsSchedulingGated(ctx, cs, ns, replicas, time.Minute))
   833  
   834  		ginkgo.By("Remove one scheduling gate")
   835  		want := []v1.PodSchedulingGate{{Name: "bar"}}
   836  		var pods []*v1.Pod
   837  		for _, pod := range podList.Items {
   838  			clone := pod.DeepCopy()
   839  			clone.Spec.SchedulingGates = want
   840  			live, err := patchPod(cs, &pod, clone)
   841  			framework.ExpectNoError(err)
   842  			pods = append(pods, live)
   843  		}
   844  
   845  		ginkgo.By("Expect all pods carry one scheduling gate and are still in pending state")
   846  		framework.ExpectNoError(e2epod.WaitForPodsWithSchedulingGates(ctx, cs, ns, replicas, time.Minute, want))
   847  		framework.ExpectNoError(e2epod.WaitForPodsSchedulingGated(ctx, cs, ns, replicas, time.Minute))
   848  
   849  		ginkgo.By("Remove the remaining scheduling gates")
   850  		for _, pod := range pods {
   851  			clone := pod.DeepCopy()
   852  			clone.Spec.SchedulingGates = nil
   853  			_, err := patchPod(cs, pod, clone)
   854  			framework.ExpectNoError(err)
   855  		}
   856  
   857  		ginkgo.By("Expect all pods are scheduled and running")
   858  		framework.ExpectNoError(e2epod.WaitForPodsRunning(ctx, cs, ns, replicas, time.Minute))
   859  	})
   860  })
   861  
   862  func patchPod(cs clientset.Interface, old, new *v1.Pod) (*v1.Pod, error) {
   863  	oldData, err := json.Marshal(old)
   864  	if err != nil {
   865  		return nil, err
   866  	}
   867  
   868  	newData, err := json.Marshal(new)
   869  	if err != nil {
   870  		return nil, err
   871  	}
   872  	patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Pod{})
   873  	if err != nil {
   874  		return nil, fmt.Errorf("failed to create merge patch for Pod %q: %w", old.Name, err)
   875  	}
   876  	return cs.CoreV1().Pods(new.Namespace).Patch(context.TODO(), new.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{})
   877  }
   878  
   879  // printAllPodsOnNode outputs status of all kubelet pods into log.
   880  func printAllPodsOnNode(ctx context.Context, c clientset.Interface, nodeName string) {
   881  	podList, err := c.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{FieldSelector: "spec.nodeName=" + nodeName})
   882  	if err != nil {
   883  		framework.Logf("Unable to retrieve pods for node %v: %v", nodeName, err)
   884  		return
   885  	}
   886  	for _, p := range podList.Items {
   887  		framework.Logf("%v from %v started at %v (%d container statuses recorded)", p.Name, p.Namespace, p.Status.StartTime, len(p.Status.ContainerStatuses))
   888  		for _, c := range p.Status.ContainerStatuses {
   889  			framework.Logf("\tContainer %v ready: %v, restart count %v",
   890  				c.Name, c.Ready, c.RestartCount)
   891  		}
   892  	}
   893  }
   894  
   895  func initPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
   896  	var gracePeriod = int64(1)
   897  	pod := &v1.Pod{
   898  		ObjectMeta: metav1.ObjectMeta{
   899  			Name:            conf.Name,
   900  			Namespace:       conf.Namespace,
   901  			Labels:          map[string]string{},
   902  			Annotations:     map[string]string{},
   903  			OwnerReferences: conf.OwnerReferences,
   904  			Finalizers:      conf.Finalizers,
   905  		},
   906  		Spec: v1.PodSpec{
   907  			SecurityContext:           e2epod.GetRestrictedPodSecurityContext(),
   908  			NodeSelector:              conf.NodeSelector,
   909  			Affinity:                  conf.Affinity,
   910  			TopologySpreadConstraints: conf.TopologySpreadConstraints,
   911  			RuntimeClassName:          conf.RuntimeClassHandler,
   912  			Containers: []v1.Container{
   913  				{
   914  					Name:            conf.Name,
   915  					Image:           imageutils.GetPauseImageName(),
   916  					Ports:           conf.Ports,
   917  					SecurityContext: e2epod.GetRestrictedContainerSecurityContext(),
   918  				},
   919  			},
   920  			Tolerations:                   conf.Tolerations,
   921  			PriorityClassName:             conf.PriorityClassName,
   922  			TerminationGracePeriodSeconds: &gracePeriod,
   923  			SchedulingGates:               conf.SchedulingGates,
   924  		},
   925  	}
   926  	for key, value := range conf.Labels {
   927  		pod.ObjectMeta.Labels[key] = value
   928  	}
   929  	for key, value := range conf.Annotations {
   930  		pod.ObjectMeta.Annotations[key] = value
   931  	}
   932  	// TODO: setting the Pod's nodeAffinity instead of setting .spec.nodeName works around the
   933  	// Preemption e2e flake (#88441), but we should investigate deeper to get to the bottom of it.
   934  	if len(conf.NodeName) != 0 {
   935  		e2epod.SetNodeAffinity(&pod.Spec, conf.NodeName)
   936  	}
   937  	if conf.Resources != nil {
   938  		pod.Spec.Containers[0].Resources = *conf.Resources
   939  	}
   940  	if conf.DeletionGracePeriodSeconds != nil {
   941  		pod.ObjectMeta.DeletionGracePeriodSeconds = conf.DeletionGracePeriodSeconds
   942  	}
   943  	return pod
   944  }
   945  
   946  func createPausePod(ctx context.Context, f *framework.Framework, conf pausePodConfig) *v1.Pod {
   947  	namespace := conf.Namespace
   948  	if len(namespace) == 0 {
   949  		namespace = f.Namespace.Name
   950  	}
   951  	pod, err := f.ClientSet.CoreV1().Pods(namespace).Create(ctx, initPausePod(f, conf), metav1.CreateOptions{})
   952  	framework.ExpectNoError(err)
   953  	return pod
   954  }
   955  
   956  func runPausePod(ctx context.Context, f *framework.Framework, conf pausePodConfig) *v1.Pod {
   957  	pod := createPausePod(ctx, f, conf)
   958  	framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodStartShort))
   959  	pod, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Get(ctx, conf.Name, metav1.GetOptions{})
   960  	framework.ExpectNoError(err)
   961  	return pod
   962  }
   963  
   964  func runPodAndGetNodeName(ctx context.Context, f *framework.Framework, conf pausePodConfig) string {
   965  	// launch a pod to find a node which can launch a pod. We intentionally do
   966  	// not just take the node list and choose the first of them. Depending on the
   967  	// cluster and the scheduler it might be that a "normal" pod cannot be
   968  	// scheduled onto it.
   969  	pod := runPausePod(ctx, f, conf)
   970  
   971  	ginkgo.By("Explicitly delete pod here to free the resource it takes.")
   972  	err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Delete(ctx, pod.Name, *metav1.NewDeleteOptions(0))
   973  	framework.ExpectNoError(err)
   974  
   975  	return pod.Spec.NodeName
   976  }
   977  
   978  func getRequestedCPU(pod v1.Pod) int64 {
   979  	var result int64
   980  	for _, container := range pod.Spec.Containers {
   981  		result += container.Resources.Requests.Cpu().MilliValue()
   982  	}
   983  	return result
   984  }
   985  
   986  func getRequestedStorageEphemeralStorage(pod v1.Pod) int64 {
   987  	var result int64
   988  	for _, container := range pod.Spec.Containers {
   989  		result += container.Resources.Requests.StorageEphemeral().Value()
   990  	}
   991  	return result
   992  }
   993  
   994  // removeTaintFromNodeAction returns a closure that removes the given taint
   995  // from the given node upon invocation.
   996  func removeTaintFromNodeAction(cs clientset.Interface, nodeName string, testTaint v1.Taint) Action {
   997  	return func(ctx context.Context) error {
   998  		e2enode.RemoveTaintOffNode(ctx, cs, nodeName, testTaint)
   999  		return nil
  1000  	}
  1001  }
  1002  
  1003  // createPausePodAction returns a closure that creates a pause pod upon invocation.
  1004  func createPausePodAction(f *framework.Framework, conf pausePodConfig) Action {
  1005  	return func(ctx context.Context) error {
  1006  		_, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(ctx, initPausePod(f, conf), metav1.CreateOptions{})
  1007  		return err
  1008  	}
  1009  }
  1010  
  1011  // WaitForSchedulerAfterAction performs the provided action and then waits for
  1012  // scheduler to act on the given pod.
  1013  func WaitForSchedulerAfterAction(ctx context.Context, f *framework.Framework, action Action, ns, podName string, expectSuccess bool) {
  1014  	predicate := scheduleFailureEvent(podName)
  1015  	if expectSuccess {
  1016  		predicate = scheduleSuccessEvent(ns, podName, "" /* any node */)
  1017  	}
  1018  	observed, err := observeEventAfterAction(ctx, f.ClientSet, f.Namespace.Name, predicate, action)
  1019  	framework.ExpectNoError(err)
  1020  	if expectSuccess && !observed {
  1021  		framework.Failf("Did not observe success event after performing the supplied action for pod %v", podName)
  1022  	}
  1023  	if !expectSuccess && !observed {
  1024  		framework.Failf("Did not observe failed event after performing the supplied action for pod %v", podName)
  1025  	}
  1026  }
  1027  
  1028  // TODO: upgrade calls in PodAffinity tests when we're able to run them
  1029  func verifyResult(ctx context.Context, c clientset.Interface, expectedScheduled int, expectedNotScheduled int, ns string) {
  1030  	allPods, err := c.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{})
  1031  	framework.ExpectNoError(err)
  1032  	scheduledPods, notScheduledPods := GetPodsScheduled(workerNodes, allPods)
  1033  
  1034  	gomega.Expect(notScheduledPods).To(gomega.HaveLen(expectedNotScheduled), fmt.Sprintf("Not scheduled Pods: %#v", notScheduledPods))
  1035  	gomega.Expect(scheduledPods).To(gomega.HaveLen(expectedScheduled), fmt.Sprintf("Scheduled Pods: %#v", scheduledPods))
  1036  }
  1037  
  1038  // GetNodeThatCanRunPod trying to launch a pod without a label to get a node which can launch it
  1039  func GetNodeThatCanRunPod(ctx context.Context, f *framework.Framework) string {
  1040  	ginkgo.By("Trying to launch a pod without a label to get a node which can launch it.")
  1041  	return runPodAndGetNodeName(ctx, f, pausePodConfig{Name: "without-label"})
  1042  }
  1043  
  1044  // Get2NodesThatCanRunPod return a 2-node slice where can run pod.
  1045  func Get2NodesThatCanRunPod(ctx context.Context, f *framework.Framework) []string {
  1046  	firstNode := GetNodeThatCanRunPod(ctx, f)
  1047  	ginkgo.By("Trying to launch a pod without a label to get a node which can launch it.")
  1048  	pod := pausePodConfig{
  1049  		Name: "without-label",
  1050  		Affinity: &v1.Affinity{
  1051  			NodeAffinity: &v1.NodeAffinity{
  1052  				RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
  1053  					NodeSelectorTerms: []v1.NodeSelectorTerm{
  1054  						{
  1055  							MatchFields: []v1.NodeSelectorRequirement{
  1056  								{Key: "metadata.name", Operator: v1.NodeSelectorOpNotIn, Values: []string{firstNode}},
  1057  							},
  1058  						},
  1059  					},
  1060  				},
  1061  			},
  1062  		},
  1063  	}
  1064  	secondNode := runPodAndGetNodeName(ctx, f, pod)
  1065  	return []string{firstNode, secondNode}
  1066  }
  1067  
  1068  func getNodeThatCanRunPodWithoutToleration(ctx context.Context, f *framework.Framework) string {
  1069  	ginkgo.By("Trying to launch a pod without a toleration to get a node which can launch it.")
  1070  	return runPodAndGetNodeName(ctx, f, pausePodConfig{Name: "without-toleration"})
  1071  }
  1072  
  1073  // CreateHostPortPods creates RC with host port 4321
  1074  func CreateHostPortPods(ctx context.Context, f *framework.Framework, id string, replicas int, expectRunning bool) {
  1075  	ginkgo.By("Running RC which reserves host port")
  1076  	config := &testutils.RCConfig{
  1077  		Client:    f.ClientSet,
  1078  		Name:      id,
  1079  		Namespace: f.Namespace.Name,
  1080  		Timeout:   defaultTimeout,
  1081  		Image:     imageutils.GetPauseImageName(),
  1082  		Replicas:  replicas,
  1083  		HostPorts: map[string]int{"port1": 4321},
  1084  	}
  1085  	err := e2erc.RunRC(ctx, *config)
  1086  	if expectRunning {
  1087  		framework.ExpectNoError(err)
  1088  	}
  1089  }
  1090  
  1091  // CreateNodeSelectorPods creates RC with host port 4321 and defines node selector
  1092  func CreateNodeSelectorPods(ctx context.Context, f *framework.Framework, id string, replicas int, nodeSelector map[string]string, expectRunning bool) error {
  1093  	ginkgo.By("Running RC which reserves host port and defines node selector")
  1094  
  1095  	config := &testutils.RCConfig{
  1096  		Client:       f.ClientSet,
  1097  		Name:         id,
  1098  		Namespace:    f.Namespace.Name,
  1099  		Timeout:      defaultTimeout,
  1100  		Image:        imageutils.GetPauseImageName(),
  1101  		Replicas:     replicas,
  1102  		HostPorts:    map[string]int{"port1": 4321},
  1103  		NodeSelector: nodeSelector,
  1104  	}
  1105  	err := e2erc.RunRC(ctx, *config)
  1106  	if expectRunning {
  1107  		return err
  1108  	}
  1109  	return nil
  1110  }
  1111  
  1112  // create pod which using hostport on the specified node according to the nodeSelector
  1113  // it starts an http server on the exposed port
  1114  func createHostPortPodOnNode(ctx context.Context, f *framework.Framework, podName, ns, hostIP string, port int32, protocol v1.Protocol, nodeSelector map[string]string, expectScheduled bool) {
  1115  	hostPortPod := &v1.Pod{
  1116  		ObjectMeta: metav1.ObjectMeta{
  1117  			Name: podName,
  1118  		},
  1119  		Spec: v1.PodSpec{
  1120  			Containers: []v1.Container{
  1121  				{
  1122  					Name:  "agnhost",
  1123  					Image: imageutils.GetE2EImage(imageutils.Agnhost),
  1124  					Args:  []string{"netexec", "--http-port=8080", "--udp-port=8080"},
  1125  					Ports: []v1.ContainerPort{
  1126  						{
  1127  							HostPort:      port,
  1128  							ContainerPort: 8080,
  1129  							Protocol:      protocol,
  1130  							HostIP:        hostIP,
  1131  						},
  1132  					},
  1133  					ReadinessProbe: &v1.Probe{
  1134  						ProbeHandler: v1.ProbeHandler{
  1135  							HTTPGet: &v1.HTTPGetAction{
  1136  								Path: "/hostname",
  1137  								Port: intstr.IntOrString{
  1138  									IntVal: int32(8080),
  1139  								},
  1140  								Scheme: v1.URISchemeHTTP,
  1141  							},
  1142  						},
  1143  					},
  1144  				},
  1145  			},
  1146  			NodeSelector: nodeSelector,
  1147  		},
  1148  	}
  1149  	_, err := f.ClientSet.CoreV1().Pods(ns).Create(ctx, hostPortPod, metav1.CreateOptions{})
  1150  	framework.ExpectNoError(err)
  1151  
  1152  	err = e2epod.WaitForPodNotPending(ctx, f.ClientSet, ns, podName)
  1153  	if expectScheduled {
  1154  		framework.ExpectNoError(err)
  1155  	}
  1156  }
  1157  
  1158  // GetPodsScheduled returns a number of currently scheduled and not scheduled Pods on worker nodes.
  1159  func GetPodsScheduled(workerNodes sets.Set[string], pods *v1.PodList) (scheduledPods, notScheduledPods []v1.Pod) {
  1160  	for _, pod := range pods.Items {
  1161  		if pod.Spec.NodeName != "" && workerNodes.Has(pod.Spec.NodeName) {
  1162  			_, scheduledCondition := podutil.GetPodCondition(&pod.Status, v1.PodScheduled)
  1163  			if scheduledCondition == nil {
  1164  				framework.Failf("Did not find 'scheduled' condition for pod %+v", podName)
  1165  			}
  1166  			if scheduledCondition.Status != v1.ConditionTrue {
  1167  				framework.Failf("PodStatus isn't 'true' for pod %+v", podName)
  1168  			}
  1169  			scheduledPods = append(scheduledPods, pod)
  1170  		} else if pod.Spec.NodeName == "" {
  1171  			notScheduledPods = append(notScheduledPods, pod)
  1172  		}
  1173  	}
  1174  	return
  1175  }
  1176  
  1177  // getNodeHostIP returns the first internal IP on the node matching the main Cluster IP family
  1178  func getNodeHostIP(ctx context.Context, f *framework.Framework, nodeName string) string {
  1179  	// Get the internal HostIP of the node
  1180  	family := v1.IPv4Protocol
  1181  	if framework.TestContext.ClusterIsIPv6() {
  1182  		family = v1.IPv6Protocol
  1183  	}
  1184  	node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
  1185  	framework.ExpectNoError(err)
  1186  	ips := e2enode.GetAddressesByTypeAndFamily(node, v1.NodeInternalIP, family)
  1187  	gomega.Expect(ips).ToNot(gomega.BeEmpty())
  1188  	return ips[0]
  1189  }
  1190  

View as plain text