...

Source file src/k8s.io/kubernetes/pkg/controller/tainteviction/taint_eviction_test.go

Documentation: k8s.io/kubernetes/pkg/controller/tainteviction

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package tainteviction
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sort"
    23  	"testing"
    24  	"time"
    25  
    26  	"github.com/google/go-cmp/cmp"
    27  
    28  	corev1 "k8s.io/api/core/v1"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/apimachinery/pkg/fields"
    31  	"k8s.io/apimachinery/pkg/labels"
    32  	"k8s.io/apimachinery/pkg/types"
    33  	"k8s.io/apimachinery/pkg/util/wait"
    34  	"k8s.io/apiserver/pkg/util/feature"
    35  	"k8s.io/client-go/informers"
    36  	"k8s.io/client-go/kubernetes/fake"
    37  	clienttesting "k8s.io/client-go/testing"
    38  	"k8s.io/client-go/tools/cache"
    39  	featuregatetesting "k8s.io/component-base/featuregate/testing"
    40  	"k8s.io/kubernetes/pkg/controller/testutil"
    41  	"k8s.io/kubernetes/pkg/features"
    42  )
    43  
    44  var timeForControllerToProgressForSanityCheck = 20 * time.Millisecond
    45  
    46  func getPodsAssignedToNode(ctx context.Context, c *fake.Clientset) GetPodsByNodeNameFunc {
    47  	return func(nodeName string) ([]*corev1.Pod, error) {
    48  		selector := fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName})
    49  		pods, err := c.CoreV1().Pods(corev1.NamespaceAll).List(ctx, metav1.ListOptions{
    50  			FieldSelector: selector.String(),
    51  			LabelSelector: labels.Everything().String(),
    52  		})
    53  		if err != nil {
    54  			return []*corev1.Pod{}, fmt.Errorf("failed to get Pods assigned to node %v", nodeName)
    55  		}
    56  		rPods := make([]*corev1.Pod, len(pods.Items))
    57  		for i := range pods.Items {
    58  			rPods[i] = &pods.Items[i]
    59  		}
    60  		return rPods, nil
    61  	}
    62  }
    63  
    64  func createNoExecuteTaint(index int) corev1.Taint {
    65  	now := metav1.Now()
    66  	return corev1.Taint{
    67  		Key:       "testTaint" + fmt.Sprintf("%v", index),
    68  		Value:     "test" + fmt.Sprintf("%v", index),
    69  		Effect:    corev1.TaintEffectNoExecute,
    70  		TimeAdded: &now,
    71  	}
    72  }
    73  
    74  func addToleration(pod *corev1.Pod, index int, duration int64) *corev1.Pod {
    75  	if pod.Annotations == nil {
    76  		pod.Annotations = map[string]string{}
    77  	}
    78  	if duration < 0 {
    79  		pod.Spec.Tolerations = []corev1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: corev1.TaintEffectNoExecute}}
    80  
    81  	} else {
    82  		pod.Spec.Tolerations = []corev1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &duration}}
    83  	}
    84  	return pod
    85  }
    86  
    87  func addTaintsToNode(node *corev1.Node, key, value string, indices []int) *corev1.Node {
    88  	taints := []corev1.Taint{}
    89  	for _, index := range indices {
    90  		taints = append(taints, createNoExecuteTaint(index))
    91  	}
    92  	node.Spec.Taints = taints
    93  	return node
    94  }
    95  
    96  var alwaysReady = func() bool { return true }
    97  
    98  func setupNewController(ctx context.Context, fakeClientSet *fake.Clientset) (*Controller, cache.Indexer, cache.Indexer) {
    99  	informerFactory := informers.NewSharedInformerFactory(fakeClientSet, 0)
   100  	podIndexer := informerFactory.Core().V1().Pods().Informer().GetIndexer()
   101  	nodeIndexer := informerFactory.Core().V1().Nodes().Informer().GetIndexer()
   102  	mgr, _ := New(ctx, fakeClientSet, informerFactory.Core().V1().Pods(), informerFactory.Core().V1().Nodes(), "taint-eviction-controller")
   103  	mgr.podListerSynced = alwaysReady
   104  	mgr.nodeListerSynced = alwaysReady
   105  	mgr.getPodsAssignedToNode = getPodsAssignedToNode(ctx, fakeClientSet)
   106  	return mgr, podIndexer, nodeIndexer
   107  }
   108  
   109  type timestampedPod struct {
   110  	names     []string
   111  	timestamp time.Duration
   112  }
   113  
   114  type durationSlice []timestampedPod
   115  
   116  func (a durationSlice) Len() int           { return len(a) }
   117  func (a durationSlice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   118  func (a durationSlice) Less(i, j int) bool { return a[i].timestamp < a[j].timestamp }
   119  
   120  func TestFilterNoExecuteTaints(t *testing.T) {
   121  	taints := []corev1.Taint{
   122  		{
   123  			Key:    "one",
   124  			Value:  "one",
   125  			Effect: corev1.TaintEffectNoExecute,
   126  		},
   127  		{
   128  			Key:    "two",
   129  			Value:  "two",
   130  			Effect: corev1.TaintEffectNoSchedule,
   131  		},
   132  	}
   133  	taints = getNoExecuteTaints(taints)
   134  	if len(taints) != 1 || taints[0].Key != "one" {
   135  		t.Errorf("Filtering doesn't work. Got %v", taints)
   136  	}
   137  }
   138  
   139  func TestCreatePod(t *testing.T) {
   140  	testCases := []struct {
   141  		description                   string
   142  		pod                           *corev1.Pod
   143  		taintedNodes                  map[string][]corev1.Taint
   144  		expectPatch                   bool
   145  		expectDelete                  bool
   146  		enablePodDisruptionConditions bool
   147  	}{
   148  		{
   149  			description:  "not scheduled - ignore",
   150  			pod:          testutil.NewPod("pod1", ""),
   151  			taintedNodes: map[string][]corev1.Taint{},
   152  			expectDelete: false,
   153  		},
   154  		{
   155  			description:  "scheduled on untainted Node",
   156  			pod:          testutil.NewPod("pod1", "node1"),
   157  			taintedNodes: map[string][]corev1.Taint{},
   158  			expectDelete: false,
   159  		},
   160  		{
   161  			description: "schedule on tainted Node",
   162  			pod:         testutil.NewPod("pod1", "node1"),
   163  			taintedNodes: map[string][]corev1.Taint{
   164  				"node1": {createNoExecuteTaint(1)},
   165  			},
   166  			expectDelete: true,
   167  		},
   168  		{
   169  			description: "schedule on tainted Node; PodDisruptionConditions enabled",
   170  			pod:         testutil.NewPod("pod1", "node1"),
   171  			taintedNodes: map[string][]corev1.Taint{
   172  				"node1": {createNoExecuteTaint(1)},
   173  			},
   174  			expectPatch:                   true,
   175  			expectDelete:                  true,
   176  			enablePodDisruptionConditions: true,
   177  		},
   178  		{
   179  			description: "schedule on tainted Node with finite toleration",
   180  			pod:         addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
   181  			taintedNodes: map[string][]corev1.Taint{
   182  				"node1": {createNoExecuteTaint(1)},
   183  			},
   184  			expectDelete: false,
   185  		},
   186  		{
   187  			description: "schedule on tainted Node with infinite toleration",
   188  			pod:         addToleration(testutil.NewPod("pod1", "node1"), 1, -1),
   189  			taintedNodes: map[string][]corev1.Taint{
   190  				"node1": {createNoExecuteTaint(1)},
   191  			},
   192  			expectDelete: false,
   193  		},
   194  		{
   195  			description: "schedule on tainted Node with infinite ivalid toleration",
   196  			pod:         addToleration(testutil.NewPod("pod1", "node1"), 2, -1),
   197  			taintedNodes: map[string][]corev1.Taint{
   198  				"node1": {createNoExecuteTaint(1)},
   199  			},
   200  			expectDelete: true,
   201  		},
   202  	}
   203  
   204  	for _, item := range testCases {
   205  		t.Run(item.description, func(t *testing.T) {
   206  			defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions)()
   207  			ctx, cancel := context.WithCancel(context.Background())
   208  			fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: []corev1.Pod{*item.pod}})
   209  			controller, podIndexer, _ := setupNewController(ctx, fakeClientset)
   210  			controller.recorder = testutil.NewFakeRecorder()
   211  			go controller.Run(ctx)
   212  			controller.taintedNodes = item.taintedNodes
   213  
   214  			podIndexer.Add(item.pod)
   215  			controller.PodUpdated(nil, item.pod)
   216  
   217  			verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete)
   218  
   219  			cancel()
   220  		})
   221  	}
   222  }
   223  
   224  func TestDeletePod(t *testing.T) {
   225  	ctx, cancel := context.WithCancel(context.Background())
   226  	defer cancel()
   227  
   228  	fakeClientset := fake.NewSimpleClientset()
   229  	controller, _, _ := setupNewController(ctx, fakeClientset)
   230  	controller.recorder = testutil.NewFakeRecorder()
   231  	go controller.Run(ctx)
   232  	controller.taintedNodes = map[string][]corev1.Taint{
   233  		"node1": {createNoExecuteTaint(1)},
   234  	}
   235  	controller.PodUpdated(testutil.NewPod("pod1", "node1"), nil)
   236  	// wait a bit to see if nothing will panic
   237  	time.Sleep(timeForControllerToProgressForSanityCheck)
   238  }
   239  
   240  func TestUpdatePod(t *testing.T) {
   241  	testCases := []struct {
   242  		description                   string
   243  		prevPod                       *corev1.Pod
   244  		awaitForScheduledEviction     bool
   245  		newPod                        *corev1.Pod
   246  		taintedNodes                  map[string][]corev1.Taint
   247  		expectPatch                   bool
   248  		expectDelete                  bool
   249  		enablePodDisruptionConditions bool
   250  	}{
   251  		{
   252  			description: "scheduling onto tainted Node results in patch and delete when PodDisruptionConditions enabled",
   253  			prevPod:     testutil.NewPod("pod1", ""),
   254  			newPod:      testutil.NewPod("pod1", "node1"),
   255  			taintedNodes: map[string][]corev1.Taint{
   256  				"node1": {createNoExecuteTaint(1)},
   257  			},
   258  			expectPatch:                   true,
   259  			expectDelete:                  true,
   260  			enablePodDisruptionConditions: true,
   261  		},
   262  		{
   263  			description: "scheduling onto tainted Node",
   264  			prevPod:     testutil.NewPod("pod1", ""),
   265  			newPod:      testutil.NewPod("pod1", "node1"),
   266  			taintedNodes: map[string][]corev1.Taint{
   267  				"node1": {createNoExecuteTaint(1)},
   268  			},
   269  			expectDelete: true,
   270  		},
   271  		{
   272  			description: "scheduling onto tainted Node with toleration",
   273  			prevPod:     addToleration(testutil.NewPod("pod1", ""), 1, -1),
   274  			newPod:      addToleration(testutil.NewPod("pod1", "node1"), 1, -1),
   275  			taintedNodes: map[string][]corev1.Taint{
   276  				"node1": {createNoExecuteTaint(1)},
   277  			},
   278  			expectDelete: false,
   279  		},
   280  		{
   281  			description:               "removing toleration",
   282  			prevPod:                   addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
   283  			newPod:                    testutil.NewPod("pod1", "node1"),
   284  			awaitForScheduledEviction: true,
   285  			taintedNodes: map[string][]corev1.Taint{
   286  				"node1": {createNoExecuteTaint(1)},
   287  			},
   288  			expectDelete: true,
   289  		},
   290  		{
   291  			description:               "lengthening toleration shouldn't work",
   292  			prevPod:                   addToleration(testutil.NewPod("pod1", "node1"), 1, 1),
   293  			newPod:                    addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
   294  			awaitForScheduledEviction: true,
   295  			taintedNodes: map[string][]corev1.Taint{
   296  				"node1": {createNoExecuteTaint(1)},
   297  			},
   298  			expectDelete: true,
   299  		},
   300  	}
   301  
   302  	for _, item := range testCases {
   303  		t.Run(item.description, func(t *testing.T) {
   304  			defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions)()
   305  			ctx, cancel := context.WithCancel(context.Background())
   306  			fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: []corev1.Pod{*item.prevPod}})
   307  			controller, podIndexer, _ := setupNewController(context.TODO(), fakeClientset)
   308  			controller.recorder = testutil.NewFakeRecorder()
   309  			controller.taintedNodes = item.taintedNodes
   310  			go controller.Run(ctx)
   311  
   312  			podIndexer.Add(item.prevPod)
   313  			controller.PodUpdated(nil, item.prevPod)
   314  
   315  			if item.awaitForScheduledEviction {
   316  				nsName := types.NamespacedName{Namespace: item.prevPod.Namespace, Name: item.prevPod.Name}
   317  				err := wait.PollImmediate(time.Millisecond*10, time.Second, func() (bool, error) {
   318  					scheduledEviction := controller.taintEvictionQueue.GetWorkerUnsafe(nsName.String())
   319  					return scheduledEviction != nil, nil
   320  				})
   321  				if err != nil {
   322  					t.Fatalf("Failed to await for scheduled eviction: %q", err)
   323  				}
   324  			}
   325  
   326  			podIndexer.Update(item.newPod)
   327  			controller.PodUpdated(item.prevPod, item.newPod)
   328  
   329  			verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete)
   330  			cancel()
   331  		})
   332  	}
   333  }
   334  
   335  func TestCreateNode(t *testing.T) {
   336  	testCases := []struct {
   337  		description  string
   338  		pods         []corev1.Pod
   339  		node         *corev1.Node
   340  		expectPatch  bool
   341  		expectDelete bool
   342  	}{
   343  		{
   344  			description: "Creating Node matching already assigned Pod",
   345  			pods: []corev1.Pod{
   346  				*testutil.NewPod("pod1", "node1"),
   347  			},
   348  			node:         testutil.NewNode("node1"),
   349  			expectPatch:  false,
   350  			expectDelete: false,
   351  		},
   352  		{
   353  			description: "Creating tainted Node matching already assigned Pod",
   354  			pods: []corev1.Pod{
   355  				*testutil.NewPod("pod1", "node1"),
   356  			},
   357  			node:         addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   358  			expectPatch:  true,
   359  			expectDelete: true,
   360  		},
   361  		{
   362  			description: "Creating tainted Node matching already assigned tolerating Pod",
   363  			pods: []corev1.Pod{
   364  				*addToleration(testutil.NewPod("pod1", "node1"), 1, -1),
   365  			},
   366  			node:         addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   367  			expectPatch:  false,
   368  			expectDelete: false,
   369  		},
   370  	}
   371  
   372  	for _, item := range testCases {
   373  		ctx, cancel := context.WithCancel(context.Background())
   374  		fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods})
   375  		controller, _, nodeIndexer := setupNewController(ctx, fakeClientset)
   376  		nodeIndexer.Add(item.node)
   377  		controller.recorder = testutil.NewFakeRecorder()
   378  		go controller.Run(ctx)
   379  		controller.NodeUpdated(nil, item.node)
   380  
   381  		verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete)
   382  
   383  		cancel()
   384  	}
   385  }
   386  
   387  func TestDeleteNode(t *testing.T) {
   388  	ctx, cancel := context.WithCancel(context.Background())
   389  	fakeClientset := fake.NewSimpleClientset()
   390  	controller, _, _ := setupNewController(ctx, fakeClientset)
   391  	controller.recorder = testutil.NewFakeRecorder()
   392  	controller.taintedNodes = map[string][]corev1.Taint{
   393  		"node1": {createNoExecuteTaint(1)},
   394  	}
   395  	go controller.Run(ctx)
   396  	controller.NodeUpdated(testutil.NewNode("node1"), nil)
   397  
   398  	// await until controller.taintedNodes is empty
   399  	err := wait.PollImmediate(10*time.Millisecond, time.Second, func() (bool, error) {
   400  		controller.taintedNodesLock.Lock()
   401  		defer controller.taintedNodesLock.Unlock()
   402  		_, ok := controller.taintedNodes["node1"]
   403  		return !ok, nil
   404  	})
   405  	if err != nil {
   406  		t.Errorf("Failed to await for processing node deleted: %q", err)
   407  	}
   408  	cancel()
   409  }
   410  
   411  func TestUpdateNode(t *testing.T) {
   412  	testCases := []struct {
   413  		description                   string
   414  		pods                          []corev1.Pod
   415  		oldNode                       *corev1.Node
   416  		newNode                       *corev1.Node
   417  		expectPatch                   bool
   418  		expectDelete                  bool
   419  		additionalSleep               time.Duration
   420  		enablePodDisruptionConditions bool
   421  	}{
   422  		{
   423  			description: "Added taint, expect node patched and deleted when PodDisruptionConditions is enabled",
   424  			pods: []corev1.Pod{
   425  				*testutil.NewPod("pod1", "node1"),
   426  			},
   427  			oldNode:                       testutil.NewNode("node1"),
   428  			newNode:                       addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   429  			expectPatch:                   true,
   430  			expectDelete:                  true,
   431  			enablePodDisruptionConditions: true,
   432  		},
   433  		{
   434  			description: "Added taint",
   435  			pods: []corev1.Pod{
   436  				*testutil.NewPod("pod1", "node1"),
   437  			},
   438  			oldNode:      testutil.NewNode("node1"),
   439  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   440  			expectDelete: true,
   441  		},
   442  		{
   443  			description: "Added tolerated taint",
   444  			pods: []corev1.Pod{
   445  				*addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
   446  			},
   447  			oldNode:      testutil.NewNode("node1"),
   448  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   449  			expectDelete: false,
   450  		},
   451  		{
   452  			description: "Only one added taint tolerated",
   453  			pods: []corev1.Pod{
   454  				*addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
   455  			},
   456  			oldNode:      testutil.NewNode("node1"),
   457  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}),
   458  			expectDelete: true,
   459  		},
   460  		{
   461  			description: "Taint removed",
   462  			pods: []corev1.Pod{
   463  				*addToleration(testutil.NewPod("pod1", "node1"), 1, 1),
   464  			},
   465  			oldNode:         addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   466  			newNode:         testutil.NewNode("node1"),
   467  			expectDelete:    false,
   468  			additionalSleep: 1500 * time.Millisecond,
   469  		},
   470  		{
   471  			description: "Pod with multiple tolerations are evicted when first one runs out",
   472  			pods: []corev1.Pod{
   473  				{
   474  					ObjectMeta: metav1.ObjectMeta{
   475  						Namespace: "default",
   476  						Name:      "pod1",
   477  					},
   478  					Spec: corev1.PodSpec{
   479  						NodeName: "node1",
   480  						Tolerations: []corev1.Toleration{
   481  							{Key: "testTaint1", Value: "test1", Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &[]int64{1}[0]},
   482  							{Key: "testTaint2", Value: "test2", Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &[]int64{100}[0]},
   483  						},
   484  					},
   485  					Status: corev1.PodStatus{
   486  						Conditions: []corev1.PodCondition{
   487  							{
   488  								Type:   corev1.PodReady,
   489  								Status: corev1.ConditionTrue,
   490  							},
   491  						},
   492  					},
   493  				},
   494  			},
   495  			oldNode:      testutil.NewNode("node1"),
   496  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}),
   497  			expectDelete: true,
   498  		},
   499  	}
   500  
   501  	for _, item := range testCases {
   502  		t.Run(item.description, func(t *testing.T) {
   503  			defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions)()
   504  			ctx, cancel := context.WithCancel(context.Background())
   505  			defer cancel()
   506  
   507  			fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods})
   508  			controller, _, nodeIndexer := setupNewController(ctx, fakeClientset)
   509  			nodeIndexer.Add(item.newNode)
   510  			controller.recorder = testutil.NewFakeRecorder()
   511  			go controller.Run(ctx)
   512  			controller.NodeUpdated(item.oldNode, item.newNode)
   513  
   514  			if item.additionalSleep > 0 {
   515  				time.Sleep(item.additionalSleep)
   516  			}
   517  
   518  			verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete)
   519  		})
   520  	}
   521  }
   522  
   523  func TestUpdateNodeWithMultipleTaints(t *testing.T) {
   524  	taint1 := createNoExecuteTaint(1)
   525  	taint2 := createNoExecuteTaint(2)
   526  
   527  	minute := int64(60)
   528  	pod := testutil.NewPod("pod1", "node1")
   529  	pod.Spec.Tolerations = []corev1.Toleration{
   530  		{Key: taint1.Key, Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoExecute},
   531  		{Key: taint2.Key, Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &minute},
   532  	}
   533  	podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
   534  
   535  	untaintedNode := testutil.NewNode("node1")
   536  
   537  	doubleTaintedNode := testutil.NewNode("node1")
   538  	doubleTaintedNode.Spec.Taints = []corev1.Taint{taint1, taint2}
   539  
   540  	singleTaintedNode := testutil.NewNode("node1")
   541  	singleTaintedNode.Spec.Taints = []corev1.Taint{taint1}
   542  
   543  	ctx, cancel := context.WithCancel(context.TODO())
   544  	fakeClientset := fake.NewSimpleClientset(pod)
   545  	controller, _, nodeIndexer := setupNewController(ctx, fakeClientset)
   546  	controller.recorder = testutil.NewFakeRecorder()
   547  	go controller.Run(ctx)
   548  
   549  	// no taint
   550  	nodeIndexer.Add(untaintedNode)
   551  	controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
   552  	// verify pod is not queued for deletion
   553  	if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
   554  		t.Fatalf("pod queued for deletion with no taints")
   555  	}
   556  
   557  	// no taint -> infinitely tolerated taint
   558  	nodeIndexer.Update(singleTaintedNode)
   559  	controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
   560  	// verify pod is not queued for deletion
   561  	if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
   562  		t.Fatalf("pod queued for deletion with permanently tolerated taint")
   563  	}
   564  
   565  	// infinitely tolerated taint -> temporarily tolerated taint
   566  	nodeIndexer.Update(doubleTaintedNode)
   567  	controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
   568  	// verify pod is queued for deletion
   569  	if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) == nil {
   570  		t.Fatalf("pod not queued for deletion after addition of temporarily tolerated taint")
   571  	}
   572  
   573  	// temporarily tolerated taint -> infinitely tolerated taint
   574  	nodeIndexer.Update(singleTaintedNode)
   575  	controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
   576  	// verify pod is not queued for deletion
   577  	if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
   578  		t.Fatalf("pod queued for deletion after removal of temporarily tolerated taint")
   579  	}
   580  
   581  	// verify pod is not deleted
   582  	for _, action := range fakeClientset.Actions() {
   583  		if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
   584  			t.Error("Unexpected deletion")
   585  		}
   586  	}
   587  	cancel()
   588  }
   589  
   590  func TestUpdateNodeWithMultiplePods(t *testing.T) {
   591  	testCases := []struct {
   592  		description         string
   593  		pods                []corev1.Pod
   594  		oldNode             *corev1.Node
   595  		newNode             *corev1.Node
   596  		expectedDeleteTimes durationSlice
   597  	}{
   598  		{
   599  			description: "Pods with different toleration times are evicted appropriately",
   600  			pods: []corev1.Pod{
   601  				*testutil.NewPod("pod1", "node1"),
   602  				*addToleration(testutil.NewPod("pod2", "node1"), 1, 1),
   603  				*addToleration(testutil.NewPod("pod3", "node1"), 1, -1),
   604  			},
   605  			oldNode: testutil.NewNode("node1"),
   606  			newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   607  			expectedDeleteTimes: durationSlice{
   608  				{[]string{"pod1"}, 0},
   609  				{[]string{"pod2"}, time.Second},
   610  			},
   611  		},
   612  		{
   613  			description: "Evict all pods not matching all taints instantly",
   614  			pods: []corev1.Pod{
   615  				*testutil.NewPod("pod1", "node1"),
   616  				*addToleration(testutil.NewPod("pod2", "node1"), 1, 1),
   617  				*addToleration(testutil.NewPod("pod3", "node1"), 1, -1),
   618  			},
   619  			oldNode: testutil.NewNode("node1"),
   620  			newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}),
   621  			expectedDeleteTimes: durationSlice{
   622  				{[]string{"pod1", "pod2", "pod3"}, 0},
   623  			},
   624  		},
   625  	}
   626  
   627  	for _, item := range testCases {
   628  		t.Run(item.description, func(t *testing.T) {
   629  			t.Logf("Starting testcase %q", item.description)
   630  			ctx, cancel := context.WithCancel(context.Background())
   631  			defer cancel()
   632  
   633  			fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods})
   634  			sort.Sort(item.expectedDeleteTimes)
   635  			controller, _, nodeIndexer := setupNewController(ctx, fakeClientset)
   636  			nodeIndexer.Add(item.newNode)
   637  			controller.recorder = testutil.NewFakeRecorder()
   638  			go controller.Run(ctx)
   639  			controller.NodeUpdated(item.oldNode, item.newNode)
   640  
   641  			startedAt := time.Now()
   642  			for i := range item.expectedDeleteTimes {
   643  				if i == 0 || item.expectedDeleteTimes[i-1].timestamp != item.expectedDeleteTimes[i].timestamp {
   644  					// compute a grace duration to give controller time to process updates. Choose big
   645  					// enough intervals in the test cases above to avoid flakes.
   646  					var increment time.Duration
   647  					if i == len(item.expectedDeleteTimes)-1 || item.expectedDeleteTimes[i+1].timestamp == item.expectedDeleteTimes[i].timestamp {
   648  						increment = 500 * time.Millisecond
   649  					} else {
   650  						increment = ((item.expectedDeleteTimes[i+1].timestamp - item.expectedDeleteTimes[i].timestamp) / time.Duration(2))
   651  					}
   652  
   653  					sleepTime := item.expectedDeleteTimes[i].timestamp - time.Since(startedAt) + increment
   654  					if sleepTime < 0 {
   655  						sleepTime = 0
   656  					}
   657  					t.Logf("Sleeping for %v", sleepTime)
   658  					time.Sleep(sleepTime)
   659  				}
   660  
   661  				for delay, podName := range item.expectedDeleteTimes[i].names {
   662  					deleted := false
   663  					for _, action := range fakeClientset.Actions() {
   664  						deleteAction, ok := action.(clienttesting.DeleteActionImpl)
   665  						if !ok {
   666  							t.Logf("Found not-delete action with verb %v. Ignoring.", action.GetVerb())
   667  							continue
   668  						}
   669  						if deleteAction.GetResource().Resource != "pods" {
   670  							continue
   671  						}
   672  						if podName == deleteAction.GetName() {
   673  							deleted = true
   674  						}
   675  					}
   676  					if !deleted {
   677  						t.Errorf("Failed to deleted pod %v after %v", podName, delay)
   678  					}
   679  				}
   680  				for _, action := range fakeClientset.Actions() {
   681  					deleteAction, ok := action.(clienttesting.DeleteActionImpl)
   682  					if !ok {
   683  						t.Logf("Found not-delete action with verb %v. Ignoring.", action.GetVerb())
   684  						continue
   685  					}
   686  					if deleteAction.GetResource().Resource != "pods" {
   687  						continue
   688  					}
   689  					deletedPodName := deleteAction.GetName()
   690  					expected := false
   691  					for _, podName := range item.expectedDeleteTimes[i].names {
   692  						if podName == deletedPodName {
   693  							expected = true
   694  						}
   695  					}
   696  					if !expected {
   697  						t.Errorf("Pod %v was deleted even though it shouldn't have", deletedPodName)
   698  					}
   699  				}
   700  				fakeClientset.ClearActions()
   701  			}
   702  		})
   703  	}
   704  }
   705  
   706  func TestGetMinTolerationTime(t *testing.T) {
   707  	one := int64(1)
   708  	two := int64(2)
   709  	oneSec := 1 * time.Second
   710  
   711  	tests := []struct {
   712  		tolerations []corev1.Toleration
   713  		expected    time.Duration
   714  	}{
   715  		{
   716  			tolerations: []corev1.Toleration{},
   717  			expected:    0,
   718  		},
   719  		{
   720  			tolerations: []corev1.Toleration{
   721  				{
   722  					TolerationSeconds: nil,
   723  				},
   724  			},
   725  			expected: -1,
   726  		},
   727  		{
   728  			tolerations: []corev1.Toleration{
   729  				{
   730  					TolerationSeconds: &one,
   731  				},
   732  				{
   733  					TolerationSeconds: &two,
   734  				},
   735  			},
   736  			expected: oneSec,
   737  		},
   738  
   739  		{
   740  			tolerations: []corev1.Toleration{
   741  				{
   742  					TolerationSeconds: &one,
   743  				},
   744  				{
   745  					TolerationSeconds: nil,
   746  				},
   747  			},
   748  			expected: oneSec,
   749  		},
   750  		{
   751  			tolerations: []corev1.Toleration{
   752  				{
   753  					TolerationSeconds: nil,
   754  				},
   755  				{
   756  					TolerationSeconds: &one,
   757  				},
   758  			},
   759  			expected: oneSec,
   760  		},
   761  	}
   762  
   763  	for _, test := range tests {
   764  		got := getMinTolerationTime(test.tolerations)
   765  		if got != test.expected {
   766  			t.Errorf("Incorrect min toleration time: got %v, expected %v", got, test.expected)
   767  		}
   768  	}
   769  }
   770  
   771  // TestEventualConsistency verifies if getPodsAssignedToNode returns incomplete data
   772  // (e.g. due to watch latency), it will reconcile the remaining pods eventually.
   773  // This scenario is partially covered by TestUpdatePods, but given this is an important
   774  // property of TaintManager, it's better to have explicit test for this.
   775  func TestEventualConsistency(t *testing.T) {
   776  	testCases := []struct {
   777  		description  string
   778  		pods         []corev1.Pod
   779  		prevPod      *corev1.Pod
   780  		newPod       *corev1.Pod
   781  		oldNode      *corev1.Node
   782  		newNode      *corev1.Node
   783  		expectPatch  bool
   784  		expectDelete bool
   785  	}{
   786  		{
   787  			description: "existing pod2 scheduled onto tainted Node",
   788  			pods: []corev1.Pod{
   789  				*testutil.NewPod("pod1", "node1"),
   790  			},
   791  			prevPod:      testutil.NewPod("pod2", ""),
   792  			newPod:       testutil.NewPod("pod2", "node1"),
   793  			oldNode:      testutil.NewNode("node1"),
   794  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   795  			expectPatch:  true,
   796  			expectDelete: true,
   797  		},
   798  		{
   799  			description: "existing pod2 with taint toleration scheduled onto tainted Node",
   800  			pods: []corev1.Pod{
   801  				*testutil.NewPod("pod1", "node1"),
   802  			},
   803  			prevPod:      addToleration(testutil.NewPod("pod2", ""), 1, 100),
   804  			newPod:       addToleration(testutil.NewPod("pod2", "node1"), 1, 100),
   805  			oldNode:      testutil.NewNode("node1"),
   806  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   807  			expectPatch:  true,
   808  			expectDelete: true,
   809  		},
   810  		{
   811  			description: "new pod2 created on tainted Node",
   812  			pods: []corev1.Pod{
   813  				*testutil.NewPod("pod1", "node1"),
   814  			},
   815  			prevPod:      nil,
   816  			newPod:       testutil.NewPod("pod2", "node1"),
   817  			oldNode:      testutil.NewNode("node1"),
   818  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   819  			expectPatch:  true,
   820  			expectDelete: true,
   821  		},
   822  		{
   823  			description: "new pod2 with tait toleration created on tainted Node",
   824  			pods: []corev1.Pod{
   825  				*testutil.NewPod("pod1", "node1"),
   826  			},
   827  			prevPod:      nil,
   828  			newPod:       addToleration(testutil.NewPod("pod2", "node1"), 1, 100),
   829  			oldNode:      testutil.NewNode("node1"),
   830  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   831  			expectPatch:  true,
   832  			expectDelete: true,
   833  		},
   834  	}
   835  
   836  	for _, item := range testCases {
   837  		t.Run(item.description, func(t *testing.T) {
   838  			ctx, cancel := context.WithCancel(context.Background())
   839  			defer cancel()
   840  
   841  			fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods})
   842  			controller, podIndexer, nodeIndexer := setupNewController(ctx, fakeClientset)
   843  			nodeIndexer.Add(item.newNode)
   844  			controller.recorder = testutil.NewFakeRecorder()
   845  			go controller.Run(ctx)
   846  
   847  			if item.prevPod != nil {
   848  				podIndexer.Add(item.prevPod)
   849  				controller.PodUpdated(nil, item.prevPod)
   850  			}
   851  
   852  			// First we simulate NodeUpdate that should delete 'pod1'. It doesn't know about 'pod2' yet.
   853  			controller.NodeUpdated(item.oldNode, item.newNode)
   854  
   855  			verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete)
   856  			fakeClientset.ClearActions()
   857  
   858  			// And now the delayed update of 'pod2' comes to the TaintManager. We should delete it as well.
   859  			podIndexer.Update(item.newPod)
   860  			controller.PodUpdated(item.prevPod, item.newPod)
   861  			// wait a bit
   862  			time.Sleep(timeForControllerToProgressForSanityCheck)
   863  		})
   864  	}
   865  }
   866  
   867  func verifyPodActions(t *testing.T, description string, fakeClientset *fake.Clientset, expectPatch, expectDelete bool) {
   868  	t.Helper()
   869  	podPatched := false
   870  	podDeleted := false
   871  	// use Poll instead of PollImmediate to give some processing time to the controller that the expected
   872  	// actions are likely to be already sent
   873  	err := wait.Poll(10*time.Millisecond, 5*time.Second, func() (bool, error) {
   874  		for _, action := range fakeClientset.Actions() {
   875  			if action.GetVerb() == "patch" && action.GetResource().Resource == "pods" {
   876  				podPatched = true
   877  			}
   878  			if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
   879  				podDeleted = true
   880  			}
   881  		}
   882  		return podPatched == expectPatch && podDeleted == expectDelete, nil
   883  	})
   884  	if err != nil {
   885  		t.Errorf("Failed waiting for the expected actions: %q", err)
   886  	}
   887  	if podPatched != expectPatch {
   888  		t.Errorf("[%v]Unexpected test result. Expected patch %v, got %v", description, expectPatch, podPatched)
   889  	}
   890  	if podDeleted != expectDelete {
   891  		t.Errorf("[%v]Unexpected test result. Expected delete %v, got %v", description, expectDelete, podDeleted)
   892  	}
   893  }
   894  
   895  // TestPodDeletionEvent Verify that the output events are as expected
   896  func TestPodDeletionEvent(t *testing.T) {
   897  	f := func(path cmp.Path) bool {
   898  		switch path.String() {
   899  		// These fields change at runtime, so ignore it
   900  		case "LastTimestamp", "FirstTimestamp", "ObjectMeta.Name":
   901  			return true
   902  		}
   903  		return false
   904  	}
   905  
   906  	t.Run("emitPodDeletionEvent", func(t *testing.T) {
   907  		controller := &Controller{}
   908  		recorder := testutil.NewFakeRecorder()
   909  		controller.recorder = recorder
   910  		controller.emitPodDeletionEvent(types.NamespacedName{
   911  			Name:      "test",
   912  			Namespace: "test",
   913  		})
   914  		want := []*corev1.Event{
   915  			{
   916  				ObjectMeta: metav1.ObjectMeta{
   917  					Namespace: "test",
   918  				},
   919  				InvolvedObject: corev1.ObjectReference{
   920  					Kind:       "Pod",
   921  					APIVersion: "v1",
   922  					Namespace:  "test",
   923  					Name:       "test",
   924  				},
   925  				Reason:  "TaintManagerEviction",
   926  				Type:    "Normal",
   927  				Count:   1,
   928  				Message: "Marking for deletion Pod test/test",
   929  				Source:  corev1.EventSource{Component: "nodeControllerTest"},
   930  			},
   931  		}
   932  		if diff := cmp.Diff(want, recorder.Events, cmp.FilterPath(f, cmp.Ignore())); len(diff) > 0 {
   933  			t.Errorf("emitPodDeletionEvent() returned data (-want,+got):\n%s", diff)
   934  		}
   935  	})
   936  
   937  	t.Run("emitCancelPodDeletionEvent", func(t *testing.T) {
   938  		controller := &Controller{}
   939  		recorder := testutil.NewFakeRecorder()
   940  		controller.recorder = recorder
   941  		controller.emitCancelPodDeletionEvent(types.NamespacedName{
   942  			Name:      "test",
   943  			Namespace: "test",
   944  		})
   945  		want := []*corev1.Event{
   946  			{
   947  				ObjectMeta: metav1.ObjectMeta{
   948  					Namespace: "test",
   949  				},
   950  				InvolvedObject: corev1.ObjectReference{
   951  					Kind:       "Pod",
   952  					APIVersion: "v1",
   953  					Namespace:  "test",
   954  					Name:       "test",
   955  				},
   956  				Reason:  "TaintManagerEviction",
   957  				Type:    "Normal",
   958  				Count:   1,
   959  				Message: "Cancelling deletion of Pod test/test",
   960  				Source:  corev1.EventSource{Component: "nodeControllerTest"},
   961  			},
   962  		}
   963  		if diff := cmp.Diff(want, recorder.Events, cmp.FilterPath(f, cmp.Ignore())); len(diff) > 0 {
   964  			t.Errorf("emitPodDeletionEvent() returned data (-want,+got):\n%s", diff)
   965  		}
   966  	})
   967  }
   968  

View as plain text