...

Source file src/k8s.io/kubernetes/pkg/controller/podgc/gc_controller_test.go

Documentation: k8s.io/kubernetes/pkg/controller/podgc

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package podgc
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/google/go-cmp/cmp"
    26  	"github.com/google/go-cmp/cmp/cmpopts"
    27  
    28  	v1 "k8s.io/api/core/v1"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/apimachinery/pkg/labels"
    31  	"k8s.io/apimachinery/pkg/util/sets"
    32  	"k8s.io/apimachinery/pkg/util/strategicpatch"
    33  	"k8s.io/apimachinery/pkg/util/wait"
    34  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    35  	"k8s.io/client-go/informers"
    36  	coreinformers "k8s.io/client-go/informers/core/v1"
    37  	clientset "k8s.io/client-go/kubernetes"
    38  	"k8s.io/client-go/kubernetes/fake"
    39  	clienttesting "k8s.io/client-go/testing"
    40  	"k8s.io/client-go/util/workqueue"
    41  	featuregatetesting "k8s.io/component-base/featuregate/testing"
    42  	metricstestutil "k8s.io/component-base/metrics/testutil"
    43  	"k8s.io/klog/v2/ktesting"
    44  	"k8s.io/kubernetes/pkg/controller"
    45  	"k8s.io/kubernetes/pkg/controller/podgc/metrics"
    46  	"k8s.io/kubernetes/pkg/controller/testutil"
    47  	"k8s.io/kubernetes/pkg/features"
    48  	"k8s.io/kubernetes/pkg/kubelet/eviction"
    49  	testingclock "k8s.io/utils/clock/testing"
    50  	"k8s.io/utils/pointer"
    51  )
    52  
    53  func alwaysReady() bool { return true }
    54  
    55  func NewFromClient(ctx context.Context, kubeClient clientset.Interface, terminatedPodThreshold int) (*PodGCController, coreinformers.PodInformer, coreinformers.NodeInformer) {
    56  	informerFactory := informers.NewSharedInformerFactory(kubeClient, controller.NoResyncPeriodFunc())
    57  	podInformer := informerFactory.Core().V1().Pods()
    58  	nodeInformer := informerFactory.Core().V1().Nodes()
    59  	controller := NewPodGC(ctx, kubeClient, podInformer, nodeInformer, terminatedPodThreshold)
    60  	controller.podListerSynced = alwaysReady
    61  	return controller, podInformer, nodeInformer
    62  }
    63  
    64  func TestGCTerminated(t *testing.T) {
    65  	type nameToPhase struct {
    66  		name   string
    67  		phase  v1.PodPhase
    68  		reason string
    69  	}
    70  
    71  	testCases := []struct {
    72  		name                          string
    73  		pods                          []nameToPhase
    74  		threshold                     int
    75  		deletedPodNames               sets.String
    76  		patchedPodNames               sets.String
    77  		enablePodDisruptionConditions bool
    78  	}{
    79  		{
    80  			name: "delete pod a which is PodFailed and pod b which is PodSucceeded; PodDisruptionConditions enabled",
    81  			pods: []nameToPhase{
    82  				{name: "a", phase: v1.PodFailed},
    83  				{name: "b", phase: v1.PodSucceeded},
    84  				{name: "c", phase: v1.PodFailed},
    85  			},
    86  			threshold:                     1,
    87  			patchedPodNames:               sets.NewString(),
    88  			deletedPodNames:               sets.NewString("a", "b"),
    89  			enablePodDisruptionConditions: true,
    90  		},
    91  		{
    92  			name: "threshold = 0, disables terminated pod deletion",
    93  			pods: []nameToPhase{
    94  				{name: "a", phase: v1.PodFailed},
    95  				{name: "b", phase: v1.PodSucceeded},
    96  			},
    97  			threshold: 0,
    98  			// threshold = 0 disables terminated pod deletion
    99  			deletedPodNames: sets.NewString(),
   100  		},
   101  		{
   102  			name: "threshold = 1, delete pod a which is PodFailed and pod b which is PodSucceeded",
   103  			pods: []nameToPhase{
   104  				{name: "a", phase: v1.PodFailed},
   105  				{name: "b", phase: v1.PodSucceeded},
   106  				{name: "c", phase: v1.PodFailed},
   107  			},
   108  			threshold:       1,
   109  			deletedPodNames: sets.NewString("a", "b"),
   110  		},
   111  		{
   112  			name: "threshold = 1, delete pod b which is PodSucceeded",
   113  			pods: []nameToPhase{
   114  				{name: "a", phase: v1.PodRunning},
   115  				{name: "b", phase: v1.PodSucceeded},
   116  				{name: "c", phase: v1.PodFailed},
   117  			},
   118  			threshold:       1,
   119  			deletedPodNames: sets.NewString("b"),
   120  		},
   121  		{
   122  			name: "threshold = 1, delete pod a which is PodFailed",
   123  			pods: []nameToPhase{
   124  				{name: "a", phase: v1.PodFailed},
   125  				{name: "b", phase: v1.PodSucceeded},
   126  			},
   127  			threshold:       1,
   128  			deletedPodNames: sets.NewString("a"),
   129  		},
   130  		{
   131  			name: "threshold = 5, don't delete pod",
   132  			pods: []nameToPhase{
   133  				{name: "a", phase: v1.PodFailed},
   134  				{name: "b", phase: v1.PodSucceeded},
   135  			},
   136  			threshold:       5,
   137  			deletedPodNames: sets.NewString(),
   138  		},
   139  		{
   140  			pods: []nameToPhase{
   141  				{name: "a", phase: v1.PodFailed},
   142  				{name: "b", phase: v1.PodSucceeded},
   143  				{name: "c", phase: v1.PodFailed, reason: eviction.Reason},
   144  			},
   145  			threshold:       1,
   146  			deletedPodNames: sets.NewString("c", "a"),
   147  		},
   148  		{
   149  			pods: []nameToPhase{
   150  				{name: "a", phase: v1.PodRunning},
   151  				{name: "b", phase: v1.PodSucceeded},
   152  				{name: "c", phase: v1.PodFailed, reason: eviction.Reason},
   153  			},
   154  			threshold:       1,
   155  			deletedPodNames: sets.NewString("c"),
   156  		},
   157  	}
   158  	for _, test := range testCases {
   159  		t.Run(test.name, func(t *testing.T) {
   160  			_, ctx := ktesting.NewTestContext(t)
   161  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
   162  			creationTime := time.Unix(0, 0)
   163  			nodes := []*v1.Node{testutil.NewNode("node")}
   164  
   165  			pods := make([]*v1.Pod, 0, len(test.pods))
   166  			for _, pod := range test.pods {
   167  				creationTime = creationTime.Add(1 * time.Hour)
   168  				pods = append(pods, &v1.Pod{
   169  					ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime}},
   170  					Status:     v1.PodStatus{Phase: pod.phase, Reason: pod.reason},
   171  					Spec:       v1.PodSpec{NodeName: "node"},
   172  				})
   173  			}
   174  			client := setupNewSimpleClient(nodes, pods)
   175  			gcc, podInformer, _ := NewFromClient(ctx, client, test.threshold)
   176  			for _, pod := range pods {
   177  				podInformer.Informer().GetStore().Add(pod)
   178  			}
   179  
   180  			gcc.gc(ctx)
   181  
   182  			verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
   183  		})
   184  	}
   185  
   186  	// testDeletingPodsMetrics is 9 in this test
   187  	testDeletingPodsMetrics(t, 9, metrics.PodGCReasonTerminated)
   188  }
   189  
   190  func makePod(name string, nodeName string, phase v1.PodPhase) *v1.Pod {
   191  	return &v1.Pod{
   192  		ObjectMeta: metav1.ObjectMeta{
   193  			Name:      name,
   194  			Namespace: metav1.NamespaceDefault,
   195  		},
   196  		Spec:   v1.PodSpec{NodeName: nodeName},
   197  		Status: v1.PodStatus{Phase: phase},
   198  	}
   199  }
   200  
   201  func waitForAdded(q workqueue.DelayingInterface, depth int) error {
   202  	return wait.Poll(1*time.Millisecond, 10*time.Second, func() (done bool, err error) {
   203  		if q.Len() == depth {
   204  			return true, nil
   205  		}
   206  
   207  		return false, nil
   208  	})
   209  }
   210  
   211  func TestGCOrphaned(t *testing.T) {
   212  	testCases := []struct {
   213  		name                          string
   214  		initialClientNodes            []*v1.Node
   215  		initialInformerNodes          []*v1.Node
   216  		delay                         time.Duration
   217  		addedClientNodes              []*v1.Node
   218  		deletedClientNodes            []*v1.Node
   219  		addedInformerNodes            []*v1.Node
   220  		deletedInformerNodes          []*v1.Node
   221  		pods                          []*v1.Pod
   222  		itemsInQueue                  int
   223  		deletedPodNames               sets.String
   224  		patchedPodNames               sets.String
   225  		enablePodDisruptionConditions bool
   226  	}{
   227  		{
   228  			name: "nodes present in lister",
   229  			initialInformerNodes: []*v1.Node{
   230  				testutil.NewNode("existing1"),
   231  				testutil.NewNode("existing2"),
   232  			},
   233  			delay: 2 * quarantineTime,
   234  			pods: []*v1.Pod{
   235  				makePod("a", "existing1", v1.PodRunning),
   236  				makePod("b", "existing2", v1.PodFailed),
   237  				makePod("c", "existing2", v1.PodSucceeded),
   238  			},
   239  			itemsInQueue:    0,
   240  			deletedPodNames: sets.NewString(),
   241  		},
   242  		{
   243  			name: "nodes present in client",
   244  			initialClientNodes: []*v1.Node{
   245  				testutil.NewNode("existing1"),
   246  				testutil.NewNode("existing2"),
   247  			},
   248  			delay: 2 * quarantineTime,
   249  			pods: []*v1.Pod{
   250  				makePod("a", "existing1", v1.PodRunning),
   251  				makePod("b", "existing2", v1.PodFailed),
   252  				makePod("c", "existing2", v1.PodSucceeded),
   253  			},
   254  			itemsInQueue:    2,
   255  			deletedPodNames: sets.NewString(),
   256  		},
   257  		{
   258  			name:  "no nodes",
   259  			delay: 2 * quarantineTime,
   260  			pods: []*v1.Pod{
   261  				makePod("a", "deleted", v1.PodFailed),
   262  				makePod("b", "deleted", v1.PodSucceeded),
   263  			},
   264  			itemsInQueue:    1,
   265  			deletedPodNames: sets.NewString("a", "b"),
   266  		},
   267  		{
   268  			name:  "no nodes with PodDisruptionConditions enabled",
   269  			delay: 2 * quarantineTime,
   270  			pods: []*v1.Pod{
   271  				makePod("a", "deleted", v1.PodFailed),
   272  				makePod("b", "deleted", v1.PodSucceeded),
   273  				makePod("c", "deleted", v1.PodRunning),
   274  			},
   275  			itemsInQueue:                  1,
   276  			deletedPodNames:               sets.NewString("a", "b", "c"),
   277  			patchedPodNames:               sets.NewString("c"),
   278  			enablePodDisruptionConditions: true,
   279  		},
   280  		{
   281  			name:  "quarantine not finished",
   282  			delay: quarantineTime / 2,
   283  			pods: []*v1.Pod{
   284  				makePod("a", "deleted", v1.PodFailed),
   285  			},
   286  			itemsInQueue:    0,
   287  			deletedPodNames: sets.NewString(),
   288  		},
   289  		{
   290  			name:                 "wrong nodes",
   291  			initialInformerNodes: []*v1.Node{testutil.NewNode("existing")},
   292  			delay:                2 * quarantineTime,
   293  			pods: []*v1.Pod{
   294  				makePod("a", "deleted", v1.PodRunning),
   295  			},
   296  			itemsInQueue:    1,
   297  			deletedPodNames: sets.NewString("a"),
   298  			patchedPodNames: sets.NewString("a"),
   299  		},
   300  		{
   301  			name:                 "some nodes missing",
   302  			initialInformerNodes: []*v1.Node{testutil.NewNode("existing")},
   303  			delay:                2 * quarantineTime,
   304  			pods: []*v1.Pod{
   305  				makePod("a", "deleted", v1.PodFailed),
   306  				makePod("b", "existing", v1.PodFailed),
   307  				makePod("c", "deleted", v1.PodSucceeded),
   308  				makePod("d", "deleted", v1.PodRunning),
   309  			},
   310  			itemsInQueue:    1,
   311  			deletedPodNames: sets.NewString("a", "c", "d"),
   312  			patchedPodNames: sets.NewString("d"),
   313  		},
   314  		{
   315  			name:             "node added to client after quarantine",
   316  			delay:            2 * quarantineTime,
   317  			addedClientNodes: []*v1.Node{testutil.NewNode("node")},
   318  			pods: []*v1.Pod{
   319  				makePod("a", "node", v1.PodRunning),
   320  			},
   321  			itemsInQueue:    1,
   322  			deletedPodNames: sets.NewString(),
   323  		},
   324  		{
   325  			name:               "node added to informer after quarantine",
   326  			delay:              2 * quarantineTime,
   327  			addedInformerNodes: []*v1.Node{testutil.NewNode("node")},
   328  			pods: []*v1.Pod{
   329  				makePod("a", "node", v1.PodFailed),
   330  			},
   331  			itemsInQueue:    1,
   332  			deletedPodNames: sets.NewString(),
   333  		},
   334  		{
   335  			// It shouldn't happen that client will be lagging behind informer.
   336  			// This test case is more a sanity check.
   337  			name:               "node deleted from client after quarantine",
   338  			initialClientNodes: []*v1.Node{testutil.NewNode("node")},
   339  			delay:              2 * quarantineTime,
   340  			deletedClientNodes: []*v1.Node{testutil.NewNode("node")},
   341  			pods: []*v1.Pod{
   342  				makePod("a", "node", v1.PodFailed),
   343  			},
   344  			itemsInQueue:    1,
   345  			deletedPodNames: sets.NewString("a"),
   346  		},
   347  		{
   348  			name:                 "node deleted from informer after quarantine",
   349  			initialInformerNodes: []*v1.Node{testutil.NewNode("node")},
   350  			delay:                2 * quarantineTime,
   351  			deletedInformerNodes: []*v1.Node{testutil.NewNode("node")},
   352  			pods: []*v1.Pod{
   353  				makePod("a", "node", v1.PodSucceeded),
   354  			},
   355  			itemsInQueue:    0,
   356  			deletedPodNames: sets.NewString(),
   357  		},
   358  	}
   359  
   360  	for _, test := range testCases {
   361  		t.Run(test.name, func(t *testing.T) {
   362  			_, ctx := ktesting.NewTestContext(t)
   363  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
   364  			nodes := make([]*v1.Node, 0, len(test.initialClientNodes))
   365  			for _, node := range test.initialClientNodes {
   366  				nodes = append(nodes, node)
   367  			}
   368  			pods := make([]*v1.Pod, 0, len(test.pods))
   369  			for _, pod := range test.pods {
   370  				pods = append(pods, pod)
   371  			}
   372  			client := setupNewSimpleClient(nodes, pods)
   373  			gcc, podInformer, nodeInformer := NewFromClient(ctx, client, -1)
   374  			for _, node := range test.initialInformerNodes {
   375  				nodeInformer.Informer().GetStore().Add(node)
   376  			}
   377  			for _, pod := range test.pods {
   378  				podInformer.Informer().GetStore().Add(pod)
   379  			}
   380  			// Overwrite queue
   381  			fakeClock := testingclock.NewFakeClock(time.Now())
   382  			gcc.nodeQueue.ShutDown()
   383  			gcc.nodeQueue = workqueue.NewDelayingQueueWithCustomClock(fakeClock, "podgc_test_queue")
   384  
   385  			// First GC of orphaned pods
   386  			gcc.gc(ctx)
   387  			deletedPodNames := getDeletedPodNames(client)
   388  
   389  			if len(deletedPodNames) > 0 {
   390  				t.Errorf("no pods should be deleted at this point.\n\tactual: %v", deletedPodNames)
   391  			}
   392  
   393  			// Move clock forward
   394  			fakeClock.Step(test.delay)
   395  			// Wait for queue goroutine to process items
   396  			if test.itemsInQueue > 0 {
   397  				err := waitForAdded(gcc.nodeQueue, test.itemsInQueue)
   398  				if err != nil {
   399  					t.Errorf("wrong number of items in the node queue.\n\texpected: %v\n\tactual: %v",
   400  						test.itemsInQueue, gcc.nodeQueue.Len())
   401  				}
   402  			}
   403  
   404  			// Execute planned nodes changes
   405  			for _, node := range test.addedClientNodes {
   406  				client.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{})
   407  			}
   408  			for _, node := range test.deletedClientNodes {
   409  				client.CoreV1().Nodes().Delete(context.TODO(), node.Name, metav1.DeleteOptions{})
   410  			}
   411  			for _, node := range test.addedInformerNodes {
   412  				nodeInformer.Informer().GetStore().Add(node)
   413  			}
   414  			for _, node := range test.deletedInformerNodes {
   415  				nodeInformer.Informer().GetStore().Delete(node)
   416  			}
   417  
   418  			// Actual pod deletion
   419  			gcc.gc(context.TODO())
   420  			verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
   421  		})
   422  	}
   423  
   424  	// testDeletingPodsMetrics is 10 in this test
   425  	testDeletingPodsMetrics(t, 10, metrics.PodGCReasonOrphaned)
   426  }
   427  
   428  func TestGCUnscheduledTerminating(t *testing.T) {
   429  	type nameToPhase struct {
   430  		name              string
   431  		phase             v1.PodPhase
   432  		deletionTimeStamp *metav1.Time
   433  		nodeName          string
   434  	}
   435  
   436  	testCases := []struct {
   437  		name                          string
   438  		pods                          []nameToPhase
   439  		deletedPodNames               sets.String
   440  		patchedPodNames               sets.String
   441  		enablePodDisruptionConditions bool
   442  	}{
   443  		{
   444  			name: "Unscheduled pod in any phase must be deleted, the phase of the running pod is changed to Failed; PodDisruptionConditions enabled",
   445  			pods: []nameToPhase{
   446  				{name: "a", phase: v1.PodFailed, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
   447  				{name: "b", phase: v1.PodSucceeded, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
   448  				{name: "c", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
   449  			},
   450  			deletedPodNames:               sets.NewString("a", "b", "c"),
   451  			patchedPodNames:               sets.NewString("c"),
   452  			enablePodDisruptionConditions: true,
   453  		},
   454  		{
   455  			name: "Unscheduled pod in any phase must be deleted",
   456  			pods: []nameToPhase{
   457  				{name: "a", phase: v1.PodFailed, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
   458  				{name: "b", phase: v1.PodSucceeded, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
   459  				{name: "c", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
   460  			},
   461  			deletedPodNames: sets.NewString("a", "b", "c"),
   462  			patchedPodNames: sets.NewString("c"),
   463  		},
   464  		{
   465  			name: "Scheduled pod in any phase must not be deleted",
   466  			pods: []nameToPhase{
   467  				{name: "a", phase: v1.PodFailed, deletionTimeStamp: nil, nodeName: ""},
   468  				{name: "b", phase: v1.PodSucceeded, deletionTimeStamp: nil, nodeName: "node"},
   469  				{name: "c", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: "node"},
   470  			},
   471  			deletedPodNames: sets.NewString(),
   472  		},
   473  	}
   474  
   475  	for _, test := range testCases {
   476  		t.Run(test.name, func(t *testing.T) {
   477  			_, ctx := ktesting.NewTestContext(t)
   478  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
   479  			creationTime := time.Unix(0, 0)
   480  
   481  			pods := make([]*v1.Pod, 0, len(test.pods))
   482  			for _, pod := range test.pods {
   483  				creationTime = creationTime.Add(1 * time.Hour)
   484  				pods = append(pods, &v1.Pod{
   485  					ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime},
   486  						DeletionTimestamp: pod.deletionTimeStamp},
   487  					Status: v1.PodStatus{Phase: pod.phase},
   488  					Spec:   v1.PodSpec{NodeName: pod.nodeName},
   489  				})
   490  			}
   491  			nodes := []*v1.Node{}
   492  			client := setupNewSimpleClient(nodes, pods)
   493  			gcc, podInformer, _ := NewFromClient(ctx, client, -1)
   494  
   495  			for _, pod := range pods {
   496  				podInformer.Informer().GetStore().Add(pod)
   497  			}
   498  
   499  			pods, err := podInformer.Lister().List(labels.Everything())
   500  			if err != nil {
   501  				t.Errorf("Error while listing all Pods: %v", err)
   502  				return
   503  			}
   504  			gcc.gcUnscheduledTerminating(ctx, pods)
   505  			verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
   506  		})
   507  	}
   508  
   509  	// testDeletingPodsMetrics is 6 in this test
   510  	testDeletingPodsMetrics(t, 6, metrics.PodGCReasonTerminatingUnscheduled)
   511  }
   512  
   513  func TestGCTerminating(t *testing.T) {
   514  	type node struct {
   515  		name           string
   516  		readyCondition v1.ConditionStatus
   517  		taints         []v1.Taint
   518  	}
   519  
   520  	type nameToPodConfig struct {
   521  		name              string
   522  		phase             v1.PodPhase
   523  		deletionTimeStamp *metav1.Time
   524  		nodeName          string
   525  	}
   526  
   527  	testCases := []struct {
   528  		name                          string
   529  		pods                          []nameToPodConfig
   530  		nodes                         []node
   531  		deletedPodNames               sets.String
   532  		patchedPodNames               sets.String
   533  		enablePodDisruptionConditions bool
   534  	}{
   535  		{
   536  			name: "pods have deletion timestamp set and the corresponding nodes are not ready",
   537  			nodes: []node{
   538  				{name: "worker-0", readyCondition: v1.ConditionFalse},
   539  				{name: "worker-1", readyCondition: v1.ConditionFalse},
   540  			},
   541  			pods: []nameToPodConfig{
   542  				{name: "a", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-0"},
   543  				{name: "b", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-1"},
   544  			},
   545  			deletedPodNames: sets.NewString(),
   546  		},
   547  
   548  		{
   549  			name: "some pods have deletion timestamp and/or phase set and some of the corresponding nodes have an" +
   550  				"outofservice taint that are not ready",
   551  			nodes: []node{
   552  				// terminated pods on this node should be force deleted
   553  				{name: "worker-0", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
   554  					Effect: v1.TaintEffectNoExecute}}},
   555  				// terminated pods on this node should not be force deleted
   556  				{name: "worker-1", readyCondition: v1.ConditionFalse},
   557  				// terminated pods on this node should not be force deleted
   558  				{name: "worker-2", readyCondition: v1.ConditionTrue},
   559  				// terminated pods on this node should be force deleted
   560  				{name: "worker-3", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
   561  					Effect: v1.TaintEffectNoSchedule}}},
   562  				// terminated pods on this node should be force deleted
   563  				{name: "worker-4", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
   564  					Effect: v1.TaintEffectPreferNoSchedule}}},
   565  				// terminated pods on this node should be force deleted
   566  				{name: "worker-5", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
   567  					Value: "any-value", Effect: v1.TaintEffectNoExecute}}},
   568  			},
   569  			pods: []nameToPodConfig{
   570  				// pods a1, b1, c1, d1 and e1 are on node worker-0
   571  				{name: "a1", nodeName: "worker-0"},
   572  				{name: "b1", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-0"},
   573  				{name: "c1", phase: v1.PodPending, nodeName: "worker-0"},
   574  				{name: "d1", phase: v1.PodRunning, nodeName: "worker-0"},
   575  				{name: "e1", phase: v1.PodUnknown, nodeName: "worker-0"},
   576  
   577  				// pods a2, b2, c2, d2 and e2 are on node worker-1
   578  				{name: "a2", nodeName: "worker-1"},
   579  				{name: "b2", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-1"},
   580  				{name: "c2", phase: v1.PodPending, nodeName: "worker-1"},
   581  				{name: "d2", phase: v1.PodRunning, nodeName: "worker-1"},
   582  				{name: "e2", phase: v1.PodUnknown, nodeName: "worker-1"},
   583  
   584  				// pods a3, b3, c3, d3 and e3 are on node worker-2
   585  				{name: "a3", nodeName: "worker-2"},
   586  				{name: "b3", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-2"},
   587  				{name: "c3", phase: v1.PodPending, nodeName: "worker-2"},
   588  				{name: "d3", phase: v1.PodRunning, nodeName: "worker-2"},
   589  				{name: "e3", phase: v1.PodUnknown, nodeName: "worker-2"},
   590  
   591  				// pods a4, b4, c4, d4 and e4 are on node worker-3
   592  				{name: "a4", nodeName: "worker-3"},
   593  				{name: "b4", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-3"},
   594  				{name: "c4", phase: v1.PodPending, nodeName: "worker-3"},
   595  				{name: "d4", phase: v1.PodRunning, nodeName: "worker-3"},
   596  				{name: "e4", phase: v1.PodUnknown, nodeName: "worker-3"},
   597  
   598  				// pods a5, b5, c5, d5 and e5 are on node worker-4
   599  				{name: "a5", nodeName: "worker-3"},
   600  				{name: "b5", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-4"},
   601  				{name: "c5", phase: v1.PodPending, nodeName: "worker-4"},
   602  				{name: "d5", phase: v1.PodRunning, nodeName: "worker-4"},
   603  				{name: "e5", phase: v1.PodUnknown, nodeName: "worker-4"},
   604  
   605  				// pods a6, b6, c6, d6 and e6 are on node worker-5
   606  				{name: "a6", nodeName: "worker-5"},
   607  				{name: "b6", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-5"},
   608  				{name: "c6", phase: v1.PodPending, nodeName: "worker-5"},
   609  				{name: "d6", phase: v1.PodRunning, nodeName: "worker-5"},
   610  				{name: "e6", phase: v1.PodUnknown, nodeName: "worker-5"},
   611  			},
   612  			deletedPodNames: sets.NewString("b1", "b4", "b5", "b6"),
   613  			patchedPodNames: sets.NewString("b1", "b4", "b5", "b6"),
   614  		},
   615  		{
   616  			name: "pods deleted from node tained out-of-service; PodDisruptionConditions enabled",
   617  			nodes: []node{
   618  				{name: "worker", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService,
   619  					Effect: v1.TaintEffectNoExecute}}},
   620  			},
   621  			pods: []nameToPodConfig{
   622  				{name: "a", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: "worker"},
   623  				{name: "b", phase: v1.PodFailed, deletionTimeStamp: &metav1.Time{}, nodeName: "worker"},
   624  				{name: "c", phase: v1.PodSucceeded, deletionTimeStamp: &metav1.Time{}, nodeName: "worker"},
   625  			},
   626  			deletedPodNames:               sets.NewString("a", "b", "c"),
   627  			patchedPodNames:               sets.NewString("a"),
   628  			enablePodDisruptionConditions: true,
   629  		},
   630  	}
   631  	for _, test := range testCases {
   632  		t.Run(test.name, func(t *testing.T) {
   633  			_, ctx := ktesting.NewTestContext(t)
   634  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)()
   635  
   636  			creationTime := time.Unix(0, 0)
   637  			nodes := make([]*v1.Node, 0, len(test.nodes))
   638  			for _, node := range test.nodes {
   639  				creationTime = creationTime.Add(2 * time.Hour)
   640  				nodes = append(nodes, &v1.Node{
   641  					ObjectMeta: metav1.ObjectMeta{Name: node.name, CreationTimestamp: metav1.Time{Time: creationTime}},
   642  					Spec: v1.NodeSpec{
   643  						Taints: node.taints,
   644  					},
   645  					Status: v1.NodeStatus{
   646  						Conditions: []v1.NodeCondition{
   647  							{
   648  								Type:   v1.NodeReady,
   649  								Status: node.readyCondition,
   650  							},
   651  						},
   652  					},
   653  				})
   654  			}
   655  			pods := make([]*v1.Pod, 0, len(test.pods))
   656  			for _, pod := range test.pods {
   657  				creationTime = creationTime.Add(1 * time.Hour)
   658  				pods = append(pods, &v1.Pod{
   659  					ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime},
   660  						DeletionTimestamp: pod.deletionTimeStamp},
   661  					Status: v1.PodStatus{Phase: pod.phase},
   662  					Spec:   v1.PodSpec{NodeName: pod.nodeName},
   663  				})
   664  			}
   665  			client := setupNewSimpleClient(nodes, pods)
   666  			gcc, podInformer, nodeInformer := NewFromClient(ctx, client, -1)
   667  
   668  			for _, pod := range pods {
   669  				podInformer.Informer().GetStore().Add(pod)
   670  			}
   671  			for _, node := range nodes {
   672  				nodeInformer.Informer().GetStore().Add(node)
   673  			}
   674  
   675  			gcc.gc(ctx)
   676  			verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames)
   677  		})
   678  	}
   679  	// testDeletingPodsMetrics is 7 in this test
   680  	testDeletingPodsMetrics(t, 7, metrics.PodGCReasonTerminatingOutOfService)
   681  }
   682  
   683  func TestGCInspectingPatchedPodBeforeDeletion(t *testing.T) {
   684  	testCases := []struct {
   685  		name                 string
   686  		pod                  *v1.Pod
   687  		expectedPatchedPod   *v1.Pod
   688  		expectedDeleteAction *clienttesting.DeleteActionImpl
   689  	}{
   690  		{
   691  			name: "orphaned pod should have DisruptionTarget condition added before deletion",
   692  			pod: &v1.Pod{
   693  				ObjectMeta: metav1.ObjectMeta{
   694  					Namespace: "default",
   695  					Name:      "testPod",
   696  				},
   697  				Spec: v1.PodSpec{
   698  					NodeName: "deletedNode",
   699  				},
   700  				Status: v1.PodStatus{
   701  					Phase: v1.PodRunning,
   702  					Conditions: []v1.PodCondition{
   703  						{
   704  							Type:   v1.PodReady,
   705  							Status: v1.ConditionTrue,
   706  						},
   707  					},
   708  				},
   709  			},
   710  			expectedPatchedPod: &v1.Pod{
   711  				ObjectMeta: metav1.ObjectMeta{
   712  					Namespace: "default",
   713  					Name:      "testPod",
   714  				},
   715  				Spec: v1.PodSpec{
   716  					NodeName: "deletedNode",
   717  				},
   718  				Status: v1.PodStatus{
   719  					Phase: v1.PodFailed,
   720  					Conditions: []v1.PodCondition{
   721  						{
   722  							Type:   v1.PodReady,
   723  							Status: v1.ConditionTrue,
   724  						},
   725  						{
   726  							Type:    v1.DisruptionTarget,
   727  							Status:  v1.ConditionTrue,
   728  							Reason:  "DeletionByPodGC",
   729  							Message: "PodGC: node no longer exists",
   730  						},
   731  					},
   732  				},
   733  			},
   734  			expectedDeleteAction: &clienttesting.DeleteActionImpl{
   735  				Name:          "testPod",
   736  				DeleteOptions: metav1.DeleteOptions{GracePeriodSeconds: pointer.Int64(0)},
   737  			},
   738  		},
   739  	}
   740  
   741  	for _, test := range testCases {
   742  		t.Run(test.name, func(t *testing.T) {
   743  			_, ctx := ktesting.NewTestContext(t)
   744  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, true)()
   745  
   746  			pods := []*v1.Pod{test.pod}
   747  
   748  			client := setupNewSimpleClient(nil, pods)
   749  			gcc, podInformer, _ := NewFromClient(ctx, client, -1)
   750  			gcc.quarantineTime = time.Duration(-1)
   751  			podInformer.Informer().GetStore().Add(test.pod)
   752  			gcc.gc(ctx)
   753  
   754  			actions := client.Actions()
   755  
   756  			var patchAction clienttesting.PatchAction
   757  			var deleteAction clienttesting.DeleteAction
   758  
   759  			for _, action := range actions {
   760  				if action.GetVerb() == "patch" {
   761  					patchAction = action.(clienttesting.PatchAction)
   762  				}
   763  
   764  				if action.GetVerb() == "delete" {
   765  					deleteAction = action.(clienttesting.DeleteAction)
   766  				}
   767  			}
   768  
   769  			if patchAction != nil && test.expectedPatchedPod == nil {
   770  				t.Fatalf("Pod was pactched but expectedPatchedPod is nil")
   771  			}
   772  			if test.expectedPatchedPod != nil {
   773  				patchedPodBytes := patchAction.GetPatch()
   774  				originalPod, err := json.Marshal(test.pod)
   775  				if err != nil {
   776  					t.Fatalf("Failed to marshal original pod %#v: %v", originalPod, err)
   777  				}
   778  				updated, err := strategicpatch.StrategicMergePatch(originalPod, patchedPodBytes, v1.Pod{})
   779  				if err != nil {
   780  					t.Fatalf("Failed to apply strategic merge patch %q on pod %#v: %v", patchedPodBytes, originalPod, err)
   781  				}
   782  
   783  				updatedPod := &v1.Pod{}
   784  				if err := json.Unmarshal(updated, updatedPod); err != nil {
   785  					t.Fatalf("Failed to unmarshal updated pod %q: %v", updated, err)
   786  				}
   787  
   788  				if diff := cmp.Diff(test.expectedPatchedPod, updatedPod, cmpopts.IgnoreFields(v1.Pod{}, "TypeMeta"), cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")); diff != "" {
   789  					t.Fatalf("Unexpected diff on pod (-want,+got):\n%s", diff)
   790  				}
   791  			}
   792  
   793  			if deleteAction != nil && test.expectedDeleteAction == nil {
   794  				t.Fatalf("Pod was deleted but expectedDeleteAction is nil")
   795  			}
   796  			if test.expectedDeleteAction != nil {
   797  				if diff := cmp.Diff(*test.expectedDeleteAction, deleteAction, cmpopts.IgnoreFields(clienttesting.DeleteActionImpl{}, "ActionImpl")); diff != "" {
   798  					t.Fatalf("Unexpected diff on deleteAction (-want,+got):\n%s", diff)
   799  				}
   800  			}
   801  		})
   802  	}
   803  }
   804  
   805  func verifyDeletedAndPatchedPods(t *testing.T, client *fake.Clientset, wantDeletedPodNames, wantPatchedPodNames sets.String) {
   806  	t.Helper()
   807  	deletedPodNames := getDeletedPodNames(client)
   808  	if diff := cmp.Diff(wantDeletedPodNames, deletedPodNames); diff != "" {
   809  		t.Errorf("Deleted pod names (-want,+got):\n%s", diff)
   810  	}
   811  	patchedPodNames := getPatchedPodNames(client)
   812  	if diff := cmp.Diff(wantPatchedPodNames, patchedPodNames); diff != "" {
   813  		t.Errorf("Patched pod names (-want,+got):\n%s", diff)
   814  	}
   815  }
   816  
   817  func testDeletingPodsMetrics(t *testing.T, total int, reason string) {
   818  	t.Helper()
   819  
   820  	actualDeletingPodsTotal, err := metricstestutil.GetCounterMetricValue(metrics.DeletingPodsTotal.WithLabelValues(metav1.NamespaceDefault, reason))
   821  	if err != nil {
   822  		t.Errorf("Error getting actualDeletingPodsTotal")
   823  	}
   824  	if actualDeletingPodsTotal != float64(total) {
   825  		t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", total, actualDeletingPodsTotal)
   826  	}
   827  
   828  	actualDeletingPodsErrorTotal, err := metricstestutil.GetCounterMetricValue(metrics.DeletingPodsErrorTotal.WithLabelValues("", reason))
   829  	if err != nil {
   830  		t.Errorf("Error getting actualDeletingPodsErrorTotal")
   831  	}
   832  	if actualDeletingPodsErrorTotal != float64(0) {
   833  		t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", 0, actualDeletingPodsErrorTotal)
   834  	}
   835  }
   836  
   837  func setupNewSimpleClient(nodes []*v1.Node, pods []*v1.Pod) *fake.Clientset {
   838  	podList := &v1.PodList{}
   839  	for _, podItem := range pods {
   840  		podList.Items = append(podList.Items, *podItem)
   841  	}
   842  	nodeList := &v1.NodeList{}
   843  	for _, nodeItem := range nodes {
   844  		nodeList.Items = append(nodeList.Items, *nodeItem)
   845  	}
   846  	return fake.NewSimpleClientset(nodeList, podList)
   847  }
   848  
   849  func getDeletedPodNames(client *fake.Clientset) sets.String {
   850  	deletedPodNames := sets.NewString()
   851  	for _, action := range client.Actions() {
   852  		if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
   853  			deleteAction := action.(clienttesting.DeleteAction)
   854  			deletedPodNames.Insert(deleteAction.GetName())
   855  		}
   856  	}
   857  	return deletedPodNames
   858  }
   859  
   860  func getPatchedPodNames(client *fake.Clientset) sets.String {
   861  	patchedPodNames := sets.NewString()
   862  	for _, action := range client.Actions() {
   863  		if action.GetVerb() == "patch" && action.GetResource().Resource == "pods" {
   864  			patchAction := action.(clienttesting.PatchAction)
   865  			patchedPodNames.Insert(patchAction.GetName())
   866  		}
   867  	}
   868  	return patchedPodNames
   869  }
   870  

View as plain text