...

Source file src/k8s.io/kubernetes/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux_test.go

Documentation: k8s.io/kubernetes/pkg/kubelet/nodeshutdown

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2020 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package nodeshutdown
    21  
    22  import (
    23  	"fmt"
    24  	"os"
    25  	"strings"
    26  	"sync"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/google/go-cmp/cmp"
    31  	"github.com/google/go-cmp/cmp/cmpopts"
    32  	"github.com/stretchr/testify/assert"
    33  	v1 "k8s.io/api/core/v1"
    34  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    35  	"k8s.io/apimachinery/pkg/types"
    36  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    37  	"k8s.io/client-go/tools/record"
    38  	featuregatetesting "k8s.io/component-base/featuregate/testing"
    39  	"k8s.io/klog/v2/ktesting"
    40  	_ "k8s.io/klog/v2/ktesting/init" // activate ktesting command line flags
    41  	"k8s.io/kubernetes/pkg/apis/scheduling"
    42  	pkgfeatures "k8s.io/kubernetes/pkg/features"
    43  	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
    44  	"k8s.io/kubernetes/pkg/kubelet/eviction"
    45  	"k8s.io/kubernetes/pkg/kubelet/nodeshutdown/systemd"
    46  	"k8s.io/kubernetes/pkg/kubelet/prober"
    47  	probetest "k8s.io/kubernetes/pkg/kubelet/prober/testing"
    48  	"k8s.io/utils/clock"
    49  	testingclock "k8s.io/utils/clock/testing"
    50  )
    51  
    52  // lock is to prevent systemDbus from being modified in the case of concurrency.
    53  var lock sync.Mutex
    54  
    55  type fakeDbus struct {
    56  	currentInhibitDelay        time.Duration
    57  	overrideSystemInhibitDelay time.Duration
    58  	shutdownChan               chan bool
    59  
    60  	didInhibitShutdown      bool
    61  	didOverrideInhibitDelay bool
    62  }
    63  
    64  func (f *fakeDbus) CurrentInhibitDelay() (time.Duration, error) {
    65  	if f.didOverrideInhibitDelay {
    66  		return f.overrideSystemInhibitDelay, nil
    67  	}
    68  	return f.currentInhibitDelay, nil
    69  }
    70  
    71  func (f *fakeDbus) InhibitShutdown() (systemd.InhibitLock, error) {
    72  	f.didInhibitShutdown = true
    73  	return systemd.InhibitLock(0), nil
    74  }
    75  
    76  func (f *fakeDbus) ReleaseInhibitLock(lock systemd.InhibitLock) error {
    77  	return nil
    78  }
    79  
    80  func (f *fakeDbus) ReloadLogindConf() error {
    81  	return nil
    82  }
    83  
    84  func (f *fakeDbus) MonitorShutdown() (<-chan bool, error) {
    85  	return f.shutdownChan, nil
    86  }
    87  
    88  func (f *fakeDbus) OverrideInhibitDelay(inhibitDelayMax time.Duration) error {
    89  	f.didOverrideInhibitDelay = true
    90  	return nil
    91  }
    92  
    93  func makePod(name string, priority int32, terminationGracePeriod *int64) *v1.Pod {
    94  	return &v1.Pod{
    95  		ObjectMeta: metav1.ObjectMeta{
    96  			Name: name,
    97  			UID:  types.UID(name),
    98  		},
    99  		Spec: v1.PodSpec{
   100  			Priority:                      &priority,
   101  			TerminationGracePeriodSeconds: terminationGracePeriod,
   102  		},
   103  	}
   104  }
   105  
   106  func TestManager(t *testing.T) {
   107  	systemDbusTmp := systemDbus
   108  	defer func() {
   109  		systemDbus = systemDbusTmp
   110  	}()
   111  	normalPodNoGracePeriod := makePod("normal-pod-nil-grace-period", scheduling.DefaultPriorityWhenNoDefaultClassExists, nil /* terminationGracePeriod */)
   112  	criticalPodNoGracePeriod := makePod("critical-pod-nil-grace-period", scheduling.SystemCriticalPriority, nil /* terminationGracePeriod */)
   113  
   114  	shortGracePeriod := int64(2)
   115  	normalPodGracePeriod := makePod("normal-pod-grace-period", scheduling.DefaultPriorityWhenNoDefaultClassExists, &shortGracePeriod /* terminationGracePeriod */)
   116  	criticalPodGracePeriod := makePod("critical-pod-grace-period", scheduling.SystemCriticalPriority, &shortGracePeriod /* terminationGracePeriod */)
   117  
   118  	longGracePeriod := int64(1000)
   119  	normalPodLongGracePeriod := makePod("normal-pod-long-grace-period", scheduling.DefaultPriorityWhenNoDefaultClassExists, &longGracePeriod /* terminationGracePeriod */)
   120  
   121  	var tests = []struct {
   122  		desc                             string
   123  		activePods                       []*v1.Pod
   124  		shutdownGracePeriodRequested     time.Duration
   125  		shutdownGracePeriodCriticalPods  time.Duration
   126  		systemInhibitDelay               time.Duration
   127  		overrideSystemInhibitDelay       time.Duration
   128  		enablePodDisruptionConditions    bool
   129  		expectedDidOverrideInhibitDelay  bool
   130  		expectedPodToGracePeriodOverride map[string]int64
   131  		expectedError                    error
   132  		expectedPodStatuses              map[string]v1.PodStatus
   133  	}{
   134  		{
   135  			desc: "verify pod status; PodDisruptionConditions enabled",
   136  			activePods: []*v1.Pod{
   137  				{
   138  					ObjectMeta: metav1.ObjectMeta{Name: "running-pod"},
   139  					Spec:       v1.PodSpec{},
   140  					Status: v1.PodStatus{
   141  						Phase: v1.PodRunning,
   142  					},
   143  				},
   144  				{
   145  					ObjectMeta: metav1.ObjectMeta{Name: "failed-pod"},
   146  					Spec:       v1.PodSpec{},
   147  					Status: v1.PodStatus{
   148  						Phase: v1.PodFailed,
   149  					},
   150  				},
   151  				{
   152  					ObjectMeta: metav1.ObjectMeta{Name: "succeeded-pod"},
   153  					Spec:       v1.PodSpec{},
   154  					Status: v1.PodStatus{
   155  						Phase: v1.PodSucceeded,
   156  					},
   157  				},
   158  			},
   159  			shutdownGracePeriodRequested:     time.Duration(30 * time.Second),
   160  			shutdownGracePeriodCriticalPods:  time.Duration(10 * time.Second),
   161  			systemInhibitDelay:               time.Duration(40 * time.Second),
   162  			overrideSystemInhibitDelay:       time.Duration(40 * time.Second),
   163  			enablePodDisruptionConditions:    true,
   164  			expectedDidOverrideInhibitDelay:  false,
   165  			expectedPodToGracePeriodOverride: map[string]int64{"running-pod": 20, "failed-pod": 20, "succeeded-pod": 20},
   166  			expectedPodStatuses: map[string]v1.PodStatus{
   167  				"running-pod": {
   168  					Phase:   v1.PodFailed,
   169  					Message: "Pod was terminated in response to imminent node shutdown.",
   170  					Reason:  "Terminated",
   171  					Conditions: []v1.PodCondition{
   172  						{
   173  							Type:    v1.DisruptionTarget,
   174  							Status:  v1.ConditionTrue,
   175  							Reason:  "TerminationByKubelet",
   176  							Message: "Pod was terminated in response to imminent node shutdown.",
   177  						},
   178  					},
   179  				},
   180  				"failed-pod": {
   181  					Phase:   v1.PodFailed,
   182  					Message: "Pod was terminated in response to imminent node shutdown.",
   183  					Reason:  "Terminated",
   184  					Conditions: []v1.PodCondition{
   185  						{
   186  							Type:    v1.DisruptionTarget,
   187  							Status:  v1.ConditionTrue,
   188  							Reason:  "TerminationByKubelet",
   189  							Message: "Pod was terminated in response to imminent node shutdown.",
   190  						},
   191  					},
   192  				},
   193  				"succeeded-pod": {
   194  					Phase:   v1.PodSucceeded,
   195  					Message: "Pod was terminated in response to imminent node shutdown.",
   196  					Reason:  "Terminated",
   197  					Conditions: []v1.PodCondition{
   198  						{
   199  							Type:    v1.DisruptionTarget,
   200  							Status:  v1.ConditionTrue,
   201  							Reason:  "TerminationByKubelet",
   202  							Message: "Pod was terminated in response to imminent node shutdown.",
   203  						},
   204  					},
   205  				},
   206  			},
   207  		},
   208  		{
   209  			desc:                             "no override (total=30s, critical=10s)",
   210  			activePods:                       []*v1.Pod{normalPodNoGracePeriod, criticalPodNoGracePeriod},
   211  			shutdownGracePeriodRequested:     time.Duration(30 * time.Second),
   212  			shutdownGracePeriodCriticalPods:  time.Duration(10 * time.Second),
   213  			systemInhibitDelay:               time.Duration(40 * time.Second),
   214  			overrideSystemInhibitDelay:       time.Duration(40 * time.Second),
   215  			enablePodDisruptionConditions:    false,
   216  			expectedDidOverrideInhibitDelay:  false,
   217  			expectedPodToGracePeriodOverride: map[string]int64{"normal-pod-nil-grace-period": 20, "critical-pod-nil-grace-period": 10},
   218  			expectedPodStatuses: map[string]v1.PodStatus{
   219  				"normal-pod-nil-grace-period": {
   220  					Phase:   v1.PodFailed,
   221  					Message: "Pod was terminated in response to imminent node shutdown.",
   222  					Reason:  "Terminated",
   223  				},
   224  				"critical-pod-nil-grace-period": {
   225  					Phase:   v1.PodFailed,
   226  					Message: "Pod was terminated in response to imminent node shutdown.",
   227  					Reason:  "Terminated",
   228  				},
   229  			},
   230  		},
   231  		{
   232  			desc:                             "no override (total=30s, critical=10s) pods with terminationGracePeriod and without",
   233  			activePods:                       []*v1.Pod{normalPodNoGracePeriod, criticalPodNoGracePeriod, normalPodGracePeriod, criticalPodGracePeriod},
   234  			shutdownGracePeriodRequested:     time.Duration(30 * time.Second),
   235  			shutdownGracePeriodCriticalPods:  time.Duration(10 * time.Second),
   236  			systemInhibitDelay:               time.Duration(40 * time.Second),
   237  			overrideSystemInhibitDelay:       time.Duration(40 * time.Second),
   238  			expectedDidOverrideInhibitDelay:  false,
   239  			expectedPodToGracePeriodOverride: map[string]int64{"normal-pod-nil-grace-period": 20, "critical-pod-nil-grace-period": 10, "normal-pod-grace-period": 2, "critical-pod-grace-period": 2},
   240  		},
   241  		{
   242  			desc:                             "no override (total=30s, critical=10s) pod with long terminationGracePeriod is overridden",
   243  			activePods:                       []*v1.Pod{normalPodNoGracePeriod, criticalPodNoGracePeriod, normalPodGracePeriod, criticalPodGracePeriod, normalPodLongGracePeriod},
   244  			shutdownGracePeriodRequested:     time.Duration(30 * time.Second),
   245  			shutdownGracePeriodCriticalPods:  time.Duration(10 * time.Second),
   246  			systemInhibitDelay:               time.Duration(40 * time.Second),
   247  			overrideSystemInhibitDelay:       time.Duration(40 * time.Second),
   248  			expectedDidOverrideInhibitDelay:  false,
   249  			expectedPodToGracePeriodOverride: map[string]int64{"normal-pod-nil-grace-period": 20, "critical-pod-nil-grace-period": 10, "normal-pod-grace-period": 2, "critical-pod-grace-period": 2, "normal-pod-long-grace-period": 20},
   250  		},
   251  		{
   252  			desc:                             "no override (total=30, critical=0)",
   253  			activePods:                       []*v1.Pod{normalPodNoGracePeriod, criticalPodNoGracePeriod},
   254  			shutdownGracePeriodRequested:     time.Duration(30 * time.Second),
   255  			shutdownGracePeriodCriticalPods:  time.Duration(0 * time.Second),
   256  			systemInhibitDelay:               time.Duration(40 * time.Second),
   257  			overrideSystemInhibitDelay:       time.Duration(40 * time.Second),
   258  			expectedDidOverrideInhibitDelay:  false,
   259  			expectedPodToGracePeriodOverride: map[string]int64{"normal-pod-nil-grace-period": 30, "critical-pod-nil-grace-period": 0},
   260  		},
   261  		{
   262  			desc:                             "override successful (total=30, critical=10)",
   263  			activePods:                       []*v1.Pod{normalPodNoGracePeriod, criticalPodNoGracePeriod},
   264  			shutdownGracePeriodRequested:     time.Duration(30 * time.Second),
   265  			shutdownGracePeriodCriticalPods:  time.Duration(10 * time.Second),
   266  			systemInhibitDelay:               time.Duration(5 * time.Second),
   267  			overrideSystemInhibitDelay:       time.Duration(30 * time.Second),
   268  			expectedDidOverrideInhibitDelay:  true,
   269  			expectedPodToGracePeriodOverride: map[string]int64{"normal-pod-nil-grace-period": 20, "critical-pod-nil-grace-period": 10},
   270  		},
   271  		{
   272  			desc:                             "override unsuccessful",
   273  			activePods:                       []*v1.Pod{normalPodNoGracePeriod, criticalPodNoGracePeriod},
   274  			shutdownGracePeriodRequested:     time.Duration(30 * time.Second),
   275  			shutdownGracePeriodCriticalPods:  time.Duration(10 * time.Second),
   276  			systemInhibitDelay:               time.Duration(5 * time.Second),
   277  			overrideSystemInhibitDelay:       time.Duration(5 * time.Second),
   278  			expectedDidOverrideInhibitDelay:  true,
   279  			expectedPodToGracePeriodOverride: map[string]int64{"normal-pod-nil-grace-period": 5, "critical-pod-nil-grace-period": 0},
   280  			expectedError:                    fmt.Errorf("unable to update logind InhibitDelayMaxSec to 30s (ShutdownGracePeriod), current value of InhibitDelayMaxSec (5s) is less than requested ShutdownGracePeriod"),
   281  		},
   282  		{
   283  			desc:                            "override unsuccessful, zero time",
   284  			activePods:                      []*v1.Pod{normalPodNoGracePeriod, criticalPodNoGracePeriod},
   285  			shutdownGracePeriodRequested:    time.Duration(5 * time.Second),
   286  			shutdownGracePeriodCriticalPods: time.Duration(5 * time.Second),
   287  			systemInhibitDelay:              time.Duration(0 * time.Second),
   288  			overrideSystemInhibitDelay:      time.Duration(0 * time.Second),
   289  			expectedError:                   fmt.Errorf("unable to update logind InhibitDelayMaxSec to 5s (ShutdownGracePeriod), current value of InhibitDelayMaxSec (0s) is less than requested ShutdownGracePeriod"),
   290  		},
   291  		{
   292  			desc:                             "no override, all time to critical pods",
   293  			activePods:                       []*v1.Pod{normalPodNoGracePeriod, criticalPodNoGracePeriod},
   294  			shutdownGracePeriodRequested:     time.Duration(5 * time.Second),
   295  			shutdownGracePeriodCriticalPods:  time.Duration(5 * time.Second),
   296  			systemInhibitDelay:               time.Duration(5 * time.Second),
   297  			overrideSystemInhibitDelay:       time.Duration(5 * time.Second),
   298  			expectedDidOverrideInhibitDelay:  false,
   299  			expectedPodToGracePeriodOverride: map[string]int64{"normal-pod-nil-grace-period": 0, "critical-pod-nil-grace-period": 5},
   300  		},
   301  	}
   302  
   303  	for _, tc := range tests {
   304  		t.Run(tc.desc, func(t *testing.T) {
   305  			logger, _ := ktesting.NewTestContext(t)
   306  
   307  			activePodsFunc := func() []*v1.Pod {
   308  				return tc.activePods
   309  			}
   310  
   311  			type PodKillInfo struct {
   312  				Name        string
   313  				GracePeriod int64
   314  			}
   315  
   316  			podKillChan := make(chan PodKillInfo, 1)
   317  			killPodsFunc := func(pod *v1.Pod, evict bool, gracePeriodOverride *int64, fn func(podStatus *v1.PodStatus)) error {
   318  				var gracePeriod int64
   319  				if gracePeriodOverride != nil {
   320  					gracePeriod = *gracePeriodOverride
   321  				}
   322  				fn(&pod.Status)
   323  				podKillChan <- PodKillInfo{Name: pod.Name, GracePeriod: gracePeriod}
   324  				return nil
   325  			}
   326  
   327  			fakeShutdownChan := make(chan bool)
   328  			fakeDbus := &fakeDbus{currentInhibitDelay: tc.systemInhibitDelay, shutdownChan: fakeShutdownChan, overrideSystemInhibitDelay: tc.overrideSystemInhibitDelay}
   329  
   330  			lock.Lock()
   331  			systemDbus = func() (dbusInhibiter, error) {
   332  				return fakeDbus, nil
   333  			}
   334  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.PodDisruptionConditions, tc.enablePodDisruptionConditions)()
   335  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.GracefulNodeShutdown, true)()
   336  
   337  			proberManager := probetest.FakeManager{}
   338  			fakeRecorder := &record.FakeRecorder{}
   339  			nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
   340  			manager, _ := NewManager(&Config{
   341  				Logger:                          logger,
   342  				ProbeManager:                    proberManager,
   343  				Recorder:                        fakeRecorder,
   344  				NodeRef:                         nodeRef,
   345  				GetPodsFunc:                     activePodsFunc,
   346  				KillPodFunc:                     killPodsFunc,
   347  				SyncNodeStatusFunc:              func() {},
   348  				ShutdownGracePeriodRequested:    tc.shutdownGracePeriodRequested,
   349  				ShutdownGracePeriodCriticalPods: tc.shutdownGracePeriodCriticalPods,
   350  				Clock:                           testingclock.NewFakeClock(time.Now()),
   351  				StateDirectory:                  os.TempDir(),
   352  			})
   353  
   354  			err := manager.Start()
   355  			lock.Unlock()
   356  
   357  			if tc.expectedError != nil {
   358  				if err == nil {
   359  					t.Errorf("unexpected error message. Got: <nil> want %s", tc.expectedError.Error())
   360  				} else if !strings.Contains(err.Error(), tc.expectedError.Error()) {
   361  					t.Errorf("unexpected error message. Got: %s want %s", err.Error(), tc.expectedError.Error())
   362  				}
   363  			} else {
   364  				assert.NoError(t, err, "expected manager.Start() to not return error")
   365  				assert.True(t, fakeDbus.didInhibitShutdown, "expected that manager inhibited shutdown")
   366  				assert.NoError(t, manager.ShutdownStatus(), "expected that manager does not return error since shutdown is not active")
   367  				assert.Equal(t, manager.Admit(nil).Admit, true)
   368  
   369  				// Send fake shutdown event
   370  				select {
   371  				case fakeShutdownChan <- true:
   372  				case <-time.After(1 * time.Second):
   373  					t.Fatal()
   374  				}
   375  
   376  				// Wait for all the pods to be killed
   377  				killedPodsToGracePeriods := map[string]int64{}
   378  				for i := 0; i < len(tc.activePods); i++ {
   379  					select {
   380  					case podKillInfo := <-podKillChan:
   381  						killedPodsToGracePeriods[podKillInfo.Name] = podKillInfo.GracePeriod
   382  						continue
   383  					case <-time.After(1 * time.Second):
   384  						t.Fatal()
   385  					}
   386  				}
   387  
   388  				assert.Error(t, manager.ShutdownStatus(), "expected that manager returns error since shutdown is active")
   389  				assert.Equal(t, manager.Admit(nil).Admit, false)
   390  				assert.Equal(t, tc.expectedPodToGracePeriodOverride, killedPodsToGracePeriods)
   391  				assert.Equal(t, tc.expectedDidOverrideInhibitDelay, fakeDbus.didOverrideInhibitDelay, "override system inhibit delay differs")
   392  				if tc.expectedPodStatuses != nil {
   393  					for _, pod := range tc.activePods {
   394  						if diff := cmp.Diff(tc.expectedPodStatuses[pod.Name], pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
   395  							t.Errorf("Unexpected PodStatus: (-want,+got):\n%s", diff)
   396  						}
   397  					}
   398  				}
   399  			}
   400  		})
   401  	}
   402  }
   403  
   404  func TestFeatureEnabled(t *testing.T) {
   405  	var tests = []struct {
   406  		desc                         string
   407  		shutdownGracePeriodRequested time.Duration
   408  		featureGateEnabled           bool
   409  		expectEnabled                bool
   410  	}{
   411  		{
   412  			desc:                         "shutdownGracePeriodRequested 0; disables feature",
   413  			shutdownGracePeriodRequested: time.Duration(0 * time.Second),
   414  			featureGateEnabled:           true,
   415  			expectEnabled:                false,
   416  		},
   417  		{
   418  			desc:                         "feature gate disabled; disables feature",
   419  			shutdownGracePeriodRequested: time.Duration(100 * time.Second),
   420  			featureGateEnabled:           false,
   421  			expectEnabled:                false,
   422  		},
   423  		{
   424  			desc:                         "feature gate enabled; shutdownGracePeriodRequested > 0; enables feature",
   425  			shutdownGracePeriodRequested: time.Duration(100 * time.Second),
   426  			featureGateEnabled:           true,
   427  			expectEnabled:                true,
   428  		},
   429  	}
   430  	for _, tc := range tests {
   431  		t.Run(tc.desc, func(t *testing.T) {
   432  			logger, _ := ktesting.NewTestContext(t)
   433  			activePodsFunc := func() []*v1.Pod {
   434  				return nil
   435  			}
   436  			killPodsFunc := func(pod *v1.Pod, evict bool, gracePeriodOverride *int64, fn func(*v1.PodStatus)) error {
   437  				return nil
   438  			}
   439  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.GracefulNodeShutdown, tc.featureGateEnabled)()
   440  
   441  			proberManager := probetest.FakeManager{}
   442  			fakeRecorder := &record.FakeRecorder{}
   443  			nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
   444  
   445  			manager, _ := NewManager(&Config{
   446  				Logger:                          logger,
   447  				ProbeManager:                    proberManager,
   448  				Recorder:                        fakeRecorder,
   449  				NodeRef:                         nodeRef,
   450  				GetPodsFunc:                     activePodsFunc,
   451  				KillPodFunc:                     killPodsFunc,
   452  				SyncNodeStatusFunc:              func() {},
   453  				ShutdownGracePeriodRequested:    tc.shutdownGracePeriodRequested,
   454  				ShutdownGracePeriodCriticalPods: 0,
   455  				StateDirectory:                  os.TempDir(),
   456  			})
   457  			assert.Equal(t, tc.expectEnabled, manager != managerStub{})
   458  		})
   459  	}
   460  }
   461  
   462  func TestRestart(t *testing.T) {
   463  	logger, _ := ktesting.NewTestContext(t)
   464  	systemDbusTmp := systemDbus
   465  	defer func() {
   466  		systemDbus = systemDbusTmp
   467  	}()
   468  
   469  	shutdownGracePeriodRequested := 30 * time.Second
   470  	shutdownGracePeriodCriticalPods := 10 * time.Second
   471  	systemInhibitDelay := 40 * time.Second
   472  	overrideSystemInhibitDelay := 40 * time.Second
   473  	activePodsFunc := func() []*v1.Pod {
   474  		return nil
   475  	}
   476  	killPodsFunc := func(pod *v1.Pod, isEvicted bool, gracePeriodOverride *int64, fn func(*v1.PodStatus)) error {
   477  		return nil
   478  	}
   479  	syncNodeStatus := func() {}
   480  
   481  	var shutdownChan chan bool
   482  	var shutdownChanMut sync.Mutex
   483  	var connChan = make(chan struct{}, 1)
   484  
   485  	lock.Lock()
   486  	systemDbus = func() (dbusInhibiter, error) {
   487  		defer func() {
   488  			connChan <- struct{}{}
   489  		}()
   490  		ch := make(chan bool)
   491  		shutdownChanMut.Lock()
   492  		shutdownChan = ch
   493  		shutdownChanMut.Unlock()
   494  		dbus := &fakeDbus{currentInhibitDelay: systemInhibitDelay, shutdownChan: ch, overrideSystemInhibitDelay: overrideSystemInhibitDelay}
   495  		return dbus, nil
   496  	}
   497  
   498  	proberManager := probetest.FakeManager{}
   499  	fakeRecorder := &record.FakeRecorder{}
   500  	nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
   501  	manager, _ := NewManager(&Config{
   502  		Logger:                          logger,
   503  		ProbeManager:                    proberManager,
   504  		Recorder:                        fakeRecorder,
   505  		NodeRef:                         nodeRef,
   506  		GetPodsFunc:                     activePodsFunc,
   507  		KillPodFunc:                     killPodsFunc,
   508  		SyncNodeStatusFunc:              syncNodeStatus,
   509  		ShutdownGracePeriodRequested:    shutdownGracePeriodRequested,
   510  		ShutdownGracePeriodCriticalPods: shutdownGracePeriodCriticalPods,
   511  		StateDirectory:                  os.TempDir(),
   512  	})
   513  
   514  	err := manager.Start()
   515  	lock.Unlock()
   516  
   517  	if err != nil {
   518  		t.Errorf("unexpected error: %v", err)
   519  	}
   520  
   521  	for i := 0; i != 3; i++ {
   522  		select {
   523  		case <-time.After(dbusReconnectPeriod * 5):
   524  			t.Fatal("wait dbus connect timeout")
   525  		case <-connChan:
   526  		}
   527  
   528  		shutdownChanMut.Lock()
   529  		close(shutdownChan)
   530  		shutdownChanMut.Unlock()
   531  	}
   532  }
   533  
   534  func Test_migrateConfig(t *testing.T) {
   535  	type shutdownConfig struct {
   536  		shutdownGracePeriodRequested    time.Duration
   537  		shutdownGracePeriodCriticalPods time.Duration
   538  	}
   539  	tests := []struct {
   540  		name string
   541  		args shutdownConfig
   542  		want []kubeletconfig.ShutdownGracePeriodByPodPriority
   543  	}{
   544  		{
   545  			name: "both shutdownGracePeriodRequested and shutdownGracePeriodCriticalPods",
   546  			args: shutdownConfig{
   547  				shutdownGracePeriodRequested:    300 * time.Second,
   548  				shutdownGracePeriodCriticalPods: 120 * time.Second,
   549  			},
   550  			want: []kubeletconfig.ShutdownGracePeriodByPodPriority{
   551  				{
   552  					Priority:                   scheduling.DefaultPriorityWhenNoDefaultClassExists,
   553  					ShutdownGracePeriodSeconds: 180,
   554  				},
   555  				{
   556  					Priority:                   scheduling.SystemCriticalPriority,
   557  					ShutdownGracePeriodSeconds: 120,
   558  				},
   559  			},
   560  		},
   561  		{
   562  			name: "only shutdownGracePeriodRequested",
   563  			args: shutdownConfig{
   564  				shutdownGracePeriodRequested:    100 * time.Second,
   565  				shutdownGracePeriodCriticalPods: 0 * time.Second,
   566  			},
   567  			want: []kubeletconfig.ShutdownGracePeriodByPodPriority{
   568  				{
   569  					Priority:                   scheduling.DefaultPriorityWhenNoDefaultClassExists,
   570  					ShutdownGracePeriodSeconds: 100,
   571  				},
   572  				{
   573  					Priority:                   scheduling.SystemCriticalPriority,
   574  					ShutdownGracePeriodSeconds: 0,
   575  				},
   576  			},
   577  		},
   578  		{
   579  			name: "empty configuration",
   580  			args: shutdownConfig{
   581  				shutdownGracePeriodRequested:    0 * time.Second,
   582  				shutdownGracePeriodCriticalPods: 0 * time.Second,
   583  			},
   584  			want: nil,
   585  		},
   586  		{
   587  			name: "wrong configuration",
   588  			args: shutdownConfig{
   589  				shutdownGracePeriodRequested:    1 * time.Second,
   590  				shutdownGracePeriodCriticalPods: 100 * time.Second,
   591  			},
   592  			want: nil,
   593  		},
   594  	}
   595  	for _, tt := range tests {
   596  		t.Run(tt.name, func(t *testing.T) {
   597  			if got := migrateConfig(tt.args.shutdownGracePeriodRequested, tt.args.shutdownGracePeriodCriticalPods); !assert.Equal(t, tt.want, got) {
   598  				t.Errorf("migrateConfig() = %v, want %v", got, tt.want)
   599  			}
   600  		})
   601  	}
   602  }
   603  
   604  func Test_groupByPriority(t *testing.T) {
   605  	type args struct {
   606  		shutdownGracePeriodByPodPriority []kubeletconfig.ShutdownGracePeriodByPodPriority
   607  		pods                             []*v1.Pod
   608  	}
   609  	tests := []struct {
   610  		name string
   611  		args args
   612  		want []podShutdownGroup
   613  	}{
   614  		{
   615  			name: "migrate config",
   616  			args: args{
   617  				shutdownGracePeriodByPodPriority: migrateConfig(300*time.Second /* shutdownGracePeriodRequested */, 120*time.Second /* shutdownGracePeriodCriticalPods */),
   618  				pods: []*v1.Pod{
   619  					makePod("normal-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, nil),
   620  					makePod("highest-user-definable-pod", scheduling.HighestUserDefinablePriority, nil),
   621  					makePod("critical-pod", scheduling.SystemCriticalPriority, nil),
   622  				},
   623  			},
   624  			want: []podShutdownGroup{
   625  				{
   626  					ShutdownGracePeriodByPodPriority: kubeletconfig.ShutdownGracePeriodByPodPriority{
   627  						Priority:                   scheduling.DefaultPriorityWhenNoDefaultClassExists,
   628  						ShutdownGracePeriodSeconds: 180,
   629  					},
   630  					Pods: []*v1.Pod{
   631  						makePod("normal-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, nil),
   632  						makePod("highest-user-definable-pod", scheduling.HighestUserDefinablePriority, nil),
   633  					},
   634  				},
   635  				{
   636  					ShutdownGracePeriodByPodPriority: kubeletconfig.ShutdownGracePeriodByPodPriority{
   637  						Priority:                   scheduling.SystemCriticalPriority,
   638  						ShutdownGracePeriodSeconds: 120,
   639  					},
   640  					Pods: []*v1.Pod{
   641  						makePod("critical-pod", scheduling.SystemCriticalPriority, nil),
   642  					},
   643  				},
   644  			},
   645  		},
   646  		{
   647  			name: "pod priority",
   648  			args: args{
   649  				shutdownGracePeriodByPodPriority: []kubeletconfig.ShutdownGracePeriodByPodPriority{
   650  					{
   651  						Priority:                   1,
   652  						ShutdownGracePeriodSeconds: 10,
   653  					},
   654  					{
   655  						Priority:                   2,
   656  						ShutdownGracePeriodSeconds: 20,
   657  					},
   658  					{
   659  						Priority:                   3,
   660  						ShutdownGracePeriodSeconds: 30,
   661  					},
   662  					{
   663  						Priority:                   4,
   664  						ShutdownGracePeriodSeconds: 40,
   665  					},
   666  				},
   667  				pods: []*v1.Pod{
   668  					makePod("pod-0", 0, nil),
   669  					makePod("pod-1", 1, nil),
   670  					makePod("pod-2", 2, nil),
   671  					makePod("pod-3", 3, nil),
   672  					makePod("pod-4", 4, nil),
   673  					makePod("pod-5", 5, nil),
   674  				},
   675  			},
   676  			want: []podShutdownGroup{
   677  				{
   678  					ShutdownGracePeriodByPodPriority: kubeletconfig.ShutdownGracePeriodByPodPriority{
   679  						Priority:                   1,
   680  						ShutdownGracePeriodSeconds: 10,
   681  					},
   682  					Pods: []*v1.Pod{
   683  						makePod("pod-0", 0, nil),
   684  						makePod("pod-1", 1, nil),
   685  					},
   686  				},
   687  				{
   688  					ShutdownGracePeriodByPodPriority: kubeletconfig.ShutdownGracePeriodByPodPriority{
   689  						Priority:                   2,
   690  						ShutdownGracePeriodSeconds: 20,
   691  					},
   692  					Pods: []*v1.Pod{
   693  						makePod("pod-2", 2, nil),
   694  					},
   695  				},
   696  				{
   697  					ShutdownGracePeriodByPodPriority: kubeletconfig.ShutdownGracePeriodByPodPriority{
   698  						Priority:                   3,
   699  						ShutdownGracePeriodSeconds: 30,
   700  					},
   701  					Pods: []*v1.Pod{
   702  						makePod("pod-3", 3, nil),
   703  					},
   704  				},
   705  				{
   706  					ShutdownGracePeriodByPodPriority: kubeletconfig.ShutdownGracePeriodByPodPriority{
   707  						Priority:                   4,
   708  						ShutdownGracePeriodSeconds: 40,
   709  					},
   710  					Pods: []*v1.Pod{
   711  						makePod("pod-4", 4, nil),
   712  						makePod("pod-5", 5, nil),
   713  					},
   714  				},
   715  			},
   716  		},
   717  	}
   718  	for _, tt := range tests {
   719  		t.Run(tt.name, func(t *testing.T) {
   720  			if got := groupByPriority(tt.args.shutdownGracePeriodByPodPriority, tt.args.pods); !assert.Equal(t, tt.want, got) {
   721  				t.Errorf("groupByPriority() = %v, want %v", got, tt.want)
   722  			}
   723  		})
   724  	}
   725  }
   726  
   727  func Test_managerImpl_processShutdownEvent(t *testing.T) {
   728  	var (
   729  		probeManager   = probetest.FakeManager{}
   730  		fakeRecorder   = &record.FakeRecorder{}
   731  		syncNodeStatus = func() {}
   732  		nodeRef        = &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
   733  		fakeclock      = testingclock.NewFakeClock(time.Now())
   734  	)
   735  
   736  	type fields struct {
   737  		recorder                         record.EventRecorder
   738  		nodeRef                          *v1.ObjectReference
   739  		probeManager                     prober.Manager
   740  		shutdownGracePeriodByPodPriority []kubeletconfig.ShutdownGracePeriodByPodPriority
   741  		getPods                          eviction.ActivePodsFunc
   742  		killPodFunc                      eviction.KillPodFunc
   743  		syncNodeStatus                   func()
   744  		dbusCon                          dbusInhibiter
   745  		inhibitLock                      systemd.InhibitLock
   746  		nodeShuttingDownNow              bool
   747  		clock                            clock.Clock
   748  	}
   749  	tests := []struct {
   750  		name                   string
   751  		fields                 fields
   752  		wantErr                bool
   753  		expectedOutputContains string
   754  	}{
   755  		{
   756  			name: "kill pod func take too long",
   757  			fields: fields{
   758  				recorder:     fakeRecorder,
   759  				nodeRef:      nodeRef,
   760  				probeManager: probeManager,
   761  				shutdownGracePeriodByPodPriority: []kubeletconfig.ShutdownGracePeriodByPodPriority{
   762  					{
   763  						Priority:                   1,
   764  						ShutdownGracePeriodSeconds: 10,
   765  					},
   766  					{
   767  						Priority:                   2,
   768  						ShutdownGracePeriodSeconds: 20,
   769  					},
   770  				},
   771  				getPods: func() []*v1.Pod {
   772  					return []*v1.Pod{
   773  						makePod("normal-pod", 1, nil),
   774  						makePod("critical-pod", 2, nil),
   775  					}
   776  				},
   777  				killPodFunc: func(pod *v1.Pod, isEvicted bool, gracePeriodOverride *int64, fn func(*v1.PodStatus)) error {
   778  					fakeclock.Step(60 * time.Second)
   779  					return nil
   780  				},
   781  				syncNodeStatus: syncNodeStatus,
   782  				clock:          fakeclock,
   783  				dbusCon:        &fakeDbus{},
   784  			},
   785  			wantErr:                false,
   786  			expectedOutputContains: "Shutdown manager pod killing time out",
   787  		},
   788  	}
   789  
   790  	for _, tt := range tests {
   791  		t.Run(tt.name, func(t *testing.T) {
   792  			logger := ktesting.NewLogger(t,
   793  				ktesting.NewConfig(
   794  					ktesting.BufferLogs(true),
   795  				),
   796  			)
   797  			m := &managerImpl{
   798  				logger:                           logger,
   799  				recorder:                         tt.fields.recorder,
   800  				nodeRef:                          tt.fields.nodeRef,
   801  				probeManager:                     tt.fields.probeManager,
   802  				shutdownGracePeriodByPodPriority: tt.fields.shutdownGracePeriodByPodPriority,
   803  				getPods:                          tt.fields.getPods,
   804  				killPodFunc:                      tt.fields.killPodFunc,
   805  				syncNodeStatus:                   tt.fields.syncNodeStatus,
   806  				dbusCon:                          tt.fields.dbusCon,
   807  				inhibitLock:                      tt.fields.inhibitLock,
   808  				nodeShuttingDownMutex:            sync.Mutex{},
   809  				nodeShuttingDownNow:              tt.fields.nodeShuttingDownNow,
   810  				clock:                            tt.fields.clock,
   811  			}
   812  			if err := m.processShutdownEvent(); (err != nil) != tt.wantErr {
   813  				t.Errorf("managerImpl.processShutdownEvent() error = %v, wantErr %v", err, tt.wantErr)
   814  			}
   815  
   816  			underlier, ok := logger.GetSink().(ktesting.Underlier)
   817  			if !ok {
   818  				t.Fatalf("Should have had a ktesting LogSink, got %T", logger.GetSink())
   819  			}
   820  
   821  			log := underlier.GetBuffer().String()
   822  			if !strings.Contains(log, tt.expectedOutputContains) {
   823  				// Log will be shown on failure. To see it
   824  				// during a successful run use "go test -v".
   825  				t.Errorf("managerImpl.processShutdownEvent() should have logged %s, see actual output above.", tt.expectedOutputContains)
   826  			}
   827  		})
   828  	}
   829  }
   830  

View as plain text