...

Source file src/k8s.io/kubernetes/pkg/controller/job/backoff_utils_test.go

Documentation: k8s.io/kubernetes/pkg/controller/job

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package job
    18  
    19  import (
    20  	"testing"
    21  	"time"
    22  
    23  	"github.com/google/go-cmp/cmp"
    24  	v1 "k8s.io/api/core/v1"
    25  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    26  	"k8s.io/klog/v2/ktesting"
    27  	clocktesting "k8s.io/utils/clock/testing"
    28  	"k8s.io/utils/ptr"
    29  )
    30  
    31  func TestNewBackoffRecord(t *testing.T) {
    32  	emptyStoreInitializer := func(*backoffStore) {}
    33  	defaultTestTime := metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC))
    34  	testCases := map[string]struct {
    35  		storeInitializer  func(*backoffStore)
    36  		uncounted         uncountedTerminatedPods
    37  		newSucceededPods  []metav1.Time
    38  		newFailedPods     []metav1.Time
    39  		wantBackoffRecord backoffRecord
    40  	}{
    41  		"Empty backoff store and one new failure": {
    42  			storeInitializer: emptyStoreInitializer,
    43  			newSucceededPods: []metav1.Time{},
    44  			newFailedPods: []metav1.Time{
    45  				defaultTestTime,
    46  			},
    47  			wantBackoffRecord: backoffRecord{
    48  				key:                      "key",
    49  				lastFailureTime:          &defaultTestTime.Time,
    50  				failuresAfterLastSuccess: 1,
    51  			},
    52  		},
    53  		"Empty backoff store and two new failures": {
    54  			storeInitializer: emptyStoreInitializer,
    55  			newSucceededPods: []metav1.Time{},
    56  			newFailedPods: []metav1.Time{
    57  				defaultTestTime,
    58  				metav1.NewTime(defaultTestTime.Add(-1 * time.Millisecond)),
    59  			},
    60  			wantBackoffRecord: backoffRecord{
    61  				key:                      "key",
    62  				lastFailureTime:          &defaultTestTime.Time,
    63  				failuresAfterLastSuccess: 2,
    64  			},
    65  		},
    66  		"Empty backoff store, two failures followed by success": {
    67  			storeInitializer: emptyStoreInitializer,
    68  			newSucceededPods: []metav1.Time{
    69  				defaultTestTime,
    70  			},
    71  			newFailedPods: []metav1.Time{
    72  				metav1.NewTime(defaultTestTime.Add(-2 * time.Millisecond)),
    73  				metav1.NewTime(defaultTestTime.Add(-1 * time.Millisecond)),
    74  			},
    75  			wantBackoffRecord: backoffRecord{
    76  				key:                      "key",
    77  				failuresAfterLastSuccess: 0,
    78  			},
    79  		},
    80  		"Empty backoff store, two failures, one success and two more failures": {
    81  			storeInitializer: emptyStoreInitializer,
    82  			newSucceededPods: []metav1.Time{
    83  				metav1.NewTime(defaultTestTime.Add(-2 * time.Millisecond)),
    84  			},
    85  			newFailedPods: []metav1.Time{
    86  				defaultTestTime,
    87  				metav1.NewTime(defaultTestTime.Add(-4 * time.Millisecond)),
    88  				metav1.NewTime(defaultTestTime.Add(-3 * time.Millisecond)),
    89  				metav1.NewTime(defaultTestTime.Add(-1 * time.Millisecond)),
    90  			},
    91  			wantBackoffRecord: backoffRecord{
    92  				key:                      "key",
    93  				lastFailureTime:          &defaultTestTime.Time,
    94  				failuresAfterLastSuccess: 2,
    95  			},
    96  		},
    97  		"Backoff store having failure count 2 and one new failure": {
    98  			storeInitializer: func(bis *backoffStore) {
    99  				bis.updateBackoffRecord(backoffRecord{
   100  					key:                      "key",
   101  					failuresAfterLastSuccess: 2,
   102  					lastFailureTime:          nil,
   103  				})
   104  			},
   105  			newSucceededPods: []metav1.Time{},
   106  			newFailedPods: []metav1.Time{
   107  				defaultTestTime,
   108  			},
   109  			wantBackoffRecord: backoffRecord{
   110  				key:                      "key",
   111  				lastFailureTime:          &defaultTestTime.Time,
   112  				failuresAfterLastSuccess: 3,
   113  			},
   114  		},
   115  		"Empty backoff store with success and failure at same timestamp": {
   116  			storeInitializer: emptyStoreInitializer,
   117  			newSucceededPods: []metav1.Time{
   118  				defaultTestTime,
   119  			},
   120  			newFailedPods: []metav1.Time{
   121  				defaultTestTime,
   122  			},
   123  			wantBackoffRecord: backoffRecord{
   124  				key:                      "key",
   125  				failuresAfterLastSuccess: 0,
   126  			},
   127  		},
   128  		"Empty backoff store with no success/failure": {
   129  			storeInitializer: emptyStoreInitializer,
   130  			newSucceededPods: []metav1.Time{},
   131  			newFailedPods:    []metav1.Time{},
   132  			wantBackoffRecord: backoffRecord{
   133  				key:                      "key",
   134  				failuresAfterLastSuccess: 0,
   135  			},
   136  		},
   137  		"Empty backoff store with one success": {
   138  			storeInitializer: emptyStoreInitializer,
   139  			newSucceededPods: []metav1.Time{
   140  				defaultTestTime,
   141  			},
   142  			newFailedPods: []metav1.Time{},
   143  			wantBackoffRecord: backoffRecord{
   144  				key:                      "key",
   145  				failuresAfterLastSuccess: 0,
   146  			},
   147  		},
   148  	}
   149  
   150  	for name, tc := range testCases {
   151  		t.Run(name, func(t *testing.T) {
   152  			backoffRecordStore := newBackoffStore()
   153  			tc.storeInitializer(backoffRecordStore)
   154  
   155  			newSucceededPods := []*v1.Pod{}
   156  			newFailedPods := []*v1.Pod{}
   157  
   158  			for _, finishTime := range tc.newSucceededPods {
   159  				newSucceededPods = append(newSucceededPods, &v1.Pod{
   160  					ObjectMeta: metav1.ObjectMeta{},
   161  					Status: v1.PodStatus{
   162  						Phase: v1.PodSucceeded,
   163  						ContainerStatuses: []v1.ContainerStatus{
   164  							{
   165  								State: v1.ContainerState{
   166  									Terminated: &v1.ContainerStateTerminated{
   167  										FinishedAt: finishTime,
   168  									},
   169  								},
   170  							},
   171  						},
   172  					},
   173  				})
   174  			}
   175  
   176  			for _, finishTime := range tc.newFailedPods {
   177  				newFailedPods = append(newFailedPods, &v1.Pod{
   178  					ObjectMeta: metav1.ObjectMeta{},
   179  					Status: v1.PodStatus{
   180  						Phase: v1.PodFailed,
   181  						ContainerStatuses: []v1.ContainerStatus{
   182  							{
   183  								State: v1.ContainerState{
   184  									Terminated: &v1.ContainerStateTerminated{
   185  										FinishedAt: finishTime,
   186  									},
   187  								},
   188  							},
   189  						},
   190  					},
   191  				})
   192  			}
   193  
   194  			backoffRecord := backoffRecordStore.newBackoffRecord("key", newSucceededPods, newFailedPods)
   195  			if diff := cmp.Diff(tc.wantBackoffRecord, backoffRecord, cmp.AllowUnexported(backoffRecord)); diff != "" {
   196  				t.Errorf("backoffRecord not matching; (-want,+got): %v", diff)
   197  			}
   198  		})
   199  	}
   200  }
   201  
   202  func TestGetFinishedTime(t *testing.T) {
   203  	defaultTestTime := time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)
   204  	defaultTestTimeMinus30s := defaultTestTime.Add(-30 * time.Second)
   205  	testCases := map[string]struct {
   206  		pod            v1.Pod
   207  		wantFinishTime time.Time
   208  	}{
   209  		"Pod with multiple containers and all containers terminated": {
   210  			pod: v1.Pod{
   211  				Status: v1.PodStatus{
   212  					ContainerStatuses: []v1.ContainerStatus{
   213  						{
   214  							State: v1.ContainerState{
   215  								Terminated: &v1.ContainerStateTerminated{FinishedAt: metav1.NewTime(defaultTestTime.Add(-1 * time.Second))},
   216  							},
   217  						},
   218  						{
   219  							State: v1.ContainerState{
   220  								Terminated: &v1.ContainerStateTerminated{FinishedAt: metav1.NewTime(defaultTestTime)},
   221  							},
   222  						},
   223  						{
   224  							State: v1.ContainerState{
   225  								Terminated: &v1.ContainerStateTerminated{FinishedAt: metav1.NewTime(defaultTestTime.Add(-2 * time.Second))},
   226  							},
   227  						},
   228  					},
   229  				},
   230  			},
   231  			wantFinishTime: defaultTestTime,
   232  		},
   233  		"Pod with multiple containers; two containers in terminated state and one in running state; fallback to deletionTimestamp": {
   234  			pod: v1.Pod{
   235  				Status: v1.PodStatus{
   236  					ContainerStatuses: []v1.ContainerStatus{
   237  						{
   238  							State: v1.ContainerState{
   239  								Terminated: &v1.ContainerStateTerminated{FinishedAt: metav1.NewTime(defaultTestTime.Add(-1 * time.Second))},
   240  							},
   241  						},
   242  						{
   243  							State: v1.ContainerState{
   244  								Running: &v1.ContainerStateRunning{},
   245  							},
   246  						},
   247  						{
   248  							State: v1.ContainerState{
   249  								Terminated: &v1.ContainerStateTerminated{FinishedAt: metav1.NewTime(defaultTestTime.Add(-2 * time.Second))},
   250  							},
   251  						},
   252  					},
   253  				},
   254  				ObjectMeta: metav1.ObjectMeta{
   255  					DeletionTimestamp: &metav1.Time{Time: defaultTestTime},
   256  				},
   257  			},
   258  			wantFinishTime: defaultTestTime,
   259  		},
   260  		"fallback to deletionTimestamp": {
   261  			pod: v1.Pod{
   262  				Status: v1.PodStatus{
   263  					ContainerStatuses: []v1.ContainerStatus{
   264  						{
   265  							State: v1.ContainerState{
   266  								Running: &v1.ContainerStateRunning{},
   267  							},
   268  						},
   269  					},
   270  				},
   271  				ObjectMeta: metav1.ObjectMeta{
   272  					DeletionTimestamp: &metav1.Time{Time: defaultTestTime},
   273  				},
   274  			},
   275  			wantFinishTime: defaultTestTime,
   276  		},
   277  		"fallback to deletionTimestamp, decremented by grace period": {
   278  			pod: v1.Pod{
   279  				Status: v1.PodStatus{
   280  					ContainerStatuses: []v1.ContainerStatus{
   281  						{
   282  							State: v1.ContainerState{
   283  								Running: &v1.ContainerStateRunning{},
   284  							},
   285  						},
   286  					},
   287  				},
   288  				ObjectMeta: metav1.ObjectMeta{
   289  					DeletionTimestamp:          &metav1.Time{Time: defaultTestTime},
   290  					DeletionGracePeriodSeconds: ptr.To[int64](30),
   291  				},
   292  			},
   293  			wantFinishTime: defaultTestTimeMinus30s,
   294  		},
   295  		"fallback to PodReady.LastTransitionTime when status of the condition is False": {
   296  			pod: v1.Pod{
   297  				Status: v1.PodStatus{
   298  					ContainerStatuses: []v1.ContainerStatus{
   299  						{
   300  							State: v1.ContainerState{
   301  								Terminated: &v1.ContainerStateTerminated{},
   302  							},
   303  						},
   304  					},
   305  					Conditions: []v1.PodCondition{
   306  						{
   307  							Type:               v1.PodReady,
   308  							Status:             v1.ConditionFalse,
   309  							Reason:             "PodFailed",
   310  							LastTransitionTime: metav1.Time{Time: defaultTestTime},
   311  						},
   312  					},
   313  				},
   314  			},
   315  			wantFinishTime: defaultTestTime,
   316  		},
   317  		"skip fallback to PodReady.LastTransitionTime when status of the condition is True": {
   318  			pod: v1.Pod{
   319  				Status: v1.PodStatus{
   320  					ContainerStatuses: []v1.ContainerStatus{
   321  						{
   322  							State: v1.ContainerState{
   323  								Terminated: &v1.ContainerStateTerminated{},
   324  							},
   325  						},
   326  					},
   327  					Conditions: []v1.PodCondition{
   328  						{
   329  							Type:               v1.PodReady,
   330  							Status:             v1.ConditionTrue,
   331  							LastTransitionTime: metav1.Time{Time: defaultTestTimeMinus30s},
   332  						},
   333  					},
   334  				},
   335  				ObjectMeta: metav1.ObjectMeta{
   336  					DeletionTimestamp: &metav1.Time{Time: defaultTestTime},
   337  				},
   338  			},
   339  			wantFinishTime: defaultTestTime,
   340  		},
   341  		"fallback to creationTimestamp": {
   342  			pod: v1.Pod{
   343  				Status: v1.PodStatus{
   344  					ContainerStatuses: []v1.ContainerStatus{
   345  						{
   346  							State: v1.ContainerState{
   347  								Terminated: &v1.ContainerStateTerminated{},
   348  							},
   349  						},
   350  					},
   351  				},
   352  				ObjectMeta: metav1.ObjectMeta{
   353  					CreationTimestamp: metav1.Time{Time: defaultTestTime},
   354  				},
   355  			},
   356  			wantFinishTime: defaultTestTime,
   357  		},
   358  	}
   359  
   360  	for name, tc := range testCases {
   361  		t.Run(name, func(t *testing.T) {
   362  			f := getFinishedTime(&tc.pod)
   363  			if !f.Equal(tc.wantFinishTime) {
   364  				t.Errorf("Expected value of finishedTime %v; got %v", tc.wantFinishTime, f)
   365  			}
   366  		})
   367  	}
   368  }
   369  
   370  func TestGetRemainingBackoffTime(t *testing.T) {
   371  	defaultTestTime := metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC))
   372  	testCases := map[string]struct {
   373  		backoffRecord  backoffRecord
   374  		currentTime    time.Time
   375  		maxBackoff     time.Duration
   376  		defaultBackoff time.Duration
   377  		wantDuration   time.Duration
   378  	}{
   379  		"no failures": {
   380  			backoffRecord: backoffRecord{
   381  				lastFailureTime:          nil,
   382  				failuresAfterLastSuccess: 0,
   383  			},
   384  			defaultBackoff: 5 * time.Second,
   385  			maxBackoff:     700 * time.Second,
   386  			wantDuration:   0 * time.Second,
   387  		},
   388  		"one failure; current time and failure time are same": {
   389  			backoffRecord: backoffRecord{
   390  				lastFailureTime:          &defaultTestTime.Time,
   391  				failuresAfterLastSuccess: 1,
   392  			},
   393  			currentTime:    defaultTestTime.Time,
   394  			defaultBackoff: 5 * time.Second,
   395  			maxBackoff:     700 * time.Second,
   396  			wantDuration:   5 * time.Second,
   397  		},
   398  		"one failure; current time == 1 second + failure time": {
   399  			backoffRecord: backoffRecord{
   400  				lastFailureTime:          &defaultTestTime.Time,
   401  				failuresAfterLastSuccess: 1,
   402  			},
   403  			currentTime:    defaultTestTime.Time.Add(time.Second),
   404  			defaultBackoff: 5 * time.Second,
   405  			maxBackoff:     700 * time.Second,
   406  			wantDuration:   4 * time.Second,
   407  		},
   408  		"one failure; current time == expected backoff time": {
   409  			backoffRecord: backoffRecord{
   410  				lastFailureTime:          &defaultTestTime.Time,
   411  				failuresAfterLastSuccess: 1,
   412  			},
   413  			currentTime:    defaultTestTime.Time.Add(5 * time.Second),
   414  			defaultBackoff: 5 * time.Second,
   415  			maxBackoff:     700 * time.Second,
   416  			wantDuration:   0 * time.Second,
   417  		},
   418  		"one failure; current time == expected backoff time + 1 Second": {
   419  			backoffRecord: backoffRecord{
   420  				lastFailureTime:          &defaultTestTime.Time,
   421  				failuresAfterLastSuccess: 1,
   422  			},
   423  			currentTime:    defaultTestTime.Time.Add(6 * time.Second),
   424  			defaultBackoff: 5 * time.Second,
   425  			maxBackoff:     700 * time.Second,
   426  			wantDuration:   0 * time.Second,
   427  		},
   428  		"three failures; current time and failure time are same": {
   429  			backoffRecord: backoffRecord{
   430  				lastFailureTime:          &defaultTestTime.Time,
   431  				failuresAfterLastSuccess: 3,
   432  			},
   433  			currentTime:    defaultTestTime.Time,
   434  			defaultBackoff: 5 * time.Second,
   435  			maxBackoff:     700 * time.Second,
   436  			wantDuration:   20 * time.Second,
   437  		},
   438  		"eight failures; current time and failure time are same; backoff not exceeding maxBackoff": {
   439  			backoffRecord: backoffRecord{
   440  				lastFailureTime:          &defaultTestTime.Time,
   441  				failuresAfterLastSuccess: 8,
   442  			},
   443  			currentTime:    defaultTestTime.Time,
   444  			defaultBackoff: 5 * time.Second,
   445  			maxBackoff:     700 * time.Second,
   446  			wantDuration:   640 * time.Second,
   447  		},
   448  		"nine failures; current time and failure time are same; backoff exceeding maxBackoff": {
   449  			backoffRecord: backoffRecord{
   450  				lastFailureTime:          &defaultTestTime.Time,
   451  				failuresAfterLastSuccess: 9,
   452  			},
   453  			currentTime:    defaultTestTime.Time,
   454  			defaultBackoff: 5 * time.Second,
   455  			maxBackoff:     700 * time.Second,
   456  			wantDuration:   700 * time.Second,
   457  		},
   458  	}
   459  
   460  	for name, tc := range testCases {
   461  		t.Run(name, func(t *testing.T) {
   462  			fakeClock := clocktesting.NewFakeClock(tc.currentTime.Truncate(time.Second))
   463  			d := tc.backoffRecord.getRemainingTime(fakeClock, tc.defaultBackoff, tc.maxBackoff)
   464  			if d.Seconds() != tc.wantDuration.Seconds() {
   465  				t.Errorf("Expected value of duration %v; got %v", tc.wantDuration, d)
   466  			}
   467  		})
   468  	}
   469  }
   470  
   471  func TestGetRemainingBackoffTimePerIndex(t *testing.T) {
   472  	defaultTestTime := metav1.NewTime(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC))
   473  	testCases := map[string]struct {
   474  		currentTime    time.Time
   475  		maxBackoff     time.Duration
   476  		defaultBackoff time.Duration
   477  		lastFailedPod  *v1.Pod
   478  		wantDuration   time.Duration
   479  	}{
   480  		"no failures": {
   481  			lastFailedPod:  nil,
   482  			defaultBackoff: 5 * time.Second,
   483  			maxBackoff:     700 * time.Second,
   484  			wantDuration:   0 * time.Second,
   485  		},
   486  		"two prev failures; current time and failure time are same": {
   487  			lastFailedPod:  buildPod().phase(v1.PodFailed).indexFailureCount("2").customDeletionTimestamp(defaultTestTime.Time).Pod,
   488  			currentTime:    defaultTestTime.Time,
   489  			defaultBackoff: 5 * time.Second,
   490  			maxBackoff:     700 * time.Second,
   491  			wantDuration:   20 * time.Second,
   492  		},
   493  		"one prev failure counted and one ignored; current time and failure time are same": {
   494  			lastFailedPod:  buildPod().phase(v1.PodFailed).indexFailureCount("1").indexIgnoredFailureCount("1").customDeletionTimestamp(defaultTestTime.Time).Pod,
   495  			currentTime:    defaultTestTime.Time,
   496  			defaultBackoff: 5 * time.Second,
   497  			maxBackoff:     700 * time.Second,
   498  			wantDuration:   20 * time.Second,
   499  		},
   500  	}
   501  
   502  	for name, tc := range testCases {
   503  		t.Run(name, func(t *testing.T) {
   504  			logger, _ := ktesting.NewTestContext(t)
   505  			fakeClock := clocktesting.NewFakeClock(tc.currentTime.Truncate(time.Second))
   506  			d := getRemainingTimePerIndex(logger, fakeClock, tc.defaultBackoff, tc.maxBackoff, tc.lastFailedPod)
   507  			if d.Seconds() != tc.wantDuration.Seconds() {
   508  				t.Errorf("Expected value of duration %v; got %v", tc.wantDuration, d)
   509  			}
   510  		})
   511  	}
   512  }
   513  

View as plain text