...

Source file src/k8s.io/kubernetes/pkg/controller/job/pod_failure_policy_test.go

Documentation: k8s.io/kubernetes/pkg/controller/job

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package job
    18  
    19  import (
    20  	"testing"
    21  
    22  	"github.com/google/go-cmp/cmp"
    23  	batch "k8s.io/api/batch/v1"
    24  	v1 "k8s.io/api/core/v1"
    25  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    26  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    27  	featuregatetesting "k8s.io/component-base/featuregate/testing"
    28  	_ "k8s.io/kubernetes/pkg/apis/core/install"
    29  	"k8s.io/kubernetes/pkg/features"
    30  	"k8s.io/utils/ptr"
    31  )
    32  
    33  func TestMatchPodFailurePolicy(t *testing.T) {
    34  	validPodObjectMeta := metav1.ObjectMeta{
    35  		Namespace: "default",
    36  		Name:      "mypod",
    37  	}
    38  	ignore := batch.PodFailurePolicyActionIgnore
    39  	failJob := batch.PodFailurePolicyActionFailJob
    40  	failIndex := batch.PodFailurePolicyActionFailIndex
    41  	count := batch.PodFailurePolicyActionCount
    42  
    43  	testCases := map[string]struct {
    44  		enableJobBackoffLimitPerIndex bool
    45  		podFailurePolicy              *batch.PodFailurePolicy
    46  		failedPod                     *v1.Pod
    47  		wantJobFailureMessage         *string
    48  		wantCountFailed               bool
    49  		wantAction                    *batch.PodFailurePolicyAction
    50  	}{
    51  		"unknown action for rule matching by exit codes - skip rule with unknown action": {
    52  			podFailurePolicy: &batch.PodFailurePolicy{
    53  				Rules: []batch.PodFailurePolicyRule{
    54  					{
    55  						Action: "UnknownAction",
    56  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
    57  							Operator: batch.PodFailurePolicyOnExitCodesOpIn,
    58  							Values:   []int32{1, 2},
    59  						},
    60  					},
    61  					{
    62  						Action: batch.PodFailurePolicyActionFailJob,
    63  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
    64  							Operator: batch.PodFailurePolicyOnExitCodesOpIn,
    65  							Values:   []int32{2, 3},
    66  						},
    67  					},
    68  				},
    69  			},
    70  			failedPod: &v1.Pod{
    71  				ObjectMeta: validPodObjectMeta,
    72  				Status: v1.PodStatus{
    73  					Phase: v1.PodFailed,
    74  					ContainerStatuses: []v1.ContainerStatus{
    75  						{
    76  							Name: "main-container",
    77  							State: v1.ContainerState{
    78  								Terminated: &v1.ContainerStateTerminated{
    79  									ExitCode: 2,
    80  								},
    81  							},
    82  						},
    83  					},
    84  				},
    85  			},
    86  			wantJobFailureMessage: ptr.To("Container main-container for pod default/mypod failed with exit code 2 matching FailJob rule at index 1"),
    87  			wantCountFailed:       true,
    88  			wantAction:            &failJob,
    89  		},
    90  		"unknown action for rule matching by pod conditions - skip rule with unknown action": {
    91  			podFailurePolicy: &batch.PodFailurePolicy{
    92  				Rules: []batch.PodFailurePolicyRule{
    93  					{
    94  						Action: "UnkonwnAction",
    95  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
    96  							{
    97  								Type:   v1.DisruptionTarget,
    98  								Status: v1.ConditionTrue,
    99  							},
   100  						},
   101  					},
   102  					{
   103  						Action: batch.PodFailurePolicyActionIgnore,
   104  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   105  							{
   106  								Type:   v1.DisruptionTarget,
   107  								Status: v1.ConditionTrue,
   108  							},
   109  						},
   110  					},
   111  				},
   112  			},
   113  			failedPod: &v1.Pod{
   114  				ObjectMeta: validPodObjectMeta,
   115  				Status: v1.PodStatus{
   116  					Phase: v1.PodFailed,
   117  					Conditions: []v1.PodCondition{
   118  						{
   119  							Type:   v1.DisruptionTarget,
   120  							Status: v1.ConditionTrue,
   121  						},
   122  					},
   123  				},
   124  			},
   125  			wantJobFailureMessage: nil,
   126  			wantCountFailed:       false,
   127  			wantAction:            &ignore,
   128  		},
   129  		"unknown operator - rule with unknown action is skipped for onExitCodes": {
   130  			podFailurePolicy: &batch.PodFailurePolicy{
   131  				Rules: []batch.PodFailurePolicyRule{
   132  					{
   133  						Action: batch.PodFailurePolicyActionIgnore,
   134  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   135  							Operator: "UnknownOperator",
   136  							Values:   []int32{1, 2},
   137  						},
   138  					},
   139  					{
   140  						Action: batch.PodFailurePolicyActionFailJob,
   141  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   142  							Operator: batch.PodFailurePolicyOnExitCodesOpIn,
   143  							Values:   []int32{2, 3},
   144  						},
   145  					},
   146  				},
   147  			},
   148  			failedPod: &v1.Pod{
   149  				ObjectMeta: validPodObjectMeta,
   150  				Status: v1.PodStatus{
   151  					Phase: v1.PodFailed,
   152  					ContainerStatuses: []v1.ContainerStatus{
   153  						{
   154  							Name: "main-container",
   155  							State: v1.ContainerState{
   156  								Terminated: &v1.ContainerStateTerminated{
   157  									ExitCode: 2,
   158  								},
   159  							},
   160  						},
   161  					},
   162  				},
   163  			},
   164  			wantJobFailureMessage: ptr.To("Container main-container for pod default/mypod failed with exit code 2 matching FailJob rule at index 1"),
   165  			wantCountFailed:       true,
   166  			wantAction:            &failJob,
   167  		},
   168  		"no policy rules": {
   169  			podFailurePolicy: nil,
   170  			failedPod: &v1.Pod{
   171  				ObjectMeta: validPodObjectMeta,
   172  				Status: v1.PodStatus{
   173  					Phase: v1.PodFailed,
   174  					ContainerStatuses: []v1.ContainerStatus{
   175  						{
   176  							State: v1.ContainerState{
   177  								Terminated: &v1.ContainerStateTerminated{
   178  									ExitCode: 2,
   179  								},
   180  							},
   181  						},
   182  					},
   183  				},
   184  			},
   185  			wantJobFailureMessage: nil,
   186  			wantCountFailed:       true,
   187  		},
   188  		"ignore rule matched for exit codes": {
   189  			podFailurePolicy: &batch.PodFailurePolicy{
   190  				Rules: []batch.PodFailurePolicyRule{
   191  					{
   192  						Action: batch.PodFailurePolicyActionIgnore,
   193  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   194  							Operator: batch.PodFailurePolicyOnExitCodesOpIn,
   195  							Values:   []int32{1, 2, 3},
   196  						},
   197  					},
   198  				},
   199  			},
   200  			failedPod: &v1.Pod{
   201  				ObjectMeta: validPodObjectMeta,
   202  				Status: v1.PodStatus{
   203  					Phase: v1.PodFailed,
   204  					ContainerStatuses: []v1.ContainerStatus{
   205  						{
   206  							State: v1.ContainerState{
   207  								Terminated: &v1.ContainerStateTerminated{
   208  									ExitCode: 2,
   209  								},
   210  							},
   211  						},
   212  					},
   213  				},
   214  			},
   215  			wantJobFailureMessage: nil,
   216  			wantCountFailed:       false,
   217  			wantAction:            &ignore,
   218  		},
   219  		"FailJob rule matched for exit codes": {
   220  			podFailurePolicy: &batch.PodFailurePolicy{
   221  				Rules: []batch.PodFailurePolicyRule{
   222  					{
   223  						Action: batch.PodFailurePolicyActionFailJob,
   224  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   225  							Operator: batch.PodFailurePolicyOnExitCodesOpIn,
   226  							Values:   []int32{1, 2, 3},
   227  						},
   228  					},
   229  				},
   230  			},
   231  			failedPod: &v1.Pod{
   232  				ObjectMeta: validPodObjectMeta,
   233  				Status: v1.PodStatus{
   234  					Phase: v1.PodFailed,
   235  					ContainerStatuses: []v1.ContainerStatus{
   236  						{
   237  							Name: "main-container",
   238  							State: v1.ContainerState{
   239  								Terminated: &v1.ContainerStateTerminated{
   240  									ExitCode: 2,
   241  								},
   242  							},
   243  						},
   244  					},
   245  				},
   246  			},
   247  			wantJobFailureMessage: ptr.To("Container main-container for pod default/mypod failed with exit code 2 matching FailJob rule at index 0"),
   248  			wantCountFailed:       true,
   249  			wantAction:            &failJob,
   250  		},
   251  		"successful containers are skipped by the rules": {
   252  			podFailurePolicy: &batch.PodFailurePolicy{
   253  				Rules: []batch.PodFailurePolicyRule{
   254  					{
   255  						Action: batch.PodFailurePolicyActionFailJob,
   256  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   257  							Operator: batch.PodFailurePolicyOnExitCodesOpNotIn,
   258  							Values:   []int32{111},
   259  						},
   260  					},
   261  				},
   262  			},
   263  			failedPod: &v1.Pod{
   264  				ObjectMeta: validPodObjectMeta,
   265  				Status: v1.PodStatus{
   266  					Phase: v1.PodFailed,
   267  					InitContainerStatuses: []v1.ContainerStatus{
   268  						{
   269  							Name: "init-container",
   270  							State: v1.ContainerState{
   271  								Terminated: &v1.ContainerStateTerminated{
   272  									ExitCode: 0,
   273  								},
   274  							},
   275  						},
   276  					},
   277  					ContainerStatuses: []v1.ContainerStatus{
   278  						{
   279  							Name: "main-container",
   280  							State: v1.ContainerState{
   281  								Terminated: &v1.ContainerStateTerminated{
   282  									ExitCode: 111,
   283  								},
   284  							},
   285  						},
   286  						{
   287  							Name: "suppport-container",
   288  							State: v1.ContainerState{
   289  								Terminated: &v1.ContainerStateTerminated{
   290  									ExitCode: 0,
   291  								},
   292  							},
   293  						},
   294  					},
   295  				},
   296  			},
   297  			wantJobFailureMessage: nil,
   298  			wantCountFailed:       true,
   299  		},
   300  		"FailIndex rule matched for exit codes; JobBackoffLimitPerIndex enabled": {
   301  			enableJobBackoffLimitPerIndex: true,
   302  			podFailurePolicy: &batch.PodFailurePolicy{
   303  				Rules: []batch.PodFailurePolicyRule{
   304  					{
   305  						Action: batch.PodFailurePolicyActionFailIndex,
   306  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   307  							Operator: batch.PodFailurePolicyOnExitCodesOpIn,
   308  							Values:   []int32{1, 2, 3},
   309  						},
   310  					},
   311  				},
   312  			},
   313  			failedPod: &v1.Pod{
   314  				ObjectMeta: validPodObjectMeta,
   315  				Status: v1.PodStatus{
   316  					Phase: v1.PodFailed,
   317  					ContainerStatuses: []v1.ContainerStatus{
   318  						{
   319  							State: v1.ContainerState{
   320  								Terminated: &v1.ContainerStateTerminated{
   321  									ExitCode: 2,
   322  								},
   323  							},
   324  						},
   325  					},
   326  				},
   327  			},
   328  			wantCountFailed: true,
   329  			wantAction:      &failIndex,
   330  		},
   331  		"FailIndex rule matched for exit codes; JobBackoffLimitPerIndex disabled": {
   332  			enableJobBackoffLimitPerIndex: false,
   333  			podFailurePolicy: &batch.PodFailurePolicy{
   334  				Rules: []batch.PodFailurePolicyRule{
   335  					{
   336  						Action: batch.PodFailurePolicyActionFailIndex,
   337  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   338  							Operator: batch.PodFailurePolicyOnExitCodesOpIn,
   339  							Values:   []int32{1, 2, 3},
   340  						},
   341  					},
   342  				},
   343  			},
   344  			failedPod: &v1.Pod{
   345  				ObjectMeta: validPodObjectMeta,
   346  				Status: v1.PodStatus{
   347  					Phase: v1.PodFailed,
   348  					ContainerStatuses: []v1.ContainerStatus{
   349  						{
   350  							State: v1.ContainerState{
   351  								Terminated: &v1.ContainerStateTerminated{
   352  									ExitCode: 2,
   353  								},
   354  							},
   355  						},
   356  					},
   357  				},
   358  			},
   359  			wantCountFailed: true,
   360  			wantAction:      nil,
   361  		},
   362  		"pod failure policy with NotIn operator and value 0": {
   363  			podFailurePolicy: &batch.PodFailurePolicy{
   364  				Rules: []batch.PodFailurePolicyRule{
   365  					{
   366  						Action: batch.PodFailurePolicyActionFailJob,
   367  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   368  							Operator: batch.PodFailurePolicyOnExitCodesOpNotIn,
   369  							Values:   []int32{0},
   370  						},
   371  					},
   372  				},
   373  			},
   374  			failedPod: &v1.Pod{
   375  				ObjectMeta: validPodObjectMeta,
   376  				Status: v1.PodStatus{
   377  					Phase: v1.PodFailed,
   378  					ContainerStatuses: []v1.ContainerStatus{
   379  						{
   380  							Name: "main-container",
   381  							State: v1.ContainerState{
   382  								Terminated: &v1.ContainerStateTerminated{
   383  									ExitCode: 1,
   384  								},
   385  							},
   386  						},
   387  						{
   388  							Name: "suppport-container",
   389  							State: v1.ContainerState{
   390  								Terminated: &v1.ContainerStateTerminated{
   391  									ExitCode: 0,
   392  								},
   393  							},
   394  						},
   395  					},
   396  				},
   397  			},
   398  			wantJobFailureMessage: ptr.To("Container main-container for pod default/mypod failed with exit code 1 matching FailJob rule at index 0"),
   399  			wantCountFailed:       true,
   400  			wantAction:            &failJob,
   401  		},
   402  		"second jobfail rule matched for exit codes": {
   403  			podFailurePolicy: &batch.PodFailurePolicy{
   404  				Rules: []batch.PodFailurePolicyRule{
   405  					{
   406  						Action: batch.PodFailurePolicyActionCount,
   407  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   408  							Operator: batch.PodFailurePolicyOnExitCodesOpIn,
   409  							Values:   []int32{1, 2, 3},
   410  						},
   411  					},
   412  					{
   413  						Action: batch.PodFailurePolicyActionFailJob,
   414  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   415  							Operator: batch.PodFailurePolicyOnExitCodesOpIn,
   416  							Values:   []int32{4, 5, 6},
   417  						},
   418  					},
   419  				},
   420  			},
   421  			failedPod: &v1.Pod{
   422  				ObjectMeta: validPodObjectMeta,
   423  				Status: v1.PodStatus{
   424  					Phase: v1.PodFailed,
   425  					ContainerStatuses: []v1.ContainerStatus{
   426  						{
   427  							Name: "main-container",
   428  							State: v1.ContainerState{
   429  								Terminated: &v1.ContainerStateTerminated{
   430  									ExitCode: 6,
   431  								},
   432  							},
   433  						},
   434  					},
   435  				},
   436  			},
   437  			wantJobFailureMessage: ptr.To("Container main-container for pod default/mypod failed with exit code 6 matching FailJob rule at index 1"),
   438  			wantCountFailed:       true,
   439  			wantAction:            &failJob,
   440  		},
   441  		"count rule matched for exit codes": {
   442  			podFailurePolicy: &batch.PodFailurePolicy{
   443  				Rules: []batch.PodFailurePolicyRule{
   444  					{
   445  						Action: batch.PodFailurePolicyActionCount,
   446  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   447  							Operator: batch.PodFailurePolicyOnExitCodesOpIn,
   448  							Values:   []int32{1, 2, 3},
   449  						},
   450  					},
   451  				},
   452  			},
   453  			failedPod: &v1.Pod{
   454  				ObjectMeta: validPodObjectMeta,
   455  				Status: v1.PodStatus{
   456  					Phase: v1.PodFailed,
   457  					ContainerStatuses: []v1.ContainerStatus{
   458  						{
   459  							Name: "foo",
   460  						},
   461  						{
   462  							Name: "bar",
   463  							State: v1.ContainerState{
   464  								Terminated: &v1.ContainerStateTerminated{
   465  									ExitCode: 2,
   466  								},
   467  							},
   468  						},
   469  					},
   470  				},
   471  			},
   472  			wantJobFailureMessage: nil,
   473  			wantCountFailed:       true,
   474  			wantAction:            &count,
   475  		},
   476  		"FailIndex rule matched for pod conditions; JobBackoffLimitPerIndex enabled": {
   477  			enableJobBackoffLimitPerIndex: true,
   478  			podFailurePolicy: &batch.PodFailurePolicy{
   479  				Rules: []batch.PodFailurePolicyRule{
   480  					{
   481  						Action: batch.PodFailurePolicyActionFailIndex,
   482  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   483  							{
   484  								Type:   v1.DisruptionTarget,
   485  								Status: v1.ConditionTrue,
   486  							},
   487  						},
   488  					},
   489  				},
   490  			},
   491  			failedPod: &v1.Pod{
   492  				ObjectMeta: validPodObjectMeta,
   493  				Status: v1.PodStatus{
   494  					Phase: v1.PodFailed,
   495  					Conditions: []v1.PodCondition{
   496  						{
   497  							Type:   v1.DisruptionTarget,
   498  							Status: v1.ConditionTrue,
   499  						},
   500  					},
   501  				},
   502  			},
   503  			wantCountFailed: true,
   504  			wantAction:      &failIndex,
   505  		},
   506  		"FailIndex rule matched for pod conditions; JobBackoffLimitPerIndex disabled": {
   507  			enableJobBackoffLimitPerIndex: false,
   508  			podFailurePolicy: &batch.PodFailurePolicy{
   509  				Rules: []batch.PodFailurePolicyRule{
   510  					{
   511  						Action: batch.PodFailurePolicyActionFailIndex,
   512  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   513  							{
   514  								Type:   v1.DisruptionTarget,
   515  								Status: v1.ConditionTrue,
   516  							},
   517  						},
   518  					},
   519  				},
   520  			},
   521  			failedPod: &v1.Pod{
   522  				ObjectMeta: validPodObjectMeta,
   523  				Status: v1.PodStatus{
   524  					Phase: v1.PodFailed,
   525  					Conditions: []v1.PodCondition{
   526  						{
   527  							Type:   v1.DisruptionTarget,
   528  							Status: v1.ConditionTrue,
   529  						},
   530  					},
   531  				},
   532  			},
   533  			wantCountFailed: true,
   534  			wantAction:      nil,
   535  		},
   536  		"ignore rule matched for pod conditions": {
   537  			podFailurePolicy: &batch.PodFailurePolicy{
   538  				Rules: []batch.PodFailurePolicyRule{
   539  					{
   540  						Action: batch.PodFailurePolicyActionIgnore,
   541  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   542  							{
   543  								Type:   v1.DisruptionTarget,
   544  								Status: v1.ConditionTrue,
   545  							},
   546  						},
   547  					},
   548  				},
   549  			},
   550  			failedPod: &v1.Pod{
   551  				ObjectMeta: validPodObjectMeta,
   552  				Status: v1.PodStatus{
   553  					Phase: v1.PodFailed,
   554  					Conditions: []v1.PodCondition{
   555  						{
   556  							Type:   v1.DisruptionTarget,
   557  							Status: v1.ConditionTrue,
   558  						},
   559  					},
   560  				},
   561  			},
   562  			wantJobFailureMessage: nil,
   563  			wantCountFailed:       false,
   564  			wantAction:            &ignore,
   565  		},
   566  		"ignore rule matches by the status=False": {
   567  			podFailurePolicy: &batch.PodFailurePolicy{
   568  				Rules: []batch.PodFailurePolicyRule{
   569  					{
   570  						Action: batch.PodFailurePolicyActionIgnore,
   571  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   572  							{
   573  								Type:   v1.DisruptionTarget,
   574  								Status: v1.ConditionFalse,
   575  							},
   576  						},
   577  					},
   578  				},
   579  			},
   580  			failedPod: &v1.Pod{
   581  				ObjectMeta: validPodObjectMeta,
   582  				Status: v1.PodStatus{
   583  					Phase: v1.PodFailed,
   584  					Conditions: []v1.PodCondition{
   585  						{
   586  							Type:   v1.DisruptionTarget,
   587  							Status: v1.ConditionFalse,
   588  						},
   589  					},
   590  				},
   591  			},
   592  			wantJobFailureMessage: nil,
   593  			wantCountFailed:       false,
   594  			wantAction:            &ignore,
   595  		},
   596  		"ignore rule matches by the status=Unknown": {
   597  			podFailurePolicy: &batch.PodFailurePolicy{
   598  				Rules: []batch.PodFailurePolicyRule{
   599  					{
   600  						Action: batch.PodFailurePolicyActionIgnore,
   601  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   602  							{
   603  								Type:   v1.DisruptionTarget,
   604  								Status: v1.ConditionUnknown,
   605  							},
   606  						},
   607  					},
   608  				},
   609  			},
   610  			failedPod: &v1.Pod{
   611  				ObjectMeta: validPodObjectMeta,
   612  				Status: v1.PodStatus{
   613  					Phase: v1.PodFailed,
   614  					Conditions: []v1.PodCondition{
   615  						{
   616  							Type:   v1.DisruptionTarget,
   617  							Status: v1.ConditionUnknown,
   618  						},
   619  					},
   620  				},
   621  			},
   622  			wantJobFailureMessage: nil,
   623  			wantCountFailed:       false,
   624  			wantAction:            &ignore,
   625  		},
   626  		"ignore rule does not match when status for pattern is False, but actual True": {
   627  			podFailurePolicy: &batch.PodFailurePolicy{
   628  				Rules: []batch.PodFailurePolicyRule{
   629  					{
   630  						Action: batch.PodFailurePolicyActionIgnore,
   631  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   632  							{
   633  								Type:   v1.DisruptionTarget,
   634  								Status: v1.ConditionFalse,
   635  							},
   636  						},
   637  					},
   638  				},
   639  			},
   640  			failedPod: &v1.Pod{
   641  				ObjectMeta: validPodObjectMeta,
   642  				Status: v1.PodStatus{
   643  					Phase: v1.PodFailed,
   644  					Conditions: []v1.PodCondition{
   645  						{
   646  							Type:   v1.DisruptionTarget,
   647  							Status: v1.ConditionTrue,
   648  						},
   649  					},
   650  				},
   651  			},
   652  			wantJobFailureMessage: nil,
   653  			wantCountFailed:       true,
   654  		},
   655  		"ignore rule does not match when status for pattern is True, but actual False": {
   656  			podFailurePolicy: &batch.PodFailurePolicy{
   657  				Rules: []batch.PodFailurePolicyRule{
   658  					{
   659  						Action: batch.PodFailurePolicyActionIgnore,
   660  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   661  							{
   662  								Type:   v1.DisruptionTarget,
   663  								Status: v1.ConditionTrue,
   664  							},
   665  						},
   666  					},
   667  				},
   668  			},
   669  			failedPod: &v1.Pod{
   670  				ObjectMeta: validPodObjectMeta,
   671  				Status: v1.PodStatus{
   672  					Phase: v1.PodFailed,
   673  					Conditions: []v1.PodCondition{
   674  						{
   675  							Type:   v1.DisruptionTarget,
   676  							Status: v1.ConditionFalse,
   677  						},
   678  					},
   679  				},
   680  			},
   681  			wantJobFailureMessage: nil,
   682  			wantCountFailed:       true,
   683  		},
   684  		"default - do not match condition with status=False": {
   685  			podFailurePolicy: &batch.PodFailurePolicy{
   686  				Rules: []batch.PodFailurePolicyRule{
   687  					{
   688  						Action: batch.PodFailurePolicyActionIgnore,
   689  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   690  							{
   691  								Type:   v1.DisruptionTarget,
   692  								Status: v1.ConditionTrue,
   693  							},
   694  						},
   695  					},
   696  				},
   697  			},
   698  			failedPod: &v1.Pod{
   699  				ObjectMeta: validPodObjectMeta,
   700  				Status: v1.PodStatus{
   701  					Phase: v1.PodFailed,
   702  					Conditions: []v1.PodCondition{
   703  						{
   704  							Type:   v1.DisruptionTarget,
   705  							Status: v1.ConditionFalse,
   706  						},
   707  					},
   708  				},
   709  			},
   710  			wantJobFailureMessage: nil,
   711  			wantCountFailed:       true,
   712  		},
   713  		"job fail rule matched for pod conditions": {
   714  			podFailurePolicy: &batch.PodFailurePolicy{
   715  				Rules: []batch.PodFailurePolicyRule{
   716  					{
   717  						Action: batch.PodFailurePolicyActionFailJob,
   718  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   719  							{
   720  								Type:   v1.DisruptionTarget,
   721  								Status: v1.ConditionTrue,
   722  							},
   723  						},
   724  					},
   725  				},
   726  			},
   727  			failedPod: &v1.Pod{
   728  				ObjectMeta: validPodObjectMeta,
   729  				Status: v1.PodStatus{
   730  					Phase: v1.PodFailed,
   731  					Conditions: []v1.PodCondition{
   732  						{
   733  							Type:   v1.DisruptionTarget,
   734  							Status: v1.ConditionTrue,
   735  						},
   736  					},
   737  				},
   738  			},
   739  			wantJobFailureMessage: ptr.To("Pod default/mypod has condition DisruptionTarget matching FailJob rule at index 0"),
   740  			wantCountFailed:       true,
   741  			wantAction:            &failJob,
   742  		},
   743  		"count rule matched for pod conditions": {
   744  			podFailurePolicy: &batch.PodFailurePolicy{
   745  				Rules: []batch.PodFailurePolicyRule{
   746  					{
   747  						Action: batch.PodFailurePolicyActionCount,
   748  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   749  							{
   750  								Type:   v1.DisruptionTarget,
   751  								Status: v1.ConditionTrue,
   752  							},
   753  						},
   754  					},
   755  				},
   756  			},
   757  			failedPod: &v1.Pod{
   758  				ObjectMeta: validPodObjectMeta,
   759  				Status: v1.PodStatus{
   760  					Phase: v1.PodFailed,
   761  					Conditions: []v1.PodCondition{
   762  						{
   763  							Type:   v1.DisruptionTarget,
   764  							Status: v1.ConditionTrue,
   765  						},
   766  					},
   767  				},
   768  			},
   769  			wantJobFailureMessage: nil,
   770  			wantCountFailed:       true,
   771  			wantAction:            &count,
   772  		},
   773  		"no rule matched": {
   774  			podFailurePolicy: &batch.PodFailurePolicy{
   775  				Rules: []batch.PodFailurePolicyRule{
   776  					{
   777  						Action: batch.PodFailurePolicyActionCount,
   778  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   779  							Operator: batch.PodFailurePolicyOnExitCodesOpNotIn,
   780  							Values:   []int32{8},
   781  						},
   782  					},
   783  					{
   784  						Action: batch.PodFailurePolicyActionIgnore,
   785  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   786  							Operator: batch.PodFailurePolicyOnExitCodesOpIn,
   787  							Values:   []int32{1, 2, 3},
   788  						},
   789  					},
   790  					{
   791  						Action: batch.PodFailurePolicyActionFailJob,
   792  						OnExitCodes: &batch.PodFailurePolicyOnExitCodesRequirement{
   793  							Operator: batch.PodFailurePolicyOnExitCodesOpNotIn,
   794  							Values:   []int32{5, 6, 7},
   795  						},
   796  					},
   797  					{
   798  						Action: batch.PodFailurePolicyActionCount,
   799  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   800  							{
   801  								Type:   v1.PodConditionType("ResourceLimitExceeded"),
   802  								Status: v1.ConditionTrue,
   803  							},
   804  						},
   805  					},
   806  					{
   807  						Action: batch.PodFailurePolicyActionIgnore,
   808  						OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
   809  							{
   810  								Type:   v1.DisruptionTarget,
   811  								Status: v1.ConditionTrue,
   812  							},
   813  						},
   814  					},
   815  				},
   816  			},
   817  			failedPod: &v1.Pod{
   818  				ObjectMeta: validPodObjectMeta,
   819  				Status: v1.PodStatus{
   820  					Phase: v1.PodFailed,
   821  					ContainerStatuses: []v1.ContainerStatus{
   822  						{
   823  							State: v1.ContainerState{
   824  								Terminated: &v1.ContainerStateTerminated{
   825  									ExitCode: 32,
   826  								},
   827  							},
   828  						},
   829  					},
   830  				},
   831  			},
   832  			wantJobFailureMessage: nil,
   833  			wantCountFailed:       true,
   834  			wantAction:            &count,
   835  		},
   836  	}
   837  	for name, tc := range testCases {
   838  		t.Run(name, func(t *testing.T) {
   839  			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobBackoffLimitPerIndex, tc.enableJobBackoffLimitPerIndex)()
   840  			jobFailMessage, countFailed, action := matchPodFailurePolicy(tc.podFailurePolicy, tc.failedPod)
   841  			if diff := cmp.Diff(tc.wantJobFailureMessage, jobFailMessage); diff != "" {
   842  				t.Errorf("Unexpected job failure message: %s", diff)
   843  			}
   844  			if tc.wantCountFailed != countFailed {
   845  				t.Errorf("Unexpected count failed. want: %v. got: %v", tc.wantCountFailed, countFailed)
   846  			}
   847  			if diff := cmp.Diff(tc.wantAction, action); diff != "" {
   848  				t.Errorf("Unexpected failure policy action: %s", diff)
   849  			}
   850  		})
   851  	}
   852  }
   853  

View as plain text