
Source file src/k8s.io/kubernetes/pkg/kubelet/kubelet_pods.go

Documentation: k8s.io/kubernetes/pkg/kubelet

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     8      http://www.apache.org/licenses/LICENSE-2.0
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    17  package kubelet
    19  import (
    20  	"bytes"
    21  	"context"
    22  	goerrors "errors"
    23  	"fmt"
    24  	"io"
    25  	"net/http"
    26  	"net/url"
    27  	"os"
    28  	"os/exec"
    29  	"os/user"
    30  	"path/filepath"
    31  	"runtime"
    32  	"sort"
    33  	"strconv"
    34  	"strings"
    36  	"github.com/google/go-cmp/cmp"
    37  	v1 "k8s.io/api/core/v1"
    38  	"k8s.io/apimachinery/pkg/api/errors"
    39  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    40  	"k8s.io/apimachinery/pkg/labels"
    41  	"k8s.io/apimachinery/pkg/types"
    42  	"k8s.io/apimachinery/pkg/util/sets"
    43  	utilvalidation "k8s.io/apimachinery/pkg/util/validation"
    44  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    45  	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
    46  	"k8s.io/klog/v2"
    47  	"k8s.io/kubelet/pkg/cri/streaming/portforward"
    48  	remotecommandserver "k8s.io/kubelet/pkg/cri/streaming/remotecommand"
    49  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    50  	"k8s.io/kubernetes/pkg/api/v1/resource"
    51  	podshelper "k8s.io/kubernetes/pkg/apis/core/pods"
    52  	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
    53  	v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
    54  	"k8s.io/kubernetes/pkg/features"
    55  	"k8s.io/kubernetes/pkg/fieldpath"
    56  	"k8s.io/kubernetes/pkg/kubelet/cm"
    57  	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
    58  	"k8s.io/kubernetes/pkg/kubelet/envvars"
    59  	"k8s.io/kubernetes/pkg/kubelet/images"
    60  	"k8s.io/kubernetes/pkg/kubelet/metrics"
    61  	"k8s.io/kubernetes/pkg/kubelet/status"
    62  	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
    63  	"k8s.io/kubernetes/pkg/kubelet/util"
    64  	utilfs "k8s.io/kubernetes/pkg/util/filesystem"
    65  	utilpod "k8s.io/kubernetes/pkg/util/pod"
    66  	volumeutil "k8s.io/kubernetes/pkg/volume/util"
    67  	"k8s.io/kubernetes/pkg/volume/util/hostutil"
    68  	"k8s.io/kubernetes/pkg/volume/util/subpath"
    69  	"k8s.io/kubernetes/pkg/volume/util/volumepathhandler"
    70  	volumevalidation "k8s.io/kubernetes/pkg/volume/validation"
    71  	"k8s.io/kubernetes/third_party/forked/golang/expansion"
    72  	utilnet "k8s.io/utils/net"
    73  )
    75  const (
    76  	managedHostsHeader                = "# Kubernetes-managed hosts file.\n"
    77  	managedHostsHeaderWithHostNetwork = "# Kubernetes-managed hosts file (host network).\n"
    78  )
    80  // Container state reason list
    81  const (
    82  	PodInitializing   = "PodInitializing"
    83  	ContainerCreating = "ContainerCreating"
    85  	kubeletUser = "kubelet"
    86  )
    88  // parseGetSubIdsOutput parses the output from the `getsubids` tool, which is used to query subordinate user or group ID ranges for
    89  // a given user or group. getsubids produces a line for each mapping configured.
    90  // Here we expect that there is a single mapping, and the same values are used for the subordinate user and group ID ranges.
    91  // The output is something like:
    92  // $ getsubids kubelet
    93  // 0: kubelet 65536 2147483648
    94  // $ getsubids -g kubelet
    95  // 0: kubelet 65536 2147483648
    96  func parseGetSubIdsOutput(input string) (uint32, uint32, error) {
    97  	lines := strings.Split(strings.Trim(input, "\n"), "\n")
    98  	if len(lines) != 1 {
    99  		return 0, 0, fmt.Errorf("error parsing line %q: it must contain only one line", input)
   100  	}
   102  	parts := strings.Fields(lines[0])
   103  	if len(parts) != 4 {
   104  		return 0, 0, fmt.Errorf("invalid line %q", input)
   105  	}
   107  	// Parsing the numbers
   108  	num1, err := strconv.ParseUint(parts[2], 10, 32)
   109  	if err != nil {
   110  		return 0, 0, fmt.Errorf("error parsing line %q: %w", input, err)
   111  	}
   113  	num2, err := strconv.ParseUint(parts[3], 10, 32)
   114  	if err != nil {
   115  		return 0, 0, fmt.Errorf("error parsing line %q: %w", input, err)
   116  	}
   118  	return uint32(num1), uint32(num2), nil
   119  }
   121  // getKubeletMappings returns the range of IDs that can be used to configure user namespaces.
   122  // If subordinate user or group ID ranges are specified for the kubelet user and the getsubids tool
   123  // is installed, then the single mapping specified both for user and group IDs will be used.
   124  // If the tool is not installed, or there are no IDs configured, the default mapping is returned.
   125  // The default mapping includes the entire IDs range except IDs below 65536.
   126  func (kl *Kubelet) getKubeletMappings() (uint32, uint32, error) {
   127  	// default mappings to return if there is no specific configuration
   128  	const defaultFirstID = 1 << 16
   129  	const defaultLen = 1<<32 - defaultFirstID
   131  	if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesSupport) {
   132  		return defaultFirstID, defaultLen, nil
   133  	}
   135  	_, err := user.Lookup(kubeletUser)
   136  	if err != nil {
   137  		var unknownUserErr user.UnknownUserError
   138  		if goerrors.As(err, &unknownUserErr) {
   139  			// if the user is not found, we assume that the user is not configured
   140  			return defaultFirstID, defaultLen, nil
   141  		}
   142  		return 0, 0, err
   143  	}
   145  	execName := "getsubids"
   146  	cmd, err := exec.LookPath(execName)
   147  	if err != nil {
   148  		if os.IsNotExist(err) {
   149  			klog.V(2).InfoS("Could not find executable, default mappings will be used for the user namespaces", "executable", execName, "err", err)
   150  			return defaultFirstID, defaultLen, nil
   151  		}
   152  		return 0, 0, err
   153  	}
   154  	outUids, err := exec.Command(cmd, kubeletUser).Output()
   155  	if err != nil {
   156  		return 0, 0, fmt.Errorf("error retrieving additional ids for user %q", kubeletUser)
   157  	}
   158  	outGids, err := exec.Command(cmd, "-g", kubeletUser).Output()
   159  	if err != nil {
   160  		return 0, 0, fmt.Errorf("error retrieving additional gids for user %q", kubeletUser)
   161  	}
   162  	if string(outUids) != string(outGids) {
   163  		return 0, 0, fmt.Errorf("mismatched subuids and subgids for user %q", kubeletUser)
   164  	}
   165  	return parseGetSubIdsOutput(string(outUids))
   166  }
   168  // Get a list of pods that have data directories.
   169  func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) {
   170  	podInfos, err := os.ReadDir(kl.getPodsDir())
   171  	if err != nil {
   172  		return nil, err
   173  	}
   174  	pods := []types.UID{}
   175  	for i := range podInfos {
   176  		if podInfos[i].IsDir() {
   177  			pods = append(pods, types.UID(podInfos[i].Name()))
   178  		}
   179  	}
   180  	return pods, nil
   181  }
   183  // GetActivePods returns pods that have been admitted to the kubelet that
   184  // are not fully terminated. This is mapped to the "desired state" of the
   185  // kubelet - what pods should be running.
   186  //
   187  // WARNING: Currently this list does not include pods that have been force
   188  // deleted but may still be terminating, which means resources assigned to
   189  // those pods during admission may still be in use. See
   190  // https://github.com/kubernetes/kubernetes/issues/104824
   191  func (kl *Kubelet) GetActivePods() []*v1.Pod {
   192  	allPods := kl.podManager.GetPods()
   193  	activePods := kl.filterOutInactivePods(allPods)
   194  	return activePods
   195  }
   197  // makeBlockVolumes maps the raw block devices specified in the path of the container
   198  // Experimental
   199  func (kl *Kubelet) makeBlockVolumes(pod *v1.Pod, container *v1.Container, podVolumes kubecontainer.VolumeMap, blkutil volumepathhandler.BlockVolumePathHandler) ([]kubecontainer.DeviceInfo, error) {
   200  	var devices []kubecontainer.DeviceInfo
   201  	for _, device := range container.VolumeDevices {
   202  		// check path is absolute
   203  		if !utilfs.IsAbs(device.DevicePath) {
   204  			return nil, fmt.Errorf("error DevicePath `%s` must be an absolute path", device.DevicePath)
   205  		}
   206  		vol, ok := podVolumes[device.Name]
   207  		if !ok || vol.BlockVolumeMapper == nil {
   208  			klog.ErrorS(nil, "Block volume cannot be satisfied for container, because the volume is missing or the volume mapper is nil", "containerName", container.Name, "device", device)
   209  			return nil, fmt.Errorf("cannot find volume %q to pass into container %q", device.Name, container.Name)
   210  		}
   211  		// Get a symbolic link associated to a block device under pod device path
   212  		dirPath, volName := vol.BlockVolumeMapper.GetPodDeviceMapPath()
   213  		symlinkPath := filepath.Join(dirPath, volName)
   214  		if islinkExist, checkErr := blkutil.IsSymlinkExist(symlinkPath); checkErr != nil {
   215  			return nil, checkErr
   216  		} else if islinkExist {
   217  			// Check readOnly in PVCVolumeSource and set read only permission if it's true.
   218  			permission := "mrw"
   219  			if vol.ReadOnly {
   220  				permission = "r"
   221  			}
   222  			klog.V(4).InfoS("Device will be attached to container in the corresponding path on host", "containerName", container.Name, "path", symlinkPath)
   223  			devices = append(devices, kubecontainer.DeviceInfo{PathOnHost: symlinkPath, PathInContainer: device.DevicePath, Permissions: permission})
   224  		}
   225  	}
   227  	return devices, nil
   228  }
   230  // shouldMountHostsFile checks if the nodes /etc/hosts should be mounted
   231  // Kubernetes only mounts on /etc/hosts if:
   232  // - container is not an infrastructure (pause) container
   233  // - container is not already mounting on /etc/hosts
   234  // Kubernetes will not mount /etc/hosts if:
   235  // - when the Pod sandbox is being created, its IP is still unknown. Hence, PodIP will not have been set.
   236  // - Windows pod contains a hostProcess container
   237  func shouldMountHostsFile(pod *v1.Pod, podIPs []string) bool {
   238  	shouldMount := len(podIPs) > 0
   239  	if runtime.GOOS == "windows" {
   240  		return shouldMount && !kubecontainer.HasWindowsHostProcessContainer(pod)
   241  	}
   242  	return shouldMount
   243  }
   245  // makeMounts determines the mount points for the given container.
   246  func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain string, podIPs []string, podVolumes kubecontainer.VolumeMap, hu hostutil.HostUtils, subpather subpath.Interface, expandEnvs []kubecontainer.EnvVar, supportsRRO bool) ([]kubecontainer.Mount, func(), error) {
   247  	mountEtcHostsFile := shouldMountHostsFile(pod, podIPs)
   248  	klog.V(3).InfoS("Creating hosts mount for container", "pod", klog.KObj(pod), "containerName", container.Name, "podIPs", podIPs, "path", mountEtcHostsFile)
   249  	mounts := []kubecontainer.Mount{}
   250  	var cleanupAction func()
   251  	for i, mount := range container.VolumeMounts {
   252  		// do not mount /etc/hosts if container is already mounting on the path
   253  		mountEtcHostsFile = mountEtcHostsFile && (mount.MountPath != etcHostsPath)
   254  		vol, ok := podVolumes[mount.Name]
   255  		if !ok || vol.Mounter == nil {
   256  			klog.ErrorS(nil, "Mount cannot be satisfied for the container, because the volume is missing or the volume mounter (vol.Mounter) is nil",
   257  				"containerName", container.Name, "ok", ok, "volumeMounter", mount)
   258  			return nil, cleanupAction, fmt.Errorf("cannot find volume %q to mount into container %q", mount.Name, container.Name)
   259  		}
   261  		relabelVolume := false
   262  		// If the volume supports SELinux and it has not been
   263  		// relabeled already and it is not a read-only volume,
   264  		// relabel it and mark it as labeled
   265  		if vol.Mounter.GetAttributes().Managed && vol.Mounter.GetAttributes().SELinuxRelabel && !vol.SELinuxLabeled {
   266  			vol.SELinuxLabeled = true
   267  			relabelVolume = true
   268  		}
   269  		hostPath, err := volumeutil.GetPath(vol.Mounter)
   270  		if err != nil {
   271  			return nil, cleanupAction, err
   272  		}
   274  		subPath := mount.SubPath
   275  		if mount.SubPathExpr != "" {
   276  			subPath, err = kubecontainer.ExpandContainerVolumeMounts(mount, expandEnvs)
   278  			if err != nil {
   279  				return nil, cleanupAction, err
   280  			}
   281  		}
   283  		if subPath != "" {
   284  			if utilfs.IsAbs(subPath) {
   285  				return nil, cleanupAction, fmt.Errorf("error SubPath `%s` must not be an absolute path", subPath)
   286  			}
   288  			err = volumevalidation.ValidatePathNoBacksteps(subPath)
   289  			if err != nil {
   290  				return nil, cleanupAction, fmt.Errorf("unable to provision SubPath `%s`: %v", subPath, err)
   291  			}
   293  			volumePath := hostPath
   294  			hostPath = filepath.Join(volumePath, subPath)
   296  			if subPathExists, err := hu.PathExists(hostPath); err != nil {
   297  				klog.ErrorS(nil, "Could not determine if subPath exists, will not attempt to change its permissions", "path", hostPath)
   298  			} else if !subPathExists {
   299  				// Create the sub path now because if it's auto-created later when referenced, it may have an
   300  				// incorrect ownership and mode. For example, the sub path directory must have at least g+rwx
   301  				// when the pod specifies an fsGroup, and if the directory is not created here, Docker will
   302  				// later auto-create it with the incorrect mode 0750
   303  				// Make extra care not to escape the volume!
   304  				perm, err := hu.GetMode(volumePath)
   305  				if err != nil {
   306  					return nil, cleanupAction, err
   307  				}
   308  				if err := subpather.SafeMakeDir(subPath, volumePath, perm); err != nil {
   309  					// Don't pass detailed error back to the user because it could give information about host filesystem
   310  					klog.ErrorS(err, "Failed to create subPath directory for volumeMount of the container", "containerName", container.Name, "volumeMountName", mount.Name)
   311  					return nil, cleanupAction, fmt.Errorf("failed to create subPath directory for volumeMount %q of container %q", mount.Name, container.Name)
   312  				}
   313  			}
   314  			hostPath, cleanupAction, err = subpather.PrepareSafeSubpath(subpath.Subpath{
   315  				VolumeMountIndex: i,
   316  				Path:             hostPath,
   317  				VolumeName:       vol.InnerVolumeSpecName,
   318  				VolumePath:       volumePath,
   319  				PodDir:           podDir,
   320  				ContainerName:    container.Name,
   321  			})
   322  			if err != nil {
   323  				// Don't pass detailed error back to the user because it could give information about host filesystem
   324  				klog.ErrorS(err, "Failed to prepare subPath for volumeMount of the container", "containerName", container.Name, "volumeMountName", mount.Name)
   325  				return nil, cleanupAction, fmt.Errorf("failed to prepare subPath for volumeMount %q of container %q", mount.Name, container.Name)
   326  			}
   327  		}
   329  		// Docker Volume Mounts fail on Windows if it is not of the form C:/
   330  		if volumeutil.IsWindowsLocalPath(runtime.GOOS, hostPath) {
   331  			hostPath = volumeutil.MakeAbsolutePath(runtime.GOOS, hostPath)
   332  		}
   334  		containerPath := mount.MountPath
   335  		// IsAbs returns false for UNC path/SMB shares/named pipes in Windows. So check for those specifically and skip MakeAbsolutePath
   336  		if !volumeutil.IsWindowsUNCPath(runtime.GOOS, containerPath) && !utilfs.IsAbs(containerPath) {
   337  			containerPath = volumeutil.MakeAbsolutePath(runtime.GOOS, containerPath)
   338  		}
   340  		propagation, err := translateMountPropagation(mount.MountPropagation)
   341  		if err != nil {
   342  			return nil, cleanupAction, err
   343  		}
   344  		klog.V(5).InfoS("Mount has propagation", "pod", klog.KObj(pod), "containerName", container.Name, "volumeMountName", mount.Name, "propagation", propagation)
   345  		mustMountRO := vol.Mounter.GetAttributes().ReadOnly
   347  		rro, err := resolveRecursiveReadOnly(mount, supportsRRO)
   348  		if err != nil {
   349  			return nil, cleanupAction, fmt.Errorf("failed to resolve recursive read-only mode: %w", err)
   350  		}
   351  		if rro && !utilfeature.DefaultFeatureGate.Enabled(features.RecursiveReadOnlyMounts) {
   352  			return nil, cleanupAction, fmt.Errorf("recursive read-only mount needs feature gate %q to be enabled", features.RecursiveReadOnlyMounts)
   353  		}
   355  		mounts = append(mounts, kubecontainer.Mount{
   356  			Name:              mount.Name,
   357  			ContainerPath:     containerPath,
   358  			HostPath:          hostPath,
   359  			ReadOnly:          mount.ReadOnly || mustMountRO,
   360  			RecursiveReadOnly: rro,
   361  			SELinuxRelabel:    relabelVolume,
   362  			Propagation:       propagation,
   363  		})
   364  	}
   365  	if mountEtcHostsFile {
   366  		hostAliases := pod.Spec.HostAliases
   367  		hostsMount, err := makeHostsMount(podDir, podIPs, hostName, hostDomain, hostAliases, pod.Spec.HostNetwork)
   368  		if err != nil {
   369  			return nil, cleanupAction, err
   370  		}
   371  		mounts = append(mounts, *hostsMount)
   372  	}
   373  	return mounts, cleanupAction, nil
   374  }
   376  // translateMountPropagation transforms v1.MountPropagationMode to
   377  // runtimeapi.MountPropagation.
   378  func translateMountPropagation(mountMode *v1.MountPropagationMode) (runtimeapi.MountPropagation, error) {
   379  	if runtime.GOOS == "windows" {
   380  		// Windows containers doesn't support mount propagation, use private for it.
   381  		// Refer https://docs.docker.com/storage/bind-mounts/#configure-bind-propagation.
   382  		return runtimeapi.MountPropagation_PROPAGATION_PRIVATE, nil
   383  	}
   385  	switch {
   386  	case mountMode == nil:
   387  		// PRIVATE is the default
   388  		return runtimeapi.MountPropagation_PROPAGATION_PRIVATE, nil
   389  	case *mountMode == v1.MountPropagationHostToContainer:
   390  		return runtimeapi.MountPropagation_PROPAGATION_HOST_TO_CONTAINER, nil
   391  	case *mountMode == v1.MountPropagationBidirectional:
   392  		return runtimeapi.MountPropagation_PROPAGATION_BIDIRECTIONAL, nil
   393  	case *mountMode == v1.MountPropagationNone:
   394  		return runtimeapi.MountPropagation_PROPAGATION_PRIVATE, nil
   395  	default:
   396  		return 0, fmt.Errorf("invalid MountPropagation mode: %q", *mountMode)
   397  	}
   398  }
   400  // getEtcHostsPath returns the full host-side path to a pod's generated /etc/hosts file
   401  func getEtcHostsPath(podDir string) string {
   402  	hostsFilePath := filepath.Join(podDir, "etc-hosts")
   403  	// Volume Mounts fail on Windows if it is not of the form C:/
   404  	return volumeutil.MakeAbsolutePath(runtime.GOOS, hostsFilePath)
   405  }
   407  // makeHostsMount makes the mountpoint for the hosts file that the containers
   408  // in a pod are injected with. podIPs is provided instead of podIP as podIPs
   409  // are present even if dual-stack feature flag is not enabled.
   410  func makeHostsMount(podDir string, podIPs []string, hostName, hostDomainName string, hostAliases []v1.HostAlias, useHostNetwork bool) (*kubecontainer.Mount, error) {
   411  	hostsFilePath := getEtcHostsPath(podDir)
   412  	if err := ensureHostsFile(hostsFilePath, podIPs, hostName, hostDomainName, hostAliases, useHostNetwork); err != nil {
   413  		return nil, err
   414  	}
   415  	return &kubecontainer.Mount{
   416  		Name:           "k8s-managed-etc-hosts",
   417  		ContainerPath:  etcHostsPath,
   418  		HostPath:       hostsFilePath,
   419  		ReadOnly:       false,
   420  		SELinuxRelabel: true,
   421  	}, nil
   422  }
   424  // ensureHostsFile ensures that the given host file has an up-to-date ip, host
   425  // name, and domain name.
   426  func ensureHostsFile(fileName string, hostIPs []string, hostName, hostDomainName string, hostAliases []v1.HostAlias, useHostNetwork bool) error {
   427  	var hostsFileContent []byte
   428  	var err error
   430  	if useHostNetwork {
   431  		// if Pod is using host network, read hosts file from the node's filesystem.
   432  		// `etcHostsPath` references the location of the hosts file on the node.
   433  		// `/etc/hosts` for *nix systems.
   434  		hostsFileContent, err = nodeHostsFileContent(etcHostsPath, hostAliases)
   435  		if err != nil {
   436  			return err
   437  		}
   438  	} else {
   439  		// if Pod is not using host network, create a managed hosts file with Pod IP and other information.
   440  		hostsFileContent = managedHostsFileContent(hostIPs, hostName, hostDomainName, hostAliases)
   441  	}
   443  	hostsFilePerm := os.FileMode(0644)
   444  	if err := os.WriteFile(fileName, hostsFileContent, hostsFilePerm); err != nil {
   445  		return err
   446  	}
   447  	return os.Chmod(fileName, hostsFilePerm)
   448  }
   450  // nodeHostsFileContent reads the content of node's hosts file.
   451  func nodeHostsFileContent(hostsFilePath string, hostAliases []v1.HostAlias) ([]byte, error) {
   452  	hostsFileContent, err := os.ReadFile(hostsFilePath)
   453  	if err != nil {
   454  		return nil, err
   455  	}
   456  	var buffer bytes.Buffer
   457  	buffer.WriteString(managedHostsHeaderWithHostNetwork)
   458  	buffer.Write(hostsFileContent)
   459  	buffer.Write(hostsEntriesFromHostAliases(hostAliases))
   460  	return buffer.Bytes(), nil
   461  }
   463  // managedHostsFileContent generates the content of the managed etc hosts based on Pod IPs and other
   464  // information.
   465  func managedHostsFileContent(hostIPs []string, hostName, hostDomainName string, hostAliases []v1.HostAlias) []byte {
   466  	var buffer bytes.Buffer
   467  	buffer.WriteString(managedHostsHeader)
   468  	buffer.WriteString("\tlocalhost\n")                      // ipv4 localhost
   469  	buffer.WriteString("::1\tlocalhost ip6-localhost ip6-loopback\n") // ipv6 localhost
   470  	buffer.WriteString("fe00::0\tip6-localnet\n")
   471  	buffer.WriteString("fe00::0\tip6-mcastprefix\n")
   472  	buffer.WriteString("fe00::1\tip6-allnodes\n")
   473  	buffer.WriteString("fe00::2\tip6-allrouters\n")
   474  	if len(hostDomainName) > 0 {
   475  		// host entry generated for all IPs in podIPs
   476  		// podIPs field is populated for clusters even
   477  		// dual-stack feature flag is not enabled.
   478  		for _, hostIP := range hostIPs {
   479  			buffer.WriteString(fmt.Sprintf("%s\t%s.%s\t%s\n", hostIP, hostName, hostDomainName, hostName))
   480  		}
   481  	} else {
   482  		for _, hostIP := range hostIPs {
   483  			buffer.WriteString(fmt.Sprintf("%s\t%s\n", hostIP, hostName))
   484  		}
   485  	}
   486  	buffer.Write(hostsEntriesFromHostAliases(hostAliases))
   487  	return buffer.Bytes()
   488  }
   490  func hostsEntriesFromHostAliases(hostAliases []v1.HostAlias) []byte {
   491  	if len(hostAliases) == 0 {
   492  		return []byte{}
   493  	}
   495  	var buffer bytes.Buffer
   496  	buffer.WriteString("\n")
   497  	buffer.WriteString("# Entries added by HostAliases.\n")
   498  	// for each IP, write all aliases onto single line in hosts file
   499  	for _, hostAlias := range hostAliases {
   500  		buffer.WriteString(fmt.Sprintf("%s\t%s\n", hostAlias.IP, strings.Join(hostAlias.Hostnames, "\t")))
   501  	}
   502  	return buffer.Bytes()
   503  }
   505  // truncatePodHostnameIfNeeded truncates the pod hostname if it's longer than 63 chars.
   506  func truncatePodHostnameIfNeeded(podName, hostname string) (string, error) {
   507  	// Cap hostname at 63 chars (specification is 64bytes which is 63 chars and the null terminating char).
   508  	const hostnameMaxLen = 63
   509  	if len(hostname) <= hostnameMaxLen {
   510  		return hostname, nil
   511  	}
   512  	truncated := hostname[:hostnameMaxLen]
   513  	klog.ErrorS(nil, "Hostname for pod was too long, truncated it", "podName", podName, "hostnameMaxLen", hostnameMaxLen, "truncatedHostname", truncated)
   514  	// hostname should not end with '-' or '.'
   515  	truncated = strings.TrimRight(truncated, "-.")
   516  	if len(truncated) == 0 {
   517  		// This should never happen.
   518  		return "", fmt.Errorf("hostname for pod %q was invalid: %q", podName, hostname)
   519  	}
   520  	return truncated, nil
   521  }
   523  // GetOrCreateUserNamespaceMappings returns the configuration for the sandbox user namespace
   524  func (kl *Kubelet) GetOrCreateUserNamespaceMappings(pod *v1.Pod, runtimeHandler string) (*runtimeapi.UserNamespace, error) {
   525  	return kl.usernsManager.GetOrCreateUserNamespaceMappings(pod, runtimeHandler)
   526  }
   528  // GeneratePodHostNameAndDomain creates a hostname and domain name for a pod,
   529  // given that pod's spec and annotations or returns an error.
   530  func (kl *Kubelet) GeneratePodHostNameAndDomain(pod *v1.Pod) (string, string, error) {
   531  	clusterDomain := kl.dnsConfigurer.ClusterDomain
   533  	hostname := pod.Name
   534  	if len(pod.Spec.Hostname) > 0 {
   535  		if msgs := utilvalidation.IsDNS1123Label(pod.Spec.Hostname); len(msgs) != 0 {
   536  			return "", "", fmt.Errorf("pod Hostname %q is not a valid DNS label: %s", pod.Spec.Hostname, strings.Join(msgs, ";"))
   537  		}
   538  		hostname = pod.Spec.Hostname
   539  	}
   541  	hostname, err := truncatePodHostnameIfNeeded(pod.Name, hostname)
   542  	if err != nil {
   543  		return "", "", err
   544  	}
   546  	hostDomain := ""
   547  	if len(pod.Spec.Subdomain) > 0 {
   548  		if msgs := utilvalidation.IsDNS1123Label(pod.Spec.Subdomain); len(msgs) != 0 {
   549  			return "", "", fmt.Errorf("pod Subdomain %q is not a valid DNS label: %s", pod.Spec.Subdomain, strings.Join(msgs, ";"))
   550  		}
   551  		hostDomain = fmt.Sprintf("%s.%s.svc.%s", pod.Spec.Subdomain, pod.Namespace, clusterDomain)
   552  	}
   554  	return hostname, hostDomain, nil
   555  }
   557  // GetPodCgroupParent gets pod cgroup parent from container manager.
   558  func (kl *Kubelet) GetPodCgroupParent(pod *v1.Pod) string {
   559  	pcm := kl.containerManager.NewPodContainerManager()
   560  	_, cgroupParent := pcm.GetPodContainerName(pod)
   561  	return cgroupParent
   562  }
   564  // GenerateRunContainerOptions generates the RunContainerOptions, which can be used by
   565  // the container runtime to set parameters for launching a container.
   566  func (kl *Kubelet) GenerateRunContainerOptions(ctx context.Context, pod *v1.Pod, container *v1.Container, podIP string, podIPs []string) (*kubecontainer.RunContainerOptions, func(), error) {
   567  	supportsRRO := kl.runtimeClassSupportsRecursiveReadOnlyMounts(pod)
   569  	opts, err := kl.containerManager.GetResources(pod, container)
   570  	if err != nil {
   571  		return nil, nil, err
   572  	}
   573  	// The value of hostname is the short host name and it is sent to makeMounts to create /etc/hosts file.
   574  	hostname, hostDomainName, err := kl.GeneratePodHostNameAndDomain(pod)
   575  	if err != nil {
   576  		return nil, nil, err
   577  	}
   578  	// nodename will be equal to hostname if SetHostnameAsFQDN is nil or false. If SetHostnameFQDN
   579  	// is true and hostDomainName is defined, nodename will be the FQDN (hostname.hostDomainName)
   580  	nodename, err := util.GetNodenameForKernel(hostname, hostDomainName, pod.Spec.SetHostnameAsFQDN)
   581  	if err != nil {
   582  		return nil, nil, err
   583  	}
   584  	opts.Hostname = nodename
   585  	podName := volumeutil.GetUniquePodName(pod)
   586  	volumes := kl.volumeManager.GetMountedVolumesForPod(podName)
   588  	blkutil := volumepathhandler.NewBlockVolumePathHandler()
   589  	blkVolumes, err := kl.makeBlockVolumes(pod, container, volumes, blkutil)
   590  	if err != nil {
   591  		return nil, nil, err
   592  	}
   593  	opts.Devices = append(opts.Devices, blkVolumes...)
   595  	envs, err := kl.makeEnvironmentVariables(pod, container, podIP, podIPs)
   596  	if err != nil {
   597  		return nil, nil, err
   598  	}
   599  	opts.Envs = append(opts.Envs, envs...)
   601  	// only podIPs is sent to makeMounts, as podIPs is populated even if dual-stack feature flag is not enabled.
   602  	mounts, cleanupAction, err := makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIPs, volumes, kl.hostutil, kl.subpather, opts.Envs, supportsRRO)
   603  	if err != nil {
   604  		return nil, cleanupAction, err
   605  	}
   606  	opts.Mounts = append(opts.Mounts, mounts...)
   608  	// adding TerminationMessagePath on Windows is only allowed if ContainerD is used. Individual files cannot
   609  	// be mounted as volumes using Docker for Windows.
   610  	if len(container.TerminationMessagePath) != 0 {
   611  		p := kl.getPodContainerDir(pod.UID, container.Name)
   612  		if err := os.MkdirAll(p, 0750); err != nil {
   613  			klog.ErrorS(err, "Error on creating dir", "path", p)
   614  		} else {
   615  			opts.PodContainerDir = p
   616  		}
   617  	}
   619  	return opts, cleanupAction, nil
   620  }
   622  var masterServices = sets.NewString("kubernetes")
   624  // getServiceEnvVarMap makes a map[string]string of env vars for services a
   625  // pod in namespace ns should see.
   626  func (kl *Kubelet) getServiceEnvVarMap(ns string, enableServiceLinks bool) (map[string]string, error) {
   627  	var (
   628  		serviceMap = make(map[string]*v1.Service)
   629  		m          = make(map[string]string)
   630  	)
   632  	// Get all service resources from the master (via a cache),
   633  	// and populate them into service environment variables.
   634  	if kl.serviceLister == nil {
   635  		// Kubelets without masters (e.g. plain GCE ContainerVM) don't set env vars.
   636  		return m, nil
   637  	}
   638  	services, err := kl.serviceLister.List(labels.Everything())
   639  	if err != nil {
   640  		return m, fmt.Errorf("failed to list services when setting up env vars")
   641  	}
   643  	// project the services in namespace ns onto the master services
   644  	for i := range services {
   645  		service := services[i]
   646  		// ignore services where ClusterIP is "None" or empty
   647  		if !v1helper.IsServiceIPSet(service) {
   648  			continue
   649  		}
   650  		serviceName := service.Name
   652  		// We always want to add environment variabled for master services
   653  		// from the default namespace, even if enableServiceLinks is false.
   654  		// We also add environment variables for other services in the same
   655  		// namespace, if enableServiceLinks is true.
   656  		if service.Namespace == metav1.NamespaceDefault && masterServices.Has(serviceName) {
   657  			if _, exists := serviceMap[serviceName]; !exists {
   658  				serviceMap[serviceName] = service
   659  			}
   660  		} else if service.Namespace == ns && enableServiceLinks {
   661  			serviceMap[serviceName] = service
   662  		}
   663  	}
   665  	mappedServices := []*v1.Service{}
   666  	for key := range serviceMap {
   667  		mappedServices = append(mappedServices, serviceMap[key])
   668  	}
   670  	for _, e := range envvars.FromServices(mappedServices) {
   671  		m[e.Name] = e.Value
   672  	}
   673  	return m, nil
   674  }
   676  // Make the environment variables for a pod in the given namespace.
   677  func (kl *Kubelet) makeEnvironmentVariables(pod *v1.Pod, container *v1.Container, podIP string, podIPs []string) ([]kubecontainer.EnvVar, error) {
   678  	if pod.Spec.EnableServiceLinks == nil {
   679  		return nil, fmt.Errorf("nil pod.spec.enableServiceLinks encountered, cannot construct envvars")
   680  	}
   682  	// If the pod originates from the kube-api, when we know that the kube-apiserver is responding and the kubelet's credentials are valid.
   683  	// Knowing this, it is reasonable to wait until the service lister has synchronized at least once before attempting to build
   684  	// a service env var map.  This doesn't present the race below from happening entirely, but it does prevent the "obvious"
   685  	// failure case of services simply not having completed a list operation that can reasonably be expected to succeed.
   686  	// One common case this prevents is a kubelet restart reading pods before services and some pod not having the
   687  	// KUBERNETES_SERVICE_HOST injected because we didn't wait a short time for services to sync before proceeding.
   688  	// The KUBERNETES_SERVICE_HOST link is special because it is unconditionally injected into pods and is read by the
   689  	// in-cluster-config for pod clients
   690  	if !kubetypes.IsStaticPod(pod) && !kl.serviceHasSynced() {
   691  		return nil, fmt.Errorf("services have not yet been read at least once, cannot construct envvars")
   692  	}
   694  	var result []kubecontainer.EnvVar
   695  	// Note:  These are added to the docker Config, but are not included in the checksum computed
   696  	// by kubecontainer.HashContainer(...).  That way, we can still determine whether an
   697  	// v1.Container is already running by its hash. (We don't want to restart a container just
   698  	// because some service changed.)
   699  	//
   700  	// Note that there is a race between Kubelet seeing the pod and kubelet seeing the service.
   701  	// To avoid this users can: (1) wait between starting a service and starting; or (2) detect
   702  	// missing service env var and exit and be restarted; or (3) use DNS instead of env vars
   703  	// and keep trying to resolve the DNS name of the service (recommended).
   704  	serviceEnv, err := kl.getServiceEnvVarMap(pod.Namespace, *pod.Spec.EnableServiceLinks)
   705  	if err != nil {
   706  		return result, err
   707  	}
   709  	var (
   710  		configMaps = make(map[string]*v1.ConfigMap)
   711  		secrets    = make(map[string]*v1.Secret)
   712  		tmpEnv     = make(map[string]string)
   713  	)
   715  	// Env will override EnvFrom variables.
   716  	// Process EnvFrom first then allow Env to replace existing values.
   717  	for _, envFrom := range container.EnvFrom {
   718  		switch {
   719  		case envFrom.ConfigMapRef != nil:
   720  			cm := envFrom.ConfigMapRef
   721  			name := cm.Name
   722  			configMap, ok := configMaps[name]
   723  			if !ok {
   724  				if kl.kubeClient == nil {
   725  					return result, fmt.Errorf("couldn't get configMap %v/%v, no kubeClient defined", pod.Namespace, name)
   726  				}
   727  				optional := cm.Optional != nil && *cm.Optional
   728  				configMap, err = kl.configMapManager.GetConfigMap(pod.Namespace, name)
   729  				if err != nil {
   730  					if errors.IsNotFound(err) && optional {
   731  						// ignore error when marked optional
   732  						continue
   733  					}
   734  					return result, err
   735  				}
   736  				configMaps[name] = configMap
   737  			}
   739  			for k, v := range configMap.Data {
   740  				if len(envFrom.Prefix) > 0 {
   741  					k = envFrom.Prefix + k
   742  				}
   744  				tmpEnv[k] = v
   745  			}
   746  		case envFrom.SecretRef != nil:
   747  			s := envFrom.SecretRef
   748  			name := s.Name
   749  			secret, ok := secrets[name]
   750  			if !ok {
   751  				if kl.kubeClient == nil {
   752  					return result, fmt.Errorf("couldn't get secret %v/%v, no kubeClient defined", pod.Namespace, name)
   753  				}
   754  				optional := s.Optional != nil && *s.Optional
   755  				secret, err = kl.secretManager.GetSecret(pod.Namespace, name)
   756  				if err != nil {
   757  					if errors.IsNotFound(err) && optional {
   758  						// ignore error when marked optional
   759  						continue
   760  					}
   761  					return result, err
   762  				}
   763  				secrets[name] = secret
   764  			}
   766  			for k, v := range secret.Data {
   767  				if len(envFrom.Prefix) > 0 {
   768  					k = envFrom.Prefix + k
   769  				}
   771  				tmpEnv[k] = string(v)
   772  			}
   773  		}
   774  	}
   776  	// Determine the final values of variables:
   777  	//
   778  	// 1.  Determine the final value of each variable:
   779  	//     a.  If the variable's Value is set, expand the `$(var)` references to other
   780  	//         variables in the .Value field; the sources of variables are the declared
   781  	//         variables of the container and the service environment variables
   782  	//     b.  If a source is defined for an environment variable, resolve the source
   783  	// 2.  Create the container's environment in the order variables are declared
   784  	// 3.  Add remaining service environment vars
   785  	var (
   786  		mappingFunc = expansion.MappingFuncFor(tmpEnv, serviceEnv)
   787  	)
   788  	for _, envVar := range container.Env {
   789  		runtimeVal := envVar.Value
   790  		if runtimeVal != "" {
   791  			// Step 1a: expand variable references
   792  			runtimeVal = expansion.Expand(runtimeVal, mappingFunc)
   793  		} else if envVar.ValueFrom != nil {
   794  			// Step 1b: resolve alternate env var sources
   795  			switch {
   796  			case envVar.ValueFrom.FieldRef != nil:
   797  				runtimeVal, err = kl.podFieldSelectorRuntimeValue(envVar.ValueFrom.FieldRef, pod, podIP, podIPs)
   798  				if err != nil {
   799  					return result, err
   800  				}
   801  			case envVar.ValueFrom.ResourceFieldRef != nil:
   802  				defaultedPod, defaultedContainer, err := kl.defaultPodLimitsForDownwardAPI(pod, container)
   803  				if err != nil {
   804  					return result, err
   805  				}
   806  				runtimeVal, err = containerResourceRuntimeValue(envVar.ValueFrom.ResourceFieldRef, defaultedPod, defaultedContainer)
   807  				if err != nil {
   808  					return result, err
   809  				}
   810  			case envVar.ValueFrom.ConfigMapKeyRef != nil:
   811  				cm := envVar.ValueFrom.ConfigMapKeyRef
   812  				name := cm.Name
   813  				key := cm.Key
   814  				optional := cm.Optional != nil && *cm.Optional
   815  				configMap, ok := configMaps[name]
   816  				if !ok {
   817  					if kl.kubeClient == nil {
   818  						return result, fmt.Errorf("couldn't get configMap %v/%v, no kubeClient defined", pod.Namespace, name)
   819  					}
   820  					configMap, err = kl.configMapManager.GetConfigMap(pod.Namespace, name)
   821  					if err != nil {
   822  						if errors.IsNotFound(err) && optional {
   823  							// ignore error when marked optional
   824  							continue
   825  						}
   826  						return result, err
   827  					}
   828  					configMaps[name] = configMap
   829  				}
   830  				runtimeVal, ok = configMap.Data[key]
   831  				if !ok {
   832  					if optional {
   833  						continue
   834  					}
   835  					return result, fmt.Errorf("couldn't find key %v in ConfigMap %v/%v", key, pod.Namespace, name)
   836  				}
   837  			case envVar.ValueFrom.SecretKeyRef != nil:
   838  				s := envVar.ValueFrom.SecretKeyRef
   839  				name := s.Name
   840  				key := s.Key
   841  				optional := s.Optional != nil && *s.Optional
   842  				secret, ok := secrets[name]
   843  				if !ok {
   844  					if kl.kubeClient == nil {
   845  						return result, fmt.Errorf("couldn't get secret %v/%v, no kubeClient defined", pod.Namespace, name)
   846  					}
   847  					secret, err = kl.secretManager.GetSecret(pod.Namespace, name)
   848  					if err != nil {
   849  						if errors.IsNotFound(err) && optional {
   850  							// ignore error when marked optional
   851  							continue
   852  						}
   853  						return result, err
   854  					}
   855  					secrets[name] = secret
   856  				}
   857  				runtimeValBytes, ok := secret.Data[key]
   858  				if !ok {
   859  					if optional {
   860  						continue
   861  					}
   862  					return result, fmt.Errorf("couldn't find key %v in Secret %v/%v", key, pod.Namespace, name)
   863  				}
   864  				runtimeVal = string(runtimeValBytes)
   865  			}
   866  		}
   868  		tmpEnv[envVar.Name] = runtimeVal
   869  	}
   871  	// Append the env vars
   872  	for k, v := range tmpEnv {
   873  		result = append(result, kubecontainer.EnvVar{Name: k, Value: v})
   874  	}
   876  	// Append remaining service env vars.
   877  	for k, v := range serviceEnv {
   878  		// Accesses apiserver+Pods.
   879  		// So, the master may set service env vars, or kubelet may.  In case both are doing
   880  		// it, we skip the key from the kubelet-generated ones so we don't have duplicate
   881  		// env vars.
   882  		// TODO: remove this next line once all platforms use apiserver+Pods.
   883  		if _, present := tmpEnv[k]; !present {
   884  			result = append(result, kubecontainer.EnvVar{Name: k, Value: v})
   885  		}
   886  	}
   887  	return result, nil
   888  }
   890  // podFieldSelectorRuntimeValue returns the runtime value of the given
   891  // selector for a pod.
   892  func (kl *Kubelet) podFieldSelectorRuntimeValue(fs *v1.ObjectFieldSelector, pod *v1.Pod, podIP string, podIPs []string) (string, error) {
   893  	internalFieldPath, _, err := podshelper.ConvertDownwardAPIFieldLabel(fs.APIVersion, fs.FieldPath, "")
   894  	if err != nil {
   895  		return "", err
   896  	}
   898  	// make podIPs order match node IP family preference #97979
   899  	podIPs = kl.sortPodIPs(podIPs)
   900  	if len(podIPs) > 0 {
   901  		podIP = podIPs[0]
   902  	}
   904  	switch internalFieldPath {
   905  	case "spec.nodeName":
   906  		return pod.Spec.NodeName, nil
   907  	case "spec.serviceAccountName":
   908  		return pod.Spec.ServiceAccountName, nil
   909  	case "status.hostIP":
   910  		hostIPs, err := kl.getHostIPsAnyWay()
   911  		if err != nil {
   912  			return "", err
   913  		}
   914  		return hostIPs[0].String(), nil
   915  	case "status.hostIPs":
   916  		if !utilfeature.DefaultFeatureGate.Enabled(features.PodHostIPs) {
   917  			return "", nil
   918  		}
   919  		hostIPs, err := kl.getHostIPsAnyWay()
   920  		if err != nil {
   921  			return "", err
   922  		}
   923  		ips := make([]string, 0, len(hostIPs))
   924  		for _, ip := range hostIPs {
   925  			ips = append(ips, ip.String())
   926  		}
   927  		return strings.Join(ips, ","), nil
   928  	case "status.podIP":
   929  		return podIP, nil
   930  	case "status.podIPs":
   931  		return strings.Join(podIPs, ","), nil
   932  	}
   933  	return fieldpath.ExtractFieldPathAsString(pod, internalFieldPath)
   934  }
   936  // containerResourceRuntimeValue returns the value of the provided container resource
   937  func containerResourceRuntimeValue(fs *v1.ResourceFieldSelector, pod *v1.Pod, container *v1.Container) (string, error) {
   938  	containerName := fs.ContainerName
   939  	if len(containerName) == 0 {
   940  		return resource.ExtractContainerResourceValue(fs, container)
   941  	}
   942  	return resource.ExtractResourceValueByContainerName(fs, pod, containerName)
   943  }
   945  // killPod instructs the container runtime to kill the pod. This method requires that
   946  // the pod status contains the result of the last syncPod, otherwise it may fail to
   947  // terminate newly created containers and sandboxes.
   948  func (kl *Kubelet) killPod(ctx context.Context, pod *v1.Pod, p kubecontainer.Pod, gracePeriodOverride *int64) error {
   949  	// Call the container runtime KillPod method which stops all known running containers of the pod
   950  	if err := kl.containerRuntime.KillPod(ctx, pod, p, gracePeriodOverride); err != nil {
   951  		return err
   952  	}
   953  	if err := kl.containerManager.UpdateQOSCgroups(); err != nil {
   954  		klog.V(2).InfoS("Failed to update QoS cgroups while killing pod", "err", err)
   955  	}
   956  	return nil
   957  }
   959  // makePodDataDirs creates the dirs for the pod datas.
   960  func (kl *Kubelet) makePodDataDirs(pod *v1.Pod) error {
   961  	uid := pod.UID
   962  	if err := os.MkdirAll(kl.getPodDir(uid), 0750); err != nil && !os.IsExist(err) {
   963  		return err
   964  	}
   965  	if err := os.MkdirAll(kl.getPodVolumesDir(uid), 0750); err != nil && !os.IsExist(err) {
   966  		return err
   967  	}
   968  	if err := os.MkdirAll(kl.getPodPluginsDir(uid), 0750); err != nil && !os.IsExist(err) {
   969  		return err
   970  	}
   971  	return nil
   972  }
   974  // getPullSecretsForPod inspects the Pod and retrieves the referenced pull
   975  // secrets.
   976  func (kl *Kubelet) getPullSecretsForPod(pod *v1.Pod) []v1.Secret {
   977  	pullSecrets := []v1.Secret{}
   978  	failedPullSecrets := []string{}
   980  	for _, secretRef := range pod.Spec.ImagePullSecrets {
   981  		if len(secretRef.Name) == 0 {
   982  			// API validation permitted entries with empty names (https://issue.k8s.io/99454#issuecomment-787838112).
   983  			// Ignore to avoid unnecessary warnings.
   984  			continue
   985  		}
   986  		secret, err := kl.secretManager.GetSecret(pod.Namespace, secretRef.Name)
   987  		if err != nil {
   988  			klog.InfoS("Unable to retrieve pull secret, the image pull may not succeed.", "pod", klog.KObj(pod), "secret", klog.KObj(secret), "err", err)
   989  			failedPullSecrets = append(failedPullSecrets, secretRef.Name)
   990  			continue
   991  		}
   993  		pullSecrets = append(pullSecrets, *secret)
   994  	}
   996  	if len(failedPullSecrets) > 0 {
   997  		kl.recorder.Eventf(pod, v1.EventTypeWarning, "FailedToRetrieveImagePullSecret", "Unable to retrieve some image pull secrets (%s); attempting to pull the image may not succeed.", strings.Join(failedPullSecrets, ", "))
   998  	}
  1000  	return pullSecrets
  1001  }
  1003  // PodCouldHaveRunningContainers returns true if the pod with the given UID could still have running
  1004  // containers. This returns false if the pod has not yet been started or the pod is unknown.
  1005  func (kl *Kubelet) PodCouldHaveRunningContainers(pod *v1.Pod) bool {
  1006  	if kl.podWorkers.CouldHaveRunningContainers(pod.UID) {
  1007  		return true
  1008  	}
  1010  	// Check if pod might need to unprepare resources before termination
  1011  	// NOTE: This is a temporary solution. This call is here to avoid changing
  1012  	// status manager and its tests.
  1013  	// TODO: extend PodDeletionSafetyProvider interface and implement it
  1014  	// in a separate Kubelet method.
  1015  	if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
  1016  		if kl.containerManager.PodMightNeedToUnprepareResources(pod.UID) {
  1017  			return true
  1018  		}
  1019  	}
  1020  	return false
  1021  }
  1023  // PodIsFinished returns true if SyncTerminatedPod is finished, ie.
  1024  // all required node-level resources that a pod was consuming have
  1025  // been reclaimed by the kubelet.
  1026  func (kl *Kubelet) PodIsFinished(pod *v1.Pod) bool {
  1027  	return kl.podWorkers.ShouldPodBeFinished(pod.UID)
  1028  }
  1030  // filterOutInactivePods returns pods that are not in a terminal phase
  1031  // or are known to be fully terminated. This method should only be used
  1032  // when the set of pods being filtered is upstream of the pod worker, i.e.
  1033  // the pods the pod manager is aware of.
  1034  func (kl *Kubelet) filterOutInactivePods(pods []*v1.Pod) []*v1.Pod {
  1035  	filteredPods := make([]*v1.Pod, 0, len(pods))
  1036  	for _, p := range pods {
  1037  		// if a pod is fully terminated by UID, it should be excluded from the
  1038  		// list of pods
  1039  		if kl.podWorkers.IsPodKnownTerminated(p.UID) {
  1040  			continue
  1041  		}
  1043  		// terminal pods are considered inactive UNLESS they are actively terminating
  1044  		if kl.isAdmittedPodTerminal(p) && !kl.podWorkers.IsPodTerminationRequested(p.UID) {
  1045  			continue
  1046  		}
  1048  		filteredPods = append(filteredPods, p)
  1049  	}
  1050  	return filteredPods
  1051  }
  1053  // isAdmittedPodTerminal returns true if the provided config source pod is in
  1054  // a terminal phase, or if the Kubelet has already indicated the pod has reached
  1055  // a terminal phase but the config source has not accepted it yet. This method
  1056  // should only be used within the pod configuration loops that notify the pod
  1057  // worker, other components should treat the pod worker as authoritative.
  1058  func (kl *Kubelet) isAdmittedPodTerminal(pod *v1.Pod) bool {
  1059  	// pods are considered inactive if the config source has observed a
  1060  	// terminal phase (if the Kubelet recorded that the pod reached a terminal
  1061  	// phase the pod should never be restarted)
  1062  	if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
  1063  		return true
  1064  	}
  1065  	// a pod that has been marked terminal within the Kubelet is considered
  1066  	// inactive (may have been rejected by Kubelet admission)
  1067  	if status, ok := kl.statusManager.GetPodStatus(pod.UID); ok {
  1068  		if status.Phase == v1.PodSucceeded || status.Phase == v1.PodFailed {
  1069  			return true
  1070  		}
  1071  	}
  1072  	return false
  1073  }
  1075  // removeOrphanedPodStatuses removes obsolete entries in podStatus where
  1076  // the pod is no longer considered bound to this node.
  1077  func (kl *Kubelet) removeOrphanedPodStatuses(pods []*v1.Pod, mirrorPods []*v1.Pod) {
  1078  	podUIDs := make(map[types.UID]bool)
  1079  	for _, pod := range pods {
  1080  		podUIDs[pod.UID] = true
  1081  	}
  1082  	for _, pod := range mirrorPods {
  1083  		podUIDs[pod.UID] = true
  1084  	}
  1085  	kl.statusManager.RemoveOrphanedStatuses(podUIDs)
  1086  }
  1088  // HandlePodCleanups performs a series of cleanup work, including terminating
  1089  // pod workers, killing unwanted pods, and removing orphaned volumes/pod
  1090  // directories. No config changes are sent to pod workers while this method
  1091  // is executing which means no new pods can appear. After this method completes
  1092  // the desired state of the kubelet should be reconciled with the actual state
  1093  // in the pod worker and other pod-related components.
  1094  //
  1095  // This function is executed by the main sync loop, so it must execute quickly
  1096  // and all nested calls should be asynchronous. Any slow reconciliation actions
  1097  // should be performed by other components (like the volume manager). The duration
  1098  // of this call is the minimum latency for static pods to be restarted if they
  1099  // are updated with a fixed UID (most should use a dynamic UID), and no config
  1100  // updates are delivered to the pod workers while this method is running.
  1101  func (kl *Kubelet) HandlePodCleanups(ctx context.Context) error {
  1102  	// The kubelet lacks checkpointing, so we need to introspect the set of pods
  1103  	// in the cgroup tree prior to inspecting the set of pods in our pod manager.
  1104  	// this ensures our view of the cgroup tree does not mistakenly observe pods
  1105  	// that are added after the fact...
  1106  	var (
  1107  		cgroupPods map[types.UID]cm.CgroupName
  1108  		err        error
  1109  	)
  1110  	if kl.cgroupsPerQOS {
  1111  		pcm := kl.containerManager.NewPodContainerManager()
  1112  		cgroupPods, err = pcm.GetAllPodsFromCgroups()
  1113  		if err != nil {
  1114  			return fmt.Errorf("failed to get list of pods that still exist on cgroup mounts: %v", err)
  1115  		}
  1116  	}
  1118  	allPods, mirrorPods, orphanedMirrorPodFullnames := kl.podManager.GetPodsAndMirrorPods()
  1120  	// Pod phase progresses monotonically. Once a pod has reached a final state,
  1121  	// it should never leave regardless of the restart policy. The statuses
  1122  	// of such pods should not be changed, and there is no need to sync them.
  1123  	// TODO: the logic here does not handle two cases:
  1124  	//   1. If the containers were removed immediately after they died, kubelet
  1125  	//      may fail to generate correct statuses, let alone filtering correctly.
  1126  	//   2. If kubelet restarted before writing the terminated status for a pod
  1127  	//      to the apiserver, it could still restart the terminated pod (even
  1128  	//      though the pod was not considered terminated by the apiserver).
  1129  	// These two conditions could be alleviated by checkpointing kubelet.
  1131  	// Stop the workers for terminated pods not in the config source
  1132  	klog.V(3).InfoS("Clean up pod workers for terminated pods")
  1133  	workingPods := kl.podWorkers.SyncKnownPods(allPods)
  1135  	// Reconcile: At this point the pod workers have been pruned to the set of
  1136  	// desired pods. Pods that must be restarted due to UID reuse, or leftover
  1137  	// pods from previous runs, are not known to the pod worker.
  1139  	allPodsByUID := make(map[types.UID]*v1.Pod)
  1140  	for _, pod := range allPods {
  1141  		allPodsByUID[pod.UID] = pod
  1142  	}
  1144  	// Identify the set of pods that have workers, which should be all pods
  1145  	// from config that are not terminated, as well as any terminating pods
  1146  	// that have already been removed from config. Pods that are terminating
  1147  	// will be added to possiblyRunningPods, to prevent overly aggressive
  1148  	// cleanup of pod cgroups.
  1149  	stringIfTrue := func(t bool) string {
  1150  		if t {
  1151  			return "true"
  1152  		}
  1153  		return ""
  1154  	}
  1155  	runningPods := make(map[types.UID]sets.Empty)
  1156  	possiblyRunningPods := make(map[types.UID]sets.Empty)
  1157  	for uid, sync := range workingPods {
  1158  		switch sync.State {
  1159  		case SyncPod:
  1160  			runningPods[uid] = struct{}{}
  1161  			possiblyRunningPods[uid] = struct{}{}
  1162  		case TerminatingPod:
  1163  			possiblyRunningPods[uid] = struct{}{}
  1164  		default:
  1165  		}
  1166  	}
  1168  	// Retrieve the list of running containers from the runtime to perform cleanup.
  1169  	// We need the latest state to avoid delaying restarts of static pods that reuse
  1170  	// a UID.
  1171  	if err := kl.runtimeCache.ForceUpdateIfOlder(ctx, kl.clock.Now()); err != nil {
  1172  		klog.ErrorS(err, "Error listing containers")
  1173  		return err
  1174  	}
  1175  	runningRuntimePods, err := kl.runtimeCache.GetPods(ctx)
  1176  	if err != nil {
  1177  		klog.ErrorS(err, "Error listing containers")
  1178  		return err
  1179  	}
  1181  	// Stop probing pods that are not running
  1182  	klog.V(3).InfoS("Clean up probes for terminated pods")
  1183  	kl.probeManager.CleanupPods(possiblyRunningPods)
  1185  	// Remove orphaned pod statuses not in the total list of known config pods
  1186  	klog.V(3).InfoS("Clean up orphaned pod statuses")
  1187  	kl.removeOrphanedPodStatuses(allPods, mirrorPods)
  1189  	// Remove orphaned pod user namespace allocations (if any).
  1190  	klog.V(3).InfoS("Clean up orphaned pod user namespace allocations")
  1191  	if err = kl.usernsManager.CleanupOrphanedPodUsernsAllocations(allPods, runningRuntimePods); err != nil {
  1192  		klog.ErrorS(err, "Failed cleaning up orphaned pod user namespaces allocations")
  1193  	}
  1195  	// Remove orphaned volumes from pods that are known not to have any
  1196  	// containers. Note that we pass all pods (including terminated pods) to
  1197  	// the function, so that we don't remove volumes associated with terminated
  1198  	// but not yet deleted pods.
  1199  	// TODO: this method could more aggressively cleanup terminated pods
  1200  	// in the future (volumes, mount dirs, logs, and containers could all be
  1201  	// better separated)
  1202  	klog.V(3).InfoS("Clean up orphaned pod directories")
  1203  	err = kl.cleanupOrphanedPodDirs(allPods, runningRuntimePods)
  1204  	if err != nil {
  1205  		// We want all cleanup tasks to be run even if one of them failed. So
  1206  		// we just log an error here and continue other cleanup tasks.
  1207  		// This also applies to the other clean up tasks.
  1208  		klog.ErrorS(err, "Failed cleaning up orphaned pod directories")
  1209  	}
  1211  	// Remove any orphaned mirror pods (mirror pods are tracked by name via the
  1212  	// pod worker)
  1213  	klog.V(3).InfoS("Clean up orphaned mirror pods")
  1214  	for _, podFullname := range orphanedMirrorPodFullnames {
  1215  		if !kl.podWorkers.IsPodForMirrorPodTerminatingByFullName(podFullname) {
  1216  			_, err := kl.mirrorPodClient.DeleteMirrorPod(podFullname, nil)
  1217  			if err != nil {
  1218  				klog.ErrorS(err, "Encountered error when deleting mirror pod", "podName", podFullname)
  1219  			} else {
  1220  				klog.V(3).InfoS("Deleted mirror pod", "podName", podFullname)
  1221  			}
  1222  		}
  1223  	}
  1225  	// After pruning pod workers for terminated pods get the list of active pods for
  1226  	// metrics and to determine restarts.
  1227  	activePods := kl.filterOutInactivePods(allPods)
  1228  	allRegularPods, allStaticPods := splitPodsByStatic(allPods)
  1229  	activeRegularPods, activeStaticPods := splitPodsByStatic(activePods)
  1230  	metrics.DesiredPodCount.WithLabelValues("").Set(float64(len(allRegularPods)))
  1231  	metrics.DesiredPodCount.WithLabelValues("true").Set(float64(len(allStaticPods)))
  1232  	metrics.ActivePodCount.WithLabelValues("").Set(float64(len(activeRegularPods)))
  1233  	metrics.ActivePodCount.WithLabelValues("true").Set(float64(len(activeStaticPods)))
  1234  	metrics.MirrorPodCount.Set(float64(len(mirrorPods)))
  1236  	// At this point, the pod worker is aware of which pods are not desired (SyncKnownPods).
  1237  	// We now look through the set of active pods for those that the pod worker is not aware of
  1238  	// and deliver an update. The most common reason a pod is not known is because the pod was
  1239  	// deleted and recreated with the same UID while the pod worker was driving its lifecycle (very
  1240  	// very rare for API pods, common for static pods with fixed UIDs). Containers that may still
  1241  	// be running from a previous execution must be reconciled by the pod worker's sync method.
  1242  	// We must use active pods because that is the set of admitted pods (podManager includes pods
  1243  	// that will never be run, and statusManager tracks already rejected pods).
  1244  	var restartCount, restartCountStatic int
  1245  	for _, desiredPod := range activePods {
  1246  		if _, knownPod := workingPods[desiredPod.UID]; knownPod {
  1247  			continue
  1248  		}
  1250  		klog.V(3).InfoS("Pod will be restarted because it is in the desired set and not known to the pod workers (likely due to UID reuse)", "podUID", desiredPod.UID)
  1251  		isStatic := kubetypes.IsStaticPod(desiredPod)
  1252  		pod, mirrorPod, wasMirror := kl.podManager.GetPodAndMirrorPod(desiredPod)
  1253  		if pod == nil || wasMirror {
  1254  			klog.V(2).InfoS("Programmer error, restartable pod was a mirror pod but activePods should never contain a mirror pod", "podUID", desiredPod.UID)
  1255  			continue
  1256  		}
  1257  		kl.podWorkers.UpdatePod(UpdatePodOptions{
  1258  			UpdateType: kubetypes.SyncPodCreate,
  1259  			Pod:        pod,
  1260  			MirrorPod:  mirrorPod,
  1261  		})
  1263  		// the desired pod is now known as well
  1264  		workingPods[desiredPod.UID] = PodWorkerSync{State: SyncPod, HasConfig: true, Static: isStatic}
  1265  		if isStatic {
  1266  			// restartable static pods are the normal case
  1267  			restartCountStatic++
  1268  		} else {
  1269  			// almost certainly means shenanigans, as API pods should never have the same UID after being deleted and recreated
  1270  			// unless there is a major API violation
  1271  			restartCount++
  1272  		}
  1273  	}
  1274  	metrics.RestartedPodTotal.WithLabelValues("true").Add(float64(restartCountStatic))
  1275  	metrics.RestartedPodTotal.WithLabelValues("").Add(float64(restartCount))
  1277  	// Complete termination of deleted pods that are not runtime pods (don't have
  1278  	// running containers), are terminal, and are not known to pod workers.
  1279  	// An example is pods rejected during kubelet admission that have never
  1280  	// started before (i.e. does not have an orphaned pod).
  1281  	// Adding the pods with SyncPodKill to pod workers allows to proceed with
  1282  	// force-deletion of such pods, yet preventing re-entry of the routine in the
  1283  	// next invocation of HandlePodCleanups.
  1284  	for _, pod := range kl.filterTerminalPodsToDelete(allPods, runningRuntimePods, workingPods) {
  1285  		klog.V(3).InfoS("Handling termination and deletion of the pod to pod workers", "pod", klog.KObj(pod), "podUID", pod.UID)
  1286  		kl.podWorkers.UpdatePod(UpdatePodOptions{
  1287  			UpdateType: kubetypes.SyncPodKill,
  1288  			Pod:        pod,
  1289  		})
  1290  	}
  1292  	// Finally, terminate any pods that are observed in the runtime but not present in the list of
  1293  	// known running pods from config. If we do terminate running runtime pods that will happen
  1294  	// asynchronously in the background and those will be processed in the next invocation of
  1295  	// HandlePodCleanups.
  1296  	var orphanCount int
  1297  	for _, runningPod := range runningRuntimePods {
  1298  		// If there are orphaned pod resources in CRI that are unknown to the pod worker, terminate them
  1299  		// now. Since housekeeping is exclusive to other pod worker updates, we know that no pods have
  1300  		// been added to the pod worker in the meantime. Note that pods that are not visible in the runtime
  1301  		// but which were previously known are terminated by SyncKnownPods().
  1302  		_, knownPod := workingPods[runningPod.ID]
  1303  		if !knownPod {
  1304  			one := int64(1)
  1305  			killPodOptions := &KillPodOptions{
  1306  				PodTerminationGracePeriodSecondsOverride: &one,
  1307  			}
  1308  			klog.V(2).InfoS("Clean up containers for orphaned pod we had not seen before", "podUID", runningPod.ID, "killPodOptions", killPodOptions)
  1309  			kl.podWorkers.UpdatePod(UpdatePodOptions{
  1310  				UpdateType:     kubetypes.SyncPodKill,
  1311  				RunningPod:     runningPod,
  1312  				KillPodOptions: killPodOptions,
  1313  			})
  1315  			// the running pod is now known as well
  1316  			workingPods[runningPod.ID] = PodWorkerSync{State: TerminatingPod, Orphan: true}
  1317  			orphanCount++
  1318  		}
  1319  	}
  1320  	metrics.OrphanedRuntimePodTotal.Add(float64(orphanCount))
  1322  	// Now that we have recorded any terminating pods, and added new pods that should be running,
  1323  	// record a summary here. Not all possible combinations of PodWorkerSync values are valid.
  1324  	counts := make(map[PodWorkerSync]int)
  1325  	for _, sync := range workingPods {
  1326  		counts[sync]++
  1327  	}
  1328  	for validSync, configState := range map[PodWorkerSync]string{
  1329  		{HasConfig: true, Static: true}:                "desired",
  1330  		{HasConfig: true, Static: false}:               "desired",
  1331  		{Orphan: true, HasConfig: true, Static: true}:  "orphan",
  1332  		{Orphan: true, HasConfig: true, Static: false}: "orphan",
  1333  		{Orphan: true, HasConfig: false}:               "runtime_only",
  1334  	} {
  1335  		for _, state := range []PodWorkerState{SyncPod, TerminatingPod, TerminatedPod} {
  1336  			validSync.State = state
  1337  			count := counts[validSync]
  1338  			delete(counts, validSync)
  1339  			staticString := stringIfTrue(validSync.Static)
  1340  			if !validSync.HasConfig {
  1341  				staticString = "unknown"
  1342  			}
  1343  			metrics.WorkingPodCount.WithLabelValues(state.String(), configState, staticString).Set(float64(count))
  1344  		}
  1345  	}
  1346  	if len(counts) > 0 {
  1347  		// in case a combination is lost
  1348  		klog.V(3).InfoS("Programmer error, did not report a kubelet_working_pods metric for a value returned by SyncKnownPods", "counts", counts)
  1349  	}
  1351  	// Remove any cgroups in the hierarchy for pods that are definitely no longer
  1352  	// running (not in the container runtime).
  1353  	if kl.cgroupsPerQOS {
  1354  		pcm := kl.containerManager.NewPodContainerManager()
  1355  		klog.V(3).InfoS("Clean up orphaned pod cgroups")
  1356  		kl.cleanupOrphanedPodCgroups(pcm, cgroupPods, possiblyRunningPods)
  1357  	}
  1359  	// Cleanup any backoff entries.
  1360  	kl.backOff.GC()
  1361  	return nil
  1362  }
  1364  // filterTerminalPodsToDelete returns terminal pods which are ready to be
  1365  // deleted by the status manager, but are not in pod workers.
  1366  // First, the check for deletionTimestamp is a performance optimization as we
  1367  // don't need to do anything with terminal pods without deletionTimestamp.
  1368  // Second, the check for terminal pods is to avoid race conditions of triggering
  1369  // deletion on Pending pods which are not yet added to pod workers.
  1370  // Third, the check to skip pods known to pod workers is that the lifecycle of
  1371  // such pods is already handled by pod workers.
  1372  // Finally, we skip runtime pods as their termination is handled separately in
  1373  // the HandlePodCleanups routine.
  1374  func (kl *Kubelet) filterTerminalPodsToDelete(allPods []*v1.Pod, runningRuntimePods []*kubecontainer.Pod, workingPods map[types.UID]PodWorkerSync) map[types.UID]*v1.Pod {
  1375  	terminalPodsToDelete := make(map[types.UID]*v1.Pod)
  1376  	for _, pod := range allPods {
  1377  		if pod.DeletionTimestamp == nil {
  1378  			// skip pods which don't have a deletion timestamp
  1379  			continue
  1380  		}
  1381  		if !podutil.IsPodPhaseTerminal(pod.Status.Phase) {
  1382  			// skip the non-terminal pods
  1383  			continue
  1384  		}
  1385  		if _, knownPod := workingPods[pod.UID]; knownPod {
  1386  			// skip pods known to pod workers
  1387  			continue
  1388  		}
  1389  		terminalPodsToDelete[pod.UID] = pod
  1390  	}
  1391  	for _, runningRuntimePod := range runningRuntimePods {
  1392  		// skip running runtime pods - they are handled by a dedicated routine
  1393  		// which terminates the containers
  1394  		delete(terminalPodsToDelete, runningRuntimePod.ID)
  1395  	}
  1396  	return terminalPodsToDelete
  1397  }
  1399  // splitPodsByStatic separates a list of desired pods from the pod manager into
  1400  // regular or static pods. Mirror pods are not valid config sources (a mirror pod
  1401  // being created cannot cause the Kubelet to start running a static pod) and are
  1402  // excluded.
  1403  func splitPodsByStatic(pods []*v1.Pod) (regular, static []*v1.Pod) {
  1404  	regular, static = make([]*v1.Pod, 0, len(pods)), make([]*v1.Pod, 0, len(pods))
  1405  	for _, pod := range pods {
  1406  		if kubetypes.IsMirrorPod(pod) {
  1407  			continue
  1408  		}
  1409  		if kubetypes.IsStaticPod(pod) {
  1410  			static = append(static, pod)
  1411  		} else {
  1412  			regular = append(regular, pod)
  1413  		}
  1414  	}
  1415  	return regular, static
  1416  }
  1418  // validateContainerLogStatus returns the container ID for the desired container to retrieve logs for, based on the state
  1419  // of the container. The previous flag will only return the logs for the last terminated container, otherwise, the current
  1420  // running container is preferred over a previous termination. If info about the container is not available then a specific
  1421  // error is returned to the end user.
  1422  func (kl *Kubelet) validateContainerLogStatus(podName string, podStatus *v1.PodStatus, containerName string, previous bool) (containerID kubecontainer.ContainerID, err error) {
  1423  	var cID string
  1425  	cStatus, found := podutil.GetContainerStatus(podStatus.ContainerStatuses, containerName)
  1426  	if !found {
  1427  		cStatus, found = podutil.GetContainerStatus(podStatus.InitContainerStatuses, containerName)
  1428  	}
  1429  	if !found {
  1430  		cStatus, found = podutil.GetContainerStatus(podStatus.EphemeralContainerStatuses, containerName)
  1431  	}
  1432  	if !found {
  1433  		return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is not available", containerName, podName)
  1434  	}
  1435  	lastState := cStatus.LastTerminationState
  1436  	waiting, running, terminated := cStatus.State.Waiting, cStatus.State.Running, cStatus.State.Terminated
  1438  	switch {
  1439  	case previous:
  1440  		if lastState.Terminated == nil || lastState.Terminated.ContainerID == "" {
  1441  			return kubecontainer.ContainerID{}, fmt.Errorf("previous terminated container %q in pod %q not found", containerName, podName)
  1442  		}
  1443  		cID = lastState.Terminated.ContainerID
  1445  	case running != nil:
  1446  		cID = cStatus.ContainerID
  1448  	case terminated != nil:
  1449  		// in cases where the next container didn't start, terminated.ContainerID will be empty, so get logs from the lastState.Terminated.
  1450  		if terminated.ContainerID == "" {
  1451  			if lastState.Terminated != nil && lastState.Terminated.ContainerID != "" {
  1452  				cID = lastState.Terminated.ContainerID
  1453  			} else {
  1454  				return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is terminated", containerName, podName)
  1455  			}
  1456  		} else {
  1457  			cID = terminated.ContainerID
  1458  		}
  1460  	case lastState.Terminated != nil:
  1461  		if lastState.Terminated.ContainerID == "" {
  1462  			return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is terminated", containerName, podName)
  1463  		}
  1464  		cID = lastState.Terminated.ContainerID
  1466  	case waiting != nil:
  1467  		// output some info for the most common pending failures
  1468  		switch reason := waiting.Reason; reason {
  1469  		case images.ErrImagePull.Error():
  1470  			return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start: image can't be pulled", containerName, podName)
  1471  		case images.ErrImagePullBackOff.Error():
  1472  			return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start: trying and failing to pull image", containerName, podName)
  1473  		default:
  1474  			return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start: %v", containerName, podName, reason)
  1475  		}
  1476  	default:
  1477  		// unrecognized state
  1478  		return kubecontainer.ContainerID{}, fmt.Errorf("container %q in pod %q is waiting to start - no logs yet", containerName, podName)
  1479  	}
  1481  	return kubecontainer.ParseContainerID(cID), nil
  1482  }
  1484  // GetKubeletContainerLogs returns logs from the container
  1485  // TODO: this method is returning logs of random container attempts, when it should be returning the most recent attempt
  1486  // or all of them.
  1487  func (kl *Kubelet) GetKubeletContainerLogs(ctx context.Context, podFullName, containerName string, logOptions *v1.PodLogOptions, stdout, stderr io.Writer) error {
  1488  	// Pod workers periodically write status to statusManager. If status is not
  1489  	// cached there, something is wrong (or kubelet just restarted and hasn't
  1490  	// caught up yet). Just assume the pod is not ready yet.
  1491  	name, namespace, err := kubecontainer.ParsePodFullName(podFullName)
  1492  	if err != nil {
  1493  		return fmt.Errorf("unable to parse pod full name %q: %v", podFullName, err)
  1494  	}
  1496  	pod, ok := kl.GetPodByName(namespace, name)
  1497  	if !ok {
  1498  		return fmt.Errorf("pod %q cannot be found - no logs available", name)
  1499  	}
  1501  	// TODO: this should be using the podWorker's pod store as authoritative, since
  1502  	// the mirrorPod might still exist, the pod may have been force deleted but
  1503  	// is still terminating (users should be able to view logs of force deleted static pods
  1504  	// based on full name).
  1505  	var podUID types.UID
  1506  	pod, mirrorPod, wasMirror := kl.podManager.GetPodAndMirrorPod(pod)
  1507  	if wasMirror {
  1508  		if pod == nil {
  1509  			return fmt.Errorf("mirror pod %q does not have a corresponding pod", name)
  1510  		}
  1511  		podUID = mirrorPod.UID
  1512  	} else {
  1513  		podUID = pod.UID
  1514  	}
  1516  	podStatus, found := kl.statusManager.GetPodStatus(podUID)
  1517  	if !found {
  1518  		// If there is no cached status, use the status from the
  1519  		// config source (apiserver). This is useful if kubelet
  1520  		// has recently been restarted.
  1521  		podStatus = pod.Status
  1522  	}
  1524  	// TODO: Consolidate the logic here with kuberuntime.GetContainerLogs, here we convert container name to containerID,
  1525  	// but inside kuberuntime we convert container id back to container name and restart count.
  1526  	// TODO: After separate container log lifecycle management, we should get log based on the existing log files
  1527  	// instead of container status.
  1528  	containerID, err := kl.validateContainerLogStatus(pod.Name, &podStatus, containerName, logOptions.Previous)
  1529  	if err != nil {
  1530  		return err
  1531  	}
  1533  	// Do a zero-byte write to stdout before handing off to the container runtime.
  1534  	// This ensures at least one Write call is made to the writer when copying starts,
  1535  	// even if we then block waiting for log output from the container.
  1536  	if _, err := stdout.Write([]byte{}); err != nil {
  1537  		return err
  1538  	}
  1540  	return kl.containerRuntime.GetContainerLogs(ctx, pod, containerID, logOptions, stdout, stderr)
  1541  }
  1543  // getPhase returns the phase of a pod given its container info.
  1544  func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.PodPhase {
  1545  	spec := pod.Spec
  1546  	pendingInitialization := 0
  1547  	failedInitialization := 0
  1549  	// regular init containers
  1550  	for _, container := range spec.InitContainers {
  1551  		if kubetypes.IsRestartableInitContainer(&container) {
  1552  			// Skip the restartable init containers here to handle them separately as
  1553  			// they are slightly different from the init containers in terms of the
  1554  			// pod phase.
  1555  			continue
  1556  		}
  1558  		containerStatus, ok := podutil.GetContainerStatus(info, container.Name)
  1559  		if !ok {
  1560  			pendingInitialization++
  1561  			continue
  1562  		}
  1564  		switch {
  1565  		case containerStatus.State.Running != nil:
  1566  			pendingInitialization++
  1567  		case containerStatus.State.Terminated != nil:
  1568  			if containerStatus.State.Terminated.ExitCode != 0 {
  1569  				failedInitialization++
  1570  			}
  1571  		case containerStatus.State.Waiting != nil:
  1572  			if containerStatus.LastTerminationState.Terminated != nil {
  1573  				if containerStatus.LastTerminationState.Terminated.ExitCode != 0 {
  1574  					failedInitialization++
  1575  				}
  1576  			} else {
  1577  				pendingInitialization++
  1578  			}
  1579  		default:
  1580  			pendingInitialization++
  1581  		}
  1582  	}
  1584  	// counters for restartable init and regular containers
  1585  	unknown := 0
  1586  	running := 0
  1587  	waiting := 0
  1588  	stopped := 0
  1589  	succeeded := 0
  1591  	// restartable init containers
  1592  	for _, container := range spec.InitContainers {
  1593  		if !kubetypes.IsRestartableInitContainer(&container) {
  1594  			// Skip the regular init containers, as they have been handled above.
  1595  			continue
  1596  		}
  1597  		containerStatus, ok := podutil.GetContainerStatus(info, container.Name)
  1598  		if !ok {
  1599  			unknown++
  1600  			continue
  1601  		}
  1603  		switch {
  1604  		case containerStatus.State.Running != nil:
  1605  			if containerStatus.Started == nil || !*containerStatus.Started {
  1606  				pendingInitialization++
  1607  			}
  1608  			running++
  1609  		case containerStatus.State.Terminated != nil:
  1610  			// Do nothing here, as terminated restartable init containers are not
  1611  			// taken into account for the pod phase.
  1612  		case containerStatus.State.Waiting != nil:
  1613  			if containerStatus.LastTerminationState.Terminated != nil {
  1614  				// Do nothing here, as terminated restartable init containers are not
  1615  				// taken into account for the pod phase.
  1616  			} else {
  1617  				pendingInitialization++
  1618  				waiting++
  1619  			}
  1620  		default:
  1621  			pendingInitialization++
  1622  			unknown++
  1623  		}
  1624  	}
  1626  	for _, container := range spec.Containers {
  1627  		containerStatus, ok := podutil.GetContainerStatus(info, container.Name)
  1628  		if !ok {
  1629  			unknown++
  1630  			continue
  1631  		}
  1633  		switch {
  1634  		case containerStatus.State.Running != nil:
  1635  			running++
  1636  		case containerStatus.State.Terminated != nil:
  1637  			stopped++
  1638  			if containerStatus.State.Terminated.ExitCode == 0 {
  1639  				succeeded++
  1640  			}
  1641  		case containerStatus.State.Waiting != nil:
  1642  			if containerStatus.LastTerminationState.Terminated != nil {
  1643  				stopped++
  1644  			} else {
  1645  				waiting++
  1646  			}
  1647  		default:
  1648  			unknown++
  1649  		}
  1650  	}
  1652  	if failedInitialization > 0 && spec.RestartPolicy == v1.RestartPolicyNever {
  1653  		return v1.PodFailed
  1654  	}
  1656  	switch {
  1657  	case pendingInitialization > 0 &&
  1658  		// This is needed to handle the case where the pod has been initialized but
  1659  		// the restartable init containers are restarting and the pod should not be
  1660  		// placed back into v1.PodPending since the regular containers have run.
  1661  		!kubecontainer.HasAnyRegularContainerStarted(&spec, info):
  1662  		fallthrough
  1663  	case waiting > 0:
  1664  		klog.V(5).InfoS("Pod waiting > 0, pending")
  1665  		// One or more containers has not been started
  1666  		return v1.PodPending
  1667  	case running > 0 && unknown == 0:
  1668  		// All containers have been started, and at least
  1669  		// one container is running
  1670  		return v1.PodRunning
  1671  	case running == 0 && stopped > 0 && unknown == 0:
  1672  		// The pod is terminal so its containers won't be restarted regardless
  1673  		// of the restart policy.
  1674  		if podIsTerminal {
  1675  			// TODO(#116484): Also assign terminal phase to static pods.
  1676  			if !kubetypes.IsStaticPod(pod) {
  1677  				// All regular containers are terminated in success and all restartable
  1678  				// init containers are stopped.
  1679  				if stopped == succeeded {
  1680  					return v1.PodSucceeded
  1681  				}
  1682  				// There is at least one failure
  1683  				return v1.PodFailed
  1684  			}
  1685  		}
  1686  		// All containers are terminated
  1687  		if spec.RestartPolicy == v1.RestartPolicyAlways {
  1688  			// All containers are in the process of restarting
  1689  			return v1.PodRunning
  1690  		}
  1691  		if stopped == succeeded {
  1692  			// RestartPolicy is not Always, all containers are terminated in success
  1693  			// and all restartable init containers are stopped.
  1694  			return v1.PodSucceeded
  1695  		}
  1696  		if spec.RestartPolicy == v1.RestartPolicyNever {
  1697  			// RestartPolicy is Never, and all containers are
  1698  			// terminated with at least one in failure
  1699  			return v1.PodFailed
  1700  		}
  1701  		// RestartPolicy is OnFailure, and at least one in failure
  1702  		// and in the process of restarting
  1703  		return v1.PodRunning
  1704  	default:
  1705  		klog.V(5).InfoS("Pod default case, pending")
  1706  		return v1.PodPending
  1707  	}
  1708  }
  1710  func deleteCustomResourceFromResourceRequirements(target *v1.ResourceRequirements) {
  1711  	for resource := range target.Limits {
  1712  		if resource != v1.ResourceCPU && resource != v1.ResourceMemory && resource != v1.ResourceEphemeralStorage {
  1713  			delete(target.Limits, resource)
  1714  		}
  1715  	}
  1716  	for resource := range target.Requests {
  1717  		if resource != v1.ResourceCPU && resource != v1.ResourceMemory && resource != v1.ResourceEphemeralStorage {
  1718  			delete(target.Requests, resource)
  1719  		}
  1720  	}
  1721  }
  1723  func (kl *Kubelet) determinePodResizeStatus(pod *v1.Pod, podStatus *v1.PodStatus) v1.PodResizeStatus {
  1724  	var podResizeStatus v1.PodResizeStatus
  1725  	specStatusDiffer := false
  1726  	for _, c := range pod.Spec.Containers {
  1727  		if cs, ok := podutil.GetContainerStatus(podStatus.ContainerStatuses, c.Name); ok {
  1728  			cResourceCopy := c.Resources.DeepCopy()
  1729  			// for both requests and limits, we only compare the cpu, memory and ephemeralstorage
  1730  			// which are included in convertToAPIContainerStatuses
  1731  			deleteCustomResourceFromResourceRequirements(cResourceCopy)
  1732  			csResourceCopy := cs.Resources.DeepCopy()
  1733  			if csResourceCopy != nil && !cmp.Equal(*cResourceCopy, *csResourceCopy) {
  1734  				specStatusDiffer = true
  1735  				break
  1736  			}
  1737  		}
  1738  	}
  1739  	if !specStatusDiffer {
  1740  		// Clear last resize state from checkpoint
  1741  		if err := kl.statusManager.SetPodResizeStatus(pod.UID, ""); err != nil {
  1742  			klog.ErrorS(err, "SetPodResizeStatus failed", "pod", pod.Name)
  1743  		}
  1744  	} else {
  1745  		if resizeStatus, found := kl.statusManager.GetPodResizeStatus(string(pod.UID)); found {
  1746  			podResizeStatus = resizeStatus
  1747  		}
  1748  	}
  1749  	return podResizeStatus
  1750  }
  1752  // generateAPIPodStatus creates the final API pod status for a pod, given the
  1753  // internal pod status. This method should only be called from within sync*Pod methods.
  1754  func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus, podIsTerminal bool) v1.PodStatus {
  1755  	klog.V(3).InfoS("Generating pod status", "podIsTerminal", podIsTerminal, "pod", klog.KObj(pod))
  1756  	// use the previous pod status, or the api status, as the basis for this pod
  1757  	oldPodStatus, found := kl.statusManager.GetPodStatus(pod.UID)
  1758  	if !found {
  1759  		oldPodStatus = pod.Status
  1760  	}
  1761  	s := kl.convertStatusToAPIStatus(pod, podStatus, oldPodStatus)
  1762  	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
  1763  		s.Resize = kl.determinePodResizeStatus(pod, s)
  1764  	}
  1765  	// calculate the next phase and preserve reason
  1766  	allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...)
  1767  	s.Phase = getPhase(pod, allStatus, podIsTerminal)
  1768  	klog.V(4).InfoS("Got phase for pod", "pod", klog.KObj(pod), "oldPhase", oldPodStatus.Phase, "phase", s.Phase)
  1770  	// Perform a three-way merge between the statuses from the status manager,
  1771  	// runtime, and generated status to ensure terminal status is correctly set.
  1772  	if s.Phase != v1.PodFailed && s.Phase != v1.PodSucceeded {
  1773  		switch {
  1774  		case oldPodStatus.Phase == v1.PodFailed || oldPodStatus.Phase == v1.PodSucceeded:
  1775  			klog.V(4).InfoS("Status manager phase was terminal, updating phase to match", "pod", klog.KObj(pod), "phase", oldPodStatus.Phase)
  1776  			s.Phase = oldPodStatus.Phase
  1777  		case pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded:
  1778  			klog.V(4).InfoS("API phase was terminal, updating phase to match", "pod", klog.KObj(pod), "phase", pod.Status.Phase)
  1779  			s.Phase = pod.Status.Phase
  1780  		}
  1781  	}
  1783  	if s.Phase == oldPodStatus.Phase {
  1784  		// preserve the reason and message which is associated with the phase
  1785  		s.Reason = oldPodStatus.Reason
  1786  		s.Message = oldPodStatus.Message
  1787  		if len(s.Reason) == 0 {
  1788  			s.Reason = pod.Status.Reason
  1789  		}
  1790  		if len(s.Message) == 0 {
  1791  			s.Message = pod.Status.Message
  1792  		}
  1793  	}
  1795  	// check if an internal module has requested the pod is evicted and override the reason and message
  1796  	for _, podSyncHandler := range kl.PodSyncHandlers {
  1797  		if result := podSyncHandler.ShouldEvict(pod); result.Evict {
  1798  			s.Phase = v1.PodFailed
  1799  			s.Reason = result.Reason
  1800  			s.Message = result.Message
  1801  			break
  1802  		}
  1803  	}
  1805  	// pods are not allowed to transition out of terminal phases
  1806  	if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded {
  1807  		// API server shows terminal phase; transitions are not allowed
  1808  		if s.Phase != pod.Status.Phase {
  1809  			klog.ErrorS(nil, "Pod attempted illegal phase transition", "pod", klog.KObj(pod), "originalStatusPhase", pod.Status.Phase, "apiStatusPhase", s.Phase, "apiStatus", s)
  1810  			// Force back to phase from the API server
  1811  			s.Phase = pod.Status.Phase
  1812  		}
  1813  	}
  1815  	// ensure the probe managers have up to date status for containers
  1816  	kl.probeManager.UpdatePodStatus(pod, s)
  1818  	// preserve all conditions not owned by the kubelet
  1819  	s.Conditions = make([]v1.PodCondition, 0, len(pod.Status.Conditions)+1)
  1820  	for _, c := range pod.Status.Conditions {
  1821  		if !kubetypes.PodConditionByKubelet(c.Type) {
  1822  			s.Conditions = append(s.Conditions, c)
  1823  		}
  1824  	}
  1826  	if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
  1827  		// copy over the pod disruption conditions from state which is already
  1828  		// updated during the eviciton (due to either node resource pressure or
  1829  		// node graceful shutdown). We do not re-generate the conditions based
  1830  		// on the container statuses as they are added based on one-time events.
  1831  		cType := v1.DisruptionTarget
  1832  		if _, condition := podutil.GetPodConditionFromList(oldPodStatus.Conditions, cType); condition != nil {
  1833  			s.Conditions = utilpod.ReplaceOrAppendPodCondition(s.Conditions, condition)
  1834  		}
  1835  	}
  1837  	// set all Kubelet-owned conditions
  1838  	if utilfeature.DefaultFeatureGate.Enabled(features.PodReadyToStartContainersCondition) {
  1839  		s.Conditions = append(s.Conditions, status.GeneratePodReadyToStartContainersCondition(pod, podStatus))
  1840  	}
  1841  	allContainerStatuses := append(s.InitContainerStatuses, s.ContainerStatuses...)
  1842  	s.Conditions = append(s.Conditions, status.GeneratePodInitializedCondition(&pod.Spec, allContainerStatuses, s.Phase))
  1843  	s.Conditions = append(s.Conditions, status.GeneratePodReadyCondition(&pod.Spec, s.Conditions, allContainerStatuses, s.Phase))
  1844  	s.Conditions = append(s.Conditions, status.GenerateContainersReadyCondition(&pod.Spec, allContainerStatuses, s.Phase))
  1845  	s.Conditions = append(s.Conditions, v1.PodCondition{
  1846  		Type:   v1.PodScheduled,
  1847  		Status: v1.ConditionTrue,
  1848  	})
  1849  	// set HostIP/HostIPs and initialize PodIP/PodIPs for host network pods
  1850  	if kl.kubeClient != nil {
  1851  		hostIPs, err := kl.getHostIPsAnyWay()
  1852  		if err != nil {
  1853  			klog.V(4).InfoS("Cannot get host IPs", "err", err)
  1854  		} else {
  1855  			if s.HostIP != "" {
  1856  				if utilnet.IPFamilyOfString(s.HostIP) != utilnet.IPFamilyOf(hostIPs[0]) {
  1857  					kl.recorder.Eventf(pod, v1.EventTypeWarning, "HostIPsIPFamilyMismatch",
  1858  						"Kubelet detected an IPv%s node IP (%s), but the cloud provider selected an IPv%s node IP (%s); pass an explicit `--node-ip` to kubelet to fix this.",
  1859  						utilnet.IPFamilyOfString(s.HostIP), s.HostIP, utilnet.IPFamilyOf(hostIPs[0]), hostIPs[0].String())
  1860  				}
  1861  			}
  1862  			s.HostIP = hostIPs[0].String()
  1863  			if utilfeature.DefaultFeatureGate.Enabled(features.PodHostIPs) {
  1864  				s.HostIPs = []v1.HostIP{{IP: s.HostIP}}
  1865  				if len(hostIPs) == 2 {
  1866  					s.HostIPs = append(s.HostIPs, v1.HostIP{IP: hostIPs[1].String()})
  1867  				}
  1868  			}
  1870  			// HostNetwork Pods inherit the node IPs as PodIPs. They are immutable once set,
  1871  			// other than that if the node becomes dual-stack, we add the secondary IP.
  1872  			if kubecontainer.IsHostNetworkPod(pod) {
  1873  				// Primary IP is not set
  1874  				if s.PodIP == "" {
  1875  					s.PodIP = hostIPs[0].String()
  1876  					s.PodIPs = []v1.PodIP{{IP: s.PodIP}}
  1877  				}
  1878  				// Secondary IP is not set #105320
  1879  				if len(hostIPs) == 2 && len(s.PodIPs) == 1 {
  1880  					if utilnet.IPFamilyOfString(s.PodIPs[0].IP) != utilnet.IPFamilyOf(hostIPs[1]) {
  1881  						s.PodIPs = append(s.PodIPs, v1.PodIP{IP: hostIPs[1].String()})
  1882  					}
  1883  				}
  1884  			}
  1885  		}
  1886  	}
  1888  	return *s
  1889  }
  1891  // sortPodIPs return the PodIPs sorted and truncated by the cluster IP family preference.
  1892  // The runtime pod status may have an arbitrary number of IPs, in an arbitrary order.
  1893  // PodIPs are obtained by: func (m *kubeGenericRuntimeManager) determinePodSandboxIPs()
  1894  // Pick out the first returned IP of the same IP family as the node IP
  1895  // first, followed by the first IP of the opposite IP family (if any)
  1896  // and use them for the Pod.Status.PodIPs and the Downward API environment variables
  1897  func (kl *Kubelet) sortPodIPs(podIPs []string) []string {
  1898  	ips := make([]string, 0, 2)
  1899  	var validPrimaryIP, validSecondaryIP func(ip string) bool
  1900  	if len(kl.nodeIPs) == 0 || utilnet.IsIPv4(kl.nodeIPs[0]) {
  1901  		validPrimaryIP = utilnet.IsIPv4String
  1902  		validSecondaryIP = utilnet.IsIPv6String
  1903  	} else {
  1904  		validPrimaryIP = utilnet.IsIPv6String
  1905  		validSecondaryIP = utilnet.IsIPv4String
  1906  	}
  1907  	for _, ip := range podIPs {
  1908  		if validPrimaryIP(ip) {
  1909  			ips = append(ips, ip)
  1910  			break
  1911  		}
  1912  	}
  1913  	for _, ip := range podIPs {
  1914  		if validSecondaryIP(ip) {
  1915  			ips = append(ips, ip)
  1916  			break
  1917  		}
  1918  	}
  1919  	return ips
  1920  }
  1922  // convertStatusToAPIStatus initialize an api PodStatus for the given pod from
  1923  // the given internal pod status and the previous state of the pod from the API.
  1924  // It is purely transformative and does not alter the kubelet state at all.
  1925  func (kl *Kubelet) convertStatusToAPIStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus, oldPodStatus v1.PodStatus) *v1.PodStatus {
  1926  	var apiPodStatus v1.PodStatus
  1928  	// copy pod status IPs to avoid race conditions with PodStatus #102806
  1929  	podIPs := make([]string, len(podStatus.IPs))
  1930  	copy(podIPs, podStatus.IPs)
  1932  	// make podIPs order match node IP family preference #97979
  1933  	podIPs = kl.sortPodIPs(podIPs)
  1934  	for _, ip := range podIPs {
  1935  		apiPodStatus.PodIPs = append(apiPodStatus.PodIPs, v1.PodIP{IP: ip})
  1936  	}
  1937  	if len(apiPodStatus.PodIPs) > 0 {
  1938  		apiPodStatus.PodIP = apiPodStatus.PodIPs[0].IP
  1939  	}
  1941  	// set status for Pods created on versions of kube older than 1.6
  1942  	apiPodStatus.QOSClass = v1qos.GetPodQOS(pod)
  1944  	apiPodStatus.ContainerStatuses = kl.convertToAPIContainerStatuses(
  1945  		pod, podStatus,
  1946  		oldPodStatus.ContainerStatuses,
  1947  		pod.Spec.Containers,
  1948  		len(pod.Spec.InitContainers) > 0,
  1949  		false,
  1950  	)
  1951  	apiPodStatus.InitContainerStatuses = kl.convertToAPIContainerStatuses(
  1952  		pod, podStatus,
  1953  		oldPodStatus.InitContainerStatuses,
  1954  		pod.Spec.InitContainers,
  1955  		len(pod.Spec.InitContainers) > 0,
  1956  		true,
  1957  	)
  1958  	var ecSpecs []v1.Container
  1959  	for i := range pod.Spec.EphemeralContainers {
  1960  		ecSpecs = append(ecSpecs, v1.Container(pod.Spec.EphemeralContainers[i].EphemeralContainerCommon))
  1961  	}
  1963  	// #80875: By now we've iterated podStatus 3 times. We could refactor this to make a single
  1964  	// pass through podStatus.ContainerStatuses
  1965  	apiPodStatus.EphemeralContainerStatuses = kl.convertToAPIContainerStatuses(
  1966  		pod, podStatus,
  1967  		oldPodStatus.EphemeralContainerStatuses,
  1968  		ecSpecs,
  1969  		len(pod.Spec.InitContainers) > 0,
  1970  		false,
  1971  	)
  1973  	return &apiPodStatus
  1974  }
  1976  // convertToAPIContainerStatuses converts the given internal container
  1977  // statuses into API container statuses.
  1978  func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecontainer.PodStatus, previousStatus []v1.ContainerStatus, containers []v1.Container, hasInitContainers, isInitContainer bool) []v1.ContainerStatus {
  1979  	convertContainerStatus := func(cs *kubecontainer.Status, oldStatus *v1.ContainerStatus) *v1.ContainerStatus {
  1980  		cid := cs.ID.String()
  1981  		status := &v1.ContainerStatus{
  1982  			Name:         cs.Name,
  1983  			RestartCount: int32(cs.RestartCount),
  1984  			Image:        cs.Image,
  1985  			// Converting the digested image ref to the Kubernetes public
  1986  			// ContainerStatus.ImageID is historically intentional and should
  1987  			// not change.
  1988  			ImageID:     cs.ImageRef,
  1989  			ContainerID: cid,
  1990  		}
  1991  		if oldStatus != nil {
  1992  			status.VolumeMounts = oldStatus.VolumeMounts // immutable
  1993  		}
  1994  		switch {
  1995  		case cs.State == kubecontainer.ContainerStateRunning:
  1996  			status.State.Running = &v1.ContainerStateRunning{StartedAt: metav1.NewTime(cs.StartedAt)}
  1997  		case cs.State == kubecontainer.ContainerStateCreated:
  1998  			// containers that are created but not running are "waiting to be running"
  1999  			status.State.Waiting = &v1.ContainerStateWaiting{}
  2000  		case cs.State == kubecontainer.ContainerStateExited:
  2001  			status.State.Terminated = &v1.ContainerStateTerminated{
  2002  				ExitCode:    int32(cs.ExitCode),
  2003  				Reason:      cs.Reason,
  2004  				Message:     cs.Message,
  2005  				StartedAt:   metav1.NewTime(cs.StartedAt),
  2006  				FinishedAt:  metav1.NewTime(cs.FinishedAt),
  2007  				ContainerID: cid,
  2008  			}
  2010  		case cs.State == kubecontainer.ContainerStateUnknown &&
  2011  			oldStatus != nil && // we have an old status
  2012  			oldStatus.State.Running != nil: // our previous status was running
  2013  			// if this happens, then we know that this container was previously running and isn't anymore (assuming the CRI isn't failing to return running containers).
  2014  			// you can imagine this happening in cases where a container failed and the kubelet didn't ask about it in time to see the result.
  2015  			// in this case, the container should not to into waiting state immediately because that can make cases like runonce pods actually run
  2016  			// twice. "container never ran" is different than "container ran and failed".  This is handled differently in the kubelet
  2017  			// and it is handled differently in higher order logic like crashloop detection and handling
  2018  			status.State.Terminated = &v1.ContainerStateTerminated{
  2019  				Reason:   "ContainerStatusUnknown",
  2020  				Message:  "The container could not be located when the pod was terminated",
  2021  				ExitCode: 137, // this code indicates an error
  2022  			}
  2023  			// the restart count normally comes from the CRI (see near the top of this method), but since this is being added explicitly
  2024  			// for the case where the CRI did not return a status, we need to manually increment the restart count to be accurate.
  2025  			status.RestartCount = oldStatus.RestartCount + 1
  2027  		default:
  2028  			// this collapses any unknown state to container waiting.  If any container is waiting, then the pod status moves to pending even if it is running.
  2029  			// if I'm reading this correctly, then any failure to read status on any container results in the entire pod going pending even if the containers
  2030  			// are actually running.
  2031  			// see https://github.com/kubernetes/kubernetes/blob/5d1b3e26af73dde33ecb6a3e69fb5876ceab192f/pkg/kubelet/kuberuntime/kuberuntime_container.go#L497 to
  2032  			// https://github.com/kubernetes/kubernetes/blob/8976e3620f8963e72084971d9d4decbd026bf49f/pkg/kubelet/kuberuntime/helpers.go#L58-L71
  2033  			// and interpreted here https://github.com/kubernetes/kubernetes/blob/b27e78f590a0d43e4a23ca3b2bf1739ca4c6e109/pkg/kubelet/kubelet_pods.go#L1434-L1439
  2034  			status.State.Waiting = &v1.ContainerStateWaiting{}
  2035  		}
  2036  		return status
  2037  	}
  2039  	convertContainerStatusResources := func(cName string, status *v1.ContainerStatus, cStatus *kubecontainer.Status, oldStatuses map[string]v1.ContainerStatus) *v1.ResourceRequirements {
  2040  		var requests, limits v1.ResourceList
  2041  		// oldStatus should always exist if container is running
  2042  		oldStatus, oldStatusFound := oldStatuses[cName]
  2043  		// Initialize limits/requests from container's spec upon transition to Running state
  2044  		// For cpu & memory, values queried from runtime via CRI always supercedes spec values
  2045  		// For ephemeral-storage, a running container's status.limit/request equals spec.limit/request
  2046  		determineResource := func(rName v1.ResourceName, v1ContainerResource, oldStatusResource, resource v1.ResourceList) {
  2047  			if oldStatusFound {
  2048  				if oldStatus.State.Running == nil || status.ContainerID != oldStatus.ContainerID {
  2049  					if r, exists := v1ContainerResource[rName]; exists {
  2050  						resource[rName] = r.DeepCopy()
  2051  					}
  2052  				} else {
  2053  					if oldStatusResource != nil {
  2054  						if r, exists := oldStatusResource[rName]; exists {
  2055  							resource[rName] = r.DeepCopy()
  2056  						}
  2057  					}
  2058  				}
  2059  			}
  2060  		}
  2061  		container := kubecontainer.GetContainerSpec(pod, cName)
  2062  		// AllocatedResources values come from checkpoint. It is the source-of-truth.
  2063  		found := false
  2064  		status.AllocatedResources, found = kl.statusManager.GetContainerResourceAllocation(string(pod.UID), cName)
  2065  		if !(container.Resources.Requests == nil && container.Resources.Limits == nil) && !found {
  2066  			// Log error and fallback to AllocatedResources in oldStatus if it exists
  2067  			klog.ErrorS(nil, "resource allocation not found in checkpoint store", "pod", pod.Name, "container", cName)
  2068  			if oldStatusFound {
  2069  				status.AllocatedResources = oldStatus.AllocatedResources
  2070  			}
  2071  		}
  2072  		if oldStatus.Resources == nil {
  2073  			oldStatus.Resources = &v1.ResourceRequirements{}
  2074  		}
  2075  		// Convert Limits
  2076  		if container.Resources.Limits != nil {
  2077  			limits = make(v1.ResourceList)
  2078  			if cStatus.Resources != nil && cStatus.Resources.CPULimit != nil {
  2079  				limits[v1.ResourceCPU] = cStatus.Resources.CPULimit.DeepCopy()
  2080  			} else {
  2081  				determineResource(v1.ResourceCPU, container.Resources.Limits, oldStatus.Resources.Limits, limits)
  2082  			}
  2083  			if cStatus.Resources != nil && cStatus.Resources.MemoryLimit != nil {
  2084  				limits[v1.ResourceMemory] = cStatus.Resources.MemoryLimit.DeepCopy()
  2085  			} else {
  2086  				determineResource(v1.ResourceMemory, container.Resources.Limits, oldStatus.Resources.Limits, limits)
  2087  			}
  2088  			if ephemeralStorage, found := container.Resources.Limits[v1.ResourceEphemeralStorage]; found {
  2089  				limits[v1.ResourceEphemeralStorage] = ephemeralStorage.DeepCopy()
  2090  			}
  2091  		}
  2092  		// Convert Requests
  2093  		if status.AllocatedResources != nil {
  2094  			requests = make(v1.ResourceList)
  2095  			if cStatus.Resources != nil && cStatus.Resources.CPURequest != nil {
  2096  				requests[v1.ResourceCPU] = cStatus.Resources.CPURequest.DeepCopy()
  2097  			} else {
  2098  				determineResource(v1.ResourceCPU, status.AllocatedResources, oldStatus.Resources.Requests, requests)
  2099  			}
  2100  			if memory, found := status.AllocatedResources[v1.ResourceMemory]; found {
  2101  				requests[v1.ResourceMemory] = memory.DeepCopy()
  2102  			}
  2103  			if ephemeralStorage, found := status.AllocatedResources[v1.ResourceEphemeralStorage]; found {
  2104  				requests[v1.ResourceEphemeralStorage] = ephemeralStorage.DeepCopy()
  2105  			}
  2106  		}
  2107  		//TODO(vinaykul,derekwaynecarr,InPlacePodVerticalScaling): Update this to include extended resources in
  2108  		// addition to CPU, memory, ephemeral storage. Add test case for extended resources.
  2109  		resources := &v1.ResourceRequirements{
  2110  			Limits:   limits,
  2111  			Requests: requests,
  2112  		}
  2113  		return resources
  2114  	}
  2116  	// Fetch old containers statuses from old pod status.
  2117  	oldStatuses := make(map[string]v1.ContainerStatus, len(containers))
  2118  	for _, status := range previousStatus {
  2119  		oldStatuses[status.Name] = status
  2120  	}
  2122  	// Set all container statuses to default waiting state
  2123  	statuses := make(map[string]*v1.ContainerStatus, len(containers))
  2124  	defaultWaitingState := v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: ContainerCreating}}
  2125  	if hasInitContainers {
  2126  		defaultWaitingState = v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: PodInitializing}}
  2127  	}
  2129  	supportsRRO := kl.runtimeClassSupportsRecursiveReadOnlyMounts(pod)
  2131  	for _, container := range containers {
  2132  		status := &v1.ContainerStatus{
  2133  			Name:  container.Name,
  2134  			Image: container.Image,
  2135  			State: defaultWaitingState,
  2136  		}
  2137  		// status.VolumeMounts cannot be propagated from kubecontainer.Status
  2138  		// because the CRI API is unaware of the volume names.
  2139  		if utilfeature.DefaultFeatureGate.Enabled(features.RecursiveReadOnlyMounts) {
  2140  			for _, vol := range container.VolumeMounts {
  2141  				volStatus := v1.VolumeMountStatus{
  2142  					Name:      vol.Name,
  2143  					MountPath: vol.MountPath,
  2144  					ReadOnly:  vol.ReadOnly,
  2145  				}
  2146  				if vol.ReadOnly {
  2147  					rroMode := v1.RecursiveReadOnlyDisabled
  2148  					if b, err := resolveRecursiveReadOnly(vol, supportsRRO); err != nil {
  2149  						klog.ErrorS(err, "failed to resolve recursive read-only mode", "mode", *vol.RecursiveReadOnly)
  2150  					} else if b {
  2151  						if utilfeature.DefaultFeatureGate.Enabled(features.RecursiveReadOnlyMounts) {
  2152  							rroMode = v1.RecursiveReadOnlyEnabled
  2153  						} else {
  2154  							klog.ErrorS(nil, "recursive read-only mount needs feature gate to be enabled",
  2155  								"featureGate", features.RecursiveReadOnlyMounts)
  2156  						}
  2157  					}
  2158  					volStatus.RecursiveReadOnly = &rroMode // Disabled or Enabled
  2159  				}
  2160  				status.VolumeMounts = append(status.VolumeMounts, volStatus)
  2161  			}
  2162  		}
  2163  		oldStatus, found := oldStatuses[container.Name]
  2164  		if found {
  2165  			if oldStatus.State.Terminated != nil {
  2166  				status = &oldStatus
  2167  			} else {
  2168  				// Apply some values from the old statuses as the default values.
  2169  				status.RestartCount = oldStatus.RestartCount
  2170  				status.LastTerminationState = oldStatus.LastTerminationState
  2171  			}
  2172  		}
  2173  		statuses[container.Name] = status
  2174  	}
  2176  	for _, container := range containers {
  2177  		found := false
  2178  		for _, cStatus := range podStatus.ContainerStatuses {
  2179  			if container.Name == cStatus.Name {
  2180  				found = true
  2181  				break
  2182  			}
  2183  		}
  2184  		if found {
  2185  			continue
  2186  		}
  2187  		// if no container is found, then assuming it should be waiting seems plausible, but the status code requires
  2188  		// that a previous termination be present.  If we're offline long enough or something removed the container, then
  2189  		// the previous termination may not be present.  This next code block ensures that if the container was previously running
  2190  		// then when that container status disappears, we can infer that it terminated even if we don't know the status code.
  2191  		// By setting the lasttermination state we are able to leave the container status waiting and present more accurate
  2192  		// data via the API.
  2194  		oldStatus, ok := oldStatuses[container.Name]
  2195  		if !ok {
  2196  			continue
  2197  		}
  2198  		if oldStatus.State.Terminated != nil {
  2199  			// if the old container status was terminated, the lasttermination status is correct
  2200  			continue
  2201  		}
  2202  		if oldStatus.State.Running == nil {
  2203  			// if the old container status isn't running, then waiting is an appropriate status and we have nothing to do
  2204  			continue
  2205  		}
  2207  		// If we're here, we know the pod was previously running, but doesn't have a terminated status. We will check now to
  2208  		// see if it's in a pending state.
  2209  		status := statuses[container.Name]
  2210  		// If the status we're about to write indicates the default, the Waiting status will force this pod back into Pending.
  2211  		// That isn't true, we know the pod was previously running.
  2212  		isDefaultWaitingStatus := status.State.Waiting != nil && status.State.Waiting.Reason == ContainerCreating
  2213  		if hasInitContainers {
  2214  			isDefaultWaitingStatus = status.State.Waiting != nil && status.State.Waiting.Reason == PodInitializing
  2215  		}
  2216  		if !isDefaultWaitingStatus {
  2217  			// the status was written, don't override
  2218  			continue
  2219  		}
  2220  		if status.LastTerminationState.Terminated != nil {
  2221  			// if we already have a termination state, nothing to do
  2222  			continue
  2223  		}
  2225  		// setting this value ensures that we show as stopped here, not as waiting:
  2226  		// https://github.com/kubernetes/kubernetes/blob/90c9f7b3e198e82a756a68ffeac978a00d606e55/pkg/kubelet/kubelet_pods.go#L1440-L1445
  2227  		// This prevents the pod from becoming pending
  2228  		status.LastTerminationState.Terminated = &v1.ContainerStateTerminated{
  2229  			Reason:   "ContainerStatusUnknown",
  2230  			Message:  "The container could not be located when the pod was deleted.  The container used to be Running",
  2231  			ExitCode: 137,
  2232  		}
  2234  		// If the pod was not deleted, then it's been restarted. Increment restart count.
  2235  		if pod.DeletionTimestamp == nil {
  2236  			status.RestartCount += 1
  2237  		}
  2239  		statuses[container.Name] = status
  2240  	}
  2242  	// Copy the slice before sorting it
  2243  	containerStatusesCopy := make([]*kubecontainer.Status, len(podStatus.ContainerStatuses))
  2244  	copy(containerStatusesCopy, podStatus.ContainerStatuses)
  2246  	// Make the latest container status comes first.
  2247  	sort.Sort(sort.Reverse(kubecontainer.SortContainerStatusesByCreationTime(containerStatusesCopy)))
  2248  	// Set container statuses according to the statuses seen in pod status
  2249  	containerSeen := map[string]int{}
  2250  	for _, cStatus := range containerStatusesCopy {
  2251  		cName := cStatus.Name
  2252  		if _, ok := statuses[cName]; !ok {
  2253  			// This would also ignore the infra container.
  2254  			continue
  2255  		}
  2256  		if containerSeen[cName] >= 2 {
  2257  			continue
  2258  		}
  2259  		var oldStatusPtr *v1.ContainerStatus
  2260  		if oldStatus, ok := oldStatuses[cName]; ok {
  2261  			oldStatusPtr = &oldStatus
  2262  		}
  2263  		status := convertContainerStatus(cStatus, oldStatusPtr)
  2264  		if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
  2265  			if status.State.Running != nil {
  2266  				status.Resources = convertContainerStatusResources(cName, status, cStatus, oldStatuses)
  2267  			}
  2268  		}
  2269  		if containerSeen[cName] == 0 {
  2270  			statuses[cName] = status
  2271  		} else {
  2272  			statuses[cName].LastTerminationState = status.State
  2273  		}
  2274  		containerSeen[cName] = containerSeen[cName] + 1
  2275  	}
  2277  	// Handle the containers failed to be started, which should be in Waiting state.
  2278  	for _, container := range containers {
  2279  		if isInitContainer {
  2280  			// If the init container is terminated with exit code 0, it won't be restarted.
  2281  			// TODO(random-liu): Handle this in a cleaner way.
  2282  			s := podStatus.FindContainerStatusByName(container.Name)
  2283  			if s != nil && s.State == kubecontainer.ContainerStateExited && s.ExitCode == 0 {
  2284  				continue
  2285  			}
  2286  		}
  2287  		// If a container should be restarted in next syncpod, it is *Waiting*.
  2288  		if !kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
  2289  			continue
  2290  		}
  2291  		status := statuses[container.Name]
  2292  		reason, ok := kl.reasonCache.Get(pod.UID, container.Name)
  2293  		if !ok {
  2294  			// In fact, we could also apply Waiting state here, but it is less informative,
  2295  			// and the container will be restarted soon, so we prefer the original state here.
  2296  			// Note that with the current implementation of ShouldContainerBeRestarted the original state here
  2297  			// could be:
  2298  			//   * Waiting: There is no associated historical container and start failure reason record.
  2299  			//   * Terminated: The container is terminated.
  2300  			continue
  2301  		}
  2302  		if status.State.Terminated != nil {
  2303  			status.LastTerminationState = status.State
  2304  		}
  2305  		status.State = v1.ContainerState{
  2306  			Waiting: &v1.ContainerStateWaiting{
  2307  				Reason:  reason.Err.Error(),
  2308  				Message: reason.Message,
  2309  			},
  2310  		}
  2311  		statuses[container.Name] = status
  2312  	}
  2314  	// Sort the container statuses since clients of this interface expect the list
  2315  	// of containers in a pod has a deterministic order.
  2316  	if isInitContainer {
  2317  		return kubetypes.SortStatusesOfInitContainers(pod, statuses)
  2318  	}
  2319  	containerStatuses := make([]v1.ContainerStatus, 0, len(statuses))
  2320  	for _, status := range statuses {
  2321  		containerStatuses = append(containerStatuses, *status)
  2322  	}
  2324  	sort.Sort(kubetypes.SortedContainerStatuses(containerStatuses))
  2325  	return containerStatuses
  2326  }
  2328  // ServeLogs returns logs of current machine.
  2329  func (kl *Kubelet) ServeLogs(w http.ResponseWriter, req *http.Request) {
  2330  	// TODO: allowlist logs we are willing to serve
  2331  	kl.logServer.ServeHTTP(w, req)
  2332  }
  2334  // findContainer finds and returns the container with the given pod ID, full name, and container name.
  2335  // It returns nil if not found.
  2336  func (kl *Kubelet) findContainer(ctx context.Context, podFullName string, podUID types.UID, containerName string) (*kubecontainer.Container, error) {
  2337  	pods, err := kl.containerRuntime.GetPods(ctx, false)
  2338  	if err != nil {
  2339  		return nil, err
  2340  	}
  2341  	// Resolve and type convert back again.
  2342  	// We need the static pod UID but the kubecontainer API works with types.UID.
  2343  	podUID = types.UID(kl.podManager.TranslatePodUID(podUID))
  2344  	pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID)
  2345  	return pod.FindContainerByName(containerName), nil
  2346  }
  2348  // RunInContainer runs a command in a container, returns the combined stdout, stderr as an array of bytes
  2349  func (kl *Kubelet) RunInContainer(ctx context.Context, podFullName string, podUID types.UID, containerName string, cmd []string) ([]byte, error) {
  2350  	container, err := kl.findContainer(ctx, podFullName, podUID, containerName)
  2351  	if err != nil {
  2352  		return nil, err
  2353  	}
  2354  	if container == nil {
  2355  		return nil, fmt.Errorf("container not found (%q)", containerName)
  2356  	}
  2357  	// TODO(tallclair): Pass a proper timeout value.
  2358  	return kl.runner.RunInContainer(ctx, container.ID, cmd, 0)
  2359  }
  2361  // GetExec gets the URL the exec will be served from, or nil if the Kubelet will serve it.
  2362  func (kl *Kubelet) GetExec(ctx context.Context, podFullName string, podUID types.UID, containerName string, cmd []string, streamOpts remotecommandserver.Options) (*url.URL, error) {
  2363  	container, err := kl.findContainer(ctx, podFullName, podUID, containerName)
  2364  	if err != nil {
  2365  		return nil, err
  2366  	}
  2367  	if container == nil {
  2368  		return nil, fmt.Errorf("container not found (%q)", containerName)
  2369  	}
  2370  	return kl.streamingRuntime.GetExec(ctx, container.ID, cmd, streamOpts.Stdin, streamOpts.Stdout, streamOpts.Stderr, streamOpts.TTY)
  2371  }
  2373  // GetAttach gets the URL the attach will be served from, or nil if the Kubelet will serve it.
  2374  func (kl *Kubelet) GetAttach(ctx context.Context, podFullName string, podUID types.UID, containerName string, streamOpts remotecommandserver.Options) (*url.URL, error) {
  2375  	container, err := kl.findContainer(ctx, podFullName, podUID, containerName)
  2376  	if err != nil {
  2377  		return nil, err
  2378  	}
  2379  	if container == nil {
  2380  		return nil, fmt.Errorf("container %s not found in pod %s", containerName, podFullName)
  2381  	}
  2383  	// The TTY setting for attach must match the TTY setting in the initial container configuration,
  2384  	// since whether the process is running in a TTY cannot be changed after it has started.  We
  2385  	// need the api.Pod to get the TTY status.
  2386  	pod, found := kl.GetPodByFullName(podFullName)
  2387  	if !found || (string(podUID) != "" && pod.UID != podUID) {
  2388  		return nil, fmt.Errorf("pod %s not found", podFullName)
  2389  	}
  2390  	containerSpec := kubecontainer.GetContainerSpec(pod, containerName)
  2391  	if containerSpec == nil {
  2392  		return nil, fmt.Errorf("container %s not found in pod %s", containerName, podFullName)
  2393  	}
  2394  	tty := containerSpec.TTY
  2396  	return kl.streamingRuntime.GetAttach(ctx, container.ID, streamOpts.Stdin, streamOpts.Stdout, streamOpts.Stderr, tty)
  2397  }
  2399  // GetPortForward gets the URL the port-forward will be served from, or nil if the Kubelet will serve it.
  2400  func (kl *Kubelet) GetPortForward(ctx context.Context, podName, podNamespace string, podUID types.UID, portForwardOpts portforward.V4Options) (*url.URL, error) {
  2401  	pods, err := kl.containerRuntime.GetPods(ctx, false)
  2402  	if err != nil {
  2403  		return nil, err
  2404  	}
  2405  	// Resolve and type convert back again.
  2406  	// We need the static pod UID but the kubecontainer API works with types.UID.
  2407  	podUID = types.UID(kl.podManager.TranslatePodUID(podUID))
  2408  	podFullName := kubecontainer.BuildPodFullName(podName, podNamespace)
  2409  	pod := kubecontainer.Pods(pods).FindPod(podFullName, podUID)
  2410  	if pod.IsEmpty() {
  2411  		return nil, fmt.Errorf("pod not found (%q)", podFullName)
  2412  	}
  2414  	return kl.streamingRuntime.GetPortForward(ctx, podName, podNamespace, podUID, portForwardOpts.Ports)
  2415  }
  2417  // cleanupOrphanedPodCgroups removes cgroups that should no longer exist.
  2418  // it reconciles the cached state of cgroupPods with the specified list of runningPods
  2419  func (kl *Kubelet) cleanupOrphanedPodCgroups(pcm cm.PodContainerManager, cgroupPods map[types.UID]cm.CgroupName, possiblyRunningPods map[types.UID]sets.Empty) {
  2420  	// Iterate over all the found pods to verify if they should be running
  2421  	for uid, val := range cgroupPods {
  2422  		// if the pod is in the running set, its not a candidate for cleanup
  2423  		if _, ok := possiblyRunningPods[uid]; ok {
  2424  			continue
  2425  		}
  2427  		// If volumes have not been unmounted/detached, do not delete the cgroup
  2428  		// so any memory backed volumes don't have their charges propagated to the
  2429  		// parent croup.  If the volumes still exist, reduce the cpu shares for any
  2430  		// process in the cgroup to the minimum value while we wait.  if the kubelet
  2431  		// is configured to keep terminated volumes, we will delete the cgroup and not block.
  2432  		if podVolumesExist := kl.podVolumesExist(uid); podVolumesExist && !kl.keepTerminatedPodVolumes {
  2433  			klog.V(3).InfoS("Orphaned pod found, but volumes not yet removed.  Reducing cpu to minimum", "podUID", uid)
  2434  			if err := pcm.ReduceCPULimits(val); err != nil {
  2435  				klog.InfoS("Failed to reduce cpu time for pod pending volume cleanup", "podUID", uid, "err", err)
  2436  			}
  2437  			continue
  2438  		}
  2439  		klog.V(3).InfoS("Orphaned pod found, removing pod cgroups", "podUID", uid)
  2440  		// Destroy all cgroups of pod that should not be running,
  2441  		// by first killing all the attached processes to these cgroups.
  2442  		// We ignore errors thrown by the method, as the housekeeping loop would
  2443  		// again try to delete these unwanted pod cgroups
  2444  		go pcm.Destroy(val)
  2445  	}
  2446  }
  2448  func (kl *Kubelet) runtimeClassSupportsRecursiveReadOnlyMounts(pod *v1.Pod) bool {
  2449  	if kl.runtimeClassManager == nil {
  2450  		return false
  2451  	}
  2452  	runtimeHandlerName, err := kl.runtimeClassManager.LookupRuntimeHandler(pod.Spec.RuntimeClassName)
  2453  	if err != nil {
  2454  		klog.ErrorS(err, "failed to look up the runtime handler", "runtimeClassName", pod.Spec.RuntimeClassName)
  2455  		return false
  2456  	}
  2457  	runtimeHandlers := kl.runtimeState.runtimeHandlers()
  2458  	return runtimeHandlerSupportsRecursiveReadOnlyMounts(runtimeHandlerName, runtimeHandlers)
  2459  }
  2461  // runtimeHandlerSupportsRecursiveReadOnlyMounts checks whether the runtime handler supports recursive read-only mounts.
  2462  // The kubelet feature gate is not checked here.
  2463  func runtimeHandlerSupportsRecursiveReadOnlyMounts(runtimeHandlerName string, runtimeHandlers []kubecontainer.RuntimeHandler) bool {
  2464  	if len(runtimeHandlers) == 0 {
  2465  		// The runtime does not support returning the handler list.
  2466  		// No need to print a warning here.
  2467  		return false
  2468  	}
  2469  	for _, h := range runtimeHandlers {
  2470  		if h.Name == runtimeHandlerName {
  2471  			return h.SupportsRecursiveReadOnlyMounts
  2472  		}
  2473  	}
  2474  	klog.ErrorS(nil, "Unknown runtime handler", "runtimeHandlerName", runtimeHandlerName)
  2475  	return false
  2476  }
  2478  // resolveRecursiveReadOnly resolves the recursive read-only mount mode.
  2479  func resolveRecursiveReadOnly(m v1.VolumeMount, runtimeSupportsRRO bool) (bool, error) {
  2480  	if m.RecursiveReadOnly == nil || *m.RecursiveReadOnly == v1.RecursiveReadOnlyDisabled {
  2481  		return false, nil
  2482  	}
  2483  	if !m.ReadOnly {
  2484  		return false, fmt.Errorf("volume %q requested recursive read-only mode, but it is not read-only", m.Name)
  2485  	}
  2486  	if m.MountPropagation != nil && *m.MountPropagation != v1.MountPropagationNone {
  2487  		return false, fmt.Errorf("volume %q requested recursive read-only mode, but it is not compatible with propagation %q",
  2488  			m.Name, *m.MountPropagation)
  2489  	}
  2490  	switch rroMode := *m.RecursiveReadOnly; rroMode {
  2491  	case v1.RecursiveReadOnlyIfPossible:
  2492  		return runtimeSupportsRRO, nil
  2493  	case v1.RecursiveReadOnlyEnabled:
  2494  		if !runtimeSupportsRRO {
  2495  			return false, fmt.Errorf("volume %q requested recursive read-only mode, but it is not supported by the runtime", m.Name)
  2496  		}
  2497  		return true, nil
  2498  	default:
  2499  		return false, fmt.Errorf("unknown recursive read-only mode %q", rroMode)
  2500  	}
  2501  }

View as plain text