...

Source file src/k8s.io/kubernetes/pkg/kubelet/volumemanager/cache/desired_state_of_world.go

Documentation: k8s.io/kubernetes/pkg/kubelet/volumemanager/cache

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  /*
    18  Package cache implements data structures used by the kubelet volume manager to
    19  keep track of attached volumes and the pods that mounted them.
    20  */
    21  package cache
    22  
    23  import (
    24  	"fmt"
    25  	"sync"
    26  	"time"
    27  
    28  	v1 "k8s.io/api/core/v1"
    29  	"k8s.io/apimachinery/pkg/api/resource"
    30  	"k8s.io/apimachinery/pkg/util/sets"
    31  	"k8s.io/apiserver/pkg/util/feature"
    32  	"k8s.io/component-base/metrics"
    33  	"k8s.io/klog/v2"
    34  	"k8s.io/kubernetes/pkg/volume/csi"
    35  
    36  	resourcehelper "k8s.io/kubernetes/pkg/api/v1/resource"
    37  	"k8s.io/kubernetes/pkg/features"
    38  	"k8s.io/kubernetes/pkg/volume"
    39  	"k8s.io/kubernetes/pkg/volume/util"
    40  	"k8s.io/kubernetes/pkg/volume/util/operationexecutor"
    41  	"k8s.io/kubernetes/pkg/volume/util/types"
    42  )
    43  
    44  // DesiredStateOfWorld defines a set of thread-safe operations for the kubelet
    45  // volume manager's desired state of the world cache.
    46  // This cache contains volumes->pods i.e. a set of all volumes that should be
    47  // attached to this node and the pods that reference them and should mount the
    48  // volume.
    49  // Note: This is distinct from the DesiredStateOfWorld implemented by the
    50  // attach/detach controller. They both keep track of different objects. This
    51  // contains kubelet volume manager specific state.
    52  type DesiredStateOfWorld interface {
    53  	// AddPodToVolume adds the given pod to the given volume in the cache
    54  	// indicating the specified pod should mount the specified volume.
    55  	// A unique volumeName is generated from the volumeSpec and returned on
    56  	// success.
    57  	// If no volume plugin can support the given volumeSpec or more than one
    58  	// plugin can support it, an error is returned.
    59  	// If a volume with the name volumeName does not exist in the list of
    60  	// volumes that should be attached to this node, the volume is implicitly
    61  	// added.
    62  	// If a pod with the same unique name already exists under the specified
    63  	// volume, this is a no-op.
    64  	AddPodToVolume(podName types.UniquePodName, pod *v1.Pod, volumeSpec *volume.Spec, outerVolumeSpecName string, volumeGidValue string, seLinuxContainerContexts []*v1.SELinuxOptions) (v1.UniqueVolumeName, error)
    65  
    66  	// MarkVolumesReportedInUse sets the ReportedInUse value to true for the
    67  	// reportedVolumes. For volumes not in the reportedVolumes list, the
    68  	// ReportedInUse value is reset to false. The default ReportedInUse value
    69  	// for a newly created volume is false.
    70  	// When set to true this value indicates that the volume was successfully
    71  	// added to the VolumesInUse field in the node's status. Mount operation needs
    72  	// to check this value before issuing the operation.
    73  	// If a volume in the reportedVolumes list does not exist in the list of
    74  	// volumes that should be attached to this node, it is skipped without error.
    75  	MarkVolumesReportedInUse(reportedVolumes []v1.UniqueVolumeName)
    76  
    77  	// DeletePodFromVolume removes the given pod from the given volume in the
    78  	// cache indicating the specified pod no longer requires the specified
    79  	// volume.
    80  	// If a pod with the same unique name does not exist under the specified
    81  	// volume, this is a no-op.
    82  	// If a volume with the name volumeName does not exist in the list of
    83  	// attached volumes, this is a no-op.
    84  	// If after deleting the pod, the specified volume contains no other child
    85  	// pods, the volume is also deleted.
    86  	DeletePodFromVolume(podName types.UniquePodName, volumeName v1.UniqueVolumeName)
    87  
    88  	// VolumeExists returns true if the given volume exists in the list of
    89  	// volumes that should be attached to this node.
    90  	// If a pod with the same unique name does not exist under the specified
    91  	// volume, false is returned.
    92  	VolumeExists(volumeName v1.UniqueVolumeName, seLinuxMountContext string) bool
    93  
    94  	// PodExistsInVolume returns true if the given pod exists in the list of
    95  	// podsToMount for the given volume in the cache.
    96  	// If a pod with the same unique name does not exist under the specified
    97  	// volume, false is returned.
    98  	// If a volume with the name volumeName does not exist in the list of
    99  	// attached volumes, false is returned.
   100  	PodExistsInVolume(podName types.UniquePodName, volumeName v1.UniqueVolumeName, seLinuxMountContext string) bool
   101  
   102  	// GetVolumesToMount generates and returns a list of volumes that should be
   103  	// attached to this node and the pods they should be mounted to based on the
   104  	// current desired state of the world.
   105  	GetVolumesToMount() []VolumeToMount
   106  
   107  	// GetPods generates and returns a map of pods in which map is indexed
   108  	// with pod's unique name. This map can be used to determine which pod is currently
   109  	// in desired state of world.
   110  	GetPods() map[types.UniquePodName]bool
   111  
   112  	// VolumeExistsWithSpecName returns true if the given volume specified with the
   113  	// volume spec name (a.k.a., InnerVolumeSpecName) exists in the list of
   114  	// volumes that should be attached to this node.
   115  	// If a pod with the same name does not exist under the specified
   116  	// volume, false is returned.
   117  	VolumeExistsWithSpecName(podName types.UniquePodName, volumeSpecName string) bool
   118  
   119  	// AddErrorToPod adds the given error to the given pod in the cache.
   120  	// It will be returned by subsequent GetPodErrors().
   121  	// Each error string is stored only once.
   122  	AddErrorToPod(podName types.UniquePodName, err string)
   123  
   124  	// PopPodErrors returns accumulated errors on a given pod and clears
   125  	// them.
   126  	PopPodErrors(podName types.UniquePodName) []string
   127  
   128  	// GetPodsWithErrors returns names of pods that have stored errors.
   129  	GetPodsWithErrors() []types.UniquePodName
   130  
   131  	// MarkVolumeAttachability updates the volume's attachability for a given volume
   132  	MarkVolumeAttachability(volumeName v1.UniqueVolumeName, attachable bool)
   133  
   134  	// UpdatePersistentVolumeSize updates persistentVolumeSize in desired state of the world
   135  	// so as it can be compared against actual size and volume expansion performed
   136  	// if necessary
   137  	UpdatePersistentVolumeSize(volumeName v1.UniqueVolumeName, size *resource.Quantity)
   138  }
   139  
   140  // VolumeToMount represents a volume that is attached to this node and needs to
   141  // be mounted to PodName.
   142  type VolumeToMount struct {
   143  	operationexecutor.VolumeToMount
   144  }
   145  
   146  // NewDesiredStateOfWorld returns a new instance of DesiredStateOfWorld.
   147  func NewDesiredStateOfWorld(volumePluginMgr *volume.VolumePluginMgr, seLinuxTranslator util.SELinuxLabelTranslator) DesiredStateOfWorld {
   148  	if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
   149  		registerSELinuxMetrics()
   150  	}
   151  	return &desiredStateOfWorld{
   152  		volumesToMount:    make(map[v1.UniqueVolumeName]volumeToMount),
   153  		volumePluginMgr:   volumePluginMgr,
   154  		podErrors:         make(map[types.UniquePodName]sets.String),
   155  		seLinuxTranslator: seLinuxTranslator,
   156  	}
   157  }
   158  
   159  type desiredStateOfWorld struct {
   160  	// volumesToMount is a map containing the set of volumes that should be
   161  	// attached to this node and mounted to the pods referencing it. The key in
   162  	// the map is the name of the volume and the value is a volume object
   163  	// containing more information about the volume.
   164  	volumesToMount map[v1.UniqueVolumeName]volumeToMount
   165  	// volumePluginMgr is the volume plugin manager used to create volume
   166  	// plugin objects.
   167  	volumePluginMgr *volume.VolumePluginMgr
   168  	// podErrors are errors caught by desiredStateOfWorldPopulator about volumes for a given pod.
   169  	podErrors map[types.UniquePodName]sets.String
   170  	// seLinuxTranslator translates v1.SELinuxOptions to a file SELinux label.
   171  	seLinuxTranslator util.SELinuxLabelTranslator
   172  
   173  	sync.RWMutex
   174  }
   175  
   176  // The volume object represents a volume that should be attached to this node,
   177  // and mounted to podsToMount.
   178  type volumeToMount struct {
   179  	// volumeName contains the unique identifier for this volume.
   180  	volumeName v1.UniqueVolumeName
   181  
   182  	// podsToMount is a map containing the set of pods that reference this
   183  	// volume and should mount it once it is attached. The key in the map is
   184  	// the name of the pod and the value is a pod object containing more
   185  	// information about the pod.
   186  	podsToMount map[types.UniquePodName]podToMount
   187  
   188  	// pluginIsAttachable indicates that the plugin for this volume implements
   189  	// the volume.Attacher interface
   190  	pluginIsAttachable bool
   191  
   192  	// pluginIsDeviceMountable indicates that the plugin for this volume implements
   193  	// the volume.DeviceMounter interface
   194  	pluginIsDeviceMountable bool
   195  
   196  	// volumeGidValue contains the value of the GID annotation, if present.
   197  	volumeGidValue string
   198  
   199  	// reportedInUse indicates that the volume was successfully added to the
   200  	// VolumesInUse field in the node's status.
   201  	reportedInUse bool
   202  
   203  	// desiredSizeLimit indicates the desired upper bound on the size of the volume
   204  	// (if so implemented)
   205  	desiredSizeLimit *resource.Quantity
   206  
   207  	// persistentVolumeSize records desired size of a persistent volume.
   208  	// Usually this value reflects size recorded in pv.Spec.Capacity
   209  	persistentVolumeSize *resource.Quantity
   210  
   211  	// effectiveSELinuxMountFileLabel is the SELinux label that will be applied to the volume using mount options.
   212  	// If empty, then:
   213  	// - either the context+label is unknown (assigned randomly by the container runtime)
   214  	// - or the volume plugin responsible for this volume does not support mounting with -o context
   215  	// - or the volume is not ReadWriteOncePod
   216  	// - or the OS does not support SELinux
   217  	// In all cases, the SELinux context does not matter when mounting the volume.
   218  	effectiveSELinuxMountFileLabel string
   219  
   220  	// originalSELinuxLabel is the SELinux label that would be used if SELinux mount was supported for all access modes.
   221  	// For RWOP volumes it's the same as effectiveSELinuxMountFileLabel.
   222  	// It is used only to report potential SELinux mismatch metrics.
   223  	// If empty, then:
   224  	// - either the context+label is unknown (assigned randomly by the container runtime)
   225  	// - or the volume plugin responsible for this volume does not support mounting with -o context
   226  	// - or the OS does not support SELinux
   227  	originalSELinuxLabel string
   228  }
   229  
   230  // The pod object represents a pod that references the underlying volume and
   231  // should mount it once it is attached.
   232  type podToMount struct {
   233  	// podName contains the name of this pod.
   234  	podName types.UniquePodName
   235  
   236  	// Pod to mount the volume to. Used to create NewMounter.
   237  	pod *v1.Pod
   238  
   239  	// volume spec containing the specification for this volume. Used to
   240  	// generate the volume plugin object, and passed to plugin methods.
   241  	// For non-PVC volumes this is the same as defined in the pod object. For
   242  	// PVC volumes it is from the dereferenced PV object.
   243  	volumeSpec *volume.Spec
   244  
   245  	// outerVolumeSpecName is the volume.Spec.Name() of the volume as referenced
   246  	// directly in the pod. If the volume was referenced through a persistent
   247  	// volume claim, this contains the volume.Spec.Name() of the persistent
   248  	// volume claim
   249  	outerVolumeSpecName string
   250  	// mountRequestTime stores time at which mount was requested
   251  	mountRequestTime time.Time
   252  }
   253  
   254  const (
   255  	// Maximum errors to be stored per pod in desiredStateOfWorld.podErrors to
   256  	// prevent unbound growth.
   257  	maxPodErrors = 10
   258  )
   259  
   260  func (dsw *desiredStateOfWorld) AddPodToVolume(
   261  	podName types.UniquePodName,
   262  	pod *v1.Pod,
   263  	volumeSpec *volume.Spec,
   264  	outerVolumeSpecName string,
   265  	volumeGidValue string,
   266  	seLinuxContainerContexts []*v1.SELinuxOptions) (v1.UniqueVolumeName, error) {
   267  	dsw.Lock()
   268  	defer dsw.Unlock()
   269  
   270  	volumePlugin, err := dsw.volumePluginMgr.FindPluginBySpec(volumeSpec)
   271  	if err != nil || volumePlugin == nil {
   272  		return "", fmt.Errorf(
   273  			"failed to get Plugin from volumeSpec for volume %q err=%v",
   274  			volumeSpec.Name(),
   275  			err)
   276  	}
   277  	volumePluginName := getVolumePluginNameWithDriver(volumePlugin, volumeSpec)
   278  	accessMode := getVolumeAccessMode(volumeSpec)
   279  
   280  	var volumeName v1.UniqueVolumeName
   281  
   282  	// The unique volume name used depends on whether the volume is attachable/device-mountable
   283  	// or not.
   284  	attachable := util.IsAttachableVolume(volumeSpec, dsw.volumePluginMgr)
   285  	deviceMountable := util.IsDeviceMountableVolume(volumeSpec, dsw.volumePluginMgr)
   286  	if attachable || deviceMountable {
   287  		// For attachable/device-mountable volumes, use the unique volume name as reported by
   288  		// the plugin.
   289  		volumeName, err =
   290  			util.GetUniqueVolumeNameFromSpec(volumePlugin, volumeSpec)
   291  		if err != nil {
   292  			return "", fmt.Errorf(
   293  				"failed to GetUniqueVolumeNameFromSpec for volumeSpec %q using volume plugin %q err=%v",
   294  				volumeSpec.Name(),
   295  				volumePlugin.GetPluginName(),
   296  				err)
   297  		}
   298  	} else {
   299  		// For non-attachable and non-device-mountable volumes, generate a unique name based on the pod
   300  		// namespace and name and the name of the volume within the pod.
   301  		volumeName = util.GetUniqueVolumeNameFromSpecWithPod(podName, volumePlugin, volumeSpec)
   302  	}
   303  
   304  	seLinuxFileLabel, pluginSupportsSELinuxContextMount, err := dsw.getSELinuxLabel(volumeSpec, seLinuxContainerContexts)
   305  	if err != nil {
   306  		return "", err
   307  	}
   308  	klog.V(4).InfoS("expected volume SELinux label context", "volume", volumeSpec.Name(), "label", seLinuxFileLabel)
   309  
   310  	if _, volumeExists := dsw.volumesToMount[volumeName]; !volumeExists {
   311  		var sizeLimit *resource.Quantity
   312  		if volumeSpec.Volume != nil {
   313  			if util.IsLocalEphemeralVolume(*volumeSpec.Volume) {
   314  				podLimits := resourcehelper.PodLimits(pod, resourcehelper.PodResourcesOptions{})
   315  				ephemeralStorageLimit := podLimits[v1.ResourceEphemeralStorage]
   316  				sizeLimit = resource.NewQuantity(ephemeralStorageLimit.Value(), resource.BinarySI)
   317  				if volumeSpec.Volume.EmptyDir != nil &&
   318  					volumeSpec.Volume.EmptyDir.SizeLimit != nil &&
   319  					volumeSpec.Volume.EmptyDir.SizeLimit.Value() > 0 &&
   320  					(sizeLimit.Value() == 0 || volumeSpec.Volume.EmptyDir.SizeLimit.Value() < sizeLimit.Value()) {
   321  					sizeLimit = resource.NewQuantity(volumeSpec.Volume.EmptyDir.SizeLimit.Value(), resource.BinarySI)
   322  				}
   323  			}
   324  		}
   325  		effectiveSELinuxMountLabel := seLinuxFileLabel
   326  		if !util.VolumeSupportsSELinuxMount(volumeSpec) {
   327  			// Clear SELinux label for the volume with unsupported access modes.
   328  			klog.V(4).InfoS("volume does not support SELinux context mount, clearing the expected label", "volume", volumeSpec.Name())
   329  			effectiveSELinuxMountLabel = ""
   330  		}
   331  		if seLinuxFileLabel != "" {
   332  			seLinuxVolumesAdmitted.WithLabelValues(volumePluginName, accessMode).Add(1.0)
   333  		}
   334  		vmt := volumeToMount{
   335  			volumeName:                     volumeName,
   336  			podsToMount:                    make(map[types.UniquePodName]podToMount),
   337  			pluginIsAttachable:             attachable,
   338  			pluginIsDeviceMountable:        deviceMountable,
   339  			volumeGidValue:                 volumeGidValue,
   340  			reportedInUse:                  false,
   341  			desiredSizeLimit:               sizeLimit,
   342  			effectiveSELinuxMountFileLabel: effectiveSELinuxMountLabel,
   343  			originalSELinuxLabel:           seLinuxFileLabel,
   344  		}
   345  		// record desired size of the volume
   346  		if volumeSpec.PersistentVolume != nil {
   347  			pvCap := volumeSpec.PersistentVolume.Spec.Capacity.Storage()
   348  			if pvCap != nil {
   349  				pvCapCopy := pvCap.DeepCopy()
   350  				vmt.persistentVolumeSize = &pvCapCopy
   351  			}
   352  		}
   353  		dsw.volumesToMount[volumeName] = vmt
   354  	}
   355  
   356  	oldPodMount, ok := dsw.volumesToMount[volumeName].podsToMount[podName]
   357  	mountRequestTime := time.Now()
   358  	if ok && !volumePlugin.RequiresRemount(volumeSpec) {
   359  		mountRequestTime = oldPodMount.mountRequestTime
   360  	}
   361  
   362  	if !ok {
   363  		// The volume exists, but not with this pod.
   364  		// It will be added below as podToMount, now just report SELinux metric.
   365  		if pluginSupportsSELinuxContextMount {
   366  			existingVolume := dsw.volumesToMount[volumeName]
   367  			if seLinuxFileLabel != existingVolume.originalSELinuxLabel {
   368  				fullErr := fmt.Errorf("conflicting SELinux labels of volume %s: %q and %q", volumeSpec.Name(), existingVolume.originalSELinuxLabel, seLinuxFileLabel)
   369  				supported := util.VolumeSupportsSELinuxMount(volumeSpec)
   370  				err := handleSELinuxMetricError(
   371  					fullErr,
   372  					supported,
   373  					seLinuxVolumeContextMismatchWarnings.WithLabelValues(volumePluginName, accessMode),
   374  					seLinuxVolumeContextMismatchErrors.WithLabelValues(volumePluginName, accessMode))
   375  				if err != nil {
   376  					return "", err
   377  				}
   378  			}
   379  		}
   380  	}
   381  
   382  	// Create new podToMount object. If it already exists, it is refreshed with
   383  	// updated values (this is required for volumes that require remounting on
   384  	// pod update, like Downward API volumes).
   385  	dsw.volumesToMount[volumeName].podsToMount[podName] = podToMount{
   386  		podName:             podName,
   387  		pod:                 pod,
   388  		volumeSpec:          volumeSpec,
   389  		outerVolumeSpecName: outerVolumeSpecName,
   390  		mountRequestTime:    mountRequestTime,
   391  	}
   392  	return volumeName, nil
   393  }
   394  
   395  func (dsw *desiredStateOfWorld) getSELinuxLabel(volumeSpec *volume.Spec, seLinuxContainerContexts []*v1.SELinuxOptions) (string, bool, error) {
   396  	var seLinuxFileLabel string
   397  	var pluginSupportsSELinuxContextMount bool
   398  
   399  	if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
   400  		var err error
   401  
   402  		if !dsw.seLinuxTranslator.SELinuxEnabled() {
   403  			return "", false, nil
   404  		}
   405  
   406  		pluginSupportsSELinuxContextMount, err = dsw.getSELinuxMountSupport(volumeSpec)
   407  		if err != nil {
   408  			return "", false, err
   409  		}
   410  		seLinuxSupported := util.VolumeSupportsSELinuxMount(volumeSpec)
   411  		if pluginSupportsSELinuxContextMount {
   412  			// Ensure that a volume that can be mounted with "-o context=XYZ" is
   413  			// used only by containers with the same SELinux contexts.
   414  			for _, containerContext := range seLinuxContainerContexts {
   415  				newLabel, err := dsw.seLinuxTranslator.SELinuxOptionsToFileLabel(containerContext)
   416  				if err != nil {
   417  					fullErr := fmt.Errorf("failed to construct SELinux label from context %q: %s", containerContext, err)
   418  					accessMode := getVolumeAccessMode(volumeSpec)
   419  					err := handleSELinuxMetricError(
   420  						fullErr,
   421  						seLinuxSupported,
   422  						seLinuxContainerContextWarnings.WithLabelValues(accessMode),
   423  						seLinuxContainerContextErrors.WithLabelValues(accessMode))
   424  					if err != nil {
   425  						return "", false, err
   426  					}
   427  				}
   428  				if seLinuxFileLabel == "" {
   429  					seLinuxFileLabel = newLabel
   430  					continue
   431  				}
   432  				if seLinuxFileLabel != newLabel {
   433  					accessMode := getVolumeAccessMode(volumeSpec)
   434  
   435  					fullErr := fmt.Errorf("volume %s is used with two different SELinux contexts in the same pod: %q, %q", volumeSpec.Name(), seLinuxFileLabel, newLabel)
   436  					err := handleSELinuxMetricError(
   437  						fullErr,
   438  						seLinuxSupported,
   439  						seLinuxPodContextMismatchWarnings.WithLabelValues(accessMode),
   440  						seLinuxPodContextMismatchErrors.WithLabelValues(accessMode))
   441  					if err != nil {
   442  						return "", false, err
   443  					}
   444  				}
   445  			}
   446  		} else {
   447  			// Volume plugin does not support SELinux context mount.
   448  			// DSW will track this volume with SELinux label "", i.e. no mount with
   449  			// -o context.
   450  			seLinuxFileLabel = ""
   451  		}
   452  	}
   453  	return seLinuxFileLabel, pluginSupportsSELinuxContextMount, nil
   454  }
   455  
   456  func (dsw *desiredStateOfWorld) MarkVolumesReportedInUse(
   457  	reportedVolumes []v1.UniqueVolumeName) {
   458  	dsw.Lock()
   459  	defer dsw.Unlock()
   460  
   461  	reportedVolumesMap := make(
   462  		map[v1.UniqueVolumeName]bool, len(reportedVolumes) /* capacity */)
   463  
   464  	for _, reportedVolume := range reportedVolumes {
   465  		reportedVolumesMap[reportedVolume] = true
   466  	}
   467  
   468  	for volumeName, volumeObj := range dsw.volumesToMount {
   469  		_, volumeReported := reportedVolumesMap[volumeName]
   470  		volumeObj.reportedInUse = volumeReported
   471  		dsw.volumesToMount[volumeName] = volumeObj
   472  	}
   473  }
   474  
   475  func (dsw *desiredStateOfWorld) DeletePodFromVolume(
   476  	podName types.UniquePodName, volumeName v1.UniqueVolumeName) {
   477  	dsw.Lock()
   478  	defer dsw.Unlock()
   479  
   480  	delete(dsw.podErrors, podName)
   481  
   482  	volumeObj, volumeExists := dsw.volumesToMount[volumeName]
   483  	if !volumeExists {
   484  		return
   485  	}
   486  
   487  	if _, podExists := volumeObj.podsToMount[podName]; !podExists {
   488  		return
   489  	}
   490  
   491  	// Delete pod if it exists
   492  	delete(dsw.volumesToMount[volumeName].podsToMount, podName)
   493  
   494  	if len(dsw.volumesToMount[volumeName].podsToMount) == 0 {
   495  		// Delete volume if no child pods left
   496  		delete(dsw.volumesToMount, volumeName)
   497  	}
   498  }
   499  
   500  // UpdatePersistentVolumeSize updates last known PV size. This is used for volume expansion and
   501  // should be only used for persistent volumes.
   502  func (dsw *desiredStateOfWorld) UpdatePersistentVolumeSize(volumeName v1.UniqueVolumeName, size *resource.Quantity) {
   503  	dsw.Lock()
   504  	defer dsw.Unlock()
   505  
   506  	vol, volExists := dsw.volumesToMount[volumeName]
   507  	if volExists {
   508  		vol.persistentVolumeSize = size
   509  		dsw.volumesToMount[volumeName] = vol
   510  	}
   511  }
   512  
   513  func (dsw *desiredStateOfWorld) VolumeExists(
   514  	volumeName v1.UniqueVolumeName, seLinuxMountContext string) bool {
   515  	dsw.RLock()
   516  	defer dsw.RUnlock()
   517  
   518  	vol, volumeExists := dsw.volumesToMount[volumeName]
   519  	if !volumeExists {
   520  		return false
   521  	}
   522  	if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
   523  		// Handling two volumes with the same name and different SELinux context
   524  		// as two *different* volumes here. Because if a volume is mounted with
   525  		// an old SELinux context, it must be unmounted first and then mounted again
   526  		// with the new context.
   527  		//
   528  		// This will happen when a pod A with context alpha_t runs and is being
   529  		// terminated by kubelet and its volumes are being torn down, while a
   530  		// pod B with context beta_t is already scheduled on the same node,
   531  		// using the same volumes
   532  		// The volumes from Pod A must be fully unmounted (incl. UnmountDevice)
   533  		// and mounted with new SELinux mount options for pod B.
   534  		// Without SELinux, kubelet can (and often does) reuse device mounted
   535  		// for A.
   536  		return vol.effectiveSELinuxMountFileLabel == seLinuxMountContext
   537  	}
   538  	return true
   539  }
   540  
   541  func (dsw *desiredStateOfWorld) PodExistsInVolume(
   542  	podName types.UniquePodName, volumeName v1.UniqueVolumeName, seLinuxMountOption string) bool {
   543  	dsw.RLock()
   544  	defer dsw.RUnlock()
   545  
   546  	volumeObj, volumeExists := dsw.volumesToMount[volumeName]
   547  	if !volumeExists {
   548  		return false
   549  	}
   550  
   551  	if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
   552  		if volumeObj.effectiveSELinuxMountFileLabel != seLinuxMountOption {
   553  			// The volume is in DSW, but with a different SELinux mount option.
   554  			// Report it as unused, so the volume is unmounted and mounted back
   555  			// with the right SELinux option.
   556  			return false
   557  		}
   558  	}
   559  
   560  	_, podExists := volumeObj.podsToMount[podName]
   561  	return podExists
   562  }
   563  
   564  func (dsw *desiredStateOfWorld) VolumeExistsWithSpecName(podName types.UniquePodName, volumeSpecName string) bool {
   565  	dsw.RLock()
   566  	defer dsw.RUnlock()
   567  	for _, volumeObj := range dsw.volumesToMount {
   568  		if podObj, podExists := volumeObj.podsToMount[podName]; podExists {
   569  			if podObj.volumeSpec.Name() == volumeSpecName {
   570  				return true
   571  			}
   572  		}
   573  	}
   574  	return false
   575  }
   576  
   577  func (dsw *desiredStateOfWorld) GetPods() map[types.UniquePodName]bool {
   578  	dsw.RLock()
   579  	defer dsw.RUnlock()
   580  
   581  	podList := make(map[types.UniquePodName]bool)
   582  	for _, volumeObj := range dsw.volumesToMount {
   583  		for podName := range volumeObj.podsToMount {
   584  			podList[podName] = true
   585  		}
   586  	}
   587  	return podList
   588  }
   589  
   590  func (dsw *desiredStateOfWorld) GetVolumesToMount() []VolumeToMount {
   591  	dsw.RLock()
   592  	defer dsw.RUnlock()
   593  
   594  	volumesToMount := make([]VolumeToMount, 0 /* len */, len(dsw.volumesToMount) /* cap */)
   595  	for volumeName, volumeObj := range dsw.volumesToMount {
   596  		for podName, podObj := range volumeObj.podsToMount {
   597  			vmt := VolumeToMount{
   598  				VolumeToMount: operationexecutor.VolumeToMount{
   599  					VolumeName:              volumeName,
   600  					PodName:                 podName,
   601  					Pod:                     podObj.pod,
   602  					VolumeSpec:              podObj.volumeSpec,
   603  					PluginIsAttachable:      volumeObj.pluginIsAttachable,
   604  					PluginIsDeviceMountable: volumeObj.pluginIsDeviceMountable,
   605  					OuterVolumeSpecName:     podObj.outerVolumeSpecName,
   606  					VolumeGidValue:          volumeObj.volumeGidValue,
   607  					ReportedInUse:           volumeObj.reportedInUse,
   608  					MountRequestTime:        podObj.mountRequestTime,
   609  					DesiredSizeLimit:        volumeObj.desiredSizeLimit,
   610  					SELinuxLabel:            volumeObj.effectiveSELinuxMountFileLabel,
   611  				},
   612  			}
   613  			if volumeObj.persistentVolumeSize != nil {
   614  				vmt.DesiredPersistentVolumeSize = volumeObj.persistentVolumeSize.DeepCopy()
   615  			}
   616  			volumesToMount = append(volumesToMount, vmt)
   617  		}
   618  	}
   619  	return volumesToMount
   620  }
   621  
   622  func (dsw *desiredStateOfWorld) AddErrorToPod(podName types.UniquePodName, err string) {
   623  	dsw.Lock()
   624  	defer dsw.Unlock()
   625  
   626  	if errs, found := dsw.podErrors[podName]; found {
   627  		if errs.Len() <= maxPodErrors {
   628  			errs.Insert(err)
   629  		}
   630  		return
   631  	}
   632  	dsw.podErrors[podName] = sets.NewString(err)
   633  }
   634  
   635  func (dsw *desiredStateOfWorld) PopPodErrors(podName types.UniquePodName) []string {
   636  	dsw.Lock()
   637  	defer dsw.Unlock()
   638  
   639  	if errs, found := dsw.podErrors[podName]; found {
   640  		delete(dsw.podErrors, podName)
   641  		return errs.List()
   642  	}
   643  	return []string{}
   644  }
   645  
   646  func (dsw *desiredStateOfWorld) GetPodsWithErrors() []types.UniquePodName {
   647  	dsw.RLock()
   648  	defer dsw.RUnlock()
   649  
   650  	pods := make([]types.UniquePodName, 0, len(dsw.podErrors))
   651  	for podName := range dsw.podErrors {
   652  		pods = append(pods, podName)
   653  	}
   654  	return pods
   655  }
   656  
   657  func (dsw *desiredStateOfWorld) MarkVolumeAttachability(volumeName v1.UniqueVolumeName, attachable bool) {
   658  	dsw.Lock()
   659  	defer dsw.Unlock()
   660  	volumeObj, volumeExists := dsw.volumesToMount[volumeName]
   661  	if !volumeExists {
   662  		return
   663  	}
   664  	volumeObj.pluginIsAttachable = attachable
   665  	dsw.volumesToMount[volumeName] = volumeObj
   666  }
   667  
   668  func (dsw *desiredStateOfWorld) getSELinuxMountSupport(volumeSpec *volume.Spec) (bool, error) {
   669  	return util.SupportsSELinuxContextMount(volumeSpec, dsw.volumePluginMgr)
   670  }
   671  
   672  // Based on isRWOP, bump the right warning / error metric and either consume the error or return it.
   673  func handleSELinuxMetricError(err error, seLinuxSupported bool, warningMetric, errorMetric metrics.GaugeMetric) error {
   674  	if seLinuxSupported {
   675  		errorMetric.Add(1.0)
   676  		return err
   677  	}
   678  
   679  	// This is not an error yet, but it will be when support for other access modes is added.
   680  	warningMetric.Add(1.0)
   681  	klog.V(4).ErrorS(err, "Please report this error in https://github.com/kubernetes/enhancements/issues/1710, together with full Pod yaml file")
   682  	return nil
   683  }
   684  
   685  // Return the volume plugin name, together with the CSI driver name if it's a CSI volume.
   686  func getVolumePluginNameWithDriver(plugin volume.VolumePlugin, spec *volume.Spec) string {
   687  	pluginName := plugin.GetPluginName()
   688  	if pluginName != csi.CSIPluginName {
   689  		return pluginName
   690  	}
   691  
   692  	// It's a CSI volume
   693  	driverName, err := csi.GetCSIDriverName(spec)
   694  	if err != nil {
   695  		// In theory this is unreachable - such volume would not pass validation.
   696  		klog.V(4).ErrorS(err, "failed to get CSI driver name from volume spec")
   697  		driverName = "unknown"
   698  	}
   699  	// `/` is used to separate plugin + CSI driver in util.GetUniqueVolumeName() too
   700  	return pluginName + "/" + driverName
   701  }
   702  
   703  func getVolumeAccessMode(spec *volume.Spec) string {
   704  	if spec.PersistentVolume == nil {
   705  		// In-line volumes in pod do not have a specific access mode, using "inline".
   706  		return "inline"
   707  	}
   708  	// For purpose of this PR, report only the "highest" access mode in this order: RWX (highest priority), ROX, RWO, RWOP (lowest priority
   709  	pv := spec.PersistentVolume
   710  	if util.ContainsAccessMode(pv.Spec.AccessModes, v1.ReadWriteMany) {
   711  		return "RWX"
   712  	}
   713  	if util.ContainsAccessMode(pv.Spec.AccessModes, v1.ReadOnlyMany) {
   714  		return "ROX"
   715  	}
   716  	if util.ContainsAccessMode(pv.Spec.AccessModes, v1.ReadWriteOnce) {
   717  		return "RWO"
   718  	}
   719  	if util.ContainsAccessMode(pv.Spec.AccessModes, v1.ReadWriteOncePod) {
   720  		return "RWOP"
   721  	}
   722  	// This should not happen, validation does not allow empty or unknown AccessModes.
   723  	return ""
   724  }
   725  

View as plain text