1
16
17
21 package cache
22
23 import (
24 "fmt"
25 "sync"
26 "time"
27
28 v1 "k8s.io/api/core/v1"
29 "k8s.io/apimachinery/pkg/api/resource"
30 "k8s.io/apimachinery/pkg/util/sets"
31 "k8s.io/apiserver/pkg/util/feature"
32 "k8s.io/component-base/metrics"
33 "k8s.io/klog/v2"
34 "k8s.io/kubernetes/pkg/volume/csi"
35
36 resourcehelper "k8s.io/kubernetes/pkg/api/v1/resource"
37 "k8s.io/kubernetes/pkg/features"
38 "k8s.io/kubernetes/pkg/volume"
39 "k8s.io/kubernetes/pkg/volume/util"
40 "k8s.io/kubernetes/pkg/volume/util/operationexecutor"
41 "k8s.io/kubernetes/pkg/volume/util/types"
42 )
43
44
45
46
47
48
49
50
51
52 type DesiredStateOfWorld interface {
53
54
55
56
57
58
59
60
61
62
63
64 AddPodToVolume(podName types.UniquePodName, pod *v1.Pod, volumeSpec *volume.Spec, outerVolumeSpecName string, volumeGidValue string, seLinuxContainerContexts []*v1.SELinuxOptions) (v1.UniqueVolumeName, error)
65
66
67
68
69
70
71
72
73
74
75 MarkVolumesReportedInUse(reportedVolumes []v1.UniqueVolumeName)
76
77
78
79
80
81
82
83
84
85
86 DeletePodFromVolume(podName types.UniquePodName, volumeName v1.UniqueVolumeName)
87
88
89
90
91
92 VolumeExists(volumeName v1.UniqueVolumeName, seLinuxMountContext string) bool
93
94
95
96
97
98
99
100 PodExistsInVolume(podName types.UniquePodName, volumeName v1.UniqueVolumeName, seLinuxMountContext string) bool
101
102
103
104
105 GetVolumesToMount() []VolumeToMount
106
107
108
109
110 GetPods() map[types.UniquePodName]bool
111
112
113
114
115
116
117 VolumeExistsWithSpecName(podName types.UniquePodName, volumeSpecName string) bool
118
119
120
121
122 AddErrorToPod(podName types.UniquePodName, err string)
123
124
125
126 PopPodErrors(podName types.UniquePodName) []string
127
128
129 GetPodsWithErrors() []types.UniquePodName
130
131
132 MarkVolumeAttachability(volumeName v1.UniqueVolumeName, attachable bool)
133
134
135
136
137 UpdatePersistentVolumeSize(volumeName v1.UniqueVolumeName, size *resource.Quantity)
138 }
139
140
141
142 type VolumeToMount struct {
143 operationexecutor.VolumeToMount
144 }
145
146
147 func NewDesiredStateOfWorld(volumePluginMgr *volume.VolumePluginMgr, seLinuxTranslator util.SELinuxLabelTranslator) DesiredStateOfWorld {
148 if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
149 registerSELinuxMetrics()
150 }
151 return &desiredStateOfWorld{
152 volumesToMount: make(map[v1.UniqueVolumeName]volumeToMount),
153 volumePluginMgr: volumePluginMgr,
154 podErrors: make(map[types.UniquePodName]sets.String),
155 seLinuxTranslator: seLinuxTranslator,
156 }
157 }
158
159 type desiredStateOfWorld struct {
160
161
162
163
164 volumesToMount map[v1.UniqueVolumeName]volumeToMount
165
166
167 volumePluginMgr *volume.VolumePluginMgr
168
169 podErrors map[types.UniquePodName]sets.String
170
171 seLinuxTranslator util.SELinuxLabelTranslator
172
173 sync.RWMutex
174 }
175
176
177
178 type volumeToMount struct {
179
180 volumeName v1.UniqueVolumeName
181
182
183
184
185
186 podsToMount map[types.UniquePodName]podToMount
187
188
189
190 pluginIsAttachable bool
191
192
193
194 pluginIsDeviceMountable bool
195
196
197 volumeGidValue string
198
199
200
201 reportedInUse bool
202
203
204
205 desiredSizeLimit *resource.Quantity
206
207
208
209 persistentVolumeSize *resource.Quantity
210
211
212
213
214
215
216
217
218 effectiveSELinuxMountFileLabel string
219
220
221
222
223
224
225
226
227 originalSELinuxLabel string
228 }
229
230
231
232 type podToMount struct {
233
234 podName types.UniquePodName
235
236
237 pod *v1.Pod
238
239
240
241
242
243 volumeSpec *volume.Spec
244
245
246
247
248
249 outerVolumeSpecName string
250
251 mountRequestTime time.Time
252 }
253
254 const (
255
256
257 maxPodErrors = 10
258 )
259
260 func (dsw *desiredStateOfWorld) AddPodToVolume(
261 podName types.UniquePodName,
262 pod *v1.Pod,
263 volumeSpec *volume.Spec,
264 outerVolumeSpecName string,
265 volumeGidValue string,
266 seLinuxContainerContexts []*v1.SELinuxOptions) (v1.UniqueVolumeName, error) {
267 dsw.Lock()
268 defer dsw.Unlock()
269
270 volumePlugin, err := dsw.volumePluginMgr.FindPluginBySpec(volumeSpec)
271 if err != nil || volumePlugin == nil {
272 return "", fmt.Errorf(
273 "failed to get Plugin from volumeSpec for volume %q err=%v",
274 volumeSpec.Name(),
275 err)
276 }
277 volumePluginName := getVolumePluginNameWithDriver(volumePlugin, volumeSpec)
278 accessMode := getVolumeAccessMode(volumeSpec)
279
280 var volumeName v1.UniqueVolumeName
281
282
283
284 attachable := util.IsAttachableVolume(volumeSpec, dsw.volumePluginMgr)
285 deviceMountable := util.IsDeviceMountableVolume(volumeSpec, dsw.volumePluginMgr)
286 if attachable || deviceMountable {
287
288
289 volumeName, err =
290 util.GetUniqueVolumeNameFromSpec(volumePlugin, volumeSpec)
291 if err != nil {
292 return "", fmt.Errorf(
293 "failed to GetUniqueVolumeNameFromSpec for volumeSpec %q using volume plugin %q err=%v",
294 volumeSpec.Name(),
295 volumePlugin.GetPluginName(),
296 err)
297 }
298 } else {
299
300
301 volumeName = util.GetUniqueVolumeNameFromSpecWithPod(podName, volumePlugin, volumeSpec)
302 }
303
304 seLinuxFileLabel, pluginSupportsSELinuxContextMount, err := dsw.getSELinuxLabel(volumeSpec, seLinuxContainerContexts)
305 if err != nil {
306 return "", err
307 }
308 klog.V(4).InfoS("expected volume SELinux label context", "volume", volumeSpec.Name(), "label", seLinuxFileLabel)
309
310 if _, volumeExists := dsw.volumesToMount[volumeName]; !volumeExists {
311 var sizeLimit *resource.Quantity
312 if volumeSpec.Volume != nil {
313 if util.IsLocalEphemeralVolume(*volumeSpec.Volume) {
314 podLimits := resourcehelper.PodLimits(pod, resourcehelper.PodResourcesOptions{})
315 ephemeralStorageLimit := podLimits[v1.ResourceEphemeralStorage]
316 sizeLimit = resource.NewQuantity(ephemeralStorageLimit.Value(), resource.BinarySI)
317 if volumeSpec.Volume.EmptyDir != nil &&
318 volumeSpec.Volume.EmptyDir.SizeLimit != nil &&
319 volumeSpec.Volume.EmptyDir.SizeLimit.Value() > 0 &&
320 (sizeLimit.Value() == 0 || volumeSpec.Volume.EmptyDir.SizeLimit.Value() < sizeLimit.Value()) {
321 sizeLimit = resource.NewQuantity(volumeSpec.Volume.EmptyDir.SizeLimit.Value(), resource.BinarySI)
322 }
323 }
324 }
325 effectiveSELinuxMountLabel := seLinuxFileLabel
326 if !util.VolumeSupportsSELinuxMount(volumeSpec) {
327
328 klog.V(4).InfoS("volume does not support SELinux context mount, clearing the expected label", "volume", volumeSpec.Name())
329 effectiveSELinuxMountLabel = ""
330 }
331 if seLinuxFileLabel != "" {
332 seLinuxVolumesAdmitted.WithLabelValues(volumePluginName, accessMode).Add(1.0)
333 }
334 vmt := volumeToMount{
335 volumeName: volumeName,
336 podsToMount: make(map[types.UniquePodName]podToMount),
337 pluginIsAttachable: attachable,
338 pluginIsDeviceMountable: deviceMountable,
339 volumeGidValue: volumeGidValue,
340 reportedInUse: false,
341 desiredSizeLimit: sizeLimit,
342 effectiveSELinuxMountFileLabel: effectiveSELinuxMountLabel,
343 originalSELinuxLabel: seLinuxFileLabel,
344 }
345
346 if volumeSpec.PersistentVolume != nil {
347 pvCap := volumeSpec.PersistentVolume.Spec.Capacity.Storage()
348 if pvCap != nil {
349 pvCapCopy := pvCap.DeepCopy()
350 vmt.persistentVolumeSize = &pvCapCopy
351 }
352 }
353 dsw.volumesToMount[volumeName] = vmt
354 }
355
356 oldPodMount, ok := dsw.volumesToMount[volumeName].podsToMount[podName]
357 mountRequestTime := time.Now()
358 if ok && !volumePlugin.RequiresRemount(volumeSpec) {
359 mountRequestTime = oldPodMount.mountRequestTime
360 }
361
362 if !ok {
363
364
365 if pluginSupportsSELinuxContextMount {
366 existingVolume := dsw.volumesToMount[volumeName]
367 if seLinuxFileLabel != existingVolume.originalSELinuxLabel {
368 fullErr := fmt.Errorf("conflicting SELinux labels of volume %s: %q and %q", volumeSpec.Name(), existingVolume.originalSELinuxLabel, seLinuxFileLabel)
369 supported := util.VolumeSupportsSELinuxMount(volumeSpec)
370 err := handleSELinuxMetricError(
371 fullErr,
372 supported,
373 seLinuxVolumeContextMismatchWarnings.WithLabelValues(volumePluginName, accessMode),
374 seLinuxVolumeContextMismatchErrors.WithLabelValues(volumePluginName, accessMode))
375 if err != nil {
376 return "", err
377 }
378 }
379 }
380 }
381
382
383
384
385 dsw.volumesToMount[volumeName].podsToMount[podName] = podToMount{
386 podName: podName,
387 pod: pod,
388 volumeSpec: volumeSpec,
389 outerVolumeSpecName: outerVolumeSpecName,
390 mountRequestTime: mountRequestTime,
391 }
392 return volumeName, nil
393 }
394
395 func (dsw *desiredStateOfWorld) getSELinuxLabel(volumeSpec *volume.Spec, seLinuxContainerContexts []*v1.SELinuxOptions) (string, bool, error) {
396 var seLinuxFileLabel string
397 var pluginSupportsSELinuxContextMount bool
398
399 if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
400 var err error
401
402 if !dsw.seLinuxTranslator.SELinuxEnabled() {
403 return "", false, nil
404 }
405
406 pluginSupportsSELinuxContextMount, err = dsw.getSELinuxMountSupport(volumeSpec)
407 if err != nil {
408 return "", false, err
409 }
410 seLinuxSupported := util.VolumeSupportsSELinuxMount(volumeSpec)
411 if pluginSupportsSELinuxContextMount {
412
413
414 for _, containerContext := range seLinuxContainerContexts {
415 newLabel, err := dsw.seLinuxTranslator.SELinuxOptionsToFileLabel(containerContext)
416 if err != nil {
417 fullErr := fmt.Errorf("failed to construct SELinux label from context %q: %s", containerContext, err)
418 accessMode := getVolumeAccessMode(volumeSpec)
419 err := handleSELinuxMetricError(
420 fullErr,
421 seLinuxSupported,
422 seLinuxContainerContextWarnings.WithLabelValues(accessMode),
423 seLinuxContainerContextErrors.WithLabelValues(accessMode))
424 if err != nil {
425 return "", false, err
426 }
427 }
428 if seLinuxFileLabel == "" {
429 seLinuxFileLabel = newLabel
430 continue
431 }
432 if seLinuxFileLabel != newLabel {
433 accessMode := getVolumeAccessMode(volumeSpec)
434
435 fullErr := fmt.Errorf("volume %s is used with two different SELinux contexts in the same pod: %q, %q", volumeSpec.Name(), seLinuxFileLabel, newLabel)
436 err := handleSELinuxMetricError(
437 fullErr,
438 seLinuxSupported,
439 seLinuxPodContextMismatchWarnings.WithLabelValues(accessMode),
440 seLinuxPodContextMismatchErrors.WithLabelValues(accessMode))
441 if err != nil {
442 return "", false, err
443 }
444 }
445 }
446 } else {
447
448
449
450 seLinuxFileLabel = ""
451 }
452 }
453 return seLinuxFileLabel, pluginSupportsSELinuxContextMount, nil
454 }
455
456 func (dsw *desiredStateOfWorld) MarkVolumesReportedInUse(
457 reportedVolumes []v1.UniqueVolumeName) {
458 dsw.Lock()
459 defer dsw.Unlock()
460
461 reportedVolumesMap := make(
462 map[v1.UniqueVolumeName]bool, len(reportedVolumes) )
463
464 for _, reportedVolume := range reportedVolumes {
465 reportedVolumesMap[reportedVolume] = true
466 }
467
468 for volumeName, volumeObj := range dsw.volumesToMount {
469 _, volumeReported := reportedVolumesMap[volumeName]
470 volumeObj.reportedInUse = volumeReported
471 dsw.volumesToMount[volumeName] = volumeObj
472 }
473 }
474
475 func (dsw *desiredStateOfWorld) DeletePodFromVolume(
476 podName types.UniquePodName, volumeName v1.UniqueVolumeName) {
477 dsw.Lock()
478 defer dsw.Unlock()
479
480 delete(dsw.podErrors, podName)
481
482 volumeObj, volumeExists := dsw.volumesToMount[volumeName]
483 if !volumeExists {
484 return
485 }
486
487 if _, podExists := volumeObj.podsToMount[podName]; !podExists {
488 return
489 }
490
491
492 delete(dsw.volumesToMount[volumeName].podsToMount, podName)
493
494 if len(dsw.volumesToMount[volumeName].podsToMount) == 0 {
495
496 delete(dsw.volumesToMount, volumeName)
497 }
498 }
499
500
501
502 func (dsw *desiredStateOfWorld) UpdatePersistentVolumeSize(volumeName v1.UniqueVolumeName, size *resource.Quantity) {
503 dsw.Lock()
504 defer dsw.Unlock()
505
506 vol, volExists := dsw.volumesToMount[volumeName]
507 if volExists {
508 vol.persistentVolumeSize = size
509 dsw.volumesToMount[volumeName] = vol
510 }
511 }
512
513 func (dsw *desiredStateOfWorld) VolumeExists(
514 volumeName v1.UniqueVolumeName, seLinuxMountContext string) bool {
515 dsw.RLock()
516 defer dsw.RUnlock()
517
518 vol, volumeExists := dsw.volumesToMount[volumeName]
519 if !volumeExists {
520 return false
521 }
522 if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
523
524
525
526
527
528
529
530
531
532
533
534
535
536 return vol.effectiveSELinuxMountFileLabel == seLinuxMountContext
537 }
538 return true
539 }
540
541 func (dsw *desiredStateOfWorld) PodExistsInVolume(
542 podName types.UniquePodName, volumeName v1.UniqueVolumeName, seLinuxMountOption string) bool {
543 dsw.RLock()
544 defer dsw.RUnlock()
545
546 volumeObj, volumeExists := dsw.volumesToMount[volumeName]
547 if !volumeExists {
548 return false
549 }
550
551 if feature.DefaultFeatureGate.Enabled(features.SELinuxMountReadWriteOncePod) {
552 if volumeObj.effectiveSELinuxMountFileLabel != seLinuxMountOption {
553
554
555
556 return false
557 }
558 }
559
560 _, podExists := volumeObj.podsToMount[podName]
561 return podExists
562 }
563
564 func (dsw *desiredStateOfWorld) VolumeExistsWithSpecName(podName types.UniquePodName, volumeSpecName string) bool {
565 dsw.RLock()
566 defer dsw.RUnlock()
567 for _, volumeObj := range dsw.volumesToMount {
568 if podObj, podExists := volumeObj.podsToMount[podName]; podExists {
569 if podObj.volumeSpec.Name() == volumeSpecName {
570 return true
571 }
572 }
573 }
574 return false
575 }
576
577 func (dsw *desiredStateOfWorld) GetPods() map[types.UniquePodName]bool {
578 dsw.RLock()
579 defer dsw.RUnlock()
580
581 podList := make(map[types.UniquePodName]bool)
582 for _, volumeObj := range dsw.volumesToMount {
583 for podName := range volumeObj.podsToMount {
584 podList[podName] = true
585 }
586 }
587 return podList
588 }
589
590 func (dsw *desiredStateOfWorld) GetVolumesToMount() []VolumeToMount {
591 dsw.RLock()
592 defer dsw.RUnlock()
593
594 volumesToMount := make([]VolumeToMount, 0 , len(dsw.volumesToMount) )
595 for volumeName, volumeObj := range dsw.volumesToMount {
596 for podName, podObj := range volumeObj.podsToMount {
597 vmt := VolumeToMount{
598 VolumeToMount: operationexecutor.VolumeToMount{
599 VolumeName: volumeName,
600 PodName: podName,
601 Pod: podObj.pod,
602 VolumeSpec: podObj.volumeSpec,
603 PluginIsAttachable: volumeObj.pluginIsAttachable,
604 PluginIsDeviceMountable: volumeObj.pluginIsDeviceMountable,
605 OuterVolumeSpecName: podObj.outerVolumeSpecName,
606 VolumeGidValue: volumeObj.volumeGidValue,
607 ReportedInUse: volumeObj.reportedInUse,
608 MountRequestTime: podObj.mountRequestTime,
609 DesiredSizeLimit: volumeObj.desiredSizeLimit,
610 SELinuxLabel: volumeObj.effectiveSELinuxMountFileLabel,
611 },
612 }
613 if volumeObj.persistentVolumeSize != nil {
614 vmt.DesiredPersistentVolumeSize = volumeObj.persistentVolumeSize.DeepCopy()
615 }
616 volumesToMount = append(volumesToMount, vmt)
617 }
618 }
619 return volumesToMount
620 }
621
622 func (dsw *desiredStateOfWorld) AddErrorToPod(podName types.UniquePodName, err string) {
623 dsw.Lock()
624 defer dsw.Unlock()
625
626 if errs, found := dsw.podErrors[podName]; found {
627 if errs.Len() <= maxPodErrors {
628 errs.Insert(err)
629 }
630 return
631 }
632 dsw.podErrors[podName] = sets.NewString(err)
633 }
634
635 func (dsw *desiredStateOfWorld) PopPodErrors(podName types.UniquePodName) []string {
636 dsw.Lock()
637 defer dsw.Unlock()
638
639 if errs, found := dsw.podErrors[podName]; found {
640 delete(dsw.podErrors, podName)
641 return errs.List()
642 }
643 return []string{}
644 }
645
646 func (dsw *desiredStateOfWorld) GetPodsWithErrors() []types.UniquePodName {
647 dsw.RLock()
648 defer dsw.RUnlock()
649
650 pods := make([]types.UniquePodName, 0, len(dsw.podErrors))
651 for podName := range dsw.podErrors {
652 pods = append(pods, podName)
653 }
654 return pods
655 }
656
657 func (dsw *desiredStateOfWorld) MarkVolumeAttachability(volumeName v1.UniqueVolumeName, attachable bool) {
658 dsw.Lock()
659 defer dsw.Unlock()
660 volumeObj, volumeExists := dsw.volumesToMount[volumeName]
661 if !volumeExists {
662 return
663 }
664 volumeObj.pluginIsAttachable = attachable
665 dsw.volumesToMount[volumeName] = volumeObj
666 }
667
668 func (dsw *desiredStateOfWorld) getSELinuxMountSupport(volumeSpec *volume.Spec) (bool, error) {
669 return util.SupportsSELinuxContextMount(volumeSpec, dsw.volumePluginMgr)
670 }
671
672
673 func handleSELinuxMetricError(err error, seLinuxSupported bool, warningMetric, errorMetric metrics.GaugeMetric) error {
674 if seLinuxSupported {
675 errorMetric.Add(1.0)
676 return err
677 }
678
679
680 warningMetric.Add(1.0)
681 klog.V(4).ErrorS(err, "Please report this error in https://github.com/kubernetes/enhancements/issues/1710, together with full Pod yaml file")
682 return nil
683 }
684
685
686 func getVolumePluginNameWithDriver(plugin volume.VolumePlugin, spec *volume.Spec) string {
687 pluginName := plugin.GetPluginName()
688 if pluginName != csi.CSIPluginName {
689 return pluginName
690 }
691
692
693 driverName, err := csi.GetCSIDriverName(spec)
694 if err != nil {
695
696 klog.V(4).ErrorS(err, "failed to get CSI driver name from volume spec")
697 driverName = "unknown"
698 }
699
700 return pluginName + "/" + driverName
701 }
702
703 func getVolumeAccessMode(spec *volume.Spec) string {
704 if spec.PersistentVolume == nil {
705
706 return "inline"
707 }
708
709 pv := spec.PersistentVolume
710 if util.ContainsAccessMode(pv.Spec.AccessModes, v1.ReadWriteMany) {
711 return "RWX"
712 }
713 if util.ContainsAccessMode(pv.Spec.AccessModes, v1.ReadOnlyMany) {
714 return "ROX"
715 }
716 if util.ContainsAccessMode(pv.Spec.AccessModes, v1.ReadWriteOnce) {
717 return "RWO"
718 }
719 if util.ContainsAccessMode(pv.Spec.AccessModes, v1.ReadWriteOncePod) {
720 return "RWOP"
721 }
722
723 return ""
724 }
725
View as plain text