1
16
17 package noderestriction
18
19 import (
20 "context"
21 "errors"
22 "fmt"
23 "io"
24 "strings"
25
26 "github.com/google/go-cmp/cmp"
27 v1 "k8s.io/api/core/v1"
28 apiequality "k8s.io/apimachinery/pkg/api/equality"
29 apierrors "k8s.io/apimachinery/pkg/api/errors"
30 "k8s.io/apimachinery/pkg/api/meta"
31 "k8s.io/apimachinery/pkg/labels"
32 "k8s.io/apimachinery/pkg/util/sets"
33 "k8s.io/apiserver/pkg/admission"
34 apiserveradmission "k8s.io/apiserver/pkg/admission/initializer"
35 "k8s.io/client-go/informers"
36 corev1lister "k8s.io/client-go/listers/core/v1"
37 "k8s.io/component-base/featuregate"
38 kubeletapis "k8s.io/kubelet/pkg/apis"
39 podutil "k8s.io/kubernetes/pkg/api/pod"
40 authenticationapi "k8s.io/kubernetes/pkg/apis/authentication"
41 coordapi "k8s.io/kubernetes/pkg/apis/coordination"
42 api "k8s.io/kubernetes/pkg/apis/core"
43 "k8s.io/kubernetes/pkg/apis/policy"
44 "k8s.io/kubernetes/pkg/apis/resource"
45 storage "k8s.io/kubernetes/pkg/apis/storage"
46 "k8s.io/kubernetes/pkg/auth/nodeidentifier"
47 "k8s.io/kubernetes/pkg/features"
48 )
49
50
51 const PluginName = "NodeRestriction"
52
53
54 func Register(plugins *admission.Plugins) {
55 plugins.Register(PluginName, func(config io.Reader) (admission.Interface, error) {
56 return NewPlugin(nodeidentifier.NewDefaultNodeIdentifier()), nil
57 })
58 }
59
60
61
62 func NewPlugin(nodeIdentifier nodeidentifier.NodeIdentifier) *Plugin {
63 return &Plugin{
64 Handler: admission.NewHandler(admission.Create, admission.Update, admission.Delete),
65 nodeIdentifier: nodeIdentifier,
66 }
67 }
68
69
70 type Plugin struct {
71 *admission.Handler
72 nodeIdentifier nodeidentifier.NodeIdentifier
73 podsGetter corev1lister.PodLister
74 nodesGetter corev1lister.NodeLister
75
76 expansionRecoveryEnabled bool
77 dynamicResourceAllocationEnabled bool
78 }
79
80 var (
81 _ admission.Interface = &Plugin{}
82 _ apiserveradmission.WantsExternalKubeInformerFactory = &Plugin{}
83 _ apiserveradmission.WantsFeatures = &Plugin{}
84 )
85
86
87 func (p *Plugin) InspectFeatureGates(featureGates featuregate.FeatureGate) {
88 p.expansionRecoveryEnabled = featureGates.Enabled(features.RecoverVolumeExpansionFailure)
89 p.dynamicResourceAllocationEnabled = featureGates.Enabled(features.DynamicResourceAllocation)
90 }
91
92
93 func (p *Plugin) SetExternalKubeInformerFactory(f informers.SharedInformerFactory) {
94 p.podsGetter = f.Core().V1().Pods().Lister()
95 p.nodesGetter = f.Core().V1().Nodes().Lister()
96 }
97
98
99 func (p *Plugin) ValidateInitialization() error {
100 if p.nodeIdentifier == nil {
101 return fmt.Errorf("%s requires a node identifier", PluginName)
102 }
103 if p.podsGetter == nil {
104 return fmt.Errorf("%s requires a pod getter", PluginName)
105 }
106 if p.nodesGetter == nil {
107 return fmt.Errorf("%s requires a node getter", PluginName)
108 }
109 return nil
110 }
111
112 var (
113 podResource = api.Resource("pods")
114 nodeResource = api.Resource("nodes")
115 pvcResource = api.Resource("persistentvolumeclaims")
116 svcacctResource = api.Resource("serviceaccounts")
117 leaseResource = coordapi.Resource("leases")
118 csiNodeResource = storage.Resource("csinodes")
119 resourceSliceResource = resource.Resource("resourceslices")
120 )
121
122
123 func (p *Plugin) Admit(ctx context.Context, a admission.Attributes, o admission.ObjectInterfaces) error {
124 nodeName, isNode := p.nodeIdentifier.NodeIdentity(a.GetUserInfo())
125
126
127 if !isNode {
128 return nil
129 }
130
131 if len(nodeName) == 0 {
132
133 return admission.NewForbidden(a, fmt.Errorf("could not determine node from user %q", a.GetUserInfo().GetName()))
134 }
135
136
137
138 switch a.GetResource().GroupResource() {
139 case podResource:
140 switch a.GetSubresource() {
141 case "":
142 return p.admitPod(nodeName, a)
143 case "status":
144 return p.admitPodStatus(nodeName, a)
145 case "eviction":
146 return p.admitPodEviction(nodeName, a)
147 default:
148 return admission.NewForbidden(a, fmt.Errorf("unexpected pod subresource %q, only 'status' and 'eviction' are allowed", a.GetSubresource()))
149 }
150
151 case nodeResource:
152 return p.admitNode(nodeName, a)
153
154 case pvcResource:
155 switch a.GetSubresource() {
156 case "status":
157 return p.admitPVCStatus(nodeName, a)
158 default:
159 return admission.NewForbidden(a, fmt.Errorf("may only update PVC status"))
160 }
161
162 case svcacctResource:
163 return p.admitServiceAccount(nodeName, a)
164
165 case leaseResource:
166 return p.admitLease(nodeName, a)
167
168 case csiNodeResource:
169 return p.admitCSINode(nodeName, a)
170
171 case resourceSliceResource:
172 return p.admitResourceSlice(nodeName, a)
173
174 default:
175 return nil
176 }
177 }
178
179
180
181 func (p *Plugin) admitPod(nodeName string, a admission.Attributes) error {
182 switch a.GetOperation() {
183 case admission.Create:
184 return p.admitPodCreate(nodeName, a)
185
186 case admission.Delete:
187
188 existingPod, err := p.podsGetter.Pods(a.GetNamespace()).Get(a.GetName())
189 if apierrors.IsNotFound(err) {
190 return err
191 }
192 if err != nil {
193 return admission.NewForbidden(a, err)
194 }
195
196 if existingPod.Spec.NodeName != nodeName {
197 return admission.NewForbidden(a, fmt.Errorf("node %q can only delete pods with spec.nodeName set to itself", nodeName))
198 }
199 return nil
200
201 default:
202 return admission.NewForbidden(a, fmt.Errorf("unexpected operation %q, node %q can only create and delete mirror pods", a.GetOperation(), nodeName))
203 }
204 }
205
206 func (p *Plugin) admitPodCreate(nodeName string, a admission.Attributes) error {
207
208 pod, ok := a.GetObject().(*api.Pod)
209 if !ok {
210 return admission.NewForbidden(a, fmt.Errorf("unexpected type %T", a.GetObject()))
211 }
212
213
214 if _, isMirrorPod := pod.Annotations[api.MirrorPodAnnotationKey]; !isMirrorPod {
215 return admission.NewForbidden(a, fmt.Errorf("pod does not have %q annotation, node %q can only create mirror pods", api.MirrorPodAnnotationKey, nodeName))
216 }
217
218
219 if pod.Spec.NodeName != nodeName {
220 return admission.NewForbidden(a, fmt.Errorf("node %q can only create pods with spec.nodeName set to itself", nodeName))
221 }
222 if len(pod.OwnerReferences) > 1 {
223 return admission.NewForbidden(a, fmt.Errorf("node %q can only create pods with a single owner reference set to itself", nodeName))
224 }
225 if len(pod.OwnerReferences) == 0 {
226 return admission.NewForbidden(a, fmt.Errorf("node %q can only create pods with an owner reference set to itself", nodeName))
227 }
228 if len(pod.OwnerReferences) == 1 {
229 owner := pod.OwnerReferences[0]
230 if owner.APIVersion != v1.SchemeGroupVersion.String() ||
231 owner.Kind != "Node" ||
232 owner.Name != nodeName {
233 return admission.NewForbidden(a, fmt.Errorf("node %q can only create pods with an owner reference set to itself", nodeName))
234 }
235 if owner.Controller == nil || !*owner.Controller {
236 return admission.NewForbidden(a, fmt.Errorf("node %q can only create pods with a controller owner reference set to itself", nodeName))
237 }
238 if owner.BlockOwnerDeletion != nil && *owner.BlockOwnerDeletion {
239 return admission.NewForbidden(a, fmt.Errorf("node %q must not set blockOwnerDeletion on an owner reference", nodeName))
240 }
241
242
243 node, err := p.nodesGetter.Get(nodeName)
244 if apierrors.IsNotFound(err) {
245 return err
246 }
247 if err != nil {
248 return admission.NewForbidden(a, fmt.Errorf("error looking up node %s to verify uid: %v", nodeName, err))
249 }
250 if owner.UID != node.UID {
251 return admission.NewForbidden(a, fmt.Errorf("node %s UID mismatch: expected %s got %s", nodeName, owner.UID, node.UID))
252 }
253 }
254
255
256 if pod.Spec.ServiceAccountName != "" {
257 return admission.NewForbidden(a, fmt.Errorf("node %q can not create pods that reference a service account", nodeName))
258 }
259 hasSecrets := false
260 podutil.VisitPodSecretNames(pod, func(name string) (shouldContinue bool) { hasSecrets = true; return false }, podutil.AllContainers)
261 if hasSecrets {
262 return admission.NewForbidden(a, fmt.Errorf("node %q can not create pods that reference secrets", nodeName))
263 }
264 hasConfigMaps := false
265 podutil.VisitPodConfigmapNames(pod, func(name string) (shouldContinue bool) { hasConfigMaps = true; return false }, podutil.AllContainers)
266 if hasConfigMaps {
267 return admission.NewForbidden(a, fmt.Errorf("node %q can not create pods that reference configmaps", nodeName))
268 }
269
270 for _, vol := range pod.Spec.Volumes {
271 if vol.VolumeSource.Projected != nil {
272 for _, src := range vol.VolumeSource.Projected.Sources {
273 if src.ClusterTrustBundle != nil {
274 return admission.NewForbidden(a, fmt.Errorf("node %q can not create pods that reference clustertrustbundles", nodeName))
275 }
276 }
277 }
278 }
279
280 for _, v := range pod.Spec.Volumes {
281 if v.PersistentVolumeClaim != nil {
282 return admission.NewForbidden(a, fmt.Errorf("node %q can not create pods that reference persistentvolumeclaims", nodeName))
283 }
284 }
285
286 return nil
287 }
288
289
290
291 func (p *Plugin) admitPodStatus(nodeName string, a admission.Attributes) error {
292 switch a.GetOperation() {
293 case admission.Update:
294
295 oldPod, ok := a.GetOldObject().(*api.Pod)
296 if !ok {
297 return admission.NewForbidden(a, fmt.Errorf("unexpected type %T", a.GetOldObject()))
298 }
299
300 if oldPod.Spec.NodeName != nodeName {
301 return admission.NewForbidden(a, fmt.Errorf("node %q can only update pod status for pods with spec.nodeName set to itself", nodeName))
302 }
303 newPod, ok := a.GetObject().(*api.Pod)
304 if !ok {
305 return admission.NewForbidden(a, fmt.Errorf("unexpected type %T", a.GetObject()))
306 }
307 if !labels.Equals(oldPod.Labels, newPod.Labels) {
308 return admission.NewForbidden(a, fmt.Errorf("node %q cannot update labels through pod status", nodeName))
309 }
310 if !resourceClaimStatusesEqual(oldPod.Status.ResourceClaimStatuses, newPod.Status.ResourceClaimStatuses) {
311 return admission.NewForbidden(a, fmt.Errorf("node %q cannot update resource claim statues", nodeName))
312 }
313 return nil
314
315 default:
316 return admission.NewForbidden(a, fmt.Errorf("unexpected operation %q", a.GetOperation()))
317 }
318 }
319
320 func resourceClaimStatusesEqual(statusA, statusB []api.PodResourceClaimStatus) bool {
321 if len(statusA) != len(statusB) {
322 return false
323 }
324
325
326
327 for i := range statusA {
328 if statusA[i].Name != statusB[i].Name {
329 return false
330 }
331 claimNameA := statusA[i].ResourceClaimName
332 claimNameB := statusB[i].ResourceClaimName
333 if (claimNameA == nil) != (claimNameB == nil) {
334 return false
335 }
336 if claimNameA != nil && *claimNameA != *claimNameB {
337 return false
338 }
339 }
340 return true
341 }
342
343
344 func (p *Plugin) admitPodEviction(nodeName string, a admission.Attributes) error {
345 switch a.GetOperation() {
346 case admission.Create:
347
348 eviction, ok := a.GetObject().(*policy.Eviction)
349 if !ok {
350 return admission.NewForbidden(a, fmt.Errorf("unexpected type %T", a.GetObject()))
351 }
352
353 podName := a.GetName()
354 if len(podName) == 0 {
355 if len(eviction.Name) == 0 {
356 return admission.NewForbidden(a, fmt.Errorf("could not determine pod from request data"))
357 }
358 podName = eviction.Name
359 }
360
361 existingPod, err := p.podsGetter.Pods(a.GetNamespace()).Get(podName)
362 if apierrors.IsNotFound(err) {
363 return err
364 }
365 if err != nil {
366 return admission.NewForbidden(a, err)
367 }
368
369 if existingPod.Spec.NodeName != nodeName {
370 return admission.NewForbidden(a, fmt.Errorf("node %s can only evict pods with spec.nodeName set to itself", nodeName))
371 }
372 return nil
373
374 default:
375 return admission.NewForbidden(a, fmt.Errorf("unexpected operation %s", a.GetOperation()))
376 }
377 }
378
379 func (p *Plugin) admitPVCStatus(nodeName string, a admission.Attributes) error {
380 switch a.GetOperation() {
381 case admission.Update:
382 oldPVC, ok := a.GetOldObject().(*api.PersistentVolumeClaim)
383 if !ok {
384 return admission.NewForbidden(a, fmt.Errorf("unexpected type %T", a.GetOldObject()))
385 }
386
387 newPVC, ok := a.GetObject().(*api.PersistentVolumeClaim)
388 if !ok {
389 return admission.NewForbidden(a, fmt.Errorf("unexpected type %T", a.GetObject()))
390 }
391
392
393 oldPVC = oldPVC.DeepCopy()
394 newPVC = newPVC.DeepCopy()
395
396
397
398 oldPVC.ObjectMeta.ResourceVersion = ""
399 newPVC.ObjectMeta.ResourceVersion = ""
400
401 oldPVC.Status.Capacity = nil
402 newPVC.Status.Capacity = nil
403
404 oldPVC.Status.Conditions = nil
405 newPVC.Status.Conditions = nil
406
407 if p.expansionRecoveryEnabled {
408 oldPVC.Status.AllocatedResourceStatuses = nil
409 newPVC.Status.AllocatedResourceStatuses = nil
410
411 oldPVC.Status.AllocatedResources = nil
412 newPVC.Status.AllocatedResources = nil
413 }
414
415
416
417
418 oldPVC.ObjectMeta.ManagedFields = nil
419 newPVC.ObjectMeta.ManagedFields = nil
420
421
422 if !apiequality.Semantic.DeepEqual(oldPVC, newPVC) {
423 return admission.NewForbidden(a, fmt.Errorf("node %q is not allowed to update fields other than status.quantity and status.conditions: %v", nodeName, cmp.Diff(oldPVC, newPVC)))
424 }
425
426 return nil
427
428 default:
429 return admission.NewForbidden(a, fmt.Errorf("unexpected operation %q", a.GetOperation()))
430 }
431 }
432
433 func (p *Plugin) admitNode(nodeName string, a admission.Attributes) error {
434 requestedName := a.GetName()
435
436 if requestedName != nodeName {
437 return admission.NewForbidden(a, fmt.Errorf("node %q is not allowed to modify node %q", nodeName, requestedName))
438 }
439
440 if a.GetOperation() == admission.Create {
441 node, ok := a.GetObject().(*api.Node)
442 if !ok {
443 return admission.NewForbidden(a, fmt.Errorf("unexpected type %T", a.GetObject()))
444 }
445
446
447
448 if node.Spec.ConfigSource != nil {
449 return admission.NewForbidden(a, fmt.Errorf("node %q is not allowed to create pods with a non-nil configSource", nodeName))
450 }
451
452
453
454 modifiedLabels := getModifiedLabels(node.Labels, nil)
455 if forbiddenLabels := p.getForbiddenLabels(modifiedLabels); len(forbiddenLabels) > 0 {
456 return admission.NewForbidden(a, fmt.Errorf("node %q is not allowed to set the following labels: %s", nodeName, strings.Join(forbiddenLabels.List(), ", ")))
457 }
458 }
459
460 if a.GetOperation() == admission.Update {
461 node, ok := a.GetObject().(*api.Node)
462 if !ok {
463 return admission.NewForbidden(a, fmt.Errorf("unexpected type %T", a.GetObject()))
464 }
465 oldNode, ok := a.GetOldObject().(*api.Node)
466 if !ok {
467 return admission.NewForbidden(a, fmt.Errorf("unexpected type %T", a.GetObject()))
468 }
469
470
471
472
473 if node.Spec.ConfigSource != nil && !apiequality.Semantic.DeepEqual(node.Spec.ConfigSource, oldNode.Spec.ConfigSource) {
474 return admission.NewForbidden(a, fmt.Errorf("node %q is not allowed to update configSource to a new non-nil configSource", nodeName))
475 }
476
477
478
479 if !apiequality.Semantic.DeepEqual(node.Spec.Taints, oldNode.Spec.Taints) {
480 return admission.NewForbidden(a, fmt.Errorf("node %q is not allowed to modify taints", nodeName))
481 }
482
483
484
485 modifiedLabels := getModifiedLabels(node.Labels, oldNode.Labels)
486 if forbiddenUpdateLabels := p.getForbiddenLabels(modifiedLabels); len(forbiddenUpdateLabels) > 0 {
487 return admission.NewForbidden(a, fmt.Errorf("is not allowed to modify labels: %s", strings.Join(forbiddenUpdateLabels.List(), ", ")))
488 }
489 }
490
491 return nil
492 }
493
494
495 func getModifiedLabels(a, b map[string]string) sets.String {
496 modified := sets.NewString()
497 for k, v1 := range a {
498 if v2, ok := b[k]; !ok || v1 != v2 {
499 modified.Insert(k)
500 }
501 }
502 for k, v1 := range b {
503 if v2, ok := a[k]; !ok || v1 != v2 {
504 modified.Insert(k)
505 }
506 }
507 return modified
508 }
509
510 func isKubernetesLabel(key string) bool {
511 namespace := getLabelNamespace(key)
512 if namespace == "kubernetes.io" || strings.HasSuffix(namespace, ".kubernetes.io") {
513 return true
514 }
515 if namespace == "k8s.io" || strings.HasSuffix(namespace, ".k8s.io") {
516 return true
517 }
518 return false
519 }
520
521 func getLabelNamespace(key string) string {
522 if parts := strings.SplitN(key, "/", 2); len(parts) == 2 {
523 return parts[0]
524 }
525 return ""
526 }
527
528
529 func (p *Plugin) getForbiddenLabels(modifiedLabels sets.String) sets.String {
530 if len(modifiedLabels) == 0 {
531 return nil
532 }
533
534 forbiddenLabels := sets.NewString()
535 for label := range modifiedLabels {
536 namespace := getLabelNamespace(label)
537
538 if namespace == v1.LabelNamespaceNodeRestriction || strings.HasSuffix(namespace, "."+v1.LabelNamespaceNodeRestriction) {
539 forbiddenLabels.Insert(label)
540 }
541
542 if isKubernetesLabel(label) && !kubeletapis.IsKubeletLabel(label) {
543
544 forbiddenLabels.Insert(label)
545 }
546 }
547 return forbiddenLabels
548 }
549
550 func (p *Plugin) admitServiceAccount(nodeName string, a admission.Attributes) error {
551 if a.GetOperation() != admission.Create {
552 return nil
553 }
554 if a.GetSubresource() != "token" {
555 return nil
556 }
557 tr, ok := a.GetObject().(*authenticationapi.TokenRequest)
558 if !ok {
559 return admission.NewForbidden(a, fmt.Errorf("unexpected type %T", a.GetObject()))
560 }
561
562
563
564 ref := tr.Spec.BoundObjectRef
565 if ref == nil ||
566 ref.APIVersion != "v1" ||
567 ref.Kind != "Pod" ||
568 ref.Name == "" {
569 return admission.NewForbidden(a, fmt.Errorf("node requested token not bound to a pod"))
570 }
571 if ref.UID == "" {
572 return admission.NewForbidden(a, fmt.Errorf("node requested token with a pod binding without a uid"))
573 }
574 pod, err := p.podsGetter.Pods(a.GetNamespace()).Get(ref.Name)
575 if apierrors.IsNotFound(err) {
576 return err
577 }
578 if err != nil {
579 return admission.NewForbidden(a, err)
580 }
581 if ref.UID != pod.UID {
582 return admission.NewForbidden(a, fmt.Errorf("the UID in the bound object reference (%s) does not match the UID in record. The object might have been deleted and then recreated", ref.UID))
583 }
584 if pod.Spec.NodeName != nodeName {
585 return admission.NewForbidden(a, fmt.Errorf("node requested token bound to a pod scheduled on a different node"))
586 }
587
588
589
590
591
592
593
594 return nil
595 }
596
597 func (p *Plugin) admitLease(nodeName string, a admission.Attributes) error {
598
599 if a.GetNamespace() != api.NamespaceNodeLease {
600 return admission.NewForbidden(a, fmt.Errorf("can only access leases in the %q system namespace", api.NamespaceNodeLease))
601 }
602
603
604 if a.GetOperation() == admission.Create {
605
606 lease, ok := a.GetObject().(*coordapi.Lease)
607 if !ok {
608 return admission.NewForbidden(a, fmt.Errorf("unexpected type %T", a.GetObject()))
609 }
610 if lease.Name != nodeName {
611 return admission.NewForbidden(a, fmt.Errorf("can only access node lease with the same name as the requesting node"))
612 }
613 } else {
614 if a.GetName() != nodeName {
615 return admission.NewForbidden(a, fmt.Errorf("can only access node lease with the same name as the requesting node"))
616 }
617 }
618
619 return nil
620 }
621
622 func (p *Plugin) admitCSINode(nodeName string, a admission.Attributes) error {
623
624 if a.GetOperation() == admission.Create {
625
626 accessor, err := meta.Accessor(a.GetObject())
627 if err != nil {
628 return admission.NewForbidden(a, fmt.Errorf("unable to access the object name"))
629 }
630 if accessor.GetName() != nodeName {
631 return admission.NewForbidden(a, fmt.Errorf("can only access CSINode with the same name as the requesting node"))
632 }
633 } else {
634 if a.GetName() != nodeName {
635 return admission.NewForbidden(a, fmt.Errorf("can only access CSINode with the same name as the requesting node"))
636 }
637 }
638
639 return nil
640 }
641
642 func (p *Plugin) admitResourceSlice(nodeName string, a admission.Attributes) error {
643
644
645 if a.GetOperation() == admission.Create {
646 slice, ok := a.GetObject().(*resource.ResourceSlice)
647 if !ok {
648 return admission.NewForbidden(a, fmt.Errorf("unexpected type %T", a.GetObject()))
649 }
650
651 if slice.NodeName != nodeName {
652 return admission.NewForbidden(a, errors.New("can only create ResourceSlice with the same NodeName as the requesting node"))
653 }
654 }
655
656 return nil
657 }
658
View as plain text