...

Source file src/k8s.io/kubernetes/pkg/volume/util/subpath/subpath_linux.go

Documentation: k8s.io/kubernetes/pkg/volume/util/subpath

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2014 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package subpath
    21  
    22  import (
    23  	"fmt"
    24  	"io/ioutil"
    25  	"os"
    26  	"path/filepath"
    27  	"strconv"
    28  	"strings"
    29  	"syscall"
    30  
    31  	"golang.org/x/sys/unix"
    32  	"k8s.io/klog/v2"
    33  	"k8s.io/mount-utils"
    34  )
    35  
    36  const (
    37  	// place for subpath mounts
    38  	// TODO: pass in directory using kubelet_getters instead
    39  	containerSubPathDirectoryName = "volume-subpaths"
    40  	// syscall.Openat flags used to traverse directories not following symlinks
    41  	nofollowFlags = unix.O_RDONLY | unix.O_NOFOLLOW
    42  	// flags for getting file descriptor without following the symlink
    43  	openFDFlags = unix.O_NOFOLLOW | unix.O_PATH
    44  )
    45  
    46  type subpath struct {
    47  	mounter mount.Interface
    48  }
    49  
    50  // New returns a subpath.Interface for the current system
    51  func New(mounter mount.Interface) Interface {
    52  	return &subpath{
    53  		mounter: mounter,
    54  	}
    55  }
    56  
    57  func (sp *subpath) CleanSubPaths(podDir string, volumeName string) error {
    58  	return doCleanSubPaths(sp.mounter, podDir, volumeName)
    59  }
    60  
    61  func (sp *subpath) SafeMakeDir(subdir string, base string, perm os.FileMode) error {
    62  	realBase, err := filepath.EvalSymlinks(base)
    63  	if err != nil {
    64  		return fmt.Errorf("error resolving symlinks in %s: %s", base, err)
    65  	}
    66  
    67  	realFullPath := filepath.Join(realBase, subdir)
    68  
    69  	return doSafeMakeDir(realFullPath, realBase, perm)
    70  }
    71  
    72  func (sp *subpath) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
    73  	newHostPath, err = doBindSubPath(sp.mounter, subPath)
    74  
    75  	// There is no action when the container starts. Bind-mount will be cleaned
    76  	// when container stops by CleanSubPaths.
    77  	cleanupAction = nil
    78  	return newHostPath, cleanupAction, err
    79  }
    80  
    81  // This implementation is shared between Linux and NsEnter
    82  func safeOpenSubPath(mounter mount.Interface, subpath Subpath) (int, error) {
    83  	if !mount.PathWithinBase(subpath.Path, subpath.VolumePath) {
    84  		return -1, fmt.Errorf("subpath %q not within volume path %q", subpath.Path, subpath.VolumePath)
    85  	}
    86  	fd, err := doSafeOpen(subpath.Path, subpath.VolumePath)
    87  	if err != nil {
    88  		return -1, fmt.Errorf("error opening subpath %v: %v", subpath.Path, err)
    89  	}
    90  	return fd, nil
    91  }
    92  
    93  // prepareSubpathTarget creates target for bind-mount of subpath. It returns
    94  // "true" when the target already exists and something is mounted there.
    95  // Given Subpath must have all paths with already resolved symlinks and with
    96  // paths relevant to kubelet (when it runs in a container).
    97  // This function is called also by NsEnterMounter. It works because
    98  // /var/lib/kubelet is mounted from the host into the container with Kubelet as
    99  // /var/lib/kubelet too.
   100  func prepareSubpathTarget(mounter mount.Interface, subpath Subpath) (bool, string, error) {
   101  	// Early check for already bind-mounted subpath.
   102  	bindPathTarget := getSubpathBindTarget(subpath)
   103  	notMount, err := mount.IsNotMountPoint(mounter, bindPathTarget)
   104  	if err != nil {
   105  		if !os.IsNotExist(err) {
   106  			return false, "", fmt.Errorf("error checking path %s for mount: %s", bindPathTarget, err)
   107  		}
   108  		// Ignore ErrorNotExist: the file/directory will be created below if it does not exist yet.
   109  		notMount = true
   110  	}
   111  	if !notMount {
   112  		// It's already mounted, so check if it's bind-mounted to the same path
   113  		samePath, err := checkSubPathFileEqual(subpath, bindPathTarget)
   114  		if err != nil {
   115  			return false, "", fmt.Errorf("error checking subpath mount info for %s: %s", bindPathTarget, err)
   116  		}
   117  		if !samePath {
   118  			// It's already mounted but not what we want, unmount it
   119  			if err = mounter.Unmount(bindPathTarget); err != nil {
   120  				return false, "", fmt.Errorf("error ummounting %s: %s", bindPathTarget, err)
   121  			}
   122  		} else {
   123  			// It's already mounted
   124  			klog.V(5).Infof("Skipping bind-mounting subpath %s: already mounted", bindPathTarget)
   125  			return true, bindPathTarget, nil
   126  		}
   127  	}
   128  
   129  	// bindPathTarget is in /var/lib/kubelet and thus reachable without any
   130  	// translation even to containerized kubelet.
   131  	bindParent := filepath.Dir(bindPathTarget)
   132  	err = os.MkdirAll(bindParent, 0750)
   133  	if err != nil && !os.IsExist(err) {
   134  		return false, "", fmt.Errorf("error creating directory %s: %s", bindParent, err)
   135  	}
   136  
   137  	t, err := os.Lstat(subpath.Path)
   138  	if err != nil {
   139  		return false, "", fmt.Errorf("lstat %s failed: %s", subpath.Path, err)
   140  	}
   141  
   142  	if t.Mode()&os.ModeDir > 0 {
   143  		if err = os.Mkdir(bindPathTarget, 0750); err != nil && !os.IsExist(err) {
   144  			return false, "", fmt.Errorf("error creating directory %s: %s", bindPathTarget, err)
   145  		}
   146  	} else {
   147  		// "/bin/touch <bindPathTarget>".
   148  		// A file is enough for all possible targets (symlink, device, pipe,
   149  		// socket, ...), bind-mounting them into a file correctly changes type
   150  		// of the target file.
   151  		if err = ioutil.WriteFile(bindPathTarget, []byte{}, 0640); err != nil {
   152  			return false, "", fmt.Errorf("error creating file %s: %s", bindPathTarget, err)
   153  		}
   154  	}
   155  	return false, bindPathTarget, nil
   156  }
   157  
   158  func checkSubPathFileEqual(subpath Subpath, bindMountTarget string) (bool, error) {
   159  	s, err := os.Lstat(subpath.Path)
   160  	if err != nil {
   161  		return false, fmt.Errorf("stat %s failed: %s", subpath.Path, err)
   162  	}
   163  
   164  	t, err := os.Lstat(bindMountTarget)
   165  	if err != nil {
   166  		return false, fmt.Errorf("lstat %s failed: %s", bindMountTarget, err)
   167  	}
   168  
   169  	if !os.SameFile(s, t) {
   170  		return false, nil
   171  	}
   172  	return true, nil
   173  }
   174  
   175  func getSubpathBindTarget(subpath Subpath) string {
   176  	// containerName is DNS label, i.e. safe as a directory name.
   177  	return filepath.Join(subpath.PodDir, containerSubPathDirectoryName, subpath.VolumeName, subpath.ContainerName, strconv.Itoa(subpath.VolumeMountIndex))
   178  }
   179  
   180  func doBindSubPath(mounter mount.Interface, subpath Subpath) (hostPath string, err error) {
   181  	// Linux, kubelet runs on the host:
   182  	// - safely open the subpath
   183  	// - bind-mount /proc/<pid of kubelet>/fd/<fd> to subpath target
   184  	// User can't change /proc/<pid of kubelet>/fd/<fd> to point to a bad place.
   185  
   186  	// Evaluate all symlinks here once for all subsequent functions.
   187  	newVolumePath, err := filepath.EvalSymlinks(subpath.VolumePath)
   188  	if err != nil {
   189  		return "", fmt.Errorf("error resolving symlinks in %q: %v", subpath.VolumePath, err)
   190  	}
   191  	newPath, err := filepath.EvalSymlinks(subpath.Path)
   192  	if err != nil {
   193  		return "", fmt.Errorf("error resolving symlinks in %q: %v", subpath.Path, err)
   194  	}
   195  	klog.V(5).Infof("doBindSubPath %q (%q) for volumepath %q", subpath.Path, newPath, subpath.VolumePath)
   196  	subpath.VolumePath = newVolumePath
   197  	subpath.Path = newPath
   198  
   199  	fd, err := safeOpenSubPath(mounter, subpath)
   200  	if err != nil {
   201  		return "", err
   202  	}
   203  	defer syscall.Close(fd)
   204  
   205  	alreadyMounted, bindPathTarget, err := prepareSubpathTarget(mounter, subpath)
   206  	if err != nil {
   207  		return "", err
   208  	}
   209  	if alreadyMounted {
   210  		return bindPathTarget, nil
   211  	}
   212  
   213  	success := false
   214  	defer func() {
   215  		// Cleanup subpath on error
   216  		if !success {
   217  			klog.V(4).Infof("doBindSubPath() failed for %q, cleaning up subpath", bindPathTarget)
   218  			if cleanErr := cleanSubPath(mounter, subpath); cleanErr != nil {
   219  				klog.Errorf("Failed to clean subpath %q: %v", bindPathTarget, cleanErr)
   220  			}
   221  		}
   222  	}()
   223  
   224  	kubeletPid := os.Getpid()
   225  	mountSource := fmt.Sprintf("/proc/%d/fd/%v", kubeletPid, fd)
   226  
   227  	// Do the bind mount
   228  	options := []string{"bind"}
   229  	mountFlags := []string{"--no-canonicalize"}
   230  	klog.V(5).Infof("bind mounting %q at %q", mountSource, bindPathTarget)
   231  	if err = mounter.MountSensitiveWithoutSystemdWithMountFlags(mountSource, bindPathTarget, "" /*fstype*/, options, nil /* sensitiveOptions */, mountFlags); err != nil {
   232  		return "", fmt.Errorf("error mounting %s: %s", subpath.Path, err)
   233  	}
   234  	success = true
   235  
   236  	klog.V(3).Infof("Bound SubPath %s into %s", subpath.Path, bindPathTarget)
   237  	return bindPathTarget, nil
   238  }
   239  
   240  // This implementation is shared between Linux and NsEnter
   241  func doCleanSubPaths(mounter mount.Interface, podDir string, volumeName string) error {
   242  	// scan /var/lib/kubelet/pods/<uid>/volume-subpaths/<volume>/*
   243  	subPathDir := filepath.Join(podDir, containerSubPathDirectoryName, volumeName)
   244  	klog.V(4).Infof("Cleaning up subpath mounts for %s", subPathDir)
   245  
   246  	containerDirs, err := ioutil.ReadDir(subPathDir)
   247  	if err != nil {
   248  		if os.IsNotExist(err) {
   249  			return nil
   250  		}
   251  		return fmt.Errorf("error reading %s: %s", subPathDir, err)
   252  	}
   253  
   254  	for _, containerDir := range containerDirs {
   255  		if !containerDir.IsDir() {
   256  			klog.V(4).Infof("Container file is not a directory: %s", containerDir.Name())
   257  			continue
   258  		}
   259  		klog.V(4).Infof("Cleaning up subpath mounts for container %s", containerDir.Name())
   260  
   261  		// scan /var/lib/kubelet/pods/<uid>/volume-subpaths/<volume>/<container name>/*
   262  		fullContainerDirPath := filepath.Join(subPathDir, containerDir.Name())
   263  		// The original traversal method here was ReadDir, which was not so robust to handle some error such as "stale NFS file handle",
   264  		// so it was replaced with filepath.Walk in a later patch, which can pass through error and handled by the callback WalkFunc.
   265  		// After go 1.16, WalkDir was introduced, it's more effective than Walk because the callback WalkDirFunc is called before
   266  		// reading a directory, making it save some time when a container's subPath contains lots of dirs.
   267  		// See https://github.com/kubernetes/kubernetes/pull/71804 and https://github.com/kubernetes/kubernetes/issues/107667 for more details.
   268  		err = filepath.WalkDir(fullContainerDirPath, func(path string, info os.DirEntry, _ error) error {
   269  			if path == fullContainerDirPath {
   270  				// Skip top level directory
   271  				return nil
   272  			}
   273  
   274  			// pass through errors and let doCleanSubPath handle them
   275  			if err = doCleanSubPath(mounter, fullContainerDirPath, filepath.Base(path)); err != nil {
   276  				return err
   277  			}
   278  
   279  			// We need to check that info is not nil. This may happen when the incoming err is not nil due to stale mounts or permission errors.
   280  			if info != nil && info.IsDir() {
   281  				// skip subdirs of the volume: it only matters the first level to unmount, otherwise it would try to unmount subdir of the volume
   282  				return filepath.SkipDir
   283  			}
   284  
   285  			return nil
   286  		})
   287  		if err != nil {
   288  			return fmt.Errorf("error processing %s: %s", fullContainerDirPath, err)
   289  		}
   290  
   291  		// Whole container has been processed, remove its directory.
   292  		if err := os.Remove(fullContainerDirPath); err != nil {
   293  			return fmt.Errorf("error deleting %s: %s", fullContainerDirPath, err)
   294  		}
   295  		klog.V(5).Infof("Removed %s", fullContainerDirPath)
   296  	}
   297  	// Whole pod volume subpaths have been cleaned up, remove its subpath directory.
   298  	if err := os.Remove(subPathDir); err != nil {
   299  		return fmt.Errorf("error deleting %s: %s", subPathDir, err)
   300  	}
   301  	klog.V(5).Infof("Removed %s", subPathDir)
   302  
   303  	// Remove entire subpath directory if it's the last one
   304  	podSubPathDir := filepath.Join(podDir, containerSubPathDirectoryName)
   305  	if err := os.Remove(podSubPathDir); err != nil && !os.IsExist(err) {
   306  		return fmt.Errorf("error deleting %s: %s", podSubPathDir, err)
   307  	}
   308  	klog.V(5).Infof("Removed %s", podSubPathDir)
   309  	return nil
   310  }
   311  
   312  // doCleanSubPath tears down the single subpath bind mount
   313  func doCleanSubPath(mounter mount.Interface, fullContainerDirPath, subPathIndex string) error {
   314  	// process /var/lib/kubelet/pods/<uid>/volume-subpaths/<volume>/<container name>/<subPathName>
   315  	klog.V(4).Infof("Cleaning up subpath mounts for subpath %v", subPathIndex)
   316  	fullSubPath := filepath.Join(fullContainerDirPath, subPathIndex)
   317  
   318  	if err := mount.CleanupMountPoint(fullSubPath, mounter, true); err != nil {
   319  		return fmt.Errorf("error cleaning subpath mount %s: %s", fullSubPath, err)
   320  	}
   321  
   322  	klog.V(4).Infof("Successfully cleaned subpath directory %s", fullSubPath)
   323  	return nil
   324  }
   325  
   326  // cleanSubPath will teardown the subpath bind mount and any remove any directories if empty
   327  func cleanSubPath(mounter mount.Interface, subpath Subpath) error {
   328  	containerDir := filepath.Join(subpath.PodDir, containerSubPathDirectoryName, subpath.VolumeName, subpath.ContainerName)
   329  
   330  	// Clean subdir bindmount
   331  	if err := doCleanSubPath(mounter, containerDir, strconv.Itoa(subpath.VolumeMountIndex)); err != nil && !os.IsNotExist(err) {
   332  		return err
   333  	}
   334  
   335  	// Recusively remove directories if empty
   336  	if err := removeEmptyDirs(subpath.PodDir, containerDir); err != nil {
   337  		return err
   338  	}
   339  
   340  	return nil
   341  }
   342  
   343  // removeEmptyDirs works backwards from endDir to baseDir and removes each directory
   344  // if it is empty.  It stops once it encounters a directory that has content
   345  func removeEmptyDirs(baseDir, endDir string) error {
   346  	if !mount.PathWithinBase(endDir, baseDir) {
   347  		return fmt.Errorf("endDir %q is not within baseDir %q", endDir, baseDir)
   348  	}
   349  
   350  	for curDir := endDir; curDir != baseDir; curDir = filepath.Dir(curDir) {
   351  		s, err := os.Stat(curDir)
   352  		if err != nil {
   353  			if os.IsNotExist(err) {
   354  				klog.V(5).Infof("curDir %q doesn't exist, skipping", curDir)
   355  				continue
   356  			}
   357  			return fmt.Errorf("error stat %q: %v", curDir, err)
   358  		}
   359  		if !s.IsDir() {
   360  			return fmt.Errorf("path %q not a directory", curDir)
   361  		}
   362  
   363  		err = os.Remove(curDir)
   364  		if os.IsExist(err) {
   365  			klog.V(5).Infof("Directory %q not empty, not removing", curDir)
   366  			break
   367  		} else if err != nil {
   368  			return fmt.Errorf("error removing directory %q: %v", curDir, err)
   369  		}
   370  		klog.V(5).Infof("Removed directory %q", curDir)
   371  	}
   372  	return nil
   373  }
   374  
   375  // This implementation is shared between Linux and NsEnterMounter. Both pathname
   376  // and base must be either already resolved symlinks or thet will be resolved in
   377  // kubelet's mount namespace (in case it runs containerized).
   378  func doSafeMakeDir(pathname string, base string, perm os.FileMode) error {
   379  	klog.V(4).Infof("Creating directory %q within base %q", pathname, base)
   380  
   381  	if !mount.PathWithinBase(pathname, base) {
   382  		return fmt.Errorf("path %s is outside of allowed base %s", pathname, base)
   383  	}
   384  
   385  	// Quick check if the directory already exists
   386  	s, err := os.Stat(pathname)
   387  	if err == nil {
   388  		// Path exists
   389  		if s.IsDir() {
   390  			// The directory already exists. It can be outside of the parent,
   391  			// but there is no race-proof check.
   392  			klog.V(4).Infof("Directory %s already exists", pathname)
   393  			return nil
   394  		}
   395  		return &os.PathError{Op: "mkdir", Path: pathname, Err: syscall.ENOTDIR}
   396  	}
   397  
   398  	// Find all existing directories
   399  	existingPath, toCreate, err := findExistingPrefix(base, pathname)
   400  	if err != nil {
   401  		return fmt.Errorf("error opening directory %s: %s", pathname, err)
   402  	}
   403  	// Ensure the existing directory is inside allowed base
   404  	fullExistingPath, err := filepath.EvalSymlinks(existingPath)
   405  	if err != nil {
   406  		return fmt.Errorf("error opening directory %s: %s", existingPath, err)
   407  	}
   408  	if !mount.PathWithinBase(fullExistingPath, base) {
   409  		return fmt.Errorf("path %s is outside of allowed base %s", fullExistingPath, err)
   410  	}
   411  
   412  	klog.V(4).Infof("%q already exists, %q to create", fullExistingPath, filepath.Join(toCreate...))
   413  	parentFD, err := doSafeOpen(fullExistingPath, base)
   414  	if err != nil {
   415  		return fmt.Errorf("cannot open directory %s: %s", existingPath, err)
   416  	}
   417  	childFD := -1
   418  	defer func() {
   419  		if parentFD != -1 {
   420  			if err = syscall.Close(parentFD); err != nil {
   421  				klog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err)
   422  			}
   423  		}
   424  		if childFD != -1 {
   425  			if err = syscall.Close(childFD); err != nil {
   426  				klog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", childFD, pathname, err)
   427  			}
   428  		}
   429  	}()
   430  
   431  	currentPath := fullExistingPath
   432  	// create the directories one by one, making sure nobody can change
   433  	// created directory into symlink.
   434  	for _, dir := range toCreate {
   435  		currentPath = filepath.Join(currentPath, dir)
   436  		klog.V(4).Infof("Creating %s", dir)
   437  		err = syscall.Mkdirat(parentFD, currentPath, uint32(perm))
   438  		if err != nil {
   439  			return fmt.Errorf("cannot create directory %s: %s", currentPath, err)
   440  		}
   441  		// Dive into the created directory
   442  		childFD, err = syscall.Openat(parentFD, dir, nofollowFlags|unix.O_CLOEXEC, 0)
   443  		if err != nil {
   444  			return fmt.Errorf("cannot open %s: %s", currentPath, err)
   445  		}
   446  		// We can be sure that childFD is safe to use. It could be changed
   447  		// by user after Mkdirat() and before Openat(), however:
   448  		// - it could not be changed to symlink - we use nofollowFlags
   449  		// - it could be changed to a file (or device, pipe, socket, ...)
   450  		//   but either subsequent Mkdirat() fails or we mount this file
   451  		//   to user's container. Security is no violated in both cases
   452  		//   and user either gets error or the file that it can already access.
   453  
   454  		if err = syscall.Close(parentFD); err != nil {
   455  			klog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err)
   456  		}
   457  		parentFD = childFD
   458  		childFD = -1
   459  
   460  		// Everything was created. mkdirat(..., perm) above was affected by current
   461  		// umask and we must apply the right permissions to the all created directory.
   462  		// (that's the one that will be available to the container as subpath)
   463  		// so user can read/write it.
   464  		// parentFD is the last created directory.
   465  
   466  		// Translate perm (os.FileMode) to uint32 that fchmod() expects
   467  		kernelPerm := uint32(perm & os.ModePerm)
   468  		if perm&os.ModeSetgid > 0 {
   469  			kernelPerm |= syscall.S_ISGID
   470  		}
   471  		if perm&os.ModeSetuid > 0 {
   472  			kernelPerm |= syscall.S_ISUID
   473  		}
   474  		if perm&os.ModeSticky > 0 {
   475  			kernelPerm |= syscall.S_ISVTX
   476  		}
   477  		if err = syscall.Fchmod(parentFD, kernelPerm); err != nil {
   478  			return fmt.Errorf("chmod %q failed: %s", currentPath, err)
   479  		}
   480  	}
   481  
   482  	return nil
   483  }
   484  
   485  // findExistingPrefix finds prefix of pathname that exists. In addition, it
   486  // returns list of remaining directories that don't exist yet.
   487  func findExistingPrefix(base, pathname string) (string, []string, error) {
   488  	rel, err := filepath.Rel(base, pathname)
   489  	if err != nil {
   490  		return base, nil, err
   491  	}
   492  	dirs := strings.Split(rel, string(filepath.Separator))
   493  
   494  	// Do OpenAt in a loop to find the first non-existing dir. Resolve symlinks.
   495  	// This should be faster than looping through all dirs and calling os.Stat()
   496  	// on each of them, as the symlinks are resolved only once with OpenAt().
   497  	currentPath := base
   498  	fd, err := syscall.Open(currentPath, syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
   499  	if err != nil {
   500  		return pathname, nil, fmt.Errorf("error opening %s: %s", currentPath, err)
   501  	}
   502  	defer func() {
   503  		if err = syscall.Close(fd); err != nil {
   504  			klog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err)
   505  		}
   506  	}()
   507  	for i, dir := range dirs {
   508  		// Using O_PATH here will prevent hangs in case user replaces directory with
   509  		// fifo
   510  		childFD, err := syscall.Openat(fd, dir, unix.O_PATH|unix.O_CLOEXEC, 0)
   511  		if err != nil {
   512  			if os.IsNotExist(err) {
   513  				return currentPath, dirs[i:], nil
   514  			}
   515  			return base, nil, err
   516  		}
   517  		if err = syscall.Close(fd); err != nil {
   518  			klog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err)
   519  		}
   520  		fd = childFD
   521  		currentPath = filepath.Join(currentPath, dir)
   522  	}
   523  	return pathname, []string{}, nil
   524  }
   525  
   526  // This implementation is shared between Linux and NsEnterMounter
   527  // Open path and return its fd.
   528  // Symlinks are disallowed (pathname must already resolve symlinks),
   529  // and the path must be within the base directory.
   530  func doSafeOpen(pathname string, base string) (int, error) {
   531  	pathname = filepath.Clean(pathname)
   532  	base = filepath.Clean(base)
   533  
   534  	// Calculate segments to follow
   535  	subpath, err := filepath.Rel(base, pathname)
   536  	if err != nil {
   537  		return -1, err
   538  	}
   539  	segments := strings.Split(subpath, string(filepath.Separator))
   540  
   541  	// Assumption: base is the only directory that we have under control.
   542  	// Base dir is not allowed to be a symlink.
   543  	parentFD, err := syscall.Open(base, nofollowFlags|unix.O_CLOEXEC, 0)
   544  	if err != nil {
   545  		return -1, fmt.Errorf("cannot open directory %s: %s", base, err)
   546  	}
   547  	defer func() {
   548  		if parentFD != -1 {
   549  			if err = syscall.Close(parentFD); err != nil {
   550  				klog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", parentFD, pathname, err)
   551  			}
   552  		}
   553  	}()
   554  
   555  	childFD := -1
   556  	defer func() {
   557  		if childFD != -1 {
   558  			if err = syscall.Close(childFD); err != nil {
   559  				klog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", childFD, pathname, err)
   560  			}
   561  		}
   562  	}()
   563  
   564  	currentPath := base
   565  
   566  	// Follow the segments one by one using openat() to make
   567  	// sure the user cannot change already existing directories into symlinks.
   568  	for _, seg := range segments {
   569  		var deviceStat unix.Stat_t
   570  
   571  		currentPath = filepath.Join(currentPath, seg)
   572  		if !mount.PathWithinBase(currentPath, base) {
   573  			return -1, fmt.Errorf("path %s is outside of allowed base %s", currentPath, base)
   574  		}
   575  
   576  		// Trigger auto mount if it's an auto-mounted directory, ignore error if not a directory.
   577  		// Notice the trailing slash is mandatory, see "automount" in openat(2) and open_by_handle_at(2).
   578  		unix.Fstatat(parentFD, seg+"/", &deviceStat, unix.AT_SYMLINK_NOFOLLOW)
   579  
   580  		klog.V(5).Infof("Opening path %s", currentPath)
   581  		childFD, err = syscall.Openat(parentFD, seg, openFDFlags|unix.O_CLOEXEC, 0)
   582  		if err != nil {
   583  			return -1, fmt.Errorf("cannot open %s: %s", currentPath, err)
   584  		}
   585  
   586  		err := unix.Fstat(childFD, &deviceStat)
   587  		if err != nil {
   588  			return -1, fmt.Errorf("error running fstat on %s with %v", currentPath, err)
   589  		}
   590  		fileFmt := deviceStat.Mode & syscall.S_IFMT
   591  		if fileFmt == syscall.S_IFLNK {
   592  			return -1, fmt.Errorf("unexpected symlink found %s", currentPath)
   593  		}
   594  
   595  		// Close parentFD
   596  		if err = syscall.Close(parentFD); err != nil {
   597  			return -1, fmt.Errorf("closing fd for %q failed: %v", filepath.Dir(currentPath), err)
   598  		}
   599  		// Set child to new parent
   600  		parentFD = childFD
   601  		childFD = -1
   602  	}
   603  
   604  	// We made it to the end, return this fd, don't close it
   605  	finalFD := parentFD
   606  	parentFD = -1
   607  
   608  	return finalFD, nil
   609  }
   610  

View as plain text