...

Source file src/github.com/opencontainers/runc/libcontainer/setns_init_linux.go

Documentation: github.com/opencontainers/runc/libcontainer

     1  package libcontainer
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"os/exec"
     8  	"strconv"
     9  
    10  	"github.com/opencontainers/selinux/go-selinux"
    11  	"github.com/sirupsen/logrus"
    12  	"golang.org/x/sys/unix"
    13  
    14  	"github.com/opencontainers/runc/libcontainer/apparmor"
    15  	"github.com/opencontainers/runc/libcontainer/keys"
    16  	"github.com/opencontainers/runc/libcontainer/seccomp"
    17  	"github.com/opencontainers/runc/libcontainer/system"
    18  	"github.com/opencontainers/runc/libcontainer/utils"
    19  )
    20  
    21  // linuxSetnsInit performs the container's initialization for running a new process
    22  // inside an existing container.
    23  type linuxSetnsInit struct {
    24  	pipe          *os.File
    25  	consoleSocket *os.File
    26  	config        *initConfig
    27  	logFd         int
    28  }
    29  
    30  func (l *linuxSetnsInit) getSessionRingName() string {
    31  	return "_ses." + l.config.ContainerId
    32  }
    33  
    34  func (l *linuxSetnsInit) Init() error {
    35  	if !l.config.Config.NoNewKeyring {
    36  		if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil {
    37  			return err
    38  		}
    39  		defer selinux.SetKeyLabel("") //nolint: errcheck
    40  		// Do not inherit the parent's session keyring.
    41  		if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
    42  			// Same justification as in standart_init_linux.go as to why we
    43  			// don't bail on ENOSYS.
    44  			//
    45  			// TODO(cyphar): And we should have logging here too.
    46  			if !errors.Is(err, unix.ENOSYS) {
    47  				return fmt.Errorf("unable to join session keyring: %w", err)
    48  			}
    49  		}
    50  	}
    51  	if l.config.CreateConsole {
    52  		if err := setupConsole(l.consoleSocket, l.config, false); err != nil {
    53  			return err
    54  		}
    55  		if err := system.Setctty(); err != nil {
    56  			return err
    57  		}
    58  	}
    59  	if l.config.NoNewPrivileges {
    60  		if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
    61  			return err
    62  		}
    63  	}
    64  	if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil {
    65  		return err
    66  	}
    67  	defer selinux.SetExecLabel("") //nolint: errcheck
    68  	// Without NoNewPrivileges seccomp is a privileged operation, so we need to
    69  	// do this before dropping capabilities; otherwise do it as late as possible
    70  	// just before execve so as few syscalls take place after it as possible.
    71  	if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
    72  		seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp)
    73  		if err != nil {
    74  			return err
    75  		}
    76  
    77  		if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
    78  			return err
    79  		}
    80  	}
    81  	if err := finalizeNamespace(l.config); err != nil {
    82  		return err
    83  	}
    84  	if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
    85  		return err
    86  	}
    87  
    88  	// Check for the arg before waiting to make sure it exists and it is
    89  	// returned as a create time error.
    90  	name, err := exec.LookPath(l.config.Args[0])
    91  	if err != nil {
    92  		return err
    93  	}
    94  	// exec.LookPath in Go < 1.20 might return no error for an executable
    95  	// residing on a file system mounted with noexec flag, so perform this
    96  	// extra check now while we can still return a proper error.
    97  	// TODO: remove this once go < 1.20 is not supported.
    98  	if err := eaccess(name); err != nil {
    99  		return &os.PathError{Op: "eaccess", Path: name, Err: err}
   100  	}
   101  
   102  	// Set seccomp as close to execve as possible, so as few syscalls take
   103  	// place afterward (reducing the amount of syscalls that users need to
   104  	// enable in their seccomp profiles).
   105  	if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
   106  		seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp)
   107  		if err != nil {
   108  			return fmt.Errorf("unable to init seccomp: %w", err)
   109  		}
   110  
   111  		if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
   112  			return err
   113  		}
   114  	}
   115  	logrus.Debugf("setns_init: about to exec")
   116  	// Close the log pipe fd so the parent's ForwardLogs can exit.
   117  	if err := unix.Close(l.logFd); err != nil {
   118  		return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err}
   119  	}
   120  
   121  	// Close all file descriptors we are not passing to the container. This is
   122  	// necessary because the execve target could use internal runc fds as the
   123  	// execve path, potentially giving access to binary files from the host
   124  	// (which can then be opened by container processes, leading to container
   125  	// escapes). Note that because this operation will close any open file
   126  	// descriptors that are referenced by (*os.File) handles from underneath
   127  	// the Go runtime, we must not do any file operations after this point
   128  	// (otherwise the (*os.File) finaliser could close the wrong file). See
   129  	// CVE-2024-21626 for more information as to why this protection is
   130  	// necessary.
   131  	//
   132  	// This is not needed for runc-dmz, because the extra execve(2) step means
   133  	// that all O_CLOEXEC file descriptors have already been closed and thus
   134  	// the second execve(2) from runc-dmz cannot access internal file
   135  	// descriptors from runc.
   136  	if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
   137  		return err
   138  	}
   139  	return system.Exec(name, l.config.Args[0:], os.Environ())
   140  }
   141  

View as plain text