...

Source file src/github.com/opencontainers/runc/libcontainer/factory_linux.go

Documentation: github.com/opencontainers/runc/libcontainer

     1  package libcontainer
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"os"
     8  	"path/filepath"
     9  	"regexp"
    10  	"runtime/debug"
    11  	"strconv"
    12  
    13  	securejoin "github.com/cyphar/filepath-securejoin"
    14  	"github.com/moby/sys/mountinfo"
    15  	"golang.org/x/sys/unix"
    16  
    17  	"github.com/opencontainers/runc/libcontainer/cgroups/manager"
    18  	"github.com/opencontainers/runc/libcontainer/configs"
    19  	"github.com/opencontainers/runc/libcontainer/configs/validate"
    20  	"github.com/opencontainers/runc/libcontainer/intelrdt"
    21  	"github.com/opencontainers/runc/libcontainer/utils"
    22  	"github.com/sirupsen/logrus"
    23  )
    24  
    25  const (
    26  	stateFilename    = "state.json"
    27  	execFifoFilename = "exec.fifo"
    28  )
    29  
    30  var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
    31  
    32  // InitArgs returns an options func to configure a LinuxFactory with the
    33  // provided init binary path and arguments.
    34  func InitArgs(args ...string) func(*LinuxFactory) error {
    35  	return func(l *LinuxFactory) (err error) {
    36  		if len(args) > 0 {
    37  			// Resolve relative paths to ensure that its available
    38  			// after directory changes.
    39  			if args[0], err = filepath.Abs(args[0]); err != nil {
    40  				// The only error returned from filepath.Abs is
    41  				// the one from os.Getwd, i.e. a system error.
    42  				return err
    43  			}
    44  		}
    45  
    46  		l.InitArgs = args
    47  		return nil
    48  	}
    49  }
    50  
    51  // TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
    52  func TmpfsRoot(l *LinuxFactory) error {
    53  	mounted, err := mountinfo.Mounted(l.Root)
    54  	if err != nil {
    55  		return err
    56  	}
    57  	if !mounted {
    58  		if err := mount("tmpfs", l.Root, "", "tmpfs", 0, ""); err != nil {
    59  			return err
    60  		}
    61  	}
    62  	return nil
    63  }
    64  
    65  // CriuPath returns an option func to configure a LinuxFactory with the
    66  // provided criupath
    67  func CriuPath(criupath string) func(*LinuxFactory) error {
    68  	return func(l *LinuxFactory) error {
    69  		l.CriuPath = criupath
    70  		return nil
    71  	}
    72  }
    73  
    74  // New returns a linux based container factory based in the root directory and
    75  // configures the factory with the provided option funcs.
    76  func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
    77  	if root != "" {
    78  		if err := os.MkdirAll(root, 0o700); err != nil {
    79  			return nil, err
    80  		}
    81  	}
    82  	l := &LinuxFactory{
    83  		Root:      root,
    84  		InitPath:  "/proc/self/exe",
    85  		InitArgs:  []string{os.Args[0], "init"},
    86  		Validator: validate.New(),
    87  		CriuPath:  "criu",
    88  	}
    89  
    90  	for _, opt := range options {
    91  		if opt == nil {
    92  			continue
    93  		}
    94  		if err := opt(l); err != nil {
    95  			return nil, err
    96  		}
    97  	}
    98  	return l, nil
    99  }
   100  
   101  // LinuxFactory implements the default factory interface for linux based systems.
   102  type LinuxFactory struct {
   103  	// Root directory for the factory to store state.
   104  	Root string
   105  
   106  	// InitPath is the path for calling the init responsibilities for spawning
   107  	// a container.
   108  	InitPath string
   109  
   110  	// InitArgs are arguments for calling the init responsibilities for spawning
   111  	// a container.
   112  	InitArgs []string
   113  
   114  	// CriuPath is the path to the criu binary used for checkpoint and restore of
   115  	// containers.
   116  	CriuPath string
   117  
   118  	// New{u,g}idmapPath is the path to the binaries used for mapping with
   119  	// rootless containers.
   120  	NewuidmapPath string
   121  	NewgidmapPath string
   122  
   123  	// Validator provides validation to container configurations.
   124  	Validator validate.Validator
   125  }
   126  
   127  func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
   128  	if l.Root == "" {
   129  		return nil, errors.New("root not set")
   130  	}
   131  	if err := l.validateID(id); err != nil {
   132  		return nil, err
   133  	}
   134  	if err := l.Validator.Validate(config); err != nil {
   135  		return nil, err
   136  	}
   137  	containerRoot, err := securejoin.SecureJoin(l.Root, id)
   138  	if err != nil {
   139  		return nil, err
   140  	}
   141  	if _, err := os.Stat(containerRoot); err == nil {
   142  		return nil, ErrExist
   143  	} else if !os.IsNotExist(err) {
   144  		return nil, err
   145  	}
   146  
   147  	cm, err := manager.New(config.Cgroups)
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  
   152  	// Check that cgroup does not exist or empty (no processes).
   153  	// Note for cgroup v1 this check is not thorough, as there are multiple
   154  	// separate hierarchies, while both Exists() and GetAllPids() only use
   155  	// one for "devices" controller (assuming others are the same, which is
   156  	// probably true in almost all scenarios). Checking all the hierarchies
   157  	// would be too expensive.
   158  	if cm.Exists() {
   159  		pids, err := cm.GetAllPids()
   160  		// Reading PIDs can race with cgroups removal, so ignore ENOENT and ENODEV.
   161  		if err != nil && !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.ENODEV) {
   162  			return nil, fmt.Errorf("unable to get cgroup PIDs: %w", err)
   163  		}
   164  		if len(pids) != 0 {
   165  			if config.Cgroups.Systemd {
   166  				// systemd cgroup driver can't add a pid to an
   167  				// existing systemd unit and will return an
   168  				// error anyway, so let's error out early.
   169  				return nil, fmt.Errorf("container's cgroup is not empty: %d process(es) found", len(pids))
   170  			}
   171  			// TODO: return an error.
   172  			logrus.Warnf("container's cgroup is not empty: %d process(es) found", len(pids))
   173  			logrus.Warn("DEPRECATED: running container in a non-empty cgroup won't be supported in runc 1.2; https://github.com/opencontainers/runc/issues/3132")
   174  		}
   175  	}
   176  
   177  	// Check that cgroup is not frozen. Do not use Exists() here
   178  	// since in cgroup v1 it only checks "devices" controller.
   179  	st, err := cm.GetFreezerState()
   180  	if err != nil {
   181  		return nil, fmt.Errorf("unable to get cgroup freezer state: %w", err)
   182  	}
   183  	if st == configs.Frozen {
   184  		return nil, errors.New("container's cgroup unexpectedly frozen")
   185  	}
   186  
   187  	if err := os.MkdirAll(containerRoot, 0o711); err != nil {
   188  		return nil, err
   189  	}
   190  	if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
   191  		return nil, err
   192  	}
   193  	c := &linuxContainer{
   194  		id:              id,
   195  		root:            containerRoot,
   196  		config:          config,
   197  		initPath:        l.InitPath,
   198  		initArgs:        l.InitArgs,
   199  		criuPath:        l.CriuPath,
   200  		newuidmapPath:   l.NewuidmapPath,
   201  		newgidmapPath:   l.NewgidmapPath,
   202  		cgroupManager:   cm,
   203  		intelRdtManager: intelrdt.NewManager(config, id, ""),
   204  	}
   205  	c.state = &stoppedState{c: c}
   206  	return c, nil
   207  }
   208  
   209  func (l *LinuxFactory) Load(id string) (Container, error) {
   210  	if l.Root == "" {
   211  		return nil, errors.New("root not set")
   212  	}
   213  	// when load, we need to check id is valid or not.
   214  	if err := l.validateID(id); err != nil {
   215  		return nil, err
   216  	}
   217  	containerRoot, err := securejoin.SecureJoin(l.Root, id)
   218  	if err != nil {
   219  		return nil, err
   220  	}
   221  	state, err := l.loadState(containerRoot)
   222  	if err != nil {
   223  		return nil, err
   224  	}
   225  	r := &nonChildProcess{
   226  		processPid:       state.InitProcessPid,
   227  		processStartTime: state.InitProcessStartTime,
   228  		fds:              state.ExternalDescriptors,
   229  	}
   230  	cm, err := manager.NewWithPaths(state.Config.Cgroups, state.CgroupPaths)
   231  	if err != nil {
   232  		return nil, err
   233  	}
   234  	c := &linuxContainer{
   235  		initProcess:          r,
   236  		initProcessStartTime: state.InitProcessStartTime,
   237  		id:                   id,
   238  		config:               &state.Config,
   239  		initPath:             l.InitPath,
   240  		initArgs:             l.InitArgs,
   241  		criuPath:             l.CriuPath,
   242  		newuidmapPath:        l.NewuidmapPath,
   243  		newgidmapPath:        l.NewgidmapPath,
   244  		cgroupManager:        cm,
   245  		intelRdtManager:      intelrdt.NewManager(&state.Config, id, state.IntelRdtPath),
   246  		root:                 containerRoot,
   247  		created:              state.Created,
   248  	}
   249  	c.state = &loadedState{c: c}
   250  	if err := c.refreshState(); err != nil {
   251  		return nil, err
   252  	}
   253  	return c, nil
   254  }
   255  
   256  func (l *LinuxFactory) Type() string {
   257  	return "libcontainer"
   258  }
   259  
   260  // StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
   261  // This is a low level implementation detail of the reexec and should not be consumed externally
   262  func (l *LinuxFactory) StartInitialization() (err error) {
   263  	// Get the INITPIPE.
   264  	envInitPipe := os.Getenv("_LIBCONTAINER_INITPIPE")
   265  	pipefd, err := strconv.Atoi(envInitPipe)
   266  	if err != nil {
   267  		err = fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE: %w", err)
   268  		logrus.Error(err)
   269  		return err
   270  	}
   271  	pipe := os.NewFile(uintptr(pipefd), "pipe")
   272  	defer pipe.Close()
   273  
   274  	defer func() {
   275  		// We have an error during the initialization of the container's init,
   276  		// send it back to the parent process in the form of an initError.
   277  		if werr := writeSync(pipe, procError); werr != nil {
   278  			fmt.Fprintln(os.Stderr, err)
   279  			return
   280  		}
   281  		if werr := utils.WriteJSON(pipe, &initError{Message: err.Error()}); werr != nil {
   282  			fmt.Fprintln(os.Stderr, err)
   283  			return
   284  		}
   285  	}()
   286  
   287  	// Only init processes have FIFOFD.
   288  	fifofd := -1
   289  	envInitType := os.Getenv("_LIBCONTAINER_INITTYPE")
   290  	it := initType(envInitType)
   291  	if it == initStandard {
   292  		envFifoFd := os.Getenv("_LIBCONTAINER_FIFOFD")
   293  		if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
   294  			return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD: %w", err)
   295  		}
   296  	}
   297  
   298  	var consoleSocket *os.File
   299  	if envConsole := os.Getenv("_LIBCONTAINER_CONSOLE"); envConsole != "" {
   300  		console, err := strconv.Atoi(envConsole)
   301  		if err != nil {
   302  			return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE: %w", err)
   303  		}
   304  		consoleSocket = os.NewFile(uintptr(console), "console-socket")
   305  		defer consoleSocket.Close()
   306  	}
   307  
   308  	logPipeFdStr := os.Getenv("_LIBCONTAINER_LOGPIPE")
   309  	logPipeFd, err := strconv.Atoi(logPipeFdStr)
   310  	if err != nil {
   311  		return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE: %w", err)
   312  	}
   313  
   314  	// Get mount files (O_PATH).
   315  	mountFds, err := parseMountFds()
   316  	if err != nil {
   317  		return err
   318  	}
   319  
   320  	// clear the current process's environment to clean any libcontainer
   321  	// specific env vars.
   322  	os.Clearenv()
   323  
   324  	defer func() {
   325  		if e := recover(); e != nil {
   326  			if ee, ok := e.(error); ok {
   327  				err = fmt.Errorf("panic from initialization: %w, %s", ee, debug.Stack())
   328  			} else {
   329  				err = fmt.Errorf("panic from initialization: %v, %s", e, debug.Stack())
   330  			}
   331  		}
   332  	}()
   333  
   334  	i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd, mountFds)
   335  	if err != nil {
   336  		return err
   337  	}
   338  
   339  	// If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
   340  	return i.Init()
   341  }
   342  
   343  func (l *LinuxFactory) loadState(root string) (*State, error) {
   344  	stateFilePath, err := securejoin.SecureJoin(root, stateFilename)
   345  	if err != nil {
   346  		return nil, err
   347  	}
   348  	f, err := os.Open(stateFilePath)
   349  	if err != nil {
   350  		if os.IsNotExist(err) {
   351  			return nil, ErrNotExist
   352  		}
   353  		return nil, err
   354  	}
   355  	defer f.Close()
   356  	var state *State
   357  	if err := json.NewDecoder(f).Decode(&state); err != nil {
   358  		return nil, err
   359  	}
   360  	return state, nil
   361  }
   362  
   363  func (l *LinuxFactory) validateID(id string) error {
   364  	if !idRegex.MatchString(id) || string(os.PathSeparator)+id != utils.CleanPath(string(os.PathSeparator)+id) {
   365  		return ErrInvalidID
   366  	}
   367  
   368  	return nil
   369  }
   370  
   371  // NewuidmapPath returns an option func to configure a LinuxFactory with the
   372  // provided ..
   373  func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error {
   374  	return func(l *LinuxFactory) error {
   375  		l.NewuidmapPath = newuidmapPath
   376  		return nil
   377  	}
   378  }
   379  
   380  // NewgidmapPath returns an option func to configure a LinuxFactory with the
   381  // provided ..
   382  func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
   383  	return func(l *LinuxFactory) error {
   384  		l.NewgidmapPath = newgidmapPath
   385  		return nil
   386  	}
   387  }
   388  
   389  func parseMountFds() ([]int, error) {
   390  	fdsJson := os.Getenv("_LIBCONTAINER_MOUNT_FDS")
   391  	if fdsJson == "" {
   392  		// Always return the nil slice if no fd is present.
   393  		return nil, nil
   394  	}
   395  
   396  	var mountFds []int
   397  	if err := json.Unmarshal([]byte(fdsJson), &mountFds); err != nil {
   398  		return nil, fmt.Errorf("Error unmarshalling _LIBCONTAINER_MOUNT_FDS: %w", err)
   399  	}
   400  
   401  	return mountFds, nil
   402  }
   403  

View as plain text