...

Source file src/github.com/opencontainers/runc/libcontainer/process_linux.go

Documentation: github.com/opencontainers/runc/libcontainer

     1  package libcontainer
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"net"
     9  	"os"
    10  	"os/exec"
    11  	"path/filepath"
    12  	"strconv"
    13  	"time"
    14  
    15  	"github.com/opencontainers/runc/libcontainer/cgroups"
    16  	"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
    17  	"github.com/opencontainers/runc/libcontainer/configs"
    18  	"github.com/opencontainers/runc/libcontainer/intelrdt"
    19  	"github.com/opencontainers/runc/libcontainer/logs"
    20  	"github.com/opencontainers/runc/libcontainer/system"
    21  	"github.com/opencontainers/runc/libcontainer/utils"
    22  	"github.com/opencontainers/runtime-spec/specs-go"
    23  	"github.com/sirupsen/logrus"
    24  	"golang.org/x/sys/unix"
    25  )
    26  
    27  type parentProcess interface {
    28  	// pid returns the pid for the running process.
    29  	pid() int
    30  
    31  	// start starts the process execution.
    32  	start() error
    33  
    34  	// send a SIGKILL to the process and wait for the exit.
    35  	terminate() error
    36  
    37  	// wait waits on the process returning the process state.
    38  	wait() (*os.ProcessState, error)
    39  
    40  	// startTime returns the process start time.
    41  	startTime() (uint64, error)
    42  	signal(os.Signal) error
    43  	externalDescriptors() []string
    44  	setExternalDescriptors(fds []string)
    45  	forwardChildLogs() chan error
    46  }
    47  
    48  type filePair struct {
    49  	parent *os.File
    50  	child  *os.File
    51  }
    52  
    53  type setnsProcess struct {
    54  	cmd             *exec.Cmd
    55  	messageSockPair filePair
    56  	logFilePair     filePair
    57  	cgroupPaths     map[string]string
    58  	rootlessCgroups bool
    59  	manager         cgroups.Manager
    60  	intelRdtPath    string
    61  	config          *initConfig
    62  	fds             []string
    63  	process         *Process
    64  	bootstrapData   io.Reader
    65  	initProcessPid  int
    66  }
    67  
    68  func (p *setnsProcess) startTime() (uint64, error) {
    69  	stat, err := system.Stat(p.pid())
    70  	return stat.StartTime, err
    71  }
    72  
    73  func (p *setnsProcess) signal(sig os.Signal) error {
    74  	s, ok := sig.(unix.Signal)
    75  	if !ok {
    76  		return errors.New("os: unsupported signal type")
    77  	}
    78  	return unix.Kill(p.pid(), s)
    79  }
    80  
    81  func (p *setnsProcess) start() (retErr error) {
    82  	defer p.messageSockPair.parent.Close()
    83  	// get the "before" value of oom kill count
    84  	oom, _ := p.manager.OOMKillCount()
    85  	err := p.cmd.Start()
    86  	// close the write-side of the pipes (controlled by child)
    87  	p.messageSockPair.child.Close()
    88  	p.logFilePair.child.Close()
    89  	if err != nil {
    90  		return fmt.Errorf("error starting setns process: %w", err)
    91  	}
    92  
    93  	waitInit := initWaiter(p.messageSockPair.parent)
    94  	defer func() {
    95  		if retErr != nil {
    96  			if newOom, err := p.manager.OOMKillCount(); err == nil && newOom != oom {
    97  				// Someone in this cgroup was killed, this _might_ be us.
    98  				retErr = fmt.Errorf("%w (possibly OOM-killed)", retErr)
    99  			}
   100  			werr := <-waitInit
   101  			if werr != nil {
   102  				logrus.WithError(werr).Warn()
   103  			}
   104  			err := ignoreTerminateErrors(p.terminate())
   105  			if err != nil {
   106  				logrus.WithError(err).Warn("unable to terminate setnsProcess")
   107  			}
   108  		}
   109  	}()
   110  
   111  	if p.bootstrapData != nil {
   112  		if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
   113  			return fmt.Errorf("error copying bootstrap data to pipe: %w", err)
   114  		}
   115  	}
   116  	err = <-waitInit
   117  	if err != nil {
   118  		return err
   119  	}
   120  	if err := p.execSetns(); err != nil {
   121  		return fmt.Errorf("error executing setns process: %w", err)
   122  	}
   123  	for _, path := range p.cgroupPaths {
   124  		if err := cgroups.WriteCgroupProc(path, p.pid()); err != nil && !p.rootlessCgroups {
   125  			// On cgroup v2 + nesting + domain controllers, WriteCgroupProc may fail with EBUSY.
   126  			// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
   127  			// Try to join the cgroup of InitProcessPid.
   128  			if cgroups.IsCgroup2UnifiedMode() && p.initProcessPid != 0 {
   129  				initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
   130  				initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
   131  				if initCgErr == nil {
   132  					if initCgPath, ok := initCg[""]; ok {
   133  						initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath)
   134  						logrus.Debugf("adding pid %d to cgroups %v failed (%v), attempting to join %q (obtained from %s)",
   135  							p.pid(), p.cgroupPaths, err, initCg, initCgDirpath)
   136  						// NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
   137  						err = cgroups.WriteCgroupProc(initCgDirpath, p.pid())
   138  					}
   139  				}
   140  			}
   141  			if err != nil {
   142  				return fmt.Errorf("error adding pid %d to cgroups: %w", p.pid(), err)
   143  			}
   144  		}
   145  	}
   146  	if p.intelRdtPath != "" {
   147  		// if Intel RDT "resource control" filesystem path exists
   148  		_, err := os.Stat(p.intelRdtPath)
   149  		if err == nil {
   150  			if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil {
   151  				return fmt.Errorf("error adding pid %d to Intel RDT: %w", p.pid(), err)
   152  			}
   153  		}
   154  	}
   155  	// set rlimits, this has to be done here because we lose permissions
   156  	// to raise the limits once we enter a user-namespace
   157  	if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
   158  		return fmt.Errorf("error setting rlimits for process: %w", err)
   159  	}
   160  	if err := utils.WriteJSON(p.messageSockPair.parent, p.config); err != nil {
   161  		return fmt.Errorf("error writing config to pipe: %w", err)
   162  	}
   163  
   164  	ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
   165  		switch sync.Type {
   166  		case procReady:
   167  			// This shouldn't happen.
   168  			panic("unexpected procReady in setns")
   169  		case procHooks:
   170  			// This shouldn't happen.
   171  			panic("unexpected procHooks in setns")
   172  		case procSeccomp:
   173  			if p.config.Config.Seccomp.ListenerPath == "" {
   174  				return errors.New("listenerPath is not set")
   175  			}
   176  
   177  			seccompFd, err := recvSeccompFd(uintptr(p.pid()), uintptr(sync.Fd))
   178  			if err != nil {
   179  				return err
   180  			}
   181  			defer unix.Close(seccompFd)
   182  
   183  			bundle, annotations := utils.Annotations(p.config.Config.Labels)
   184  			containerProcessState := &specs.ContainerProcessState{
   185  				Version:  specs.Version,
   186  				Fds:      []string{specs.SeccompFdName},
   187  				Pid:      p.cmd.Process.Pid,
   188  				Metadata: p.config.Config.Seccomp.ListenerMetadata,
   189  				State: specs.State{
   190  					Version:     specs.Version,
   191  					ID:          p.config.ContainerId,
   192  					Status:      specs.StateRunning,
   193  					Pid:         p.initProcessPid,
   194  					Bundle:      bundle,
   195  					Annotations: annotations,
   196  				},
   197  			}
   198  			if err := sendContainerProcessState(p.config.Config.Seccomp.ListenerPath,
   199  				containerProcessState, seccompFd); err != nil {
   200  				return err
   201  			}
   202  
   203  			// Sync with child.
   204  			if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
   205  				return err
   206  			}
   207  			return nil
   208  		default:
   209  			return errors.New("invalid JSON payload from child")
   210  		}
   211  	})
   212  
   213  	if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
   214  		return &os.PathError{Op: "shutdown", Path: "(init pipe)", Err: err}
   215  	}
   216  	// Must be done after Shutdown so the child will exit and we can wait for it.
   217  	if ierr != nil {
   218  		_, _ = p.wait()
   219  		return ierr
   220  	}
   221  	return nil
   222  }
   223  
   224  // execSetns runs the process that executes C code to perform the setns calls
   225  // because setns support requires the C process to fork off a child and perform the setns
   226  // before the go runtime boots, we wait on the process to die and receive the child's pid
   227  // over the provided pipe.
   228  func (p *setnsProcess) execSetns() error {
   229  	status, err := p.cmd.Process.Wait()
   230  	if err != nil {
   231  		_ = p.cmd.Wait()
   232  		return fmt.Errorf("error waiting on setns process to finish: %w", err)
   233  	}
   234  	if !status.Success() {
   235  		_ = p.cmd.Wait()
   236  		return &exec.ExitError{ProcessState: status}
   237  	}
   238  	var pid *pid
   239  	if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil {
   240  		_ = p.cmd.Wait()
   241  		return fmt.Errorf("error reading pid from init pipe: %w", err)
   242  	}
   243  
   244  	// Clean up the zombie parent process
   245  	// On Unix systems FindProcess always succeeds.
   246  	firstChildProcess, _ := os.FindProcess(pid.PidFirstChild)
   247  
   248  	// Ignore the error in case the child has already been reaped for any reason
   249  	_, _ = firstChildProcess.Wait()
   250  
   251  	process, err := os.FindProcess(pid.Pid)
   252  	if err != nil {
   253  		return err
   254  	}
   255  	p.cmd.Process = process
   256  	p.process.ops = p
   257  	return nil
   258  }
   259  
   260  // terminate sends a SIGKILL to the forked process for the setns routine then waits to
   261  // avoid the process becoming a zombie.
   262  func (p *setnsProcess) terminate() error {
   263  	if p.cmd.Process == nil {
   264  		return nil
   265  	}
   266  	err := p.cmd.Process.Kill()
   267  	if _, werr := p.wait(); err == nil {
   268  		err = werr
   269  	}
   270  	return err
   271  }
   272  
   273  func (p *setnsProcess) wait() (*os.ProcessState, error) {
   274  	err := p.cmd.Wait()
   275  
   276  	// Return actual ProcessState even on Wait error
   277  	return p.cmd.ProcessState, err
   278  }
   279  
   280  func (p *setnsProcess) pid() int {
   281  	return p.cmd.Process.Pid
   282  }
   283  
   284  func (p *setnsProcess) externalDescriptors() []string {
   285  	return p.fds
   286  }
   287  
   288  func (p *setnsProcess) setExternalDescriptors(newFds []string) {
   289  	p.fds = newFds
   290  }
   291  
   292  func (p *setnsProcess) forwardChildLogs() chan error {
   293  	return logs.ForwardLogs(p.logFilePair.parent)
   294  }
   295  
   296  type initProcess struct {
   297  	cmd             *exec.Cmd
   298  	messageSockPair filePair
   299  	logFilePair     filePair
   300  	config          *initConfig
   301  	manager         cgroups.Manager
   302  	intelRdtManager *intelrdt.Manager
   303  	container       *linuxContainer
   304  	fds             []string
   305  	process         *Process
   306  	bootstrapData   io.Reader
   307  	sharePidns      bool
   308  }
   309  
   310  func (p *initProcess) pid() int {
   311  	return p.cmd.Process.Pid
   312  }
   313  
   314  func (p *initProcess) externalDescriptors() []string {
   315  	return p.fds
   316  }
   317  
   318  // getChildPid receives the final child's pid over the provided pipe.
   319  func (p *initProcess) getChildPid() (int, error) {
   320  	var pid pid
   321  	if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil {
   322  		_ = p.cmd.Wait()
   323  		return -1, err
   324  	}
   325  
   326  	// Clean up the zombie parent process
   327  	// On Unix systems FindProcess always succeeds.
   328  	firstChildProcess, _ := os.FindProcess(pid.PidFirstChild)
   329  
   330  	// Ignore the error in case the child has already been reaped for any reason
   331  	_, _ = firstChildProcess.Wait()
   332  
   333  	return pid.Pid, nil
   334  }
   335  
   336  func (p *initProcess) waitForChildExit(childPid int) error {
   337  	status, err := p.cmd.Process.Wait()
   338  	if err != nil {
   339  		_ = p.cmd.Wait()
   340  		return err
   341  	}
   342  	if !status.Success() {
   343  		_ = p.cmd.Wait()
   344  		return &exec.ExitError{ProcessState: status}
   345  	}
   346  
   347  	process, err := os.FindProcess(childPid)
   348  	if err != nil {
   349  		return err
   350  	}
   351  	p.cmd.Process = process
   352  	p.process.ops = p
   353  	return nil
   354  }
   355  
   356  func (p *initProcess) start() (retErr error) {
   357  	defer p.messageSockPair.parent.Close() //nolint: errcheck
   358  	err := p.cmd.Start()
   359  	p.process.ops = p
   360  	// close the write-side of the pipes (controlled by child)
   361  	_ = p.messageSockPair.child.Close()
   362  	_ = p.logFilePair.child.Close()
   363  	if err != nil {
   364  		p.process.ops = nil
   365  		return fmt.Errorf("unable to start init: %w", err)
   366  	}
   367  
   368  	waitInit := initWaiter(p.messageSockPair.parent)
   369  	defer func() {
   370  		if retErr != nil {
   371  			// Find out if init is killed by the kernel's OOM killer.
   372  			// Get the count before killing init as otherwise cgroup
   373  			// might be removed by systemd.
   374  			oom, err := p.manager.OOMKillCount()
   375  			if err != nil {
   376  				logrus.WithError(err).Warn("unable to get oom kill count")
   377  			} else if oom > 0 {
   378  				// Does not matter what the particular error was,
   379  				// its cause is most probably OOM, so report that.
   380  				const oomError = "container init was OOM-killed (memory limit too low?)"
   381  
   382  				if logrus.GetLevel() >= logrus.DebugLevel {
   383  					// Only show the original error if debug is set,
   384  					// as it is not generally very useful.
   385  					retErr = fmt.Errorf(oomError+": %w", retErr)
   386  				} else {
   387  					retErr = errors.New(oomError)
   388  				}
   389  			}
   390  
   391  			werr := <-waitInit
   392  			if werr != nil {
   393  				logrus.WithError(werr).Warn()
   394  			}
   395  
   396  			// Terminate the process to ensure we can remove cgroups.
   397  			if err := ignoreTerminateErrors(p.terminate()); err != nil {
   398  				logrus.WithError(err).Warn("unable to terminate initProcess")
   399  			}
   400  
   401  			_ = p.manager.Destroy()
   402  			if p.intelRdtManager != nil {
   403  				_ = p.intelRdtManager.Destroy()
   404  			}
   405  		}
   406  	}()
   407  
   408  	// Do this before syncing with child so that no children can escape the
   409  	// cgroup. We don't need to worry about not doing this and not being root
   410  	// because we'd be using the rootless cgroup manager in that case.
   411  	if err := p.manager.Apply(p.pid()); err != nil {
   412  		return fmt.Errorf("unable to apply cgroup configuration: %w", err)
   413  	}
   414  	if p.intelRdtManager != nil {
   415  		if err := p.intelRdtManager.Apply(p.pid()); err != nil {
   416  			return fmt.Errorf("unable to apply Intel RDT configuration: %w", err)
   417  		}
   418  	}
   419  	if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
   420  		return fmt.Errorf("can't copy bootstrap data to pipe: %w", err)
   421  	}
   422  	err = <-waitInit
   423  	if err != nil {
   424  		return err
   425  	}
   426  
   427  	childPid, err := p.getChildPid()
   428  	if err != nil {
   429  		return fmt.Errorf("can't get final child's PID from pipe: %w", err)
   430  	}
   431  
   432  	// Save the standard descriptor names before the container process
   433  	// can potentially move them (e.g., via dup2()).  If we don't do this now,
   434  	// we won't know at checkpoint time which file descriptor to look up.
   435  	fds, err := getPipeFds(childPid)
   436  	if err != nil {
   437  		return fmt.Errorf("error getting pipe fds for pid %d: %w", childPid, err)
   438  	}
   439  	p.setExternalDescriptors(fds)
   440  
   441  	// Wait for our first child to exit
   442  	if err := p.waitForChildExit(childPid); err != nil {
   443  		return fmt.Errorf("error waiting for our first child to exit: %w", err)
   444  	}
   445  
   446  	if err := p.createNetworkInterfaces(); err != nil {
   447  		return fmt.Errorf("error creating network interfaces: %w", err)
   448  	}
   449  	if err := p.updateSpecState(); err != nil {
   450  		return fmt.Errorf("error updating spec state: %w", err)
   451  	}
   452  	if err := p.sendConfig(); err != nil {
   453  		return fmt.Errorf("error sending config to init process: %w", err)
   454  	}
   455  	var (
   456  		sentRun    bool
   457  		sentResume bool
   458  	)
   459  
   460  	ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
   461  		switch sync.Type {
   462  		case procSeccomp:
   463  			if p.config.Config.Seccomp.ListenerPath == "" {
   464  				return errors.New("listenerPath is not set")
   465  			}
   466  
   467  			seccompFd, err := recvSeccompFd(uintptr(childPid), uintptr(sync.Fd))
   468  			if err != nil {
   469  				return err
   470  			}
   471  			defer unix.Close(seccompFd)
   472  
   473  			s, err := p.container.currentOCIState()
   474  			if err != nil {
   475  				return err
   476  			}
   477  
   478  			// initProcessStartTime hasn't been set yet.
   479  			s.Pid = p.cmd.Process.Pid
   480  			s.Status = specs.StateCreating
   481  			containerProcessState := &specs.ContainerProcessState{
   482  				Version:  specs.Version,
   483  				Fds:      []string{specs.SeccompFdName},
   484  				Pid:      s.Pid,
   485  				Metadata: p.config.Config.Seccomp.ListenerMetadata,
   486  				State:    *s,
   487  			}
   488  			if err := sendContainerProcessState(p.config.Config.Seccomp.ListenerPath,
   489  				containerProcessState, seccompFd); err != nil {
   490  				return err
   491  			}
   492  
   493  			// Sync with child.
   494  			if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
   495  				return err
   496  			}
   497  		case procReady:
   498  			// set rlimits, this has to be done here because we lose permissions
   499  			// to raise the limits once we enter a user-namespace
   500  			if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
   501  				return fmt.Errorf("error setting rlimits for ready process: %w", err)
   502  			}
   503  			// call prestart and CreateRuntime hooks
   504  			if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
   505  				// Setup cgroup before the hook, so that the prestart and CreateRuntime hook could apply cgroup permissions.
   506  				if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil {
   507  					return fmt.Errorf("error setting cgroup config for ready process: %w", err)
   508  				}
   509  				if p.intelRdtManager != nil {
   510  					if err := p.intelRdtManager.Set(p.config.Config); err != nil {
   511  						return fmt.Errorf("error setting Intel RDT config for ready process: %w", err)
   512  					}
   513  				}
   514  
   515  				if len(p.config.Config.Hooks) != 0 {
   516  					s, err := p.container.currentOCIState()
   517  					if err != nil {
   518  						return err
   519  					}
   520  					// initProcessStartTime hasn't been set yet.
   521  					s.Pid = p.cmd.Process.Pid
   522  					s.Status = specs.StateCreating
   523  					hooks := p.config.Config.Hooks
   524  
   525  					if err := hooks[configs.Prestart].RunHooks(s); err != nil {
   526  						return err
   527  					}
   528  					if err := hooks[configs.CreateRuntime].RunHooks(s); err != nil {
   529  						return err
   530  					}
   531  				}
   532  			}
   533  
   534  			// generate a timestamp indicating when the container was started
   535  			p.container.created = time.Now().UTC()
   536  			p.container.state = &createdState{
   537  				c: p.container,
   538  			}
   539  
   540  			// NOTE: If the procRun state has been synced and the
   541  			// runc-create process has been killed for some reason,
   542  			// the runc-init[2:stage] process will be leaky. And
   543  			// the runc command also fails to parse root directory
   544  			// because the container doesn't have state.json.
   545  			//
   546  			// In order to cleanup the runc-init[2:stage] by
   547  			// runc-delete/stop, we should store the status before
   548  			// procRun sync.
   549  			state, uerr := p.container.updateState(p)
   550  			if uerr != nil {
   551  				return fmt.Errorf("unable to store init state: %w", err)
   552  			}
   553  			p.container.initProcessStartTime = state.InitProcessStartTime
   554  
   555  			// Sync with child.
   556  			if err := writeSync(p.messageSockPair.parent, procRun); err != nil {
   557  				return err
   558  			}
   559  			sentRun = true
   560  		case procHooks:
   561  			// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
   562  			if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil {
   563  				return fmt.Errorf("error setting cgroup config for procHooks process: %w", err)
   564  			}
   565  			if p.intelRdtManager != nil {
   566  				if err := p.intelRdtManager.Set(p.config.Config); err != nil {
   567  					return fmt.Errorf("error setting Intel RDT config for procHooks process: %w", err)
   568  				}
   569  			}
   570  			if len(p.config.Config.Hooks) != 0 {
   571  				s, err := p.container.currentOCIState()
   572  				if err != nil {
   573  					return err
   574  				}
   575  				// initProcessStartTime hasn't been set yet.
   576  				s.Pid = p.cmd.Process.Pid
   577  				s.Status = specs.StateCreating
   578  				hooks := p.config.Config.Hooks
   579  
   580  				if err := hooks[configs.Prestart].RunHooks(s); err != nil {
   581  					return err
   582  				}
   583  				if err := hooks[configs.CreateRuntime].RunHooks(s); err != nil {
   584  					return err
   585  				}
   586  			}
   587  			// Sync with child.
   588  			if err := writeSync(p.messageSockPair.parent, procResume); err != nil {
   589  				return err
   590  			}
   591  			sentResume = true
   592  		default:
   593  			return errors.New("invalid JSON payload from child")
   594  		}
   595  
   596  		return nil
   597  	})
   598  
   599  	if !sentRun {
   600  		return fmt.Errorf("error during container init: %w", ierr)
   601  	}
   602  	if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
   603  		return errors.New("could not synchronise after executing prestart and CreateRuntime hooks with container process")
   604  	}
   605  	if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
   606  		return &os.PathError{Op: "shutdown", Path: "(init pipe)", Err: err}
   607  	}
   608  
   609  	// Must be done after Shutdown so the child will exit and we can wait for it.
   610  	if ierr != nil {
   611  		_, _ = p.wait()
   612  		return ierr
   613  	}
   614  	return nil
   615  }
   616  
   617  func (p *initProcess) wait() (*os.ProcessState, error) {
   618  	err := p.cmd.Wait()
   619  	// we should kill all processes in cgroup when init is died if we use host PID namespace
   620  	if p.sharePidns {
   621  		_ = signalAllProcesses(p.manager, unix.SIGKILL)
   622  	}
   623  	return p.cmd.ProcessState, err
   624  }
   625  
   626  func (p *initProcess) terminate() error {
   627  	if p.cmd.Process == nil {
   628  		return nil
   629  	}
   630  	err := p.cmd.Process.Kill()
   631  	if _, werr := p.wait(); err == nil {
   632  		err = werr
   633  	}
   634  	return err
   635  }
   636  
   637  func (p *initProcess) startTime() (uint64, error) {
   638  	stat, err := system.Stat(p.pid())
   639  	return stat.StartTime, err
   640  }
   641  
   642  func (p *initProcess) updateSpecState() error {
   643  	s, err := p.container.currentOCIState()
   644  	if err != nil {
   645  		return err
   646  	}
   647  
   648  	p.config.SpecState = s
   649  	return nil
   650  }
   651  
   652  func (p *initProcess) sendConfig() error {
   653  	// send the config to the container's init process, we don't use JSON Encode
   654  	// here because there might be a problem in JSON decoder in some cases, see:
   655  	// https://github.com/docker/docker/issues/14203#issuecomment-174177790
   656  	return utils.WriteJSON(p.messageSockPair.parent, p.config)
   657  }
   658  
   659  func (p *initProcess) createNetworkInterfaces() error {
   660  	for _, config := range p.config.Config.Networks {
   661  		strategy, err := getStrategy(config.Type)
   662  		if err != nil {
   663  			return err
   664  		}
   665  		n := &network{
   666  			Network: *config,
   667  		}
   668  		if err := strategy.create(n, p.pid()); err != nil {
   669  			return err
   670  		}
   671  		p.config.Networks = append(p.config.Networks, n)
   672  	}
   673  	return nil
   674  }
   675  
   676  func (p *initProcess) signal(sig os.Signal) error {
   677  	s, ok := sig.(unix.Signal)
   678  	if !ok {
   679  		return errors.New("os: unsupported signal type")
   680  	}
   681  	return unix.Kill(p.pid(), s)
   682  }
   683  
   684  func (p *initProcess) setExternalDescriptors(newFds []string) {
   685  	p.fds = newFds
   686  }
   687  
   688  func (p *initProcess) forwardChildLogs() chan error {
   689  	return logs.ForwardLogs(p.logFilePair.parent)
   690  }
   691  
   692  func recvSeccompFd(childPid, childFd uintptr) (int, error) {
   693  	pidfd, _, errno := unix.Syscall(unix.SYS_PIDFD_OPEN, childPid, 0, 0)
   694  	if errno != 0 {
   695  		return -1, fmt.Errorf("performing SYS_PIDFD_OPEN syscall: %w", errno)
   696  	}
   697  	defer unix.Close(int(pidfd))
   698  
   699  	seccompFd, _, errno := unix.Syscall(unix.SYS_PIDFD_GETFD, pidfd, childFd, 0)
   700  	if errno != 0 {
   701  		return -1, fmt.Errorf("performing SYS_PIDFD_GETFD syscall: %w", errno)
   702  	}
   703  
   704  	return int(seccompFd), nil
   705  }
   706  
   707  func sendContainerProcessState(listenerPath string, state *specs.ContainerProcessState, fd int) error {
   708  	conn, err := net.Dial("unix", listenerPath)
   709  	if err != nil {
   710  		return fmt.Errorf("failed to connect with seccomp agent specified in the seccomp profile: %w", err)
   711  	}
   712  
   713  	socket, err := conn.(*net.UnixConn).File()
   714  	if err != nil {
   715  		return fmt.Errorf("cannot get seccomp socket: %w", err)
   716  	}
   717  	defer socket.Close()
   718  
   719  	b, err := json.Marshal(state)
   720  	if err != nil {
   721  		return fmt.Errorf("cannot marshall seccomp state: %w", err)
   722  	}
   723  
   724  	err = utils.SendFds(socket, b, fd)
   725  	if err != nil {
   726  		return fmt.Errorf("cannot send seccomp fd to %s: %w", listenerPath, err)
   727  	}
   728  
   729  	return nil
   730  }
   731  
   732  func getPipeFds(pid int) ([]string, error) {
   733  	fds := make([]string, 3)
   734  
   735  	dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
   736  	for i := 0; i < 3; i++ {
   737  		// XXX: This breaks if the path is not a valid symlink (which can
   738  		//      happen in certain particularly unlucky mount namespace setups).
   739  		f := filepath.Join(dirPath, strconv.Itoa(i))
   740  		target, err := os.Readlink(f)
   741  		if err != nil {
   742  			// Ignore permission errors, for rootless containers and other
   743  			// non-dumpable processes. if we can't get the fd for a particular
   744  			// file, there's not much we can do.
   745  			if os.IsPermission(err) {
   746  				continue
   747  			}
   748  			return fds, err
   749  		}
   750  		fds[i] = target
   751  	}
   752  	return fds, nil
   753  }
   754  
   755  // InitializeIO creates pipes for use with the process's stdio and returns the
   756  // opposite side for each. Do not use this if you want to have a pseudoterminal
   757  // set up for you by libcontainer (TODO: fix that too).
   758  // TODO: This is mostly unnecessary, and should be handled by clients.
   759  func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
   760  	var fds []uintptr
   761  	i = &IO{}
   762  	// cleanup in case of an error
   763  	defer func() {
   764  		if err != nil {
   765  			for _, fd := range fds {
   766  				_ = unix.Close(int(fd))
   767  			}
   768  		}
   769  	}()
   770  	// STDIN
   771  	r, w, err := os.Pipe()
   772  	if err != nil {
   773  		return nil, err
   774  	}
   775  	fds = append(fds, r.Fd(), w.Fd())
   776  	p.Stdin, i.Stdin = r, w
   777  	// STDOUT
   778  	if r, w, err = os.Pipe(); err != nil {
   779  		return nil, err
   780  	}
   781  	fds = append(fds, r.Fd(), w.Fd())
   782  	p.Stdout, i.Stdout = w, r
   783  	// STDERR
   784  	if r, w, err = os.Pipe(); err != nil {
   785  		return nil, err
   786  	}
   787  	fds = append(fds, r.Fd(), w.Fd())
   788  	p.Stderr, i.Stderr = w, r
   789  	// change ownership of the pipes in case we are in a user namespace
   790  	for _, fd := range fds {
   791  		if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil {
   792  			return nil, &os.PathError{Op: "fchown", Path: "fd " + strconv.Itoa(int(fd)), Err: err}
   793  		}
   794  	}
   795  	return i, nil
   796  }
   797  
   798  // initWaiter returns a channel to wait on for making sure
   799  // runc init has finished the initial setup.
   800  func initWaiter(r io.Reader) chan error {
   801  	ch := make(chan error, 1)
   802  	go func() {
   803  		defer close(ch)
   804  
   805  		inited := make([]byte, 1)
   806  		n, err := r.Read(inited)
   807  		if err == nil {
   808  			if n < 1 {
   809  				err = errors.New("short read")
   810  			} else if inited[0] != 0 {
   811  				err = fmt.Errorf("unexpected %d != 0", inited[0])
   812  			} else {
   813  				ch <- nil
   814  				return
   815  			}
   816  		}
   817  		ch <- fmt.Errorf("waiting for init preliminary setup: %w", err)
   818  	}()
   819  
   820  	return ch
   821  }
   822  

View as plain text