...

Source file src/github.com/opencontainers/runc/libcontainer/configs/config.go

Documentation: github.com/opencontainers/runc/libcontainer/configs

     1  package configs
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"fmt"
     7  	"os/exec"
     8  	"time"
     9  
    10  	"github.com/sirupsen/logrus"
    11  
    12  	"github.com/opencontainers/runc/libcontainer/devices"
    13  	"github.com/opencontainers/runtime-spec/specs-go"
    14  )
    15  
    16  type Rlimit struct {
    17  	Type int    `json:"type"`
    18  	Hard uint64 `json:"hard"`
    19  	Soft uint64 `json:"soft"`
    20  }
    21  
    22  // IDMap represents UID/GID Mappings for User Namespaces.
    23  type IDMap struct {
    24  	ContainerID int64 `json:"container_id"`
    25  	HostID      int64 `json:"host_id"`
    26  	Size        int64 `json:"size"`
    27  }
    28  
    29  // Seccomp represents syscall restrictions
    30  // By default, only the native architecture of the kernel is allowed to be used
    31  // for syscalls. Additional architectures can be added by specifying them in
    32  // Architectures.
    33  type Seccomp struct {
    34  	DefaultAction    Action     `json:"default_action"`
    35  	Architectures    []string   `json:"architectures"`
    36  	Syscalls         []*Syscall `json:"syscalls"`
    37  	DefaultErrnoRet  *uint      `json:"default_errno_ret"`
    38  	ListenerPath     string     `json:"listener_path,omitempty"`
    39  	ListenerMetadata string     `json:"listener_metadata,omitempty"`
    40  }
    41  
    42  // Action is taken upon rule match in Seccomp
    43  type Action int
    44  
    45  const (
    46  	Kill Action = iota + 1
    47  	Errno
    48  	Trap
    49  	Allow
    50  	Trace
    51  	Log
    52  	Notify
    53  	KillThread
    54  	KillProcess
    55  )
    56  
    57  // Operator is a comparison operator to be used when matching syscall arguments in Seccomp
    58  type Operator int
    59  
    60  const (
    61  	EqualTo Operator = iota + 1
    62  	NotEqualTo
    63  	GreaterThan
    64  	GreaterThanOrEqualTo
    65  	LessThan
    66  	LessThanOrEqualTo
    67  	MaskEqualTo
    68  )
    69  
    70  // Arg is a rule to match a specific syscall argument in Seccomp
    71  type Arg struct {
    72  	Index    uint     `json:"index"`
    73  	Value    uint64   `json:"value"`
    74  	ValueTwo uint64   `json:"value_two"`
    75  	Op       Operator `json:"op"`
    76  }
    77  
    78  // Syscall is a rule to match a syscall in Seccomp
    79  type Syscall struct {
    80  	Name     string `json:"name"`
    81  	Action   Action `json:"action"`
    82  	ErrnoRet *uint  `json:"errnoRet"`
    83  	Args     []*Arg `json:"args"`
    84  }
    85  
    86  // TODO Windows. Many of these fields should be factored out into those parts
    87  // which are common across platforms, and those which are platform specific.
    88  
    89  // Config defines configuration options for executing a process inside a contained environment.
    90  type Config struct {
    91  	// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
    92  	// This is a common option when the container is running in ramdisk
    93  	NoPivotRoot bool `json:"no_pivot_root"`
    94  
    95  	// ParentDeathSignal specifies the signal that is sent to the container's process in the case
    96  	// that the parent process dies.
    97  	ParentDeathSignal int `json:"parent_death_signal"`
    98  
    99  	// Path to a directory containing the container's root filesystem.
   100  	Rootfs string `json:"rootfs"`
   101  
   102  	// Umask is the umask to use inside of the container.
   103  	Umask *uint32 `json:"umask"`
   104  
   105  	// Readonlyfs will remount the container's rootfs as readonly where only externally mounted
   106  	// bind mounts are writtable.
   107  	Readonlyfs bool `json:"readonlyfs"`
   108  
   109  	// Specifies the mount propagation flags to be applied to /.
   110  	RootPropagation int `json:"rootPropagation"`
   111  
   112  	// Mounts specify additional source and destination paths that will be mounted inside the container's
   113  	// rootfs and mount namespace if specified
   114  	Mounts []*Mount `json:"mounts"`
   115  
   116  	// The device nodes that should be automatically created within the container upon container start.  Note, make sure that the node is marked as allowed in the cgroup as well!
   117  	Devices []*devices.Device `json:"devices"`
   118  
   119  	MountLabel string `json:"mount_label"`
   120  
   121  	// Hostname optionally sets the container's hostname if provided
   122  	Hostname string `json:"hostname"`
   123  
   124  	// Namespaces specifies the container's namespaces that it should setup when cloning the init process
   125  	// If a namespace is not provided that namespace is shared from the container's parent process
   126  	Namespaces Namespaces `json:"namespaces"`
   127  
   128  	// Capabilities specify the capabilities to keep when executing the process inside the container
   129  	// All capabilities not specified will be dropped from the processes capability mask
   130  	Capabilities *Capabilities `json:"capabilities"`
   131  
   132  	// Networks specifies the container's network setup to be created
   133  	Networks []*Network `json:"networks"`
   134  
   135  	// Routes can be specified to create entries in the route table as the container is started
   136  	Routes []*Route `json:"routes"`
   137  
   138  	// Cgroups specifies specific cgroup settings for the various subsystems that the container is
   139  	// placed into to limit the resources the container has available
   140  	Cgroups *Cgroup `json:"cgroups"`
   141  
   142  	// AppArmorProfile specifies the profile to apply to the process running in the container and is
   143  	// change at the time the process is execed
   144  	AppArmorProfile string `json:"apparmor_profile,omitempty"`
   145  
   146  	// ProcessLabel specifies the label to apply to the process running in the container.  It is
   147  	// commonly used by selinux
   148  	ProcessLabel string `json:"process_label,omitempty"`
   149  
   150  	// Rlimits specifies the resource limits, such as max open files, to set in the container
   151  	// If Rlimits are not set, the container will inherit rlimits from the parent process
   152  	Rlimits []Rlimit `json:"rlimits,omitempty"`
   153  
   154  	// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
   155  	// for a process. Valid values are between the range [-1000, '1000'], where processes with
   156  	// higher scores are preferred for being killed. If it is unset then we don't touch the current
   157  	// value.
   158  	// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
   159  	OomScoreAdj *int `json:"oom_score_adj,omitempty"`
   160  
   161  	// UidMappings is an array of User ID mappings for User Namespaces
   162  	UidMappings []IDMap `json:"uid_mappings"`
   163  
   164  	// GidMappings is an array of Group ID mappings for User Namespaces
   165  	GidMappings []IDMap `json:"gid_mappings"`
   166  
   167  	// MaskPaths specifies paths within the container's rootfs to mask over with a bind
   168  	// mount pointing to /dev/null as to prevent reads of the file.
   169  	MaskPaths []string `json:"mask_paths"`
   170  
   171  	// ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
   172  	// so that these files prevent any writes.
   173  	ReadonlyPaths []string `json:"readonly_paths"`
   174  
   175  	// Sysctl is a map of properties and their values. It is the equivalent of using
   176  	// sysctl -w my.property.name value in Linux.
   177  	Sysctl map[string]string `json:"sysctl"`
   178  
   179  	// Seccomp allows actions to be taken whenever a syscall is made within the container.
   180  	// A number of rules are given, each having an action to be taken if a syscall matches it.
   181  	// A default action to be taken if no rules match is also given.
   182  	Seccomp *Seccomp `json:"seccomp"`
   183  
   184  	// NoNewPrivileges controls whether processes in the container can gain additional privileges.
   185  	NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
   186  
   187  	// Hooks are a collection of actions to perform at various container lifecycle events.
   188  	// CommandHooks are serialized to JSON, but other hooks are not.
   189  	Hooks Hooks
   190  
   191  	// Version is the version of opencontainer specification that is supported.
   192  	Version string `json:"version"`
   193  
   194  	// Labels are user defined metadata that is stored in the config and populated on the state
   195  	Labels []string `json:"labels"`
   196  
   197  	// NoNewKeyring will not allocated a new session keyring for the container.  It will use the
   198  	// callers keyring in this case.
   199  	NoNewKeyring bool `json:"no_new_keyring"`
   200  
   201  	// IntelRdt specifies settings for Intel RDT group that the container is placed into
   202  	// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
   203  	IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
   204  
   205  	// RootlessEUID is set when the runc was launched with non-zero EUID.
   206  	// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
   207  	// When RootlessEUID is set, runc creates a new userns for the container.
   208  	// (config.json needs to contain userns settings)
   209  	RootlessEUID bool `json:"rootless_euid,omitempty"`
   210  
   211  	// RootlessCgroups is set when unlikely to have the full access to cgroups.
   212  	// When RootlessCgroups is set, cgroups errors are ignored.
   213  	RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
   214  }
   215  
   216  type (
   217  	HookName string
   218  	HookList []Hook
   219  	Hooks    map[HookName]HookList
   220  )
   221  
   222  const (
   223  	// Prestart commands are executed after the container namespaces are created,
   224  	// but before the user supplied command is executed from init.
   225  	// Note: This hook is now deprecated
   226  	// Prestart commands are called in the Runtime namespace.
   227  	Prestart HookName = "prestart"
   228  
   229  	// CreateRuntime commands MUST be called as part of the create operation after
   230  	// the runtime environment has been created but before the pivot_root has been executed.
   231  	// CreateRuntime is called immediately after the deprecated Prestart hook.
   232  	// CreateRuntime commands are called in the Runtime Namespace.
   233  	CreateRuntime HookName = "createRuntime"
   234  
   235  	// CreateContainer commands MUST be called as part of the create operation after
   236  	// the runtime environment has been created but before the pivot_root has been executed.
   237  	// CreateContainer commands are called in the Container namespace.
   238  	CreateContainer HookName = "createContainer"
   239  
   240  	// StartContainer commands MUST be called as part of the start operation and before
   241  	// the container process is started.
   242  	// StartContainer commands are called in the Container namespace.
   243  	StartContainer HookName = "startContainer"
   244  
   245  	// Poststart commands are executed after the container init process starts.
   246  	// Poststart commands are called in the Runtime Namespace.
   247  	Poststart HookName = "poststart"
   248  
   249  	// Poststop commands are executed after the container init process exits.
   250  	// Poststop commands are called in the Runtime Namespace.
   251  	Poststop HookName = "poststop"
   252  )
   253  
   254  // KnownHookNames returns the known hook names.
   255  // Used by `runc features`.
   256  func KnownHookNames() []string {
   257  	return []string{
   258  		string(Prestart), // deprecated
   259  		string(CreateRuntime),
   260  		string(CreateContainer),
   261  		string(StartContainer),
   262  		string(Poststart),
   263  		string(Poststop),
   264  	}
   265  }
   266  
   267  type Capabilities struct {
   268  	// Bounding is the set of capabilities checked by the kernel.
   269  	Bounding []string
   270  	// Effective is the set of capabilities checked by the kernel.
   271  	Effective []string
   272  	// Inheritable is the capabilities preserved across execve.
   273  	Inheritable []string
   274  	// Permitted is the limiting superset for effective capabilities.
   275  	Permitted []string
   276  	// Ambient is the ambient set of capabilities that are kept.
   277  	Ambient []string
   278  }
   279  
   280  func (hooks HookList) RunHooks(state *specs.State) error {
   281  	for i, h := range hooks {
   282  		if err := h.Run(state); err != nil {
   283  			return fmt.Errorf("error running hook #%d: %w", i, err)
   284  		}
   285  	}
   286  
   287  	return nil
   288  }
   289  
   290  func (hooks *Hooks) UnmarshalJSON(b []byte) error {
   291  	var state map[HookName][]CommandHook
   292  
   293  	if err := json.Unmarshal(b, &state); err != nil {
   294  		return err
   295  	}
   296  
   297  	*hooks = Hooks{}
   298  	for n, commandHooks := range state {
   299  		if len(commandHooks) == 0 {
   300  			continue
   301  		}
   302  
   303  		(*hooks)[n] = HookList{}
   304  		for _, h := range commandHooks {
   305  			(*hooks)[n] = append((*hooks)[n], h)
   306  		}
   307  	}
   308  
   309  	return nil
   310  }
   311  
   312  func (hooks *Hooks) MarshalJSON() ([]byte, error) {
   313  	serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
   314  		for _, hook := range hooks {
   315  			switch chook := hook.(type) {
   316  			case CommandHook:
   317  				serializableHooks = append(serializableHooks, chook)
   318  			default:
   319  				logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
   320  			}
   321  		}
   322  
   323  		return serializableHooks
   324  	}
   325  
   326  	return json.Marshal(map[string]interface{}{
   327  		"prestart":        serialize((*hooks)[Prestart]),
   328  		"createRuntime":   serialize((*hooks)[CreateRuntime]),
   329  		"createContainer": serialize((*hooks)[CreateContainer]),
   330  		"startContainer":  serialize((*hooks)[StartContainer]),
   331  		"poststart":       serialize((*hooks)[Poststart]),
   332  		"poststop":        serialize((*hooks)[Poststop]),
   333  	})
   334  }
   335  
   336  type Hook interface {
   337  	// Run executes the hook with the provided state.
   338  	Run(*specs.State) error
   339  }
   340  
   341  // NewFunctionHook will call the provided function when the hook is run.
   342  func NewFunctionHook(f func(*specs.State) error) FuncHook {
   343  	return FuncHook{
   344  		run: f,
   345  	}
   346  }
   347  
   348  type FuncHook struct {
   349  	run func(*specs.State) error
   350  }
   351  
   352  func (f FuncHook) Run(s *specs.State) error {
   353  	return f.run(s)
   354  }
   355  
   356  type Command struct {
   357  	Path    string         `json:"path"`
   358  	Args    []string       `json:"args"`
   359  	Env     []string       `json:"env"`
   360  	Dir     string         `json:"dir"`
   361  	Timeout *time.Duration `json:"timeout"`
   362  }
   363  
   364  // NewCommandHook will execute the provided command when the hook is run.
   365  func NewCommandHook(cmd Command) CommandHook {
   366  	return CommandHook{
   367  		Command: cmd,
   368  	}
   369  }
   370  
   371  type CommandHook struct {
   372  	Command
   373  }
   374  
   375  func (c Command) Run(s *specs.State) error {
   376  	b, err := json.Marshal(s)
   377  	if err != nil {
   378  		return err
   379  	}
   380  	var stdout, stderr bytes.Buffer
   381  	cmd := exec.Cmd{
   382  		Path:   c.Path,
   383  		Args:   c.Args,
   384  		Env:    c.Env,
   385  		Stdin:  bytes.NewReader(b),
   386  		Stdout: &stdout,
   387  		Stderr: &stderr,
   388  	}
   389  	if err := cmd.Start(); err != nil {
   390  		return err
   391  	}
   392  	errC := make(chan error, 1)
   393  	go func() {
   394  		err := cmd.Wait()
   395  		if err != nil {
   396  			err = fmt.Errorf("error running hook: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
   397  		}
   398  		errC <- err
   399  	}()
   400  	var timerCh <-chan time.Time
   401  	if c.Timeout != nil {
   402  		timer := time.NewTimer(*c.Timeout)
   403  		defer timer.Stop()
   404  		timerCh = timer.C
   405  	}
   406  	select {
   407  	case err := <-errC:
   408  		return err
   409  	case <-timerCh:
   410  		_ = cmd.Process.Kill()
   411  		<-errC
   412  		return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
   413  	}
   414  }
   415  

View as plain text