...

Source file src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go

Documentation: github.com/opencontainers/runc/libcontainer/seccomp

     1  //go:build cgo && seccomp
     2  // +build cgo,seccomp
     3  
     4  package seccomp
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  
    10  	libseccomp "github.com/seccomp/libseccomp-golang"
    11  	"github.com/sirupsen/logrus"
    12  	"golang.org/x/sys/unix"
    13  
    14  	"github.com/opencontainers/runc/libcontainer/configs"
    15  	"github.com/opencontainers/runc/libcontainer/seccomp/patchbpf"
    16  )
    17  
    18  var (
    19  	actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM))
    20  	actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM))
    21  )
    22  
    23  const (
    24  	// Linux system calls can have at most 6 arguments
    25  	syscallMaxArguments int = 6
    26  )
    27  
    28  // InitSeccomp installs the seccomp filters to be used in the container as
    29  // specified in config.
    30  // Returns the seccomp file descriptor if any of the filters include a
    31  // SCMP_ACT_NOTIFY action, otherwise returns -1.
    32  func InitSeccomp(config *configs.Seccomp) (int, error) {
    33  	if config == nil {
    34  		return -1, errors.New("cannot initialize Seccomp - nil config passed")
    35  	}
    36  
    37  	defaultAction, err := getAction(config.DefaultAction, config.DefaultErrnoRet)
    38  	if err != nil {
    39  		return -1, errors.New("error initializing seccomp - invalid default action")
    40  	}
    41  
    42  	// Ignore the error since pre-2.4 libseccomp is treated as API level 0.
    43  	apiLevel, _ := libseccomp.GetAPI()
    44  	for _, call := range config.Syscalls {
    45  		if call.Action == configs.Notify {
    46  			if apiLevel < 6 {
    47  				return -1, fmt.Errorf("seccomp notify unsupported: API level: got %d, want at least 6. Please try with libseccomp >= 2.5.0 and Linux >= 5.7", apiLevel)
    48  			}
    49  
    50  			// We can't allow the write syscall to notify to the seccomp agent.
    51  			// After InitSeccomp() is called, we need to syncParentSeccomp() to write the seccomp fd plain
    52  			// number, so the parent sends it to the seccomp agent. If we use SCMP_ACT_NOTIFY on write, we
    53  			// never can write the seccomp fd to the parent and therefore the seccomp agent never receives
    54  			// the seccomp fd and runc is hang during initialization.
    55  			//
    56  			// Note that read()/close(), that are also used in syncParentSeccomp(), _can_ use SCMP_ACT_NOTIFY.
    57  			// Because we write the seccomp fd on the pipe to the parent, the parent is able to proceed and
    58  			// send the seccomp fd to the agent (it is another process and not subject to the seccomp
    59  			// filter). We will be blocked on read()/close() inside syncParentSeccomp() but if the seccomp
    60  			// agent allows those syscalls to proceed, initialization works just fine and the agent can
    61  			// handle future read()/close() syscalls as it wanted.
    62  			if call.Name == "write" {
    63  				return -1, errors.New("SCMP_ACT_NOTIFY cannot be used for the write syscall")
    64  			}
    65  		}
    66  	}
    67  
    68  	// See comment on why write is not allowed. The same reason applies, as this can mean handling write too.
    69  	if defaultAction == libseccomp.ActNotify {
    70  		return -1, errors.New("SCMP_ACT_NOTIFY cannot be used as default action")
    71  	}
    72  
    73  	filter, err := libseccomp.NewFilter(defaultAction)
    74  	if err != nil {
    75  		return -1, fmt.Errorf("error creating filter: %w", err)
    76  	}
    77  
    78  	// Add extra architectures
    79  	for _, arch := range config.Architectures {
    80  		scmpArch, err := libseccomp.GetArchFromString(arch)
    81  		if err != nil {
    82  			return -1, fmt.Errorf("error validating Seccomp architecture: %w", err)
    83  		}
    84  		if err := filter.AddArch(scmpArch); err != nil {
    85  			return -1, fmt.Errorf("error adding architecture to seccomp filter: %w", err)
    86  		}
    87  	}
    88  
    89  	// Unset no new privs bit
    90  	if err := filter.SetNoNewPrivsBit(false); err != nil {
    91  		return -1, fmt.Errorf("error setting no new privileges: %w", err)
    92  	}
    93  
    94  	// Add a rule for each syscall
    95  	for _, call := range config.Syscalls {
    96  		if call == nil {
    97  			return -1, errors.New("encountered nil syscall while initializing Seccomp")
    98  		}
    99  
   100  		if err := matchCall(filter, call, defaultAction); err != nil {
   101  			return -1, err
   102  		}
   103  	}
   104  
   105  	seccompFd, err := patchbpf.PatchAndLoad(config, filter)
   106  	if err != nil {
   107  		return -1, fmt.Errorf("error loading seccomp filter into kernel: %w", err)
   108  	}
   109  
   110  	return seccompFd, nil
   111  }
   112  
   113  // Convert Libcontainer Action to Libseccomp ScmpAction
   114  func getAction(act configs.Action, errnoRet *uint) (libseccomp.ScmpAction, error) {
   115  	switch act {
   116  	case configs.Kill, configs.KillThread:
   117  		return libseccomp.ActKillThread, nil
   118  	case configs.Errno:
   119  		if errnoRet != nil {
   120  			return libseccomp.ActErrno.SetReturnCode(int16(*errnoRet)), nil
   121  		}
   122  		return actErrno, nil
   123  	case configs.Trap:
   124  		return libseccomp.ActTrap, nil
   125  	case configs.Allow:
   126  		return libseccomp.ActAllow, nil
   127  	case configs.Trace:
   128  		if errnoRet != nil {
   129  			return libseccomp.ActTrace.SetReturnCode(int16(*errnoRet)), nil
   130  		}
   131  		return actTrace, nil
   132  	case configs.Log:
   133  		return libseccomp.ActLog, nil
   134  	case configs.Notify:
   135  		return libseccomp.ActNotify, nil
   136  	case configs.KillProcess:
   137  		return libseccomp.ActKillProcess, nil
   138  	default:
   139  		return libseccomp.ActInvalid, errors.New("invalid action, cannot use in rule")
   140  	}
   141  }
   142  
   143  // Convert Libcontainer Operator to Libseccomp ScmpCompareOp
   144  func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
   145  	switch op {
   146  	case configs.EqualTo:
   147  		return libseccomp.CompareEqual, nil
   148  	case configs.NotEqualTo:
   149  		return libseccomp.CompareNotEqual, nil
   150  	case configs.GreaterThan:
   151  		return libseccomp.CompareGreater, nil
   152  	case configs.GreaterThanOrEqualTo:
   153  		return libseccomp.CompareGreaterEqual, nil
   154  	case configs.LessThan:
   155  		return libseccomp.CompareLess, nil
   156  	case configs.LessThanOrEqualTo:
   157  		return libseccomp.CompareLessOrEqual, nil
   158  	case configs.MaskEqualTo:
   159  		return libseccomp.CompareMaskedEqual, nil
   160  	default:
   161  		return libseccomp.CompareInvalid, errors.New("invalid operator, cannot use in rule")
   162  	}
   163  }
   164  
   165  // Convert Libcontainer Arg to Libseccomp ScmpCondition
   166  func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
   167  	cond := libseccomp.ScmpCondition{}
   168  
   169  	if arg == nil {
   170  		return cond, errors.New("cannot convert nil to syscall condition")
   171  	}
   172  
   173  	op, err := getOperator(arg.Op)
   174  	if err != nil {
   175  		return cond, err
   176  	}
   177  
   178  	return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo)
   179  }
   180  
   181  // Add a rule to match a single syscall
   182  func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libseccomp.ScmpAction) error {
   183  	if call == nil || filter == nil {
   184  		return errors.New("cannot use nil as syscall to block")
   185  	}
   186  
   187  	if len(call.Name) == 0 {
   188  		return errors.New("empty string is not a valid syscall")
   189  	}
   190  
   191  	// Convert the call's action to the libseccomp equivalent
   192  	callAct, err := getAction(call.Action, call.ErrnoRet)
   193  	if err != nil {
   194  		return fmt.Errorf("action in seccomp profile is invalid: %w", err)
   195  	}
   196  	if callAct == defAct {
   197  		// This rule is redundant, silently skip it
   198  		// to avoid error from AddRule.
   199  		return nil
   200  	}
   201  
   202  	// If we can't resolve the syscall, assume it is not supported
   203  	// by this kernel. Warn about it, don't error out.
   204  	callNum, err := libseccomp.GetSyscallFromName(call.Name)
   205  	if err != nil {
   206  		logrus.Debugf("unknown seccomp syscall %q ignored", call.Name)
   207  		return nil
   208  	}
   209  
   210  	// Unconditional match - just add the rule
   211  	if len(call.Args) == 0 {
   212  		if err := filter.AddRule(callNum, callAct); err != nil {
   213  			return fmt.Errorf("error adding seccomp filter rule for syscall %s: %w", call.Name, err)
   214  		}
   215  	} else {
   216  		// If two or more arguments have the same condition,
   217  		// Revert to old behavior, adding each condition as a separate rule
   218  		argCounts := make([]uint, syscallMaxArguments)
   219  		conditions := []libseccomp.ScmpCondition{}
   220  
   221  		for _, cond := range call.Args {
   222  			newCond, err := getCondition(cond)
   223  			if err != nil {
   224  				return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %w", call.Name, err)
   225  			}
   226  
   227  			argCounts[cond.Index] += 1
   228  
   229  			conditions = append(conditions, newCond)
   230  		}
   231  
   232  		hasMultipleArgs := false
   233  		for _, count := range argCounts {
   234  			if count > 1 {
   235  				hasMultipleArgs = true
   236  				break
   237  			}
   238  		}
   239  
   240  		if hasMultipleArgs {
   241  			// Revert to old behavior
   242  			// Add each condition attached to a separate rule
   243  			for _, cond := range conditions {
   244  				condArr := []libseccomp.ScmpCondition{cond}
   245  
   246  				if err := filter.AddRuleConditional(callNum, callAct, condArr); err != nil {
   247  					return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err)
   248  				}
   249  			}
   250  		} else {
   251  			// No conditions share same argument
   252  			// Use new, proper behavior
   253  			if err := filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
   254  				return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err)
   255  			}
   256  		}
   257  	}
   258  
   259  	return nil
   260  }
   261  
   262  // Version returns major, minor, and micro.
   263  func Version() (uint, uint, uint) {
   264  	return libseccomp.GetLibraryVersion()
   265  }
   266  
   267  // Enabled is true if seccomp support is compiled in.
   268  const Enabled = true
   269  

View as plain text