//go:build linux package cgroups import ( "context" "maps" "path/filepath" "slices" "strconv" "github.com/opencontainers/runc/libcontainer/devices" edgedevices "edge-infra.dev/pkg/lib/kernel/devices" "edge-infra.dev/pkg/sds/devices/logger" "edge-infra.dev/pkg/lib/kernel/cgroup" ) var ( sysFsCgroupPath = "/sys/fs/cgroup" // kubernetesSlice is the kubernetes pod slice for cgroups kubernetesSlice = "kubepods.slice" ) // default virtual allowed devices (virtual tun, mem, block) var defaultDeviceRules = []*devices.Rule{ {Type: devices.CharDevice, Major: 136, Minor: -1, Permissions: "rwm", Allow: true}, {Type: devices.CharDevice, Major: -1, Minor: -1, Permissions: "m", Allow: true}, {Type: devices.BlockDevice, Major: -1, Minor: -1, Permissions: "m", Allow: true}, } type CgroupRequest interface { Apply(ctx context.Context) } type request struct { // name of container containerName string // container id containerID string // namespace of containers pod namespace string // device classes that applies to request devices map[string]edgedevices.Device // list of container cgroups paths to update cgroupPath string // isContainerizedVM indicates if container is vm compute container isContainerizedVM bool } func NewCgroupRequest(name, containerID, namespace, cgroupPath string, devices map[string]edgedevices.Device, isContainerizedVM bool) CgroupRequest { return request{ containerName: name, containerID: containerID, namespace: namespace, cgroupPath: cgroupPath, devices: devices, isContainerizedVM: isContainerizedVM, } } // Apply takes the cgroup request and attempts to update the cgroups // rules using containerds cgroups client func (req request) Apply(ctx context.Context) { log := logger.FromContext(ctx) rules := slices.Clone(defaultDeviceRules) rules = append(rules, CgroupRules(ctx, req.devices)...) if req.isContainerizedVM { rules = addVMDeviceRules(rules) } log.Debug("applying rules to container", "isVirtualMachine", req.isContainerizedVM, "rules", rules, "devices", slices.Collect(maps.Keys(req.devices))) path := filepath.Join(sysFsCgroupPath, kubernetesSlice, req.cgroupPath) if err := cgroup.ApplyCgroups(path, rules); err != nil { log.Error("failed to apply cgroups", "error", err) return } log.Info("applied cgroups to container") } // CgroupRules converts list of devices to unix device cgroup rules func CgroupRules(ctx context.Context, deviceMap map[string]edgedevices.Device) []*devices.Rule { log := logger.FromContext(ctx) rules := []*devices.Rule{} for _, dev := range deviceMap { node, err := dev.Node() if err != nil { log.Log(ctx, logger.LevelTrace, "failed to fetch device node", "sys path", dev.Path()) continue } devType, err := node.Type() if err != nil || devType == "" { log.Log(ctx, logger.LevelTrace, "failed to fetch device node type", "node path", node.Path()) continue } major, exists, err := dev.Property("MAJOR") if err != nil || !exists { log.Log(ctx, logger.LevelTrace, "failed to fetch device node major number", "node path", node.Path()) continue } minor, exists, err := dev.Property("MINOR") if err != nil || !exists { log.Log(ctx, logger.LevelTrace, "failed to fetch device node minor number", "node path", node.Path()) continue } majorInt, err := strconv.ParseInt(major, 10, 64) if err != nil { log.Log(ctx, logger.LevelTrace, "failed to fetch device node major number", "node path", node.Path(), "number", major) continue } minorInt, err := strconv.ParseInt(minor, 10, 64) if err != nil { log.Log(ctx, logger.LevelTrace, "failed to fetch device node minor number", "node path", node.Path(), "number", major) continue } rules = append(rules, &devices.Rule{ Type: convertDeviceType(devType), Major: majorInt, Minor: minorInt, Permissions: "rwm", Allow: true, }) } return rules } // converts device type i.e. "c" (char) to devices.Type func convertDeviceType(devType string) devices.Type { switch devType { case string(devices.BlockDevice): return devices.BlockDevice case string(devices.FifoDevice): return devices.FifoDevice default: return devices.CharDevice } } // addVMDeviceRules adds access to pty and vfio devices func addVMDeviceRules(rules []*devices.Rule) []*devices.Rule { // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/admin-guide/devices.txt?h=v5.14#n2084 const ptyFirstMajor int64 = 136 const ptyMajors int64 = 16 for i := int64(0); i < ptyMajors; i++ { rules = append(rules, &devices.Rule{ Type: devices.CharDevice, Major: ptyFirstMajor + i, Minor: -1, Permissions: "rwm", Allow: true, }) } // /dev/vfio/vfio device rules = append(rules, &devices.Rule{ Type: devices.CharDevice, Major: 10, Minor: 196, Permissions: "rwm", Allow: true, }) return rules }