package fs import ( "bufio" "errors" "fmt" "math" "os" "path/filepath" "strconv" "strings" "golang.org/x/sys/unix" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/configs" ) const ( cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes" cgroupMemoryLimit = "memory.limit_in_bytes" cgroupMemoryUsage = "memory.usage_in_bytes" cgroupMemoryMaxUsage = "memory.max_usage_in_bytes" ) type MemoryGroup struct{} func (s *MemoryGroup) Name() string { return "memory" } func (s *MemoryGroup) Apply(path string, _ *configs.Resources, pid int) error { return apply(path, pid) } func setMemory(path string, val int64) error { if val == 0 { return nil } err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10)) if !errors.Is(err, unix.EBUSY) { return err } // EBUSY means the kernel can't set new limit as it's too low // (lower than the current usage). Return more specific error. usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage) if err != nil { return err } max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage) if err != nil { return err } return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max) } func setSwap(path string, val int64) error { if val == 0 { return nil } return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10)) } func setMemoryAndSwap(path string, r *configs.Resources) error { // If the memory update is set to -1 and the swap is not explicitly // set, we should also set swap to -1, it means unlimited memory. if r.Memory == -1 && r.MemorySwap == 0 { // Only set swap if it's enabled in kernel if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) { r.MemorySwap = -1 } } // When memory and swap memory are both set, we need to handle the cases // for updating container. if r.Memory != 0 && r.MemorySwap != 0 { curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit) if err != nil { return err } // When update memory limit, we should adapt the write sequence // for memory and swap memory, so it won't fail because the new // value and the old value don't fit kernel's validation. if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) { if err := setSwap(path, r.MemorySwap); err != nil { return err } if err := setMemory(path, r.Memory); err != nil { return err } return nil } } if err := setMemory(path, r.Memory); err != nil { return err } if err := setSwap(path, r.MemorySwap); err != nil { return err } return nil } func (s *MemoryGroup) Set(path string, r *configs.Resources) error { if err := setMemoryAndSwap(path, r); err != nil { return err } // ignore KernelMemory and KernelMemoryTCP if r.MemoryReservation != 0 { if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil { return err } } if r.OomKillDisable { if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil { return err } } if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 { return nil } else if *r.MemorySwappiness <= 100 { if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil { return err } } else { return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness) } return nil } func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { const file = "memory.stat" statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY) if err != nil { if os.IsNotExist(err) { return nil } return err } defer statsFile.Close() sc := bufio.NewScanner(statsFile) for sc.Scan() { t, v, err := fscommon.ParseKeyValue(sc.Text()) if err != nil { return &parseError{Path: path, File: file, Err: err} } stats.MemoryStats.Stats[t] = v } stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] memoryUsage, err := getMemoryData(path, "") if err != nil { return err } stats.MemoryStats.Usage = memoryUsage swapUsage, err := getMemoryData(path, "memsw") if err != nil { return err } stats.MemoryStats.SwapUsage = swapUsage stats.MemoryStats.SwapOnlyUsage = cgroups.MemoryData{ Usage: swapUsage.Usage - memoryUsage.Usage, Failcnt: swapUsage.Failcnt - memoryUsage.Failcnt, } kernelUsage, err := getMemoryData(path, "kmem") if err != nil { return err } stats.MemoryStats.KernelUsage = kernelUsage kernelTCPUsage, err := getMemoryData(path, "kmem.tcp") if err != nil { return err } stats.MemoryStats.KernelTCPUsage = kernelTCPUsage value, err := fscommon.GetCgroupParamUint(path, "memory.use_hierarchy") if err != nil { return err } if value == 1 { stats.MemoryStats.UseHierarchy = true } pagesByNUMA, err := getPageUsageByNUMA(path) if err != nil { return err } stats.MemoryStats.PageUsageByNUMA = pagesByNUMA return nil } func getMemoryData(path, name string) (cgroups.MemoryData, error) { memoryData := cgroups.MemoryData{} moduleName := "memory" if name != "" { moduleName = "memory." + name } var ( usage = moduleName + ".usage_in_bytes" maxUsage = moduleName + ".max_usage_in_bytes" failcnt = moduleName + ".failcnt" limit = moduleName + ".limit_in_bytes" ) value, err := fscommon.GetCgroupParamUint(path, usage) if err != nil { if name != "" && os.IsNotExist(err) { // Ignore ENOENT as swap and kmem controllers // are optional in the kernel. return cgroups.MemoryData{}, nil } return cgroups.MemoryData{}, err } memoryData.Usage = value value, err = fscommon.GetCgroupParamUint(path, maxUsage) if err != nil { return cgroups.MemoryData{}, err } memoryData.MaxUsage = value value, err = fscommon.GetCgroupParamUint(path, failcnt) if err != nil { return cgroups.MemoryData{}, err } memoryData.Failcnt = value value, err = fscommon.GetCgroupParamUint(path, limit) if err != nil { if name == "kmem" && os.IsNotExist(err) { // Ignore ENOENT as kmem.limit_in_bytes has // been removed in newer kernels. return memoryData, nil } return cgroups.MemoryData{}, err } memoryData.Limit = value return memoryData, nil } func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) { const ( maxColumns = math.MaxUint8 + 1 file = "memory.numa_stat" ) stats := cgroups.PageUsageByNUMA{} fd, err := cgroups.OpenFile(path, file, os.O_RDONLY) if os.IsNotExist(err) { return stats, nil } else if err != nil { return stats, err } defer fd.Close() // File format is documented in linux/Documentation/cgroup-v1/memory.txt // and it looks like this: // // total= N0= N1= ... // file= N0= N1= ... // anon= N0= N1= ... // unevictable= N0= N1= ... // hierarchical_= N0= N1= ... scanner := bufio.NewScanner(fd) for scanner.Scan() { var field *cgroups.PageStats line := scanner.Text() columns := strings.SplitN(line, " ", maxColumns) for i, column := range columns { byNode := strings.SplitN(column, "=", 2) // Some custom kernels have non-standard fields, like // numa_locality 0 0 0 0 0 0 0 0 0 0 // numa_exectime 0 if len(byNode) < 2 { if i == 0 { // Ignore/skip those. break } else { // The first column was already validated, // so be strict to the rest. return stats, malformedLine(path, file, line) } } key, val := byNode[0], byNode[1] if i == 0 { // First column: key is name, val is total. field = getNUMAField(&stats, key) if field == nil { // unknown field (new kernel?) break } field.Total, err = strconv.ParseUint(val, 0, 64) if err != nil { return stats, &parseError{Path: path, File: file, Err: err} } field.Nodes = map[uint8]uint64{} } else { // Subsequent columns: key is N, val is usage. if len(key) < 2 || key[0] != 'N' { // This is definitely an error. return stats, malformedLine(path, file, line) } n, err := strconv.ParseUint(key[1:], 10, 8) if err != nil { return stats, &parseError{Path: path, File: file, Err: err} } usage, err := strconv.ParseUint(val, 10, 64) if err != nil { return stats, &parseError{Path: path, File: file, Err: err} } field.Nodes[uint8(n)] = usage } } } if err := scanner.Err(); err != nil { return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err} } return stats, nil } func getNUMAField(stats *cgroups.PageUsageByNUMA, name string) *cgroups.PageStats { switch name { case "total": return &stats.Total case "file": return &stats.File case "anon": return &stats.Anon case "unevictable": return &stats.Unevictable case "hierarchical_total": return &stats.Hierarchical.Total case "hierarchical_file": return &stats.Hierarchical.File case "hierarchical_anon": return &stats.Hierarchical.Anon case "hierarchical_unevictable": return &stats.Hierarchical.Unevictable } return nil }