...

Source file src/github.com/containerd/cgroups/utils.go

Documentation: github.com/containerd/cgroups

     1  /*
     2     Copyright The containerd Authors.
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package cgroups
    18  
    19  import (
    20  	"bufio"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"os"
    25  	"path/filepath"
    26  	"strconv"
    27  	"strings"
    28  	"sync"
    29  	"syscall"
    30  	"time"
    31  
    32  	units "github.com/docker/go-units"
    33  	specs "github.com/opencontainers/runtime-spec/specs-go"
    34  	"golang.org/x/sys/unix"
    35  )
    36  
    37  var (
    38  	nsOnce    sync.Once
    39  	inUserNS  bool
    40  	checkMode sync.Once
    41  	cgMode    CGMode
    42  )
    43  
    44  const unifiedMountpoint = "/sys/fs/cgroup"
    45  
    46  // CGMode is the cgroups mode of the host system
    47  type CGMode int
    48  
    49  const (
    50  	// Unavailable cgroup mountpoint
    51  	Unavailable CGMode = iota
    52  	// Legacy cgroups v1
    53  	Legacy
    54  	// Hybrid with cgroups v1 and v2 controllers mounted
    55  	Hybrid
    56  	// Unified with only cgroups v2 mounted
    57  	Unified
    58  )
    59  
    60  // Mode returns the cgroups mode running on the host
    61  func Mode() CGMode {
    62  	checkMode.Do(func() {
    63  		var st unix.Statfs_t
    64  		if err := unix.Statfs(unifiedMountpoint, &st); err != nil {
    65  			cgMode = Unavailable
    66  			return
    67  		}
    68  		switch st.Type {
    69  		case unix.CGROUP2_SUPER_MAGIC:
    70  			cgMode = Unified
    71  		default:
    72  			cgMode = Legacy
    73  			if err := unix.Statfs(filepath.Join(unifiedMountpoint, "unified"), &st); err != nil {
    74  				return
    75  			}
    76  			if st.Type == unix.CGROUP2_SUPER_MAGIC {
    77  				cgMode = Hybrid
    78  			}
    79  		}
    80  	})
    81  	return cgMode
    82  }
    83  
    84  // RunningInUserNS detects whether we are currently running in a user namespace.
    85  // Copied from github.com/lxc/lxd/shared/util.go
    86  func RunningInUserNS() bool {
    87  	nsOnce.Do(func() {
    88  		file, err := os.Open("/proc/self/uid_map")
    89  		if err != nil {
    90  			// This kernel-provided file only exists if user namespaces are supported
    91  			return
    92  		}
    93  		defer file.Close()
    94  
    95  		buf := bufio.NewReader(file)
    96  		l, _, err := buf.ReadLine()
    97  		if err != nil {
    98  			return
    99  		}
   100  
   101  		line := string(l)
   102  		var a, b, c int64
   103  		fmt.Sscanf(line, "%d %d %d", &a, &b, &c)
   104  
   105  		/*
   106  		 * We assume we are in the initial user namespace if we have a full
   107  		 * range - 4294967295 uids starting at uid 0.
   108  		 */
   109  		if a == 0 && b == 0 && c == 4294967295 {
   110  			return
   111  		}
   112  		inUserNS = true
   113  	})
   114  	return inUserNS
   115  }
   116  
   117  // defaults returns all known groups
   118  func defaults(root string) ([]Subsystem, error) {
   119  	h, err := NewHugetlb(root)
   120  	if err != nil && !os.IsNotExist(err) {
   121  		return nil, err
   122  	}
   123  	s := []Subsystem{
   124  		NewNamed(root, "systemd"),
   125  		NewFreezer(root),
   126  		NewPids(root),
   127  		NewNetCls(root),
   128  		NewNetPrio(root),
   129  		NewPerfEvent(root),
   130  		NewCpuset(root),
   131  		NewCpu(root),
   132  		NewCpuacct(root),
   133  		NewMemory(root),
   134  		NewBlkio(root),
   135  		NewRdma(root),
   136  	}
   137  	// only add the devices cgroup if we are not in a user namespace
   138  	// because modifications are not allowed
   139  	if !RunningInUserNS() {
   140  		s = append(s, NewDevices(root))
   141  	}
   142  	// add the hugetlb cgroup if error wasn't due to missing hugetlb
   143  	// cgroup support on the host
   144  	if err == nil {
   145  		s = append(s, h)
   146  	}
   147  	return s, nil
   148  }
   149  
   150  // remove will remove a cgroup path handling EAGAIN and EBUSY errors and
   151  // retrying the remove after a exp timeout
   152  func remove(path string) error {
   153  	delay := 10 * time.Millisecond
   154  	for i := 0; i < 5; i++ {
   155  		if i != 0 {
   156  			time.Sleep(delay)
   157  			delay *= 2
   158  		}
   159  		if err := os.RemoveAll(path); err == nil {
   160  			return nil
   161  		}
   162  	}
   163  	return fmt.Errorf("cgroups: unable to remove path %q", path)
   164  }
   165  
   166  // readPids will read all the pids of processes or tasks in a cgroup by the provided path
   167  func readPids(path string, subsystem Name, pType procType) ([]Process, error) {
   168  	f, err := os.Open(filepath.Join(path, pType))
   169  	if err != nil {
   170  		return nil, err
   171  	}
   172  	defer f.Close()
   173  	var (
   174  		out []Process
   175  		s   = bufio.NewScanner(f)
   176  	)
   177  	for s.Scan() {
   178  		if t := s.Text(); t != "" {
   179  			pid, err := strconv.Atoi(t)
   180  			if err != nil {
   181  				return nil, err
   182  			}
   183  			out = append(out, Process{
   184  				Pid:       pid,
   185  				Subsystem: subsystem,
   186  				Path:      path,
   187  			})
   188  		}
   189  	}
   190  	if err := s.Err(); err != nil {
   191  		// failed to read all pids?
   192  		return nil, err
   193  	}
   194  	return out, nil
   195  }
   196  
   197  func hugePageSizes() ([]string, error) {
   198  	var (
   199  		pageSizes []string
   200  		sizeList  = []string{"B", "KB", "MB", "GB", "TB", "PB"}
   201  	)
   202  	files, err := os.ReadDir("/sys/kernel/mm/hugepages")
   203  	if err != nil {
   204  		return nil, err
   205  	}
   206  	for _, st := range files {
   207  		nameArray := strings.Split(st.Name(), "-")
   208  		pageSize, err := units.RAMInBytes(nameArray[1])
   209  		if err != nil {
   210  			return nil, err
   211  		}
   212  		pageSizes = append(pageSizes, units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList))
   213  	}
   214  	return pageSizes, nil
   215  }
   216  
   217  func readUint(path string) (uint64, error) {
   218  	v, err := os.ReadFile(path)
   219  	if err != nil {
   220  		return 0, err
   221  	}
   222  	return parseUint(strings.TrimSpace(string(v)), 10, 64)
   223  }
   224  
   225  func parseUint(s string, base, bitSize int) (uint64, error) {
   226  	v, err := strconv.ParseUint(s, base, bitSize)
   227  	if err != nil {
   228  		intValue, intErr := strconv.ParseInt(s, base, bitSize)
   229  		// 1. Handle negative values greater than MinInt64 (and)
   230  		// 2. Handle negative values lesser than MinInt64
   231  		if intErr == nil && intValue < 0 {
   232  			return 0, nil
   233  		} else if intErr != nil &&
   234  			intErr.(*strconv.NumError).Err == strconv.ErrRange &&
   235  			intValue < 0 {
   236  			return 0, nil
   237  		}
   238  		return 0, err
   239  	}
   240  	return v, nil
   241  }
   242  
   243  func parseKV(raw string) (string, uint64, error) {
   244  	parts := strings.Fields(raw)
   245  	switch len(parts) {
   246  	case 2:
   247  		v, err := parseUint(parts[1], 10, 64)
   248  		if err != nil {
   249  			return "", 0, err
   250  		}
   251  		return parts[0], v, nil
   252  	default:
   253  		return "", 0, ErrInvalidFormat
   254  	}
   255  }
   256  
   257  // ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
   258  // or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
   259  //   "cpu": "/user.slice/user-1000.slice"
   260  //   "pids": "/user.slice/user-1000.slice"
   261  // etc.
   262  //
   263  // The resulting map does not have an element for cgroup v2 unified hierarchy.
   264  // Use ParseCgroupFileUnified to get the unified path.
   265  func ParseCgroupFile(path string) (map[string]string, error) {
   266  	x, _, err := ParseCgroupFileUnified(path)
   267  	return x, err
   268  }
   269  
   270  // ParseCgroupFileUnified returns legacy subsystem paths as the first value,
   271  // and returns the unified path as the second value.
   272  func ParseCgroupFileUnified(path string) (map[string]string, string, error) {
   273  	f, err := os.Open(path)
   274  	if err != nil {
   275  		return nil, "", err
   276  	}
   277  	defer f.Close()
   278  	return parseCgroupFromReaderUnified(f)
   279  }
   280  
   281  func parseCgroupFromReaderUnified(r io.Reader) (map[string]string, string, error) {
   282  	var (
   283  		cgroups = make(map[string]string)
   284  		unified = ""
   285  		s       = bufio.NewScanner(r)
   286  	)
   287  	for s.Scan() {
   288  		var (
   289  			text  = s.Text()
   290  			parts = strings.SplitN(text, ":", 3)
   291  		)
   292  		if len(parts) < 3 {
   293  			return nil, unified, fmt.Errorf("invalid cgroup entry: %q", text)
   294  		}
   295  		for _, subs := range strings.Split(parts[1], ",") {
   296  			if subs == "" {
   297  				unified = parts[2]
   298  			} else {
   299  				cgroups[subs] = parts[2]
   300  			}
   301  		}
   302  	}
   303  	if err := s.Err(); err != nil {
   304  		return nil, unified, err
   305  	}
   306  	return cgroups, unified, nil
   307  }
   308  
   309  func getCgroupDestination(subsystem string) (string, error) {
   310  	f, err := os.Open("/proc/self/mountinfo")
   311  	if err != nil {
   312  		return "", err
   313  	}
   314  	defer f.Close()
   315  	s := bufio.NewScanner(f)
   316  	for s.Scan() {
   317  		fields := strings.Split(s.Text(), " ")
   318  		if len(fields) < 10 {
   319  			// broken mountinfo?
   320  			continue
   321  		}
   322  		if fields[len(fields)-3] != "cgroup" {
   323  			continue
   324  		}
   325  		for _, opt := range strings.Split(fields[len(fields)-1], ",") {
   326  			if opt == subsystem {
   327  				return fields[3], nil
   328  			}
   329  		}
   330  	}
   331  	if err := s.Err(); err != nil {
   332  		return "", err
   333  	}
   334  	return "", ErrNoCgroupMountDestination
   335  }
   336  
   337  func pathers(subystems []Subsystem) []pather {
   338  	var out []pather
   339  	for _, s := range subystems {
   340  		if p, ok := s.(pather); ok {
   341  			out = append(out, p)
   342  		}
   343  	}
   344  	return out
   345  }
   346  
   347  func initializeSubsystem(s Subsystem, path Path, resources *specs.LinuxResources) error {
   348  	if c, ok := s.(creator); ok {
   349  		p, err := path(s.Name())
   350  		if err != nil {
   351  			return err
   352  		}
   353  		if err := c.Create(p, resources); err != nil {
   354  			return err
   355  		}
   356  	} else if c, ok := s.(pather); ok {
   357  		p, err := path(s.Name())
   358  		if err != nil {
   359  			return err
   360  		}
   361  		// do the default create if the group does not have a custom one
   362  		if err := os.MkdirAll(c.Path(p), defaultDirPerm); err != nil {
   363  			return err
   364  		}
   365  	}
   366  	return nil
   367  }
   368  
   369  func cleanPath(path string) string {
   370  	if path == "" {
   371  		return ""
   372  	}
   373  	path = filepath.Clean(path)
   374  	if !filepath.IsAbs(path) {
   375  		path, _ = filepath.Rel(string(os.PathSeparator), filepath.Clean(string(os.PathSeparator)+path))
   376  	}
   377  	return path
   378  }
   379  
   380  func retryingWriteFile(path string, data []byte, mode os.FileMode) error {
   381  	// Retry writes on EINTR; see:
   382  	//    https://github.com/golang/go/issues/38033
   383  	for {
   384  		err := os.WriteFile(path, data, mode)
   385  		if err == nil {
   386  			return nil
   387  		} else if !errors.Is(err, syscall.EINTR) {
   388  			return err
   389  		}
   390  	}
   391  }
   392  

View as plain text