...

Source file src/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go

Documentation: github.com/opencontainers/runc/libcontainer/cgroups/fs

     1  package fs
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/opencontainers/runc/libcontainer/cgroups"
    11  	"github.com/opencontainers/runc/libcontainer/configs"
    12  	"github.com/sirupsen/logrus"
    13  	"golang.org/x/sys/unix"
    14  )
    15  
    16  type FreezerGroup struct{}
    17  
    18  func (s *FreezerGroup) Name() string {
    19  	return "freezer"
    20  }
    21  
    22  func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
    23  	return apply(path, pid)
    24  }
    25  
    26  func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
    27  	switch r.Freezer {
    28  	case configs.Frozen:
    29  		defer func() {
    30  			if Err != nil {
    31  				// Freezing failed, and it is bad and dangerous
    32  				// to leave the cgroup in FROZEN or FREEZING
    33  				// state, so (try to) thaw it back.
    34  				_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
    35  			}
    36  		}()
    37  
    38  		// As per older kernel docs (freezer-subsystem.txt before
    39  		// kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
    40  		// userspace should either retry or thaw. While current
    41  		// kernel cgroup v1 docs no longer mention a need to retry,
    42  		// even a recent kernel (v5.4, Ubuntu 20.04) can't reliably
    43  		// freeze a cgroup v1 while new processes keep appearing in it
    44  		// (either via fork/clone or by writing new PIDs to
    45  		// cgroup.procs).
    46  		//
    47  		// The numbers below are empirically chosen to have a decent
    48  		// chance to succeed in various scenarios ("runc pause/unpause
    49  		// with parallel runc exec" and "bare freeze/unfreeze on a very
    50  		// slow system"), tested on RHEL7 and Ubuntu 20.04 kernels.
    51  		//
    52  		// Adding any amount of sleep in between retries did not
    53  		// increase the chances of successful freeze in "pause/unpause
    54  		// with parallel exec" reproducer. OTOH, adding an occasional
    55  		// sleep helped for the case where the system is extremely slow
    56  		// (CentOS 7 VM on GHA CI).
    57  		//
    58  		// Alas, this is still a game of chances, since the real fix
    59  		// belong to the kernel (cgroup v2 do not have this bug).
    60  
    61  		for i := 0; i < 1000; i++ {
    62  			if i%50 == 49 {
    63  				// Occasional thaw and sleep improves
    64  				// the chances to succeed in freezing
    65  				// in case new processes keep appearing
    66  				// in the cgroup.
    67  				_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
    68  				time.Sleep(10 * time.Millisecond)
    69  			}
    70  
    71  			if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
    72  				return err
    73  			}
    74  
    75  			if i%25 == 24 {
    76  				// Occasional short sleep before reading
    77  				// the state back also improves the chances to
    78  				// succeed in freezing in case of a very slow
    79  				// system.
    80  				time.Sleep(10 * time.Microsecond)
    81  			}
    82  			state, err := cgroups.ReadFile(path, "freezer.state")
    83  			if err != nil {
    84  				return err
    85  			}
    86  			state = strings.TrimSpace(state)
    87  			switch state {
    88  			case "FREEZING":
    89  				continue
    90  			case string(configs.Frozen):
    91  				if i > 1 {
    92  					logrus.Debugf("frozen after %d retries", i)
    93  				}
    94  				return nil
    95  			default:
    96  				// should never happen
    97  				return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state))
    98  			}
    99  		}
   100  		// Despite our best efforts, it got stuck in FREEZING.
   101  		return errors.New("unable to freeze")
   102  	case configs.Thawed:
   103  		return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
   104  	case configs.Undefined:
   105  		return nil
   106  	default:
   107  		return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer))
   108  	}
   109  }
   110  
   111  func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
   112  	return nil
   113  }
   114  
   115  func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
   116  	for {
   117  		state, err := cgroups.ReadFile(path, "freezer.state")
   118  		if err != nil {
   119  			// If the kernel is too old, then we just treat the freezer as
   120  			// being in an "undefined" state.
   121  			if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
   122  				err = nil
   123  			}
   124  			return configs.Undefined, err
   125  		}
   126  		switch strings.TrimSpace(state) {
   127  		case "THAWED":
   128  			return configs.Thawed, nil
   129  		case "FROZEN":
   130  			// Find out whether the cgroup is frozen directly,
   131  			// or indirectly via an ancestor.
   132  			self, err := cgroups.ReadFile(path, "freezer.self_freezing")
   133  			if err != nil {
   134  				// If the kernel is too old, then we just treat
   135  				// it as being frozen.
   136  				if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) {
   137  					err = nil
   138  				}
   139  				return configs.Frozen, err
   140  			}
   141  			switch self {
   142  			case "0\n":
   143  				return configs.Thawed, nil
   144  			case "1\n":
   145  				return configs.Frozen, nil
   146  			default:
   147  				return configs.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self)
   148  			}
   149  		case "FREEZING":
   150  			// Make sure we get a stable freezer state, so retry if the cgroup
   151  			// is still undergoing freezing. This should be a temporary delay.
   152  			time.Sleep(1 * time.Millisecond)
   153  			continue
   154  		default:
   155  			return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state)
   156  		}
   157  	}
   158  }
   159  

View as plain text