...

Source file src/github.com/Microsoft/hcsshim/internal/guest/runtime/hcsv2/workload_container.go

Documentation: github.com/Microsoft/hcsshim/internal/guest/runtime/hcsv2

     1  //go:build linux
     2  // +build linux
     3  
     4  package hcsv2
     5  
     6  import (
     7  	"context"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  
    12  	oci "github.com/opencontainers/runtime-spec/specs-go"
    13  	"github.com/pkg/errors"
    14  	"go.opencensus.io/trace"
    15  	"golang.org/x/sys/unix"
    16  
    17  	specInternal "github.com/Microsoft/hcsshim/internal/guest/spec"
    18  	"github.com/Microsoft/hcsshim/internal/guestpath"
    19  	"github.com/Microsoft/hcsshim/internal/hooks"
    20  	"github.com/Microsoft/hcsshim/internal/oc"
    21  	"github.com/Microsoft/hcsshim/pkg/annotations"
    22  )
    23  
    24  // os.MkdirAll combines the given permissions with the running process's
    25  // umask. By default this causes 0777 to become 0755.
    26  // Temporarily set the umask of this process to 0 so that we can actually
    27  // make all dirs with os.ModePerm permissions.
    28  func mkdirAllModePerm(target string) error {
    29  	savedUmask := unix.Umask(0)
    30  	defer unix.Umask(savedUmask)
    31  	return os.MkdirAll(target, os.ModePerm)
    32  }
    33  
    34  func updateSandboxMounts(sbid string, spec *oci.Spec) error {
    35  	for i, m := range spec.Mounts {
    36  		if strings.HasPrefix(m.Source, guestpath.SandboxMountPrefix) {
    37  			sandboxSource := specInternal.SandboxMountSource(sbid, m.Source)
    38  
    39  			// filepath.Join cleans the resulting path before returning, so it would resolve the relative path if one was given.
    40  			// Hence, we need to ensure that the resolved path is still under the correct directory
    41  			if !strings.HasPrefix(sandboxSource, specInternal.SandboxMountsDir(sbid)) {
    42  				return errors.Errorf("mount path %v for mount %v is not within sandbox's mounts dir", sandboxSource, m.Source)
    43  			}
    44  
    45  			spec.Mounts[i].Source = sandboxSource
    46  
    47  			_, err := os.Stat(sandboxSource)
    48  			if os.IsNotExist(err) {
    49  				if err := mkdirAllModePerm(sandboxSource); err != nil {
    50  					return err
    51  				}
    52  			}
    53  		}
    54  	}
    55  	return nil
    56  }
    57  
    58  func updateHugePageMounts(sbid string, spec *oci.Spec) error {
    59  	for i, m := range spec.Mounts {
    60  		if strings.HasPrefix(m.Source, guestpath.HugePagesMountPrefix) {
    61  			mountsDir := specInternal.HugePagesMountsDir(sbid)
    62  			subPath := strings.TrimPrefix(m.Source, guestpath.HugePagesMountPrefix)
    63  			pageSize := strings.Split(subPath, string(os.PathSeparator))[0]
    64  			hugePageMountSource := filepath.Join(mountsDir, subPath)
    65  
    66  			// filepath.Join cleans the resulting path before returning so it would resolve the relative path if one was given.
    67  			// Hence, we need to ensure that the resolved path is still under the correct directory
    68  			if !strings.HasPrefix(hugePageMountSource, mountsDir) {
    69  				return errors.Errorf("mount path %v for mount %v is not within hugepages's mounts dir", hugePageMountSource, m.Source)
    70  			}
    71  
    72  			spec.Mounts[i].Source = hugePageMountSource
    73  
    74  			_, err := os.Stat(hugePageMountSource)
    75  			if os.IsNotExist(err) {
    76  				if err := mkdirAllModePerm(hugePageMountSource); err != nil {
    77  					return err
    78  				}
    79  				if err := unix.Mount("none", hugePageMountSource, "hugetlbfs", 0, "pagesize="+pageSize); err != nil {
    80  					return errors.Errorf("mount operation failed for %v failed with error %v", hugePageMountSource, err)
    81  				}
    82  			}
    83  		}
    84  	}
    85  	return nil
    86  }
    87  
    88  func specHasGPUDevice(spec *oci.Spec) bool {
    89  	for _, d := range spec.Windows.Devices {
    90  		if d.IDType == "gpu" {
    91  			return true
    92  		}
    93  	}
    94  	return false
    95  }
    96  
    97  func setupWorkloadContainerSpec(ctx context.Context, sbid, id string, spec *oci.Spec) (err error) {
    98  	ctx, span := oc.StartSpan(ctx, "hcsv2::setupWorkloadContainerSpec")
    99  	defer span.End()
   100  	defer func() { oc.SetSpanStatus(span, err) }()
   101  	span.AddAttributes(
   102  		trace.StringAttribute("sandboxID", sbid),
   103  		trace.StringAttribute("cid", id))
   104  
   105  	// Verify no hostname
   106  	if spec.Hostname != "" {
   107  		return errors.Errorf("workload container must not change hostname: %s", spec.Hostname)
   108  	}
   109  
   110  	// update any sandbox mounts with the sandboxMounts directory path and create files
   111  	if err = updateSandboxMounts(sbid, spec); err != nil {
   112  		return errors.Wrapf(err, "failed to update sandbox mounts for container %v in sandbox %v", id, sbid)
   113  	}
   114  
   115  	if err = updateHugePageMounts(sbid, spec); err != nil {
   116  		return errors.Wrapf(err, "failed to update hugepages mounts for container %v in sandbox %v", id, sbid)
   117  	}
   118  
   119  	// Add default mounts for container networking (e.g. /etc/hostname, /etc/hosts),
   120  	// if spec didn't override them explicitly.
   121  	networkingMounts := specInternal.GenerateWorkloadContainerNetworkMounts(sbid, spec)
   122  	spec.Mounts = append(spec.Mounts, networkingMounts...)
   123  
   124  	// TODO: JTERRY75 /dev/shm is not properly setup for LCOW I believe. CRI
   125  	// also has a concept of a sandbox/shm file when the IPC NamespaceMode !=
   126  	// NODE.
   127  
   128  	if err := applyAnnotationsToSpec(ctx, spec); err != nil {
   129  		return err
   130  	}
   131  
   132  	if rlimCore := spec.Annotations[annotations.RLimitCore]; rlimCore != "" {
   133  		if err := setCoreRLimit(spec, rlimCore); err != nil {
   134  			return err
   135  		}
   136  	}
   137  
   138  	// User.Username is generally only used on Windows, but as there's no (easy/fast at least) way to grab
   139  	// a uid:gid pairing for a username string on the host, we need to defer this work until we're here in the
   140  	// guest. The username field is used as a temporary holding place until we can perform this work here when
   141  	// we actually have the rootfs to inspect.
   142  	if spec.Process.User.Username != "" {
   143  		if err := setUserStr(spec, spec.Process.User.Username); err != nil {
   144  			return err
   145  		}
   146  	}
   147  
   148  	// Force the parent cgroup into our /containers root
   149  	spec.Linux.CgroupsPath = "/containers/" + id
   150  
   151  	if spec.Windows != nil {
   152  		// we only support Nvidia gpus right now
   153  		if specHasGPUDevice(spec) {
   154  			// run ldconfig as a `CreateRuntime` hook so that it's executed in the runtime namespace. This is
   155  			// necessary so that when we execute the nvidia hook to assign the device, the runtime can
   156  			// find the nvidia library files.
   157  			ldconfigHook := hooks.NewOCIHook("/sbin/ldconfig", []string{getNvidiaDriversUsrLibPath()}, os.Environ())
   158  			if err := hooks.AddOCIHook(spec, hooks.CreateRuntime, ldconfigHook); err != nil {
   159  				return err
   160  			}
   161  			if err := addNvidiaDeviceHook(ctx, spec); err != nil {
   162  				return err
   163  			}
   164  		}
   165  		// add other assigned devices to the spec
   166  		if err := addAssignedDevice(ctx, spec); err != nil {
   167  			return errors.Wrap(err, "failed to add assigned device(s) to the container spec")
   168  		}
   169  	}
   170  
   171  	// Clear the windows section as we dont want to forward to runc
   172  	spec.Windows = nil
   173  
   174  	return nil
   175  }
   176  

View as plain text