...

Source file src/github.com/Microsoft/hcsshim/internal/hcsoci/create.go

Documentation: github.com/Microsoft/hcsshim/internal/hcsoci

     1  //go:build windows
     2  // +build windows
     3  
     4  package hcsoci
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"os"
    11  	"path/filepath"
    12  	"strconv"
    13  
    14  	"github.com/Microsoft/go-winio/pkg/guid"
    15  	"github.com/Microsoft/hcsshim/internal/cow"
    16  	"github.com/Microsoft/hcsshim/internal/guestpath"
    17  	"github.com/Microsoft/hcsshim/internal/hcs"
    18  	hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
    19  	"github.com/Microsoft/hcsshim/internal/log"
    20  	"github.com/Microsoft/hcsshim/internal/oci"
    21  	"github.com/Microsoft/hcsshim/internal/resources"
    22  	"github.com/Microsoft/hcsshim/internal/schemaversion"
    23  	"github.com/Microsoft/hcsshim/internal/uvm"
    24  	specs "github.com/opencontainers/runtime-spec/specs-go"
    25  	"github.com/sirupsen/logrus"
    26  )
    27  
    28  var (
    29  	lcowRootInUVM = guestpath.LCOWRootPrefixInUVM + "/%s"
    30  	wcowRootInUVM = guestpath.WCOWRootPrefixInUVM + "/%s"
    31  )
    32  
    33  // CreateOptions are the set of fields used to call CreateContainer().
    34  // Note: In the spec, the LayerFolders must be arranged in the same way in which
    35  // moby configures them: layern, layern-1,...,layer2,layer1,scratch
    36  // where layer1 is the base read-only layer, layern is the top-most read-only
    37  // layer, and scratch is the RW layer. This is for historical reasons only.
    38  type CreateOptions struct {
    39  	// Common parameters
    40  	ID               string             // Identifier for the container
    41  	Owner            string             // Specifies the owner. Defaults to executable name.
    42  	Spec             *specs.Spec        // Definition of the container or utility VM being created
    43  	SchemaVersion    *hcsschema.Version // Requested Schema Version. Defaults to v2 for RS5, v1 for RS1..RS4
    44  	HostingSystem    *uvm.UtilityVM     // Utility or service VM in which the container is to be created.
    45  	NetworkNamespace string             // Host network namespace to use (overrides anything in the spec)
    46  
    47  	// This is an advanced debugging parameter. It allows for diagnosability by leaving a containers
    48  	// resources allocated in case of a failure. Thus you would be able to use tools such as hcsdiag
    49  	// to look at the state of a utility VM to see what resources were allocated. Obviously the caller
    50  	// must a) not tear down the utility VM on failure (or pause in some way) and b) is responsible for
    51  	// performing the ReleaseResources() call themselves.
    52  	DoNotReleaseResourcesOnFailure bool
    53  
    54  	// ScaleCPULimitsToSandbox indicates that the container CPU limits should be adjusted to account
    55  	// for the difference in CPU count between the host and the UVM.
    56  	ScaleCPULimitsToSandbox bool
    57  }
    58  
    59  // createOptionsInternal is the set of user-supplied create options, but includes internal
    60  // fields for processing the request once user-supplied stuff has been validated.
    61  type createOptionsInternal struct {
    62  	*CreateOptions
    63  
    64  	actualSchemaVersion    *hcsschema.Version // Calculated based on Windows build and optional caller-supplied override
    65  	actualID               string             // Identifier for the container
    66  	actualOwner            string             // Owner for the container
    67  	actualNetworkNamespace string
    68  	ccgState               *hcsschema.ContainerCredentialGuardState // Container Credential Guard information to be attached to HCS container document
    69  
    70  	windowsAdditionalMounts []hcsschema.MappedDirectory // Holds additional mounts based on added devices (such as SCSI). Only used for Windows v2 schema containers.
    71  }
    72  
    73  func validateContainerConfig(ctx context.Context, coi *createOptionsInternal) error {
    74  	// check if gMSA is disabled
    75  	if coi.Spec.Windows != nil {
    76  		disableGMSA := oci.ParseAnnotationsDisableGMSA(ctx, coi.Spec)
    77  		if _, ok := coi.Spec.Windows.CredentialSpec.(string); ok && disableGMSA {
    78  			return fmt.Errorf("gMSA credentials are disabled: %w", hcs.ErrOperationDenied)
    79  		}
    80  	}
    81  
    82  	return nil
    83  }
    84  
    85  func initializeCreateOptions(ctx context.Context, createOptions *CreateOptions) (*createOptionsInternal, error) {
    86  	coi := &createOptionsInternal{
    87  		CreateOptions: createOptions,
    88  		actualID:      createOptions.ID,
    89  		actualOwner:   createOptions.Owner,
    90  	}
    91  
    92  	if coi.Spec == nil {
    93  		return nil, fmt.Errorf("spec must be supplied")
    94  	}
    95  
    96  	// Defaults if omitted by caller.
    97  	if coi.actualID == "" {
    98  		g, err := guid.NewV4()
    99  		if err != nil {
   100  			return nil, err
   101  		}
   102  		coi.actualID = g.String()
   103  	}
   104  	if coi.actualOwner == "" {
   105  		coi.actualOwner = filepath.Base(os.Args[0])
   106  	}
   107  
   108  	if coi.HostingSystem != nil {
   109  		// By definition, a hosting system can only be supplied for a v2 Xenon.
   110  		coi.actualSchemaVersion = schemaversion.SchemaV21()
   111  	} else {
   112  		coi.actualSchemaVersion = schemaversion.DetermineSchemaVersion(coi.SchemaVersion)
   113  	}
   114  
   115  	log.G(ctx).WithFields(logrus.Fields{
   116  		"options": fmt.Sprintf("%+v", createOptions),
   117  		"schema":  coi.actualSchemaVersion,
   118  	}).Debug("hcsshim::initializeCreateOptions")
   119  
   120  	return coi, nil
   121  }
   122  
   123  // configureSandboxNetwork creates a new network namespace for the pod (sandbox)
   124  // if required and then adds that namespace to the pod.
   125  func configureSandboxNetwork(ctx context.Context, coi *createOptionsInternal, r *resources.Resources, ct oci.KubernetesContainerType) error {
   126  	if coi.NetworkNamespace != "" {
   127  		r.SetNetNS(coi.NetworkNamespace)
   128  	} else {
   129  		err := createNetworkNamespace(ctx, coi, r)
   130  		if err != nil {
   131  			return err
   132  		}
   133  	}
   134  	coi.actualNetworkNamespace = r.NetNS()
   135  
   136  	if coi.HostingSystem != nil {
   137  		// Only add the network namespace to a standalone or sandbox
   138  		// container but not a workload container in a sandbox that inherits
   139  		// the namespace.
   140  		if ct == oci.KubernetesContainerTypeNone || ct == oci.KubernetesContainerTypeSandbox {
   141  			if err := coi.HostingSystem.ConfigureNetworking(ctx, coi.actualNetworkNamespace); err != nil {
   142  				// No network setup type was specified for this UVM. Create and assign one here unless
   143  				// we received a different error.
   144  				if err == uvm.ErrNoNetworkSetup {
   145  					if err := coi.HostingSystem.CreateAndAssignNetworkSetup(ctx, "", ""); err != nil {
   146  						return err
   147  					}
   148  					if err := coi.HostingSystem.ConfigureNetworking(ctx, coi.actualNetworkNamespace); err != nil {
   149  						return err
   150  					}
   151  				} else {
   152  					return err
   153  				}
   154  			}
   155  			r.SetAddedNetNSToVM(true)
   156  		}
   157  	}
   158  
   159  	return nil
   160  }
   161  
   162  // CreateContainer creates a container. It can cope with a  wide variety of
   163  // scenarios, including v1 HCS schema calls, as well as more complex v2 HCS schema
   164  // calls. Note we always return the resources that have been allocated, even in the
   165  // case of an error. This provides support for the debugging option not to
   166  // release the resources on failure, so that the client can make the necessary
   167  // call to release resources that have been allocated as part of calling this function.
   168  func CreateContainer(ctx context.Context, createOptions *CreateOptions) (_ cow.Container, _ *resources.Resources, err error) {
   169  	coi, err := initializeCreateOptions(ctx, createOptions)
   170  	if err != nil {
   171  		return nil, nil, err
   172  	}
   173  
   174  	if err := validateContainerConfig(ctx, coi); err != nil {
   175  		return nil, nil, fmt.Errorf("container config validation failed: %s", err)
   176  	}
   177  
   178  	r := resources.NewContainerResources(coi.ID)
   179  	defer func() {
   180  		if err != nil {
   181  			if !coi.DoNotReleaseResourcesOnFailure {
   182  				_ = resources.ReleaseResources(ctx, r, coi.HostingSystem, true)
   183  			}
   184  		}
   185  	}()
   186  
   187  	if coi.HostingSystem != nil {
   188  		if coi.Spec.Linux != nil {
   189  			r.SetContainerRootInUVM(fmt.Sprintf(lcowRootInUVM, coi.ID))
   190  		} else {
   191  			n := coi.HostingSystem.ContainerCounter()
   192  			r.SetContainerRootInUVM(fmt.Sprintf(wcowRootInUVM, strconv.FormatUint(n, 16)))
   193  		}
   194  		// install kernel drivers if necessary.
   195  		// do this before network setup in case any of the drivers requested are
   196  		// network drivers
   197  		driverClosers, err := addSpecGuestDrivers(ctx, coi.HostingSystem, coi.Spec.Annotations)
   198  		if err != nil {
   199  			return nil, r, err
   200  		}
   201  		r.Add(driverClosers...)
   202  	}
   203  
   204  	ct, _, err := oci.GetSandboxTypeAndID(coi.Spec.Annotations)
   205  	if err != nil {
   206  		return nil, r, err
   207  	}
   208  	isSandbox := ct == oci.KubernetesContainerTypeSandbox
   209  
   210  	// Create a network namespace if necessary.
   211  	if coi.Spec.Windows != nil &&
   212  		coi.Spec.Windows.Network != nil &&
   213  		schemaversion.IsV21(coi.actualSchemaVersion) {
   214  		err = configureSandboxNetwork(ctx, coi, r, ct)
   215  		if err != nil {
   216  			return nil, r, fmt.Errorf("failure while creating namespace for container: %s", err)
   217  		}
   218  	}
   219  
   220  	var hcsDocument, gcsDocument interface{}
   221  	log.G(ctx).Debug("hcsshim::CreateContainer allocating resources")
   222  	if coi.Spec.Linux != nil {
   223  		if schemaversion.IsV10(coi.actualSchemaVersion) {
   224  			return nil, r, errors.New("LCOW v1 not supported")
   225  		}
   226  		log.G(ctx).Debug("hcsshim::CreateContainer allocateLinuxResources")
   227  		err = allocateLinuxResources(ctx, coi, r, isSandbox)
   228  		if err != nil {
   229  			log.G(ctx).WithError(err).Debug("failed to allocateLinuxResources")
   230  			return nil, r, err
   231  		}
   232  		gcsDocument, err = createLinuxContainerDocument(ctx, coi, r.ContainerRootInUVM(), r.LcowScratchPath())
   233  		if err != nil {
   234  			log.G(ctx).WithError(err).Debug("failed createHCSContainerDocument")
   235  			return nil, r, err
   236  		}
   237  	} else {
   238  		err = allocateWindowsResources(ctx, coi, r, isSandbox)
   239  		if err != nil {
   240  			log.G(ctx).WithError(err).Debug("failed to allocateWindowsResources")
   241  			return nil, r, err
   242  		}
   243  		log.G(ctx).Debug("hcsshim::CreateContainer creating container document")
   244  		v1, v2, err := createWindowsContainerDocument(ctx, coi)
   245  		if err != nil {
   246  			log.G(ctx).WithError(err).Debug("failed createHCSContainerDocument")
   247  			return nil, r, err
   248  		}
   249  
   250  		if schemaversion.IsV10(coi.actualSchemaVersion) {
   251  			// v1 Argon or Xenon. Pass the document directly to HCS.
   252  			hcsDocument = v1
   253  		} else if coi.HostingSystem != nil {
   254  			// v2 Xenon. Pass the container object to the UVM.
   255  			gcsDocument = &hcsschema.HostedSystem{
   256  				SchemaVersion: schemaversion.SchemaV21(),
   257  				Container:     v2,
   258  			}
   259  		} else {
   260  			// v2 Argon. Pass the container object to the HCS.
   261  			hcsDocument = &hcsschema.ComputeSystem{
   262  				Owner:                             coi.actualOwner,
   263  				SchemaVersion:                     schemaversion.SchemaV21(),
   264  				ShouldTerminateOnLastHandleClosed: true,
   265  				Container:                         v2,
   266  			}
   267  		}
   268  	}
   269  
   270  	log.G(ctx).Debug("hcsshim::CreateContainer creating compute system")
   271  	if gcsDocument != nil {
   272  		c, err := coi.HostingSystem.CreateContainer(ctx, coi.actualID, gcsDocument)
   273  		if err != nil {
   274  			return nil, r, err
   275  		}
   276  		return c, r, nil
   277  	}
   278  
   279  	system, err := hcs.CreateComputeSystem(ctx, coi.actualID, hcsDocument)
   280  	if err != nil {
   281  		return nil, r, err
   282  	}
   283  	return system, r, nil
   284  }
   285  
   286  // isV2Xenon returns true if the create options are for a HCS schema V2 xenon container
   287  // with a hosting VM
   288  func (coi *createOptionsInternal) isV2Xenon() bool {
   289  	return schemaversion.IsV21(coi.actualSchemaVersion) && coi.HostingSystem != nil
   290  }
   291  
   292  // isV1Xenon returns true if the create options are for a HCS schema V1 xenon container
   293  // with a hosting VM
   294  func (coi *createOptionsInternal) isV1Xenon() bool {
   295  	return schemaversion.IsV10(coi.actualSchemaVersion) && coi.HostingSystem != nil
   296  }
   297  
   298  // isV2Argon returns true if the create options are for a HCS schema V2 argon container
   299  // which should have no hosting VM
   300  func (coi *createOptionsInternal) isV2Argon() bool {
   301  	return schemaversion.IsV21(coi.actualSchemaVersion) && coi.HostingSystem == nil
   302  }
   303  
   304  // isV1Argon returns true if the create options are for a HCS schema V1 argon container
   305  // which should have no hyperv settings
   306  func (coi *createOptionsInternal) isV1Argon() bool {
   307  	return schemaversion.IsV10(coi.actualSchemaVersion) && coi.Spec.Windows.HyperV == nil
   308  }
   309  
   310  func (coi *createOptionsInternal) hasWindowsAssignedDevices() bool {
   311  	return (coi.Spec.Windows != nil) && (coi.Spec.Windows.Devices != nil) &&
   312  		(len(coi.Spec.Windows.Devices) > 0)
   313  }
   314  

View as plain text