...

Source file src/github.com/Microsoft/hcsshim/internal/uvm/create.go

Documentation: github.com/Microsoft/hcsshim/internal/uvm

     1  //go:build windows
     2  
     3  package uvm
     4  
     5  import (
     6  	"context"
     7  	"errors"
     8  	"fmt"
     9  	"os"
    10  	"path/filepath"
    11  	"runtime"
    12  
    13  	"github.com/sirupsen/logrus"
    14  	"go.opencensus.io/trace"
    15  	"golang.org/x/sys/windows"
    16  
    17  	"github.com/Microsoft/hcsshim/internal/cow"
    18  	"github.com/Microsoft/hcsshim/internal/hcs"
    19  	hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
    20  	"github.com/Microsoft/hcsshim/internal/log"
    21  	"github.com/Microsoft/hcsshim/internal/logfields"
    22  	"github.com/Microsoft/hcsshim/internal/oc"
    23  	"github.com/Microsoft/hcsshim/internal/schemaversion"
    24  	"github.com/Microsoft/hcsshim/osversion"
    25  )
    26  
    27  // Options are the set of options passed to Create() to create a utility vm.
    28  type Options struct {
    29  	ID    string // Identifier for the uvm. Defaults to generated GUID.
    30  	Owner string // Specifies the owner. Defaults to executable name.
    31  
    32  	// MemorySizeInMB sets the UVM memory. If `0` will default to platform
    33  	// default.
    34  	MemorySizeInMB uint64
    35  
    36  	LowMMIOGapInMB   uint64
    37  	HighMMIOBaseInMB uint64
    38  	HighMMIOGapInMB  uint64
    39  
    40  	// Memory for UVM. Defaults to true. For physical backed memory, set to
    41  	// false.
    42  	AllowOvercommit bool
    43  
    44  	// FullyPhysicallyBacked describes if a uvm should be entirely physically
    45  	// backed, including in any additional devices
    46  	FullyPhysicallyBacked bool
    47  
    48  	// Memory for UVM. Defaults to false. For virtual memory with deferred
    49  	// commit, set to true.
    50  	EnableDeferredCommit bool
    51  
    52  	// ProcessorCount sets the number of vCPU's. If `0` will default to platform
    53  	// default.
    54  	ProcessorCount int32
    55  
    56  	// ProcessorLimit sets the maximum percentage of each vCPU's the UVM can
    57  	// consume. If `0` will default to platform default.
    58  	ProcessorLimit int32
    59  
    60  	// ProcessorWeight sets the relative weight of these vCPU's vs another UVM's
    61  	// when scheduling. If `0` will default to platform default.
    62  	ProcessorWeight int32
    63  
    64  	// StorageQoSIopsMaximum sets the maximum number of Iops. If `0` will
    65  	// default to the platform default.
    66  	StorageQoSIopsMaximum int32
    67  
    68  	// StorageQoSIopsMaximum sets the maximum number of bytes per second. If `0`
    69  	// will default to the platform default.
    70  	StorageQoSBandwidthMaximum int32
    71  
    72  	// DisableCompartmentNamespace sets whether to disable namespacing the network compartment in the UVM
    73  	// for WCOW. Namespacing makes it so the compartment created for a container is essentially no longer
    74  	// aware or able to see any of the other compartments on the host (in this case the UVM).
    75  	// The compartment that the container is added to now behaves as the default compartment as
    76  	// far as the container is concerned and it is only able to view the NICs in the compartment it's assigned to.
    77  	// This is the compartment setup (and behavior) that is followed for V1 HCS schema containers (docker) so
    78  	// this change brings parity as well. This behavior is gated behind a registry key currently to avoid any
    79  	// unnecessary behavior and once this restriction is removed then we can remove the need for this variable
    80  	// and the associated annotation as well.
    81  	DisableCompartmentNamespace bool
    82  
    83  	// CPUGroupID set the ID of a CPUGroup on the host that the UVM should be added to on start.
    84  	// Defaults to an empty string which indicates the UVM should not be added to any CPUGroup.
    85  	CPUGroupID string
    86  	// NetworkConfigProxy holds the address of the network config proxy service.
    87  	// This != "" determines whether to start the ComputeAgent TTRPC service
    88  	// that receives the UVMs set of NICs from this proxy instead of enumerating
    89  	// the endpoints locally.
    90  	NetworkConfigProxy string
    91  
    92  	// Sets the location for process dumps to be placed in. On Linux this is a kernel setting so it will be
    93  	// applied to all containers. On Windows it's configurable per container, but we can mimic this for
    94  	// Windows by just applying the location specified here per container.
    95  	ProcessDumpLocation string
    96  
    97  	// NoWritableFileShares disables adding any writable vSMB and Plan9 shares to the UVM
    98  	NoWritableFileShares bool
    99  
   100  	// The number of SCSI controllers. Defaults to 1 for WCOW and 4 for LCOW
   101  	SCSIControllerCount uint32
   102  
   103  	// DumpDirectoryPath is the path of the directory inside which all debug dumps etc are stored.
   104  	DumpDirectoryPath string
   105  }
   106  
   107  // Verifies that the final UVM options are correct and supported.
   108  func verifyOptions(ctx context.Context, options interface{}) error {
   109  	switch opts := options.(type) {
   110  	case *OptionsLCOW:
   111  		if opts.EnableDeferredCommit && !opts.AllowOvercommit {
   112  			return errors.New("EnableDeferredCommit is not supported on physically backed VMs")
   113  		}
   114  		if opts.SCSIControllerCount > MaxSCSIControllers {
   115  			return fmt.Errorf("SCSI controller count can't be more than %d", MaxSCSIControllers)
   116  		}
   117  		if opts.VPMemDeviceCount > MaxVPMEMCount {
   118  			return fmt.Errorf("VPMem device count cannot be greater than %d", MaxVPMEMCount)
   119  		}
   120  		if opts.VPMemDeviceCount > 0 {
   121  			if opts.VPMemSizeBytes%4096 != 0 {
   122  				return errors.New("VPMemSizeBytes must be a multiple of 4096")
   123  			}
   124  		}
   125  		if opts.KernelDirect && osversion.Build() < 18286 {
   126  			return errors.New("KernelDirectBoot is not supported on builds older than 18286")
   127  		}
   128  
   129  		if opts.EnableColdDiscardHint && osversion.Build() < 18967 {
   130  			return errors.New("EnableColdDiscardHint is not supported on builds older than 18967")
   131  		}
   132  	case *OptionsWCOW:
   133  		if opts.EnableDeferredCommit && !opts.AllowOvercommit {
   134  			return errors.New("EnableDeferredCommit is not supported on physically backed VMs")
   135  		}
   136  		if len(opts.LayerFolders) < 2 {
   137  			return errors.New("at least 2 LayerFolders must be supplied")
   138  		}
   139  		if opts.SCSIControllerCount != 1 {
   140  			return errors.New("exactly 1 SCSI controller is required for WCOW")
   141  		}
   142  	}
   143  	return nil
   144  }
   145  
   146  // newDefaultOptions returns the default base options for WCOW and LCOW.
   147  //
   148  // If `id` is empty it will be generated.
   149  //
   150  // If `owner` is empty it will be set to the calling executables name.
   151  func newDefaultOptions(id, owner string) *Options {
   152  	opts := &Options{
   153  		ID:                    id,
   154  		Owner:                 owner,
   155  		MemorySizeInMB:        1024,
   156  		AllowOvercommit:       true,
   157  		EnableDeferredCommit:  false,
   158  		ProcessorCount:        defaultProcessorCount(),
   159  		FullyPhysicallyBacked: false,
   160  		NoWritableFileShares:  false,
   161  		SCSIControllerCount:   1,
   162  	}
   163  
   164  	if opts.Owner == "" {
   165  		opts.Owner = filepath.Base(os.Args[0])
   166  	}
   167  
   168  	return opts
   169  }
   170  
   171  // ID returns the ID of the VM's compute system.
   172  func (uvm *UtilityVM) ID() string {
   173  	return uvm.hcsSystem.ID()
   174  }
   175  
   176  // OS returns the operating system of the utility VM.
   177  func (uvm *UtilityVM) OS() string {
   178  	return uvm.operatingSystem
   179  }
   180  
   181  func (uvm *UtilityVM) create(ctx context.Context, doc interface{}) error {
   182  	uvm.exitCh = make(chan struct{})
   183  	system, err := hcs.CreateComputeSystem(ctx, uvm.id, doc)
   184  	if err != nil {
   185  		return err
   186  	}
   187  	defer func() {
   188  		if system != nil {
   189  			_ = system.Terminate(ctx)
   190  			_ = system.Wait()
   191  		}
   192  	}()
   193  
   194  	// Cache the VM ID of the utility VM.
   195  	properties, err := system.Properties(ctx)
   196  	if err != nil {
   197  		return err
   198  	}
   199  	uvm.runtimeID = properties.RuntimeID
   200  	uvm.hcsSystem = system
   201  	system = nil
   202  
   203  	log.G(ctx).WithFields(logrus.Fields{
   204  		logfields.UVMID: uvm.id,
   205  		"runtime-id":    uvm.runtimeID.String(),
   206  	}).Debug("created utility VM")
   207  	return nil
   208  }
   209  
   210  // Close terminates and releases resources associated with the utility VM.
   211  func (uvm *UtilityVM) Close() (err error) {
   212  	ctx, span := oc.StartSpan(context.Background(), "uvm::Close")
   213  	defer span.End()
   214  	defer func() { oc.SetSpanStatus(span, err) }()
   215  	span.AddAttributes(trace.StringAttribute(logfields.UVMID, uvm.id))
   216  
   217  	windows.Close(uvm.vmmemProcess)
   218  
   219  	if uvm.hcsSystem != nil {
   220  		_ = uvm.hcsSystem.Terminate(ctx)
   221  		_ = uvm.Wait()
   222  	}
   223  
   224  	if err := uvm.CloseGCSConnection(); err != nil {
   225  		log.G(ctx).Errorf("close GCS connection failed: %s", err)
   226  	}
   227  
   228  	// outputListener will only be nil for a Create -> Stop without a Start. In
   229  	// this case we have no goroutine processing output so its safe to close the
   230  	// channel here.
   231  	if uvm.outputListener != nil {
   232  		close(uvm.outputProcessingDone)
   233  		uvm.outputListener.Close()
   234  		uvm.outputListener = nil
   235  	}
   236  
   237  	if uvm.confidentialUVMOptions != nil && uvm.confidentialUVMOptions.GuestStateFile != "" {
   238  		vmgsFullPath := filepath.Join(uvm.confidentialUVMOptions.BundleDirectory, uvm.confidentialUVMOptions.GuestStateFile)
   239  		log.G(context.Background()).WithField("VMGS file", vmgsFullPath).Debug("removing VMGS file")
   240  		if err := os.Remove(vmgsFullPath); err != nil {
   241  			log.G(ctx).WithError(err).Error("failed to remove VMGS file")
   242  		}
   243  	}
   244  
   245  	if uvm.hcsSystem != nil {
   246  		return uvm.hcsSystem.Close()
   247  	}
   248  
   249  	return nil
   250  }
   251  
   252  // CreateContainer creates a container in the utility VM.
   253  func (uvm *UtilityVM) CreateContainer(ctx context.Context, id string, settings interface{}) (cow.Container, error) {
   254  	if uvm.gc != nil {
   255  		c, err := uvm.gc.CreateContainer(ctx, id, settings)
   256  		if err != nil {
   257  			return nil, fmt.Errorf("failed to create container %s: %s", id, err)
   258  		}
   259  		return c, nil
   260  	}
   261  	doc := hcsschema.ComputeSystem{
   262  		HostingSystemId:                   uvm.id,
   263  		Owner:                             uvm.owner,
   264  		SchemaVersion:                     schemaversion.SchemaV21(),
   265  		ShouldTerminateOnLastHandleClosed: true,
   266  		HostedSystem:                      settings,
   267  	}
   268  	c, err := hcs.CreateComputeSystem(ctx, id, &doc)
   269  	if err != nil {
   270  		return nil, err
   271  	}
   272  	return c, err
   273  }
   274  
   275  // CreateProcess creates a process in the utility VM.
   276  func (uvm *UtilityVM) CreateProcess(ctx context.Context, settings interface{}) (cow.Process, error) {
   277  	if uvm.gc != nil {
   278  		return uvm.gc.CreateProcess(ctx, settings)
   279  	}
   280  	return uvm.hcsSystem.CreateProcess(ctx, settings)
   281  }
   282  
   283  // IsOCI returns false, indicating the parameters to CreateProcess should not
   284  // include an OCI spec.
   285  func (uvm *UtilityVM) IsOCI() bool {
   286  	return false
   287  }
   288  
   289  // Terminate requests that the utility VM be terminated.
   290  func (uvm *UtilityVM) Terminate(ctx context.Context) error {
   291  	return uvm.hcsSystem.Terminate(ctx)
   292  }
   293  
   294  // ExitError returns an error if the utility VM has terminated unexpectedly.
   295  func (uvm *UtilityVM) ExitError() error {
   296  	return uvm.hcsSystem.ExitError()
   297  }
   298  
   299  func defaultProcessorCount() int32 {
   300  	if runtime.NumCPU() == 1 {
   301  		return 1
   302  	}
   303  	return 2
   304  }
   305  
   306  // normalizeProcessorCount sets `uvm.processorCount` to `Min(requested,
   307  // logical CPU count)`.
   308  func (uvm *UtilityVM) normalizeProcessorCount(ctx context.Context, requested int32, processorTopology *hcsschema.ProcessorTopology) int32 {
   309  	// Use host processor information retrieved from HCS instead of runtime.NumCPU,
   310  	// GetMaximumProcessorCount or other OS level calls for two reasons.
   311  	// 1. Go uses GetProcessAffinityMask and falls back to GetSystemInfo both of
   312  	// which will not return LPs in another processor group.
   313  	// 2. GetMaximumProcessorCount will return all processors on the system
   314  	// but in configurations where the host partition doesn't see the full LP count
   315  	// i.e "Minroot" scenarios this won't be sufficient.
   316  	// (https://docs.microsoft.com/en-us/windows-server/virtualization/hyper-v/manage/manage-hyper-v-minroot-2016)
   317  	hostCount := int32(processorTopology.LogicalProcessorCount)
   318  	if requested > hostCount {
   319  		log.G(ctx).WithFields(logrus.Fields{
   320  			logfields.UVMID: uvm.id,
   321  			"requested":     requested,
   322  			"assigned":      hostCount,
   323  		}).Warn("Changing user requested CPUCount to current number of processors")
   324  		return hostCount
   325  	} else {
   326  		return requested
   327  	}
   328  }
   329  
   330  // ProcessorCount returns the number of processors actually assigned to the UVM.
   331  func (uvm *UtilityVM) ProcessorCount() int32 {
   332  	return uvm.processorCount
   333  }
   334  
   335  // PhysicallyBacked returns if the UVM is backed by physical memory
   336  // (Over commit and deferred commit both false)
   337  func (uvm *UtilityVM) PhysicallyBacked() bool {
   338  	return uvm.physicallyBacked
   339  }
   340  
   341  // ProcessDumpLocation returns the location that process dumps will get written to for containers running
   342  // in the UVM.
   343  func (uvm *UtilityVM) ProcessDumpLocation() string {
   344  	return uvm.processDumpLocation
   345  }
   346  
   347  func (uvm *UtilityVM) normalizeMemorySize(ctx context.Context, requested uint64) uint64 {
   348  	actual := (requested + 1) &^ 1 // align up to an even number
   349  	if requested != actual {
   350  		log.G(ctx).WithFields(logrus.Fields{
   351  			logfields.UVMID: uvm.id,
   352  			"requested":     requested,
   353  			"assigned":      actual,
   354  		}).Warn("Changing user requested MemorySizeInMB to align to 2MB")
   355  	}
   356  	return actual
   357  }
   358  
   359  // DevicesPhysicallyBacked describes if additional devices added to the UVM
   360  // should be physically backed
   361  func (uvm *UtilityVM) DevicesPhysicallyBacked() bool {
   362  	return uvm.devicesPhysicallyBacked
   363  }
   364  
   365  // VSMBNoDirectMap returns if VSMB devices should be mounted with `NoDirectMap` set to true
   366  func (uvm *UtilityVM) VSMBNoDirectMap() bool {
   367  	return uvm.vsmbNoDirectMap
   368  }
   369  
   370  func (uvm *UtilityVM) NoWritableFileShares() bool {
   371  	return uvm.noWritableFileShares
   372  }
   373  
   374  // Closes the external GCS connection if it is being used and also closes the
   375  // listener for GCS connection.
   376  func (uvm *UtilityVM) CloseGCSConnection() (err error) {
   377  	if uvm.gc != nil {
   378  		err = uvm.gc.Close()
   379  	}
   380  	if uvm.gcListener != nil {
   381  		err = uvm.gcListener.Close()
   382  	}
   383  	return
   384  }
   385  

View as plain text