...

Source file src/github.com/Microsoft/hcsshim/internal/layers/layers.go

Documentation: github.com/Microsoft/hcsshim/internal/layers

     1  //go:build windows
     2  // +build windows
     3  
     4  package layers
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"os"
    10  	"path/filepath"
    11  	"time"
    12  
    13  	"github.com/Microsoft/go-winio/pkg/fs"
    14  	"github.com/pkg/errors"
    15  	"github.com/sirupsen/logrus"
    16  	"golang.org/x/sys/windows"
    17  
    18  	"github.com/Microsoft/hcsshim/internal/guestpath"
    19  	hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
    20  	"github.com/Microsoft/hcsshim/internal/hcserror"
    21  	"github.com/Microsoft/hcsshim/internal/log"
    22  	"github.com/Microsoft/hcsshim/internal/ospath"
    23  	"github.com/Microsoft/hcsshim/internal/resources"
    24  	"github.com/Microsoft/hcsshim/internal/uvm"
    25  	"github.com/Microsoft/hcsshim/internal/wclayer"
    26  )
    27  
    28  type lcowLayersCloser struct {
    29  	uvm                     *uvm.UtilityVM
    30  	guestCombinedLayersPath string
    31  	scratchMount            resources.ResourceCloser
    32  	layerClosers            []resources.ResourceCloser
    33  }
    34  
    35  func (lc *lcowLayersCloser) Release(ctx context.Context) (retErr error) {
    36  	if err := lc.uvm.RemoveCombinedLayersLCOW(ctx, lc.guestCombinedLayersPath); err != nil {
    37  		log.G(ctx).WithError(err).Error("failed RemoveCombinedLayersLCOW")
    38  		if retErr == nil {
    39  			retErr = fmt.Errorf("first error: %w", err)
    40  		}
    41  	}
    42  	if err := lc.scratchMount.Release(ctx); err != nil {
    43  		log.G(ctx).WithError(err).Error("failed LCOW scratch mount release")
    44  		if retErr == nil {
    45  			retErr = fmt.Errorf("first error: %w", err)
    46  		}
    47  	}
    48  	for i, closer := range lc.layerClosers {
    49  		if err := closer.Release(ctx); err != nil {
    50  			log.G(ctx).WithFields(logrus.Fields{
    51  				logrus.ErrorKey: err,
    52  				"layerIndex":    i,
    53  			}).Error("failed releasing LCOW layer")
    54  			if retErr == nil {
    55  				retErr = fmt.Errorf("first error: %w", err)
    56  			}
    57  		}
    58  	}
    59  	return
    60  }
    61  
    62  // MountLCOWLayers is a helper for clients to hide all the complexity of layer mounting for LCOW
    63  // Layer folder are in order: base, [rolayer1..rolayern,] scratch
    64  // Returns the path at which the `rootfs` of the container can be accessed. Also, returns the path inside the
    65  // UVM at which container scratch directory is located. Usually, this path is the path at which the container
    66  // scratch VHD is mounted. However, in case of scratch sharing this is a directory under the UVM scratch.
    67  func MountLCOWLayers(ctx context.Context, containerID string, layerFolders []string, guestRoot string, vm *uvm.UtilityVM) (_, _ string, _ resources.ResourceCloser, err error) {
    68  	if vm == nil {
    69  		return "", "", nil, errors.New("MountLCOWLayers cannot be called for process-isolated containers")
    70  	}
    71  
    72  	if vm.OS() != "linux" {
    73  		return "", "", nil, errors.New("MountLCOWLayers should only be called for LCOW")
    74  	}
    75  
    76  	// V2 UVM
    77  	log.G(ctx).WithField("os", vm.OS()).Debug("hcsshim::MountLCOWLayers V2 UVM")
    78  
    79  	var (
    80  		layerClosers      []resources.ResourceCloser
    81  		lcowUvmLayerPaths []string
    82  	)
    83  	defer func() {
    84  		if err != nil {
    85  			for _, closer := range layerClosers {
    86  				if err := closer.Release(ctx); err != nil {
    87  					log.G(ctx).WithError(err).Warn("failed to remove lcow layer on cleanup")
    88  				}
    89  			}
    90  		}
    91  	}()
    92  
    93  	for _, layerPath := range layerFolders[:len(layerFolders)-1] {
    94  		log.G(ctx).WithField("layerPath", layerPath).Debug("mounting layer")
    95  		var (
    96  			layerPath = filepath.Join(layerPath, "layer.vhd")
    97  			uvmPath   string
    98  		)
    99  		uvmPath, closer, err := addLCOWLayer(ctx, vm, layerPath)
   100  		if err != nil {
   101  			return "", "", nil, fmt.Errorf("failed to add LCOW layer: %s", err)
   102  		}
   103  		layerClosers = append(layerClosers, closer)
   104  		lcowUvmLayerPaths = append(lcowUvmLayerPaths, uvmPath)
   105  	}
   106  
   107  	containerScratchPathInUVM := ospath.Join(vm.OS(), guestRoot)
   108  	hostPath, err := getScratchVHDPath(layerFolders)
   109  	if err != nil {
   110  		return "", "", nil, fmt.Errorf("failed to eval symlinks on scratch path: %w", err)
   111  	}
   112  	log.G(ctx).WithField("hostPath", hostPath).Debug("mounting scratch VHD")
   113  
   114  	var options []string
   115  	scsiMount, err := vm.AddSCSI(
   116  		ctx,
   117  		hostPath,
   118  		containerScratchPathInUVM,
   119  		false,
   120  		vm.ScratchEncryptionEnabled(),
   121  		options,
   122  		uvm.VMAccessTypeIndividual,
   123  	)
   124  	if err != nil {
   125  		return "", "", nil, fmt.Errorf("failed to add SCSI scratch VHD: %s", err)
   126  	}
   127  
   128  	// handles the case where we want to share a scratch disk for multiple containers instead
   129  	// of mounting a new one. Pass a unique value for `ScratchPath` to avoid container upper and
   130  	// work directories colliding in the UVM.
   131  	containerScratchPathInUVM = ospath.Join("linux", scsiMount.UVMPath, "scratch", containerID)
   132  
   133  	defer func() {
   134  		if err != nil {
   135  			if err := scsiMount.Release(ctx); err != nil {
   136  				log.G(ctx).WithError(err).Warn("failed to remove scratch on cleanup")
   137  			}
   138  		}
   139  	}()
   140  
   141  	rootfs := ospath.Join(vm.OS(), guestRoot, guestpath.RootfsPath)
   142  	err = vm.CombineLayersLCOW(ctx, containerID, lcowUvmLayerPaths, containerScratchPathInUVM, rootfs)
   143  	if err != nil {
   144  		return "", "", nil, err
   145  	}
   146  	log.G(ctx).Debug("hcsshim::MountLCOWLayers Succeeded")
   147  	closer := &lcowLayersCloser{
   148  		uvm:                     vm,
   149  		guestCombinedLayersPath: rootfs,
   150  		scratchMount:            scsiMount,
   151  		layerClosers:            layerClosers,
   152  	}
   153  	return rootfs, containerScratchPathInUVM, closer, nil
   154  }
   155  
   156  // MountWCOWLayers is a helper for clients to hide all the complexity of layer mounting for WCOW.
   157  // Layer folder are in order: base, [rolayer1..rolayern,] scratch
   158  //
   159  //	v1/v2: Argon WCOW: Returns the mount path on the host as a volume GUID.
   160  //	v1:    Xenon WCOW: Done internally in HCS, so no point calling doing anything here.
   161  //	v2:    Xenon WCOW: Returns a CombinedLayersV2 structure where ContainerRootPath is a folder
   162  //	inside the utility VM which is a GUID mapping of the scratch folder. Each of the layers are
   163  //	the VSMB locations where the read-only layers are mounted.
   164  //
   165  //	Job container: Returns the mount path on the host as a volume guid, with the volume mounted on
   166  //	the host at `volumeMountPath`.
   167  func MountWCOWLayers(ctx context.Context, containerID string, layerFolders []string, guestRoot, volumeMountPath string, vm *uvm.UtilityVM) (_ string, _ resources.ResourceCloser, err error) {
   168  	if vm == nil {
   169  		return mountWCOWHostLayers(ctx, layerFolders, volumeMountPath)
   170  	}
   171  
   172  	if vm.OS() != "windows" {
   173  		return "", nil, errors.New("MountWCOWLayers should only be called for WCOW")
   174  	}
   175  
   176  	return mountWCOWIsolatedLayers(ctx, containerID, layerFolders, guestRoot, volumeMountPath, vm)
   177  }
   178  
   179  type wcowHostLayersCloser struct {
   180  	volumeMountPath        string
   181  	scratchLayerFolderPath string
   182  }
   183  
   184  func (lc *wcowHostLayersCloser) Release(ctx context.Context) error {
   185  	if lc.volumeMountPath != "" {
   186  		if err := RemoveSandboxMountPoint(ctx, lc.volumeMountPath); err != nil {
   187  			return err
   188  		}
   189  	}
   190  	if err := wclayer.UnprepareLayer(ctx, lc.scratchLayerFolderPath); err != nil {
   191  		return err
   192  	}
   193  	return wclayer.DeactivateLayer(ctx, lc.scratchLayerFolderPath)
   194  }
   195  
   196  func mountWCOWHostLayers(ctx context.Context, layerFolders []string, volumeMountPath string) (_ string, _ resources.ResourceCloser, err error) {
   197  	if len(layerFolders) < 2 {
   198  		return "", nil, errors.New("need at least two layers - base and scratch")
   199  	}
   200  	path := layerFolders[len(layerFolders)-1]
   201  	rest := layerFolders[:len(layerFolders)-1]
   202  	// Simple retry loop to handle some behavior on RS5. Loopback VHDs used to be mounted in a different manner on RS5 (ws2019) which led to some
   203  	// very odd cases where things would succeed when they shouldn't have, or we'd simply timeout if an operation took too long. Many
   204  	// parallel invocations of this code path and stressing the machine seem to bring out the issues, but all of the possible failure paths
   205  	// that bring about the errors we have observed aren't known.
   206  	//
   207  	// On 19h1+ this *shouldn't* be needed, but the logic is to break if everything succeeded so this is harmless and shouldn't need a version check.
   208  	var lErr error
   209  	for i := 0; i < 5; i++ {
   210  		lErr = func() (err error) {
   211  			if err := wclayer.ActivateLayer(ctx, path); err != nil {
   212  				return err
   213  			}
   214  
   215  			defer func() {
   216  				if err != nil {
   217  					_ = wclayer.DeactivateLayer(ctx, path)
   218  				}
   219  			}()
   220  
   221  			return wclayer.PrepareLayer(ctx, path, rest)
   222  		}()
   223  
   224  		if lErr != nil {
   225  			// Common errors seen from the RS5 behavior mentioned above is ERROR_NOT_READY and ERROR_DEVICE_NOT_CONNECTED. The former occurs when HCS
   226  			// tries to grab the volume path of the disk but it doesn't succeed, usually because the disk isn't actually mounted. DEVICE_NOT_CONNECTED
   227  			// has been observed after launching multiple containers in parallel on a machine under high load. This has also been observed to be a trigger
   228  			// for ERROR_NOT_READY as well.
   229  			if hcserr, ok := lErr.(*hcserror.HcsError); ok {
   230  				if hcserr.Err == windows.ERROR_NOT_READY || hcserr.Err == windows.ERROR_DEVICE_NOT_CONNECTED {
   231  					log.G(ctx).WithField("path", path).WithError(hcserr.Err).Warning("retrying layer operations after failure")
   232  
   233  					// Sleep for a little before a re-attempt. A probable cause for these issues in the first place is events not getting
   234  					// reported in time so might be good to give some time for things to "cool down" or get back to a known state.
   235  					time.Sleep(time.Millisecond * 100)
   236  					continue
   237  				}
   238  			}
   239  			// This was a failure case outside of the commonly known error conditions, don't retry here.
   240  			return "", nil, lErr
   241  		}
   242  
   243  		// No errors in layer setup, we can leave the loop
   244  		break
   245  	}
   246  	// If we got unlucky and ran into one of the two errors mentioned five times in a row and left the loop, we need to check
   247  	// the loop error here and fail also.
   248  	if lErr != nil {
   249  		return "", nil, errors.Wrap(lErr, "layer retry loop failed")
   250  	}
   251  
   252  	// If any of the below fails, we want to detach the filter and unmount the disk.
   253  	defer func() {
   254  		if err != nil {
   255  			_ = wclayer.UnprepareLayer(ctx, path)
   256  			_ = wclayer.DeactivateLayer(ctx, path)
   257  		}
   258  	}()
   259  
   260  	mountPath, err := wclayer.GetLayerMountPath(ctx, path)
   261  	if err != nil {
   262  		return "", nil, err
   263  	}
   264  
   265  	// Mount the volume to a directory on the host if requested. This is the case for job containers.
   266  	if volumeMountPath != "" {
   267  		if err := MountSandboxVolume(ctx, volumeMountPath, mountPath); err != nil {
   268  			return "", nil, err
   269  		}
   270  	}
   271  
   272  	closer := &wcowHostLayersCloser{
   273  		volumeMountPath:        volumeMountPath,
   274  		scratchLayerFolderPath: path,
   275  	}
   276  	return mountPath, closer, nil
   277  }
   278  
   279  type wcowIsolatedLayersCloser struct {
   280  	uvm                     *uvm.UtilityVM
   281  	guestCombinedLayersPath string
   282  	scratchMount            resources.ResourceCloser
   283  	layerClosers            []resources.ResourceCloser
   284  }
   285  
   286  func (lc *wcowIsolatedLayersCloser) Release(ctx context.Context) (retErr error) {
   287  	if err := lc.uvm.RemoveCombinedLayersWCOW(ctx, lc.guestCombinedLayersPath); err != nil {
   288  		log.G(ctx).WithError(err).Error("failed RemoveCombinedLayersWCOW")
   289  		if retErr == nil {
   290  			retErr = fmt.Errorf("first error: %w", err)
   291  		}
   292  	}
   293  	if err := lc.scratchMount.Release(ctx); err != nil {
   294  		log.G(ctx).WithError(err).Error("failed WCOW scratch mount release")
   295  		if retErr == nil {
   296  			retErr = fmt.Errorf("first error: %w", err)
   297  		}
   298  	}
   299  	for i, closer := range lc.layerClosers {
   300  		if err := closer.Release(ctx); err != nil {
   301  			log.G(ctx).WithFields(logrus.Fields{
   302  				logrus.ErrorKey: err,
   303  				"layerIndex":    i,
   304  			}).Error("failed releasing WCOW layer")
   305  			if retErr == nil {
   306  				retErr = fmt.Errorf("first error: %w", err)
   307  			}
   308  		}
   309  	}
   310  	return
   311  }
   312  
   313  func mountWCOWIsolatedLayers(ctx context.Context, containerID string, layerFolders []string, guestRoot, volumeMountPath string, vm *uvm.UtilityVM) (_ string, _ resources.ResourceCloser, err error) {
   314  	log.G(ctx).WithField("os", vm.OS()).Debug("hcsshim::MountWCOWLayers V2 UVM")
   315  
   316  	var (
   317  		layersAdded  []string
   318  		layerClosers []resources.ResourceCloser
   319  	)
   320  	defer func() {
   321  		if err != nil {
   322  			for _, l := range layerClosers {
   323  				if err := l.Release(ctx); err != nil {
   324  					log.G(ctx).WithError(err).Warn("failed to remove wcow layer on cleanup")
   325  				}
   326  			}
   327  		}
   328  	}()
   329  
   330  	for _, layerPath := range layerFolders[:len(layerFolders)-1] {
   331  		log.G(ctx).WithField("layerPath", layerPath).Debug("mounting layer")
   332  		options := vm.DefaultVSMBOptions(true)
   333  		options.TakeBackupPrivilege = true
   334  		mount, err := vm.AddVSMB(ctx, layerPath, options)
   335  		if err != nil {
   336  			return "", nil, fmt.Errorf("failed to add VSMB layer: %s", err)
   337  		}
   338  		layersAdded = append(layersAdded, layerPath)
   339  		layerClosers = append(layerClosers, mount)
   340  	}
   341  
   342  	containerScratchPathInUVM := ospath.Join(vm.OS(), guestRoot)
   343  	hostPath, err := getScratchVHDPath(layerFolders)
   344  	if err != nil {
   345  		return "", nil, fmt.Errorf("failed to get scratch VHD path in layer folders: %s", err)
   346  	}
   347  	log.G(ctx).WithField("hostPath", hostPath).Debug("mounting scratch VHD")
   348  
   349  	var options []string
   350  	scsiMount, err := vm.AddSCSI(
   351  		ctx,
   352  		hostPath,
   353  		containerScratchPathInUVM,
   354  		false,
   355  		vm.ScratchEncryptionEnabled(),
   356  		options,
   357  		uvm.VMAccessTypeIndividual,
   358  	)
   359  	if err != nil {
   360  		return "", nil, fmt.Errorf("failed to add SCSI scratch VHD: %s", err)
   361  	}
   362  	containerScratchPathInUVM = scsiMount.UVMPath
   363  
   364  	defer func() {
   365  		if err != nil {
   366  			if err := scsiMount.Release(ctx); err != nil {
   367  				log.G(ctx).WithError(err).Warn("failed to remove scratch on cleanup")
   368  			}
   369  		}
   370  	}()
   371  
   372  	// Load the filter at the C:\s<ID> location calculated above. We pass into this
   373  	// request each of the read-only layer folders.
   374  	var layers []hcsschema.Layer
   375  	layers, err = GetHCSLayers(ctx, vm, layersAdded)
   376  	if err != nil {
   377  		return "", nil, err
   378  	}
   379  	err = vm.CombineLayersWCOW(ctx, layers, containerScratchPathInUVM)
   380  	if err != nil {
   381  		return "", nil, err
   382  	}
   383  	log.G(ctx).Debug("hcsshim::MountWCOWLayers Succeeded")
   384  	closer := &wcowIsolatedLayersCloser{
   385  		uvm:                     vm,
   386  		guestCombinedLayersPath: containerScratchPathInUVM,
   387  		scratchMount:            scsiMount,
   388  		layerClosers:            layerClosers,
   389  	}
   390  	return containerScratchPathInUVM, closer, nil
   391  }
   392  
   393  func addLCOWLayer(ctx context.Context, vm *uvm.UtilityVM, layerPath string) (uvmPath string, _ resources.ResourceCloser, err error) {
   394  	// don't try to add as vpmem when we want additional devices on the uvm to be fully physically backed
   395  	if !vm.DevicesPhysicallyBacked() {
   396  		// We first try vPMEM and if it is full or the file is too large we
   397  		// fall back to SCSI.
   398  		mount, err := vm.AddVPMem(ctx, layerPath)
   399  		if err == nil {
   400  			log.G(ctx).WithFields(logrus.Fields{
   401  				"layerPath": layerPath,
   402  				"layerType": "vpmem",
   403  			}).Debug("Added LCOW layer")
   404  			return mount.GuestPath, mount, nil
   405  		} else if err != uvm.ErrNoAvailableLocation && err != uvm.ErrMaxVPMemLayerSize {
   406  			return "", nil, fmt.Errorf("failed to add VPMEM layer: %s", err)
   407  		}
   408  	}
   409  
   410  	options := []string{"ro"}
   411  	uvmPath = fmt.Sprintf(guestpath.LCOWGlobalMountPrefixFmt, vm.UVMMountCounter())
   412  	sm, err := vm.AddSCSI(ctx, layerPath, uvmPath, true, false, options, uvm.VMAccessTypeNoop)
   413  	if err != nil {
   414  		return "", nil, fmt.Errorf("failed to add SCSI layer: %s", err)
   415  	}
   416  	log.G(ctx).WithFields(logrus.Fields{
   417  		"layerPath": layerPath,
   418  		"layerType": "scsi",
   419  	}).Debug("Added LCOW layer")
   420  	return sm.UVMPath, sm, nil
   421  }
   422  
   423  // GetHCSLayers converts host paths corresponding to container layers into HCS schema V2 layers
   424  func GetHCSLayers(ctx context.Context, vm *uvm.UtilityVM, paths []string) (layers []hcsschema.Layer, err error) {
   425  	for _, path := range paths {
   426  		uvmPath, err := vm.GetVSMBUvmPath(ctx, path, true)
   427  		if err != nil {
   428  			return nil, err
   429  		}
   430  		layerID, err := wclayer.LayerID(ctx, path)
   431  		if err != nil {
   432  			return nil, err
   433  		}
   434  		layers = append(layers, hcsschema.Layer{Id: layerID.String(), Path: uvmPath})
   435  	}
   436  	return layers, nil
   437  }
   438  
   439  func getScratchVHDPath(layerFolders []string) (string, error) {
   440  	hostPath := filepath.Join(layerFolders[len(layerFolders)-1], "sandbox.vhdx")
   441  	// For LCOW, we can reuse another container's scratch space (usually the sandbox container's).
   442  	//
   443  	// When sharing a scratch space, the `hostPath` will be a symlink to the sandbox.vhdx location to use.
   444  	// When not sharing a scratch space, `hostPath` will be the path to the sandbox.vhdx to use.
   445  	//
   446  	// Evaluate the symlink here (if there is one).
   447  	hostPath, err := fs.ResolvePath(hostPath)
   448  	if err != nil {
   449  		return "", errors.Wrap(err, "failed to resolve path")
   450  	}
   451  	return hostPath, nil
   452  }
   453  
   454  // Mount the sandbox vhd to a user friendly path.
   455  func MountSandboxVolume(ctx context.Context, hostPath, volumeName string) (err error) {
   456  	log.G(ctx).WithFields(logrus.Fields{
   457  		"hostpath":   hostPath,
   458  		"volumeName": volumeName,
   459  	}).Debug("mounting volume for container")
   460  
   461  	if _, err := os.Stat(hostPath); os.IsNotExist(err) {
   462  		if err := os.MkdirAll(hostPath, 0777); err != nil {
   463  			return err
   464  		}
   465  	}
   466  
   467  	defer func() {
   468  		if err != nil {
   469  			os.RemoveAll(hostPath)
   470  		}
   471  	}()
   472  
   473  	// Make sure volumeName ends with a trailing slash as required.
   474  	if volumeName[len(volumeName)-1] != '\\' {
   475  		volumeName += `\` // Be nice to clients and make sure well-formed for back-compat
   476  	}
   477  
   478  	if err = windows.SetVolumeMountPoint(windows.StringToUTF16Ptr(hostPath), windows.StringToUTF16Ptr(volumeName)); err != nil {
   479  		return errors.Wrapf(err, "failed to mount sandbox volume to %s on host", hostPath)
   480  	}
   481  	return nil
   482  }
   483  
   484  // Remove volume mount point. And remove folder afterwards.
   485  func RemoveSandboxMountPoint(ctx context.Context, hostPath string) error {
   486  	log.G(ctx).WithFields(logrus.Fields{
   487  		"hostpath": hostPath,
   488  	}).Debug("removing volume mount point for container")
   489  
   490  	if err := windows.DeleteVolumeMountPoint(windows.StringToUTF16Ptr(hostPath)); err != nil {
   491  		return errors.Wrap(err, "failed to delete sandbox volume mount point")
   492  	}
   493  	if err := os.Remove(hostPath); err != nil {
   494  		return errors.Wrapf(err, "failed to remove sandbox mounted folder path %q", hostPath)
   495  	}
   496  	return nil
   497  }
   498  

View as plain text