...

Source file src/github.com/Microsoft/hcsshim/internal/guest/runtime/hcsv2/nvidia_utils.go

Documentation: github.com/Microsoft/hcsshim/internal/guest/runtime/hcsv2

     1  //go:build linux
     2  // +build linux
     3  
     4  package hcsv2
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"os"
    10  	"os/exec"
    11  	"strings"
    12  
    13  	oci "github.com/opencontainers/runtime-spec/specs-go"
    14  	"github.com/pkg/errors"
    15  
    16  	"github.com/Microsoft/hcsshim/cmd/gcstools/generichook"
    17  	"github.com/Microsoft/hcsshim/internal/guest/storage/pci"
    18  	"github.com/Microsoft/hcsshim/internal/guestpath"
    19  	"github.com/Microsoft/hcsshim/internal/hooks"
    20  	"github.com/Microsoft/hcsshim/pkg/annotations"
    21  )
    22  
    23  const nvidiaDebugFilePath = "/nvidia-container.log"
    24  
    25  const nvidiaToolBinary = "nvidia-container-cli"
    26  
    27  // described here: https://github.com/opencontainers/runtime-spec/blob/39c287c415bf86fb5b7506528d471db5405f8ca8/config.md#posix-platform-hooks
    28  // addNvidiaDeviceHook builds the arguments for nvidia-container-cli and creates the prestart hook
    29  func addNvidiaDeviceHook(ctx context.Context, spec *oci.Spec) error {
    30  	genericHookBinary := "generichook"
    31  	genericHookPath, err := exec.LookPath(genericHookBinary)
    32  	if err != nil {
    33  		return errors.Wrapf(err, "failed to find %s for container device support", genericHookBinary)
    34  	}
    35  
    36  	debugOption := fmt.Sprintf("--debug=%s", nvidiaDebugFilePath)
    37  
    38  	// TODO katiewasnothere: right now both host and container ldconfig do not work as expected for nvidia-container-cli
    39  	// ldconfig needs to be run in the container to setup the correct symlinks to the library files nvidia-container-cli
    40  	// maps into the container
    41  	args := []string{
    42  		genericHookPath,
    43  		nvidiaToolBinary,
    44  		debugOption,
    45  		"--load-kmods",
    46  		"--no-pivot",
    47  		"configure",
    48  		"--ldconfig=@/sbin/ldconfig",
    49  	}
    50  	if capabilities, ok := spec.Annotations[annotations.ContainerGPUCapabilities]; ok {
    51  		caps := strings.Split(capabilities, ",")
    52  		for _, c := range caps {
    53  			args = append(args, fmt.Sprintf("--%s", c))
    54  		}
    55  	}
    56  
    57  	for _, d := range spec.Windows.Devices {
    58  		switch d.IDType {
    59  		case "gpu":
    60  			busLocation, err := pci.FindDeviceBusLocationFromVMBusGUID(ctx, d.ID)
    61  			if err != nil {
    62  				return errors.Wrapf(err, "failed to find nvidia gpu bus location")
    63  			}
    64  			args = append(args, fmt.Sprintf("--device=%s", busLocation))
    65  		}
    66  	}
    67  
    68  	// add template for pid argument to be injected later by the generic hook binary
    69  	args = append(args, "--no-cgroups", "--pid={{pid}}", spec.Root.Path)
    70  
    71  	hookLogDebugFileEnvOpt := fmt.Sprintf("%s=%s", generichook.LogDebugFileEnvKey, nvidiaDebugFilePath)
    72  	hookEnv := append(updateEnvWithNvidiaVariables(), hookLogDebugFileEnvOpt)
    73  	nvidiaHook := hooks.NewOCIHook(genericHookPath, args, hookEnv)
    74  	return hooks.AddOCIHook(spec, hooks.CreateRuntime, nvidiaHook)
    75  }
    76  
    77  // Helper function to find the usr/lib path for the installed nvidia library files.
    78  // This function assumes that the drivers have been installed using
    79  // gcstool's `install-drivers` binary.
    80  func getNvidiaDriversUsrLibPath() string {
    81  	return fmt.Sprintf("%s/content/usr/lib", guestpath.LCOWNvidiaMountPath)
    82  }
    83  
    84  // Helper function to find the usr/bin path for the installed nvidia tools.
    85  // This function assumes that the drivers have been installed using
    86  // gcstool's `install-drivers` binary.
    87  func getNvidiaDriverUsrBinPath() string {
    88  	return fmt.Sprintf("%s/content/usr/bin", guestpath.LCOWNvidiaMountPath)
    89  }
    90  
    91  // updateEnvWithNvidiaVariables creates an env with the nvidia gpu vhd in PATH and insecure mode set
    92  func updateEnvWithNvidiaVariables() []string {
    93  	env := updatePathEnv(getNvidiaDriverUsrBinPath())
    94  	// NVC_INSECURE_MODE allows us to run nvidia-container-cli without seccomp
    95  	// we don't currently use seccomp in the uvm, so avoid using it here for now as well
    96  	env = append(env, "NVC_INSECURE_MODE=1")
    97  	return env
    98  }
    99  
   100  // updatePathEnv adds specified `dirs` to PATH variable and returns the result environment variables.
   101  func updatePathEnv(dirs ...string) []string {
   102  	pathPrefix := "PATH="
   103  	additionalDirs := strings.Join(dirs, ":")
   104  	env := os.Environ()
   105  	for i, v := range env {
   106  		if strings.HasPrefix(v, pathPrefix) {
   107  			newPath := fmt.Sprintf("%s:%s", v, additionalDirs)
   108  			env[i] = newPath
   109  			return env
   110  		}
   111  	}
   112  	return append(env, fmt.Sprintf("PATH=%s", additionalDirs))
   113  }
   114  

View as plain text