1
2
3
4 package hcsv2
5
6 import (
7 "bufio"
8 "context"
9 "crypto/sha256"
10 "encoding/base64"
11 "encoding/json"
12 "fmt"
13 "io"
14 "os"
15 "os/exec"
16 "path"
17 "path/filepath"
18 "strings"
19 "sync"
20 "syscall"
21 "time"
22
23 "github.com/Microsoft/hcsshim/internal/cosesign1"
24 "github.com/Microsoft/hcsshim/internal/debug"
25 didx509resolver "github.com/Microsoft/hcsshim/internal/did-x509-resolver"
26 "github.com/Microsoft/hcsshim/internal/guest/gcserr"
27 "github.com/Microsoft/hcsshim/internal/guest/policy"
28 "github.com/Microsoft/hcsshim/internal/guest/prot"
29 "github.com/Microsoft/hcsshim/internal/guest/runtime"
30 "github.com/Microsoft/hcsshim/internal/guest/spec"
31 "github.com/Microsoft/hcsshim/internal/guest/stdio"
32 "github.com/Microsoft/hcsshim/internal/guest/storage"
33 "github.com/Microsoft/hcsshim/internal/guest/storage/overlay"
34 "github.com/Microsoft/hcsshim/internal/guest/storage/pci"
35 "github.com/Microsoft/hcsshim/internal/guest/storage/plan9"
36 "github.com/Microsoft/hcsshim/internal/guest/storage/pmem"
37 "github.com/Microsoft/hcsshim/internal/guest/storage/scsi"
38 "github.com/Microsoft/hcsshim/internal/guest/transport"
39 "github.com/Microsoft/hcsshim/internal/log"
40 "github.com/Microsoft/hcsshim/internal/oci"
41 "github.com/Microsoft/hcsshim/internal/protocol/guestrequest"
42 "github.com/Microsoft/hcsshim/internal/protocol/guestresource"
43 "github.com/Microsoft/hcsshim/pkg/annotations"
44 "github.com/Microsoft/hcsshim/pkg/securitypolicy"
45 "github.com/mattn/go-shellwords"
46 "github.com/opencontainers/runtime-spec/specs-go"
47 "github.com/pkg/errors"
48 "github.com/sirupsen/logrus"
49 "golang.org/x/sys/unix"
50 )
51
52
53
54 const UVMContainerID = "00000000-0000-0000-0000-000000000000"
55
56
57
58 type Host struct {
59 containersMutex sync.Mutex
60 containers map[string]*Container
61
62 externalProcessesMutex sync.Mutex
63 externalProcesses map[int]*externalProcess
64
65
66 rtime runtime.Runtime
67 vsock transport.Transport
68 devNullTransport transport.Transport
69
70
71 policyMutex sync.Mutex
72 securityPolicyEnforcer securitypolicy.SecurityPolicyEnforcer
73 securityPolicyEnforcerSet bool
74 uvmReferenceInfo string
75
76
77 logWriter io.Writer
78
79
80 hostMounts *hostMounts
81 }
82
83 func NewHost(rtime runtime.Runtime, vsock transport.Transport, initialEnforcer securitypolicy.SecurityPolicyEnforcer, logWriter io.Writer) *Host {
84 return &Host{
85 containers: make(map[string]*Container),
86 externalProcesses: make(map[int]*externalProcess),
87 rtime: rtime,
88 vsock: vsock,
89 devNullTransport: &transport.DevNullTransport{},
90 securityPolicyEnforcerSet: false,
91 securityPolicyEnforcer: initialEnforcer,
92 logWriter: logWriter,
93 hostMounts: newHostMounts(),
94 }
95 }
96
97
98
99
100
101
102
103 func (h *Host) SetConfidentialUVMOptions(ctx context.Context, r *guestresource.LCOWConfidentialOptions) error {
104 h.policyMutex.Lock()
105 defer h.policyMutex.Unlock()
106 if h.securityPolicyEnforcerSet {
107 return errors.New("security policy has already been set")
108 }
109
110
111
112 maxErrorMessageLength := 3 * 1024
113
114
115
116 p, err := securitypolicy.CreateSecurityPolicyEnforcer(
117 r.EnforcerType,
118 r.EncodedSecurityPolicy,
119 policy.DefaultCRIMounts(),
120 policy.DefaultCRIPrivilegedMounts(),
121 maxErrorMessageLength,
122 )
123 if err != nil {
124 return err
125 }
126
127
128
129
130
131
132
133 if err = p.EnforceRuntimeLoggingPolicy(ctx); err == nil {
134 logrus.SetOutput(h.logWriter)
135 } else {
136 logrus.SetOutput(io.Discard)
137 }
138
139 hostData, err := securitypolicy.NewSecurityPolicyDigest(r.EncodedSecurityPolicy)
140 if err != nil {
141 return err
142 }
143
144 if err := validateHostData(hostData[:]); err != nil {
145 return err
146 }
147
148 h.securityPolicyEnforcer = p
149 h.securityPolicyEnforcerSet = true
150 h.uvmReferenceInfo = r.EncodedUVMReference
151
152 return nil
153 }
154
155
156
157
158
159
160
161
162
163
164
165 func (h *Host) InjectFragment(ctx context.Context, fragment *guestresource.LCOWSecurityPolicyFragment) (err error) {
166 log.G(ctx).WithField("fragment", fmt.Sprintf("%+v", fragment)).Debug("GCS Host.InjectFragment")
167
168 raw, err := base64.StdEncoding.DecodeString(fragment.Fragment)
169 if err != nil {
170 return err
171 }
172 blob := []byte(fragment.Fragment)
173
174
175
176 sha := sha256.New()
177 sha.Write(blob)
178 timestamp := time.Now()
179 fragmentPath := fmt.Sprintf("fragment-%x-%d.blob", sha.Sum(nil), timestamp.UnixMilli())
180 _ = os.WriteFile(filepath.Join("/tmp", fragmentPath), blob, 0644)
181
182 unpacked, err := cosesign1.UnpackAndValidateCOSE1CertChain(raw)
183 if err != nil {
184 return fmt.Errorf("InjectFragment failed COSE validation: %s", err.Error())
185 }
186
187 payloadString := string(unpacked.Payload[:])
188 issuer := unpacked.Issuer
189 feed := unpacked.Feed
190 chainPem := unpacked.ChainPem
191
192 log.G(ctx).WithFields(logrus.Fields{
193 "issuer": issuer,
194 "feed": feed,
195 "cty": unpacked.ContentType,
196 "chainPem": chainPem,
197 }).Debugf("unpacked COSE1 cert chain")
198
199 log.G(ctx).WithFields(logrus.Fields{
200 "payload": payloadString,
201 }).Tracef("unpacked COSE1 payload")
202
203 if len(issuer) == 0 || len(feed) == 0 {
204 return fmt.Errorf("either issuer and feed must both be provided in the COSE_Sign1 protected header")
205 }
206
207
208
209 _, err = didx509resolver.Resolve(unpacked.ChainPem, issuer, true)
210 if err != nil {
211 log.G(ctx).Printf("Badly formed fragment - did resolver failed to match fragment did:x509 from chain with purported issuer %s, feed %s - err %s", issuer, feed, err.Error())
212 return err
213 }
214
215
216 err = h.securityPolicyEnforcer.LoadFragment(ctx, issuer, feed, payloadString)
217 if err != nil {
218 return fmt.Errorf("InjectFragment failed policy load: %w", err)
219 }
220 log.G(ctx).Printf("passed fragment into the enforcer.")
221
222 return nil
223 }
224
225 func (h *Host) SecurityPolicyEnforcer() securitypolicy.SecurityPolicyEnforcer {
226 return h.securityPolicyEnforcer
227 }
228
229 func (h *Host) Transport() transport.Transport {
230 return h.vsock
231 }
232
233 func (h *Host) RemoveContainer(id string) {
234 h.containersMutex.Lock()
235 defer h.containersMutex.Unlock()
236
237 c, ok := h.containers[id]
238 if !ok {
239 return
240 }
241
242
243 criType, isCRI := c.spec.Annotations[annotations.KubernetesContainerType]
244 if !isCRI || criType == "sandbox" {
245 _ = RemoveNetworkNamespace(context.Background(), id)
246 }
247
248 delete(h.containers, id)
249 }
250
251 func (h *Host) GetCreatedContainer(id string) (*Container, error) {
252 h.containersMutex.Lock()
253 defer h.containersMutex.Unlock()
254
255 c, ok := h.containers[id]
256 if !ok {
257 return nil, gcserr.NewHresultError(gcserr.HrVmcomputeSystemNotFound)
258 }
259 if c.getStatus() != containerCreated {
260 return nil, fmt.Errorf("container is not in state \"created\": %w",
261 gcserr.NewHresultError(gcserr.HrVmcomputeInvalidState))
262 }
263 return c, nil
264 }
265
266 func (h *Host) AddContainer(id string, c *Container) error {
267 h.containersMutex.Lock()
268 defer h.containersMutex.Unlock()
269
270 if _, ok := h.containers[id]; ok {
271 return gcserr.NewHresultError(gcserr.HrVmcomputeSystemAlreadyExists)
272 }
273 h.containers[id] = c
274 return nil
275 }
276
277 func setupSandboxMountsPath(id string) (err error) {
278 mountPath := spec.SandboxMountsDir(id)
279 if err := os.MkdirAll(mountPath, 0755); err != nil {
280 return errors.Wrapf(err, "failed to create sandboxMounts dir in sandbox %v", id)
281 }
282 defer func() {
283 if err != nil {
284 _ = os.RemoveAll(mountPath)
285 }
286 }()
287
288 return storage.MountRShared(mountPath)
289 }
290
291 func setupSandboxHugePageMountsPath(id string) error {
292 mountPath := spec.HugePagesMountsDir(id)
293 if err := os.MkdirAll(mountPath, 0755); err != nil {
294 return errors.Wrapf(err, "failed to create hugepage Mounts dir in sandbox %v", id)
295 }
296
297 return storage.MountRShared(mountPath)
298 }
299
300 func (h *Host) CreateContainer(ctx context.Context, id string, settings *prot.VMHostedContainerSettingsV2) (_ *Container, err error) {
301 criType, isCRI := settings.OCISpecification.Annotations[annotations.KubernetesContainerType]
302 c := &Container{
303 id: id,
304 vsock: h.vsock,
305 spec: settings.OCISpecification,
306 ociBundlePath: settings.OCIBundlePath,
307 isSandbox: criType == "sandbox",
308 exitType: prot.NtUnexpectedExit,
309 processes: make(map[uint32]*containerProcess),
310 status: containerCreating,
311 scratchDirPath: settings.ScratchDirPath,
312 }
313
314 if err := h.AddContainer(id, c); err != nil {
315 return nil, err
316 }
317 defer func() {
318 if err != nil {
319 h.RemoveContainer(id)
320 }
321 }()
322
323
324
325
326
327
328
329
330 var namespaceID string
331
332 sandboxID := id
333 if isCRI {
334 switch criType {
335 case "sandbox":
336
337 namespaceID = getNetworkNamespaceID(settings.OCISpecification)
338 err = setupSandboxContainerSpec(ctx, id, settings.OCISpecification)
339 if err != nil {
340 return nil, err
341 }
342 defer func() {
343 if err != nil {
344 _ = os.RemoveAll(settings.OCIBundlePath)
345 }
346 }()
347
348 if err = setupSandboxMountsPath(id); err != nil {
349 return nil, err
350 }
351
352 if err = setupSandboxHugePageMountsPath(id); err != nil {
353 return nil, err
354 }
355
356 if err := policy.ExtendPolicyWithNetworkingMounts(id, h.securityPolicyEnforcer, settings.OCISpecification); err != nil {
357 return nil, err
358 }
359 case "container":
360 sid, ok := settings.OCISpecification.Annotations[annotations.KubernetesSandboxID]
361 sandboxID = sid
362 if !ok || sid == "" {
363 return nil, errors.Errorf("unsupported 'io.kubernetes.cri.sandbox-id': '%s'", sid)
364 }
365 if err := setupWorkloadContainerSpec(ctx, sid, id, settings.OCISpecification); err != nil {
366 return nil, err
367 }
368
369
370
371 if len(h.securityPolicyEnforcer.EncodedSecurityPolicy()) > 0 && !oci.ParseAnnotationsBool(ctx,
372 settings.OCISpecification.Annotations, annotations.LCOWPrivileged, false) {
373 if err := addDevSev(ctx, settings.OCISpecification); err != nil {
374 log.G(ctx).WithError(err).Debug("failed to add SEV device")
375 }
376 }
377
378 defer func() {
379 if err != nil {
380 _ = os.RemoveAll(settings.OCIBundlePath)
381 }
382 }()
383 if err := policy.ExtendPolicyWithNetworkingMounts(sandboxID, h.securityPolicyEnforcer, settings.OCISpecification); err != nil {
384 return nil, err
385 }
386 default:
387 return nil, errors.Errorf("unsupported 'io.kubernetes.cri.container-type': '%s'", criType)
388 }
389 } else {
390
391 namespaceID = getNetworkNamespaceID(settings.OCISpecification)
392 if err := setupStandaloneContainerSpec(ctx, id, settings.OCISpecification); err != nil {
393 return nil, err
394 }
395 defer func() {
396 if err != nil {
397 _ = os.RemoveAll(settings.OCIBundlePath)
398 }
399 }()
400 }
401
402 user, groups, umask, err := h.securityPolicyEnforcer.GetUserInfo(id, settings.OCISpecification.Process)
403 if err != nil {
404 return nil, err
405 }
406
407 seccomp, err := securitypolicy.MeasureSeccompProfile(settings.OCISpecification.Linux.Seccomp)
408 if err != nil {
409 return nil, err
410 }
411
412 envToKeep, capsToKeep, allowStdio, err := h.securityPolicyEnforcer.EnforceCreateContainerPolicy(
413 ctx,
414 sandboxID,
415 id,
416 settings.OCISpecification.Process.Args,
417 settings.OCISpecification.Process.Env,
418 settings.OCISpecification.Process.Cwd,
419 settings.OCISpecification.Mounts,
420 isPrivilegedContainerCreationRequest(ctx, settings.OCISpecification),
421 settings.OCISpecification.Process.NoNewPrivileges,
422 user,
423 groups,
424 umask,
425 settings.OCISpecification.Process.Capabilities,
426 seccomp,
427 )
428 if err != nil {
429 return nil, errors.Wrapf(err, "container creation denied due to policy")
430 }
431
432 if !allowStdio {
433
434
435 c.vsock = h.devNullTransport
436 }
437
438 if envToKeep != nil {
439 settings.OCISpecification.Process.Env = []string(envToKeep)
440 }
441
442 if capsToKeep != nil {
443 settings.OCISpecification.Process.Capabilities = capsToKeep
444 }
445
446
447
448
449
450
451
452
453 if oci.ParseAnnotationsBool(ctx, settings.OCISpecification.Annotations, annotations.UVMSecurityPolicyEnv, true) {
454 encodedPolicy := h.securityPolicyEnforcer.EncodedSecurityPolicy()
455 hostAMDCert := settings.OCISpecification.Annotations[annotations.HostAMDCertificate]
456 if len(encodedPolicy) > 0 || len(hostAMDCert) > 0 || len(h.uvmReferenceInfo) > 0 {
457
458 securityContextDir, err := os.MkdirTemp(settings.OCISpecification.Root.Path, securitypolicy.SecurityContextDirTemplate)
459 if err != nil {
460 return nil, fmt.Errorf("failed to create security context directory: %w", err)
461 }
462
463 if err := os.Chmod(securityContextDir, 0755); err != nil {
464 return nil, fmt.Errorf("failed to chmod security context directory: %w", err)
465 }
466
467 if len(encodedPolicy) > 0 {
468 if err := writeFileInDir(securityContextDir, securitypolicy.PolicyFilename, []byte(encodedPolicy), 0744); err != nil {
469 return nil, fmt.Errorf("failed to write security policy: %w", err)
470 }
471 }
472 if len(h.uvmReferenceInfo) > 0 {
473 if err := writeFileInDir(securityContextDir, securitypolicy.ReferenceInfoFilename, []byte(h.uvmReferenceInfo), 0744); err != nil {
474 return nil, fmt.Errorf("failed to write UVM reference info: %w", err)
475 }
476 }
477
478 if len(hostAMDCert) > 0 {
479 if err := writeFileInDir(securityContextDir, securitypolicy.HostAMDCertFilename, []byte(hostAMDCert), 0744); err != nil {
480 return nil, fmt.Errorf("failed to write host AMD certificate: %w", err)
481 }
482 }
483
484 containerCtxDir := fmt.Sprintf("/%s", filepath.Base(securityContextDir))
485 secCtxEnv := fmt.Sprintf("UVM_SECURITY_CONTEXT_DIR=%s", containerCtxDir)
486 settings.OCISpecification.Process.Env = append(settings.OCISpecification.Process.Env, secCtxEnv)
487 }
488 }
489
490
491 if err := os.MkdirAll(settings.OCIBundlePath, 0700); err != nil {
492 return nil, errors.Wrapf(err, "failed to create OCIBundlePath: '%s'", settings.OCIBundlePath)
493 }
494 configFile := path.Join(settings.OCIBundlePath, "config.json")
495 f, err := os.Create(configFile)
496 if err != nil {
497 return nil, errors.Wrapf(err, "failed to create config.json at: '%s'", configFile)
498 }
499 defer f.Close()
500 writer := bufio.NewWriter(f)
501 if err := json.NewEncoder(writer).Encode(settings.OCISpecification); err != nil {
502 return nil, errors.Wrapf(err, "failed to write OCISpecification to config.json at: '%s'", configFile)
503 }
504 if err := writer.Flush(); err != nil {
505 return nil, errors.Wrapf(err, "failed to flush writer for config.json at: '%s'", configFile)
506 }
507
508 con, err := h.rtime.CreateContainer(id, settings.OCIBundlePath, nil)
509 if err != nil {
510 return nil, errors.Wrapf(err, "failed to create container")
511 }
512 init, err := con.GetInitProcess()
513 if err != nil {
514 return nil, errors.Wrapf(err, "failed to get container init process")
515 }
516
517 c.container = con
518 c.initProcess = newProcess(c, settings.OCISpecification.Process, init, uint32(c.container.Pid()), true)
519
520
521 if criType == "sandbox" || !isCRI {
522 ns, err := getNetworkNamespace(namespaceID)
523 if isCRI && err != nil {
524 return nil, err
525 }
526
527 if ns != nil {
528 if err := ns.AssignContainerPid(ctx, c.container.Pid()); err != nil {
529 return nil, err
530 }
531 if err := ns.Sync(ctx); err != nil {
532 return nil, err
533 }
534 }
535 }
536
537 c.setStatus(containerCreated)
538 return c, nil
539 }
540
541 func (h *Host) modifyHostSettings(ctx context.Context, containerID string, req *guestrequest.ModificationRequest) (err error) {
542 switch req.ResourceType {
543 case guestresource.ResourceTypeSCSIDevice:
544 return modifySCSIDevice(ctx, req.RequestType, req.Settings.(*guestresource.SCSIDevice))
545 case guestresource.ResourceTypeMappedVirtualDisk:
546 mvd := req.Settings.(*guestresource.LCOWMappedVirtualDisk)
547
548 var cNum uint8
549 cNum, err = scsi.ActualControllerNumber(ctx, mvd.Controller)
550 if err != nil {
551 return err
552 }
553 mvd.Controller = cNum
554
555 if !mvd.ReadOnly {
556 localCtx, cancel := context.WithTimeout(ctx, time.Second*5)
557 defer cancel()
558 var source string
559 source, err = scsi.ControllerLunToName(localCtx, mvd.Controller, mvd.Lun)
560 if err != nil {
561 return err
562 }
563 if req.RequestType == guestrequest.RequestTypeAdd {
564 if err := h.hostMounts.AddRWDevice(mvd.MountPath, source, mvd.Encrypted); err != nil {
565 return err
566 }
567 defer func() {
568 if err != nil {
569 _ = h.hostMounts.RemoveRWDevice(mvd.MountPath, source)
570 }
571 }()
572 } else if req.RequestType == guestrequest.RequestTypeRemove {
573 if err := h.hostMounts.RemoveRWDevice(mvd.MountPath, source); err != nil {
574 return err
575 }
576 defer func() {
577 if err != nil {
578 _ = h.hostMounts.AddRWDevice(mvd.MountPath, source, mvd.Encrypted)
579 }
580 }()
581 }
582 }
583 return modifyMappedVirtualDisk(ctx, req.RequestType, mvd, h.securityPolicyEnforcer)
584 case guestresource.ResourceTypeMappedDirectory:
585 return modifyMappedDirectory(ctx, h.vsock, req.RequestType, req.Settings.(*guestresource.LCOWMappedDirectory), h.securityPolicyEnforcer)
586 case guestresource.ResourceTypeVPMemDevice:
587 return modifyMappedVPMemDevice(ctx, req.RequestType, req.Settings.(*guestresource.LCOWMappedVPMemDevice), h.securityPolicyEnforcer)
588 case guestresource.ResourceTypeCombinedLayers:
589 cl := req.Settings.(*guestresource.LCOWCombinedLayers)
590
591
592
593 encryptedScratch := cl.ScratchPath != "" && h.hostMounts.IsEncrypted(cl.ScratchPath)
594 return modifyCombinedLayers(ctx, req.RequestType, req.Settings.(*guestresource.LCOWCombinedLayers), encryptedScratch, h.securityPolicyEnforcer)
595 case guestresource.ResourceTypeNetwork:
596 return modifyNetwork(ctx, req.RequestType, req.Settings.(*guestresource.LCOWNetworkAdapter))
597 case guestresource.ResourceTypeVPCIDevice:
598 return modifyMappedVPCIDevice(ctx, req.RequestType, req.Settings.(*guestresource.LCOWMappedVPCIDevice))
599 case guestresource.ResourceTypeContainerConstraints:
600 c, err := h.GetCreatedContainer(containerID)
601 if err != nil {
602 return err
603 }
604 return c.modifyContainerConstraints(ctx, req.RequestType, req.Settings.(*guestresource.LCOWContainerConstraints))
605 case guestresource.ResourceTypeSecurityPolicy:
606 r, ok := req.Settings.(*guestresource.LCOWConfidentialOptions)
607 if !ok {
608 return errors.New("the request's settings are not of type LCOWConfidentialOptions")
609 }
610 return h.SetConfidentialUVMOptions(ctx, r)
611 case guestresource.ResourceTypePolicyFragment:
612 r, ok := req.Settings.(*guestresource.LCOWSecurityPolicyFragment)
613 if !ok {
614 return errors.New("the request settings are not of type LCOWSecurityPolicyFragment")
615 }
616 return h.InjectFragment(ctx, r)
617 default:
618 return errors.Errorf("the ResourceType %q is not supported for UVM", req.ResourceType)
619 }
620 }
621
622 func (h *Host) modifyContainerSettings(ctx context.Context, containerID string, req *guestrequest.ModificationRequest) error {
623 c, err := h.GetCreatedContainer(containerID)
624 if err != nil {
625 return err
626 }
627
628 switch req.ResourceType {
629 case guestresource.ResourceTypeContainerConstraints:
630 return c.modifyContainerConstraints(ctx, req.RequestType, req.Settings.(*guestresource.LCOWContainerConstraints))
631 default:
632 return errors.Errorf("the ResourceType \"%s\" is not supported for containers", req.ResourceType)
633 }
634 }
635
636 func (h *Host) ModifySettings(ctx context.Context, containerID string, req *guestrequest.ModificationRequest) error {
637 if containerID == UVMContainerID {
638 return h.modifyHostSettings(ctx, containerID, req)
639 }
640 return h.modifyContainerSettings(ctx, containerID, req)
641 }
642
643
644
645 func (*Host) Shutdown() {
646 _ = syscall.Reboot(syscall.LINUX_REBOOT_CMD_POWER_OFF)
647 }
648
649
650 func (h *Host) ShutdownContainer(ctx context.Context, containerID string, graceful bool) error {
651 c, err := h.GetCreatedContainer(containerID)
652 if err != nil {
653 return err
654 }
655
656 err = h.securityPolicyEnforcer.EnforceShutdownContainerPolicy(ctx, containerID)
657 if err != nil {
658 return err
659 }
660
661 signal := unix.SIGTERM
662 if !graceful {
663 signal = unix.SIGKILL
664 }
665
666 return c.Kill(ctx, signal)
667 }
668
669 func (h *Host) SignalContainerProcess(ctx context.Context, containerID string, processID uint32, signal syscall.Signal) error {
670 c, err := h.GetCreatedContainer(containerID)
671 if err != nil {
672 return err
673 }
674
675 p, err := c.GetProcess(processID)
676 if err != nil {
677 return err
678 }
679
680 signalingInitProcess := (processID == c.initProcess.pid)
681
682
683
684
685 if signalingInitProcess {
686 if (signal == unix.SIGTERM) || (signal == unix.SIGKILL) {
687 graceful := (signal == unix.SIGTERM)
688 return h.ShutdownContainer(ctx, containerID, graceful)
689 }
690 }
691
692 startupArgList := p.(*containerProcess).spec.Args
693 err = h.securityPolicyEnforcer.EnforceSignalContainerProcessPolicy(ctx, containerID, signal, signalingInitProcess, startupArgList)
694 if err != nil {
695 return err
696 }
697
698 return p.Kill(ctx, signal)
699 }
700
701 func (h *Host) ExecProcess(ctx context.Context, containerID string, params prot.ProcessParameters, conSettings stdio.ConnectionSettings) (_ int, err error) {
702 var pid int
703 var c *Container
704
705 if params.IsExternal || containerID == UVMContainerID {
706 var envToKeep securitypolicy.EnvList
707 var allowStdioAccess bool
708 envToKeep, allowStdioAccess, err = h.securityPolicyEnforcer.EnforceExecExternalProcessPolicy(
709 ctx,
710 params.CommandArgs,
711 processParamEnvToOCIEnv(params.Environment),
712 params.WorkingDirectory,
713 )
714 if err != nil {
715 return pid, errors.Wrapf(err, "exec is denied due to policy")
716 }
717
718
719
720 if params.EmulateConsole && !allowStdioAccess {
721 return pid, errors.New("exec of process that requires terminal access denied due to policy not allowing stdio access")
722 }
723
724 if envToKeep != nil {
725 params.Environment = processOCIEnvToParam(envToKeep)
726 }
727
728 var tport = h.vsock
729 if !allowStdioAccess {
730 tport = h.devNullTransport
731 }
732 pid, err = h.runExternalProcess(ctx, params, conSettings, tport)
733 } else if c, err = h.GetCreatedContainer(containerID); err == nil {
734
735 if params.OCIProcess == nil {
736
737
738 pid, err = c.Start(ctx, conSettings)
739 } else {
740
741
742
743 var envToKeep securitypolicy.EnvList
744 var capsToKeep *specs.LinuxCapabilities
745 var user securitypolicy.IDName
746 var groups []securitypolicy.IDName
747 var umask string
748 var allowStdioAccess bool
749
750 user, groups, umask, err = h.securityPolicyEnforcer.GetUserInfo(containerID, params.OCIProcess)
751 if err != nil {
752 return 0, err
753 }
754
755 envToKeep, capsToKeep, allowStdioAccess, err = h.securityPolicyEnforcer.EnforceExecInContainerPolicy(
756 ctx,
757 containerID,
758 params.OCIProcess.Args,
759 params.OCIProcess.Env,
760 params.OCIProcess.Cwd,
761 params.OCIProcess.NoNewPrivileges,
762 user,
763 groups,
764 umask,
765 params.OCIProcess.Capabilities,
766 )
767 if err != nil {
768 return pid, errors.Wrapf(err, "exec in container denied due to policy")
769 }
770
771
772
773 if params.OCIProcess.Terminal && !allowStdioAccess {
774 return pid, errors.New("exec in container of process that requires terminal access denied due to policy not allowing stdio access")
775 }
776
777 if envToKeep != nil {
778 params.OCIProcess.Env = envToKeep
779 }
780
781 if capsToKeep != nil {
782 params.OCIProcess.Capabilities = capsToKeep
783 }
784
785 pid, err = c.ExecProcess(ctx, params.OCIProcess, conSettings)
786 }
787 }
788
789 return pid, err
790 }
791
792 func (h *Host) GetExternalProcess(pid int) (Process, error) {
793 h.externalProcessesMutex.Lock()
794 defer h.externalProcessesMutex.Unlock()
795
796 p, ok := h.externalProcesses[pid]
797 if !ok {
798 return nil, gcserr.NewHresultError(gcserr.HrErrNotFound)
799 }
800 return p, nil
801 }
802
803 func (h *Host) GetProperties(ctx context.Context, containerID string, query prot.PropertyQuery) (*prot.PropertiesV2, error) {
804 err := h.securityPolicyEnforcer.EnforceGetPropertiesPolicy(ctx)
805 if err != nil {
806 return nil, errors.Wrapf(err, "get properties denied due to policy")
807 }
808
809 c, err := h.GetCreatedContainer(containerID)
810 if err != nil {
811 return nil, err
812 }
813
814 properties := &prot.PropertiesV2{}
815 for _, requestedProperty := range query.PropertyTypes {
816 if requestedProperty == prot.PtProcessList {
817 pids, err := c.GetAllProcessPids(ctx)
818 if err != nil {
819 return nil, err
820 }
821 properties.ProcessList = make([]prot.ProcessDetails, len(pids))
822 for i, pid := range pids {
823 properties.ProcessList[i].ProcessID = uint32(pid)
824 }
825 } else if requestedProperty == prot.PtStatistics {
826 cgroupMetrics, err := c.GetStats(ctx)
827 if err != nil {
828 return nil, err
829 }
830 properties.Metrics = cgroupMetrics
831 }
832 }
833
834 return properties, nil
835 }
836
837 func (h *Host) GetStacks(ctx context.Context) (string, error) {
838 err := h.securityPolicyEnforcer.EnforceDumpStacksPolicy(ctx)
839 if err != nil {
840 return "", errors.Wrapf(err, "dump stacks denied due to policy")
841 }
842
843 return debug.DumpStacks(), nil
844 }
845
846
847 func (h *Host) runExternalProcess(
848 ctx context.Context,
849 params prot.ProcessParameters,
850 conSettings stdio.ConnectionSettings,
851 tport transport.Transport,
852 ) (_ int, err error) {
853 var stdioSet *stdio.ConnectionSet
854 stdioSet, err = stdio.Connect(tport, conSettings)
855 if err != nil {
856 return -1, err
857 }
858 defer func() {
859 if err != nil {
860 stdioSet.Close()
861 }
862 }()
863
864 args := params.CommandArgs
865 if len(args) == 0 {
866 args, err = processParamCommandLineToOCIArgs(params.CommandLine)
867 if err != nil {
868 return -1, err
869 }
870 }
871
872 cmd := exec.Command(args[0], args[1:]...)
873 cmd.Dir = params.WorkingDirectory
874 cmd.Env = processParamEnvToOCIEnv(params.Environment)
875
876 var relay *stdio.TtyRelay
877 if params.EmulateConsole {
878
879 var (
880 master *os.File
881 consolePath string
882 )
883 master, consolePath, err = stdio.NewConsole()
884 if err != nil {
885 return -1, errors.Wrap(err, "failed to create console for external process")
886 }
887 defer func() {
888 if err != nil {
889 master.Close()
890 }
891 }()
892
893 var console *os.File
894 console, err = os.OpenFile(consolePath, os.O_RDWR|syscall.O_NOCTTY, 0777)
895 if err != nil {
896 return -1, errors.Wrap(err, "failed to open console file for external process")
897 }
898 defer console.Close()
899
900 relay = stdio.NewTtyRelay(stdioSet, master)
901 cmd.Stdin = console
902 cmd.Stdout = console
903 cmd.Stderr = console
904
905
906 cmd.SysProcAttr = &syscall.SysProcAttr{
907 Setsid: true,
908 Setctty: true,
909 Ctty: syscall.Stdin,
910 }
911 } else {
912 var fileSet *stdio.FileSet
913 fileSet, err = stdioSet.Files()
914 if err != nil {
915 return -1, errors.Wrap(err, "failed to set cmd stdio")
916 }
917 defer fileSet.Close()
918 defer stdioSet.Close()
919 cmd.Stdin = fileSet.In
920 cmd.Stdout = fileSet.Out
921 cmd.Stderr = fileSet.Err
922 }
923
924 onRemove := func(pid int) {
925 h.externalProcessesMutex.Lock()
926 delete(h.externalProcesses, pid)
927 h.externalProcessesMutex.Unlock()
928 }
929 p, err := newExternalProcess(ctx, cmd, relay, onRemove)
930 if err != nil {
931 return -1, err
932 }
933
934 h.externalProcessesMutex.Lock()
935 h.externalProcesses[p.Pid()] = p
936 h.externalProcessesMutex.Unlock()
937 return p.Pid(), nil
938 }
939
940 func newInvalidRequestTypeError(rt guestrequest.RequestType) error {
941 return errors.Errorf("the RequestType %q is not supported", rt)
942 }
943
944 func modifySCSIDevice(
945 ctx context.Context,
946 rt guestrequest.RequestType,
947 msd *guestresource.SCSIDevice,
948 ) error {
949 switch rt {
950 case guestrequest.RequestTypeRemove:
951 cNum, err := scsi.ActualControllerNumber(ctx, msd.Controller)
952 if err != nil {
953 return err
954 }
955 return scsi.UnplugDevice(ctx, cNum, msd.Lun)
956 default:
957 return newInvalidRequestTypeError(rt)
958 }
959 }
960
961 func modifyMappedVirtualDisk(
962 ctx context.Context,
963 rt guestrequest.RequestType,
964 mvd *guestresource.LCOWMappedVirtualDisk,
965 securityPolicy securitypolicy.SecurityPolicyEnforcer,
966 ) (err error) {
967 switch rt {
968 case guestrequest.RequestTypeAdd:
969 mountCtx, cancel := context.WithTimeout(ctx, time.Second*5)
970 defer cancel()
971 if mvd.MountPath != "" {
972 if mvd.ReadOnly {
973
974 var deviceHash string
975 if mvd.VerityInfo != nil {
976 deviceHash = mvd.VerityInfo.RootDigest
977 }
978
979 err = securityPolicy.EnforceDeviceMountPolicy(ctx, mvd.MountPath, deviceHash)
980 if err != nil {
981 return errors.Wrapf(err, "mounting scsi device controller %d lun %d onto %s denied by policy", mvd.Controller, mvd.Lun, mvd.MountPath)
982 }
983 }
984
985 return scsi.Mount(mountCtx, mvd.Controller, mvd.Lun, mvd.MountPath,
986 mvd.ReadOnly, mvd.Encrypted, mvd.Options, mvd.VerityInfo)
987 }
988 return nil
989 case guestrequest.RequestTypeRemove:
990 if mvd.MountPath != "" {
991 if mvd.ReadOnly {
992 if err := securityPolicy.EnforceDeviceUnmountPolicy(ctx, mvd.MountPath); err != nil {
993 return fmt.Errorf("unmounting scsi device at %s denied by policy: %w", mvd.MountPath, err)
994 }
995 }
996
997 if err := scsi.Unmount(ctx, mvd.Controller, mvd.Lun, mvd.MountPath, mvd.Encrypted, mvd.VerityInfo); err != nil {
998 return err
999 }
1000 }
1001 return nil
1002 default:
1003 return newInvalidRequestTypeError(rt)
1004 }
1005 }
1006
1007 func modifyMappedDirectory(
1008 ctx context.Context,
1009 vsock transport.Transport,
1010 rt guestrequest.RequestType,
1011 md *guestresource.LCOWMappedDirectory,
1012 securityPolicy securitypolicy.SecurityPolicyEnforcer,
1013 ) (err error) {
1014 switch rt {
1015 case guestrequest.RequestTypeAdd:
1016 err = securityPolicy.EnforcePlan9MountPolicy(ctx, md.MountPath)
1017 if err != nil {
1018 return errors.Wrapf(err, "mounting plan9 device at %s denied by policy", md.MountPath)
1019 }
1020
1021 return plan9.Mount(ctx, vsock, md.MountPath, md.ShareName, uint32(md.Port), md.ReadOnly)
1022 case guestrequest.RequestTypeRemove:
1023 err = securityPolicy.EnforcePlan9UnmountPolicy(ctx, md.MountPath)
1024 if err != nil {
1025 return errors.Wrapf(err, "unmounting plan9 device at %s denied by policy", md.MountPath)
1026 }
1027
1028 return storage.UnmountPath(ctx, md.MountPath, true)
1029 default:
1030 return newInvalidRequestTypeError(rt)
1031 }
1032 }
1033
1034 func modifyMappedVPMemDevice(ctx context.Context,
1035 rt guestrequest.RequestType,
1036 vpd *guestresource.LCOWMappedVPMemDevice,
1037 securityPolicy securitypolicy.SecurityPolicyEnforcer,
1038 ) (err error) {
1039 switch rt {
1040 case guestrequest.RequestTypeAdd:
1041 var deviceHash string
1042 if vpd.VerityInfo != nil {
1043 deviceHash = vpd.VerityInfo.RootDigest
1044 }
1045 err = securityPolicy.EnforceDeviceMountPolicy(ctx, vpd.MountPath, deviceHash)
1046 if err != nil {
1047 return errors.Wrapf(err, "mounting pmem device %d onto %s denied by policy", vpd.DeviceNumber, vpd.MountPath)
1048 }
1049
1050 return pmem.Mount(ctx, vpd.DeviceNumber, vpd.MountPath, vpd.MappingInfo, vpd.VerityInfo)
1051 case guestrequest.RequestTypeRemove:
1052 if err := securityPolicy.EnforceDeviceUnmountPolicy(ctx, vpd.MountPath); err != nil {
1053 return errors.Wrapf(err, "unmounting pmem device from %s denied by policy", vpd.MountPath)
1054 }
1055
1056 return pmem.Unmount(ctx, vpd.DeviceNumber, vpd.MountPath, vpd.MappingInfo, vpd.VerityInfo)
1057 default:
1058 return newInvalidRequestTypeError(rt)
1059 }
1060 }
1061
1062 func modifyMappedVPCIDevice(ctx context.Context, rt guestrequest.RequestType, vpciDev *guestresource.LCOWMappedVPCIDevice) error {
1063 switch rt {
1064 case guestrequest.RequestTypeAdd:
1065 return pci.WaitForPCIDeviceFromVMBusGUID(ctx, vpciDev.VMBusGUID)
1066 default:
1067 return newInvalidRequestTypeError(rt)
1068 }
1069 }
1070
1071 func modifyCombinedLayers(
1072 ctx context.Context,
1073 rt guestrequest.RequestType,
1074 cl *guestresource.LCOWCombinedLayers,
1075 scratchEncrypted bool,
1076 securityPolicy securitypolicy.SecurityPolicyEnforcer,
1077 ) (err error) {
1078 switch rt {
1079 case guestrequest.RequestTypeAdd:
1080 layerPaths := make([]string, len(cl.Layers))
1081 for i, layer := range cl.Layers {
1082 layerPaths[i] = layer.Path
1083 }
1084
1085 var upperdirPath string
1086 var workdirPath string
1087 readonly := false
1088 if cl.ScratchPath == "" {
1089
1090 readonly = true
1091 } else {
1092 upperdirPath = filepath.Join(cl.ScratchPath, "upper")
1093 workdirPath = filepath.Join(cl.ScratchPath, "work")
1094
1095 if err := securityPolicy.EnforceScratchMountPolicy(ctx, cl.ScratchPath, scratchEncrypted); err != nil {
1096 return fmt.Errorf("scratch mounting denied by policy: %w", err)
1097 }
1098 }
1099
1100 if err := securityPolicy.EnforceOverlayMountPolicy(ctx, cl.ContainerID, layerPaths, cl.ContainerRootPath); err != nil {
1101 return fmt.Errorf("overlay creation denied by policy: %w", err)
1102 }
1103
1104 return overlay.MountLayer(ctx, layerPaths, upperdirPath, workdirPath, cl.ContainerRootPath, readonly)
1105 case guestrequest.RequestTypeRemove:
1106 if err := securityPolicy.EnforceOverlayUnmountPolicy(ctx, cl.ContainerRootPath); err != nil {
1107 return errors.Wrap(err, "overlay removal denied by policy")
1108 }
1109
1110 return storage.UnmountPath(ctx, cl.ContainerRootPath, true)
1111 default:
1112 return newInvalidRequestTypeError(rt)
1113 }
1114 }
1115
1116 func modifyNetwork(ctx context.Context, rt guestrequest.RequestType, na *guestresource.LCOWNetworkAdapter) (err error) {
1117 switch rt {
1118 case guestrequest.RequestTypeAdd:
1119 ns := GetOrAddNetworkNamespace(na.NamespaceID)
1120 if err := ns.AddAdapter(ctx, na); err != nil {
1121 return err
1122 }
1123
1124
1125 return ns.Sync(ctx)
1126 case guestrequest.RequestTypeRemove:
1127 ns := GetOrAddNetworkNamespace(na.ID)
1128 if err := ns.RemoveAdapter(ctx, na.ID); err != nil {
1129 return err
1130 }
1131 return nil
1132 default:
1133 return newInvalidRequestTypeError(rt)
1134 }
1135 }
1136
1137
1138
1139
1140 func processParamCommandLineToOCIArgs(commandLine string) ([]string, error) {
1141 args, err := shellwords.Parse(commandLine)
1142 if err != nil {
1143 return nil, errors.Wrapf(err, "failed to parse command line string \"%s\"", commandLine)
1144 }
1145 return args, nil
1146 }
1147
1148
1149
1150
1151
1152 func processParamEnvToOCIEnv(environment map[string]string) []string {
1153 environmentList := make([]string, 0, len(environment))
1154 for k, v := range environment {
1155
1156
1157 environmentList = append(environmentList, fmt.Sprintf("%s=%s", k, v))
1158 }
1159 return environmentList
1160 }
1161
1162
1163 func processOCIEnvToParam(envs []string) map[string]string {
1164 paramEnv := make(map[string]string, len(envs))
1165 for _, env := range envs {
1166 parts := strings.SplitN(env, "=", 2)
1167 paramEnv[parts[0]] = parts[1]
1168 }
1169
1170 return paramEnv
1171 }
1172
1173
1174
1175 func isPrivilegedContainerCreationRequest(ctx context.Context, spec *specs.Spec) bool {
1176 return oci.ParseAnnotationsBool(ctx, spec.Annotations, annotations.LCOWPrivileged, false)
1177 }
1178
1179 func writeFileInDir(dir string, filename string, data []byte, perm os.FileMode) error {
1180 st, err := os.Stat(dir)
1181 if err != nil {
1182 return err
1183 }
1184
1185 if !st.IsDir() {
1186 return fmt.Errorf("not a directory %q", dir)
1187 }
1188
1189 targetFilename := filepath.Join(dir, filename)
1190 return os.WriteFile(targetFilename, data, perm)
1191 }
1192
View as plain text