1
2
3 package main
4
5 import (
6 "context"
7 "encoding/json"
8 "errors"
9 "fmt"
10 "os"
11 "path/filepath"
12 "strconv"
13 "strings"
14 "time"
15
16 winio "github.com/Microsoft/go-winio"
17 "github.com/Microsoft/go-winio/pkg/guid"
18 "github.com/Microsoft/hcsshim/internal/cni"
19 "github.com/Microsoft/hcsshim/internal/hcs"
20 "github.com/Microsoft/hcsshim/internal/hcsoci"
21 "github.com/Microsoft/hcsshim/internal/logfields"
22 "github.com/Microsoft/hcsshim/internal/oci"
23 "github.com/Microsoft/hcsshim/internal/regstate"
24 "github.com/Microsoft/hcsshim/internal/resources"
25 "github.com/Microsoft/hcsshim/internal/runhcs"
26 "github.com/Microsoft/hcsshim/internal/uvm"
27 "github.com/Microsoft/hcsshim/osversion"
28 "github.com/Microsoft/hcsshim/pkg/annotations"
29 specs "github.com/opencontainers/runtime-spec/specs-go"
30 "github.com/sirupsen/logrus"
31 "golang.org/x/sys/windows"
32 )
33
34 var errContainerStopped = errors.New("container is stopped")
35
36 type persistedState struct {
37
38 ID string `json:",omitempty"`
39
40 Owner string `json:",omitempty"`
41
42
43
44 SandboxID string `json:",omitempty"`
45
46
47 HostID string `json:",omitempty"`
48
49
50 Bundle string `json:",omitempty"`
51 Created time.Time `json:",omitempty"`
52 Rootfs string `json:",omitempty"`
53
54 Spec *specs.Spec `json:",omitempty"`
55 RequestedNetNS string `json:",omitempty"`
56
57 IsHost bool `json:",omitempty"`
58
59 UniqueID guid.GUID `json:",omitempty"`
60
61
62 HostUniqueID guid.GUID `json:",omitempty"`
63 }
64
65 type containerStatus string
66
67 const (
68 containerRunning containerStatus = "running"
69 containerStopped containerStatus = "stopped"
70 containerCreated containerStatus = "created"
71 containerPaused containerStatus = "paused"
72 containerUnknown containerStatus = "unknown"
73
74 keyState = "state"
75 keyResources = "resources"
76 keyShimPid = "shim"
77 keyInitPid = "pid"
78 keyNetNS = "netns"
79
80
81 keyPidMapFmt = "pid-%d"
82 )
83
84 type container struct {
85 persistedState
86 ShimPid int
87 hc *hcs.System
88 }
89
90 func startProcessShim(id, pidFile, logFile string, spec *specs.Process) (_ *os.Process, err error) {
91
92
93
94 for _, f := range []*os.File{os.Stdin, os.Stdout, os.Stderr} {
95 err = windows.SetHandleInformation(windows.Handle(f.Fd()), windows.HANDLE_FLAG_INHERIT, windows.HANDLE_FLAG_INHERIT)
96 if err != nil {
97 return nil, err
98 }
99 }
100
101 args := []string{
102 "--stdin", strconv.Itoa(int(os.Stdin.Fd())),
103 "--stdout", strconv.Itoa(int(os.Stdout.Fd())),
104 "--stderr", strconv.Itoa(int(os.Stderr.Fd())),
105 }
106 if spec != nil {
107 args = append(args, "--exec")
108 }
109 if strings.HasPrefix(logFile, runhcs.SafePipePrefix) {
110 args = append(args, "--log-pipe", logFile)
111 }
112 args = append(args, id)
113 return launchShim("shim", pidFile, logFile, args, spec)
114 }
115
116 func launchShim(cmd, pidFile, logFile string, args []string, data interface{}) (_ *os.Process, err error) {
117 executable, err := os.Executable()
118 if err != nil {
119 return nil, err
120 }
121
122
123
124
125 rp, wp, err := os.Pipe()
126 if err != nil {
127 return nil, err
128 }
129 defer rp.Close()
130 defer wp.Close()
131
132
133 var rdatap, wdatap *os.File
134 if data != nil {
135 rdatap, wdatap, err = os.Pipe()
136 if err != nil {
137 return nil, err
138 }
139 defer rdatap.Close()
140 defer wdatap.Close()
141 }
142
143 var log *os.File
144 fullargs := []string{os.Args[0]}
145 if logFile != "" {
146 if !strings.HasPrefix(logFile, runhcs.SafePipePrefix) {
147 log, err = os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0666)
148 if err != nil {
149 return nil, err
150 }
151 defer log.Close()
152 }
153
154 fullargs = append(fullargs, "--log-format", logFormat)
155 if logrus.GetLevel() == logrus.DebugLevel {
156 fullargs = append(fullargs, "--debug")
157 }
158 }
159 fullargs = append(fullargs, cmd)
160 fullargs = append(fullargs, args...)
161 attr := &os.ProcAttr{
162 Files: []*os.File{rdatap, wp, log},
163 }
164 p, err := os.StartProcess(executable, fullargs, attr)
165 if err != nil {
166 return nil, err
167 }
168 defer func() {
169 if err != nil {
170 _ = p.Kill()
171 }
172 }()
173
174 wp.Close()
175
176
177 if data != nil {
178 rdatap.Close()
179 dataj, err := json.Marshal(data)
180 if err != nil {
181 return nil, err
182 }
183 _, err = wdatap.Write(dataj)
184 if err != nil {
185 return nil, err
186 }
187 wdatap.Close()
188 }
189
190 err = runhcs.GetErrorFromPipe(rp, p)
191 if err != nil {
192 return nil, err
193 }
194
195 if pidFile != "" {
196 if err = createPidFile(pidFile, p.Pid); err != nil {
197 return nil, err
198 }
199 }
200
201 return p, nil
202 }
203
204
205
206
207
208
209
210
211 func parseSandboxAnnotations(a map[string]string) (string, bool) {
212 var t, id string
213 if t = a[annotations.KubernetesContainerType]; t != "" {
214 id = a[annotations.KubernetesSandboxID]
215 } else if t = a["io.kubernetes.cri-o.ContainerType"]; t != "" {
216 id = a["io.kubernetes.cri-o.SandboxID"]
217 } else if t = a["io.kubernetes.docker.type"]; t != "" {
218 id = a["io.kubernetes.sandbox.id"]
219 if t == "podsandbox" {
220 t = "sandbox"
221 }
222 }
223 if t == "container" {
224 return id, false
225 }
226 if t == "sandbox" {
227 return id, true
228 }
229 return "", false
230 }
231
232
233 func (c *container) startVMShim(logFile string, opts interface{}) (*os.Process, error) {
234 var os string
235 if _, ok := opts.(*uvm.OptionsLCOW); ok {
236 os = "linux"
237 } else {
238 os = "windows"
239 }
240 args := []string{"--os", os}
241 if strings.HasPrefix(logFile, runhcs.SafePipePrefix) {
242 args = append(args, "--log-pipe", logFile)
243 }
244 args = append(args, c.VMPipePath())
245 return launchShim("vmshim", "", logFile, args, opts)
246 }
247
248 type containerConfig struct {
249 ID string
250 Owner string
251 HostID string
252 PidFile string
253 ShimLogFile, VMLogFile string
254 Spec *specs.Spec
255 VMConsolePipe string
256 }
257
258 func createContainer(cfg *containerConfig) (_ *container, err error) {
259
260 cwd, err := os.Getwd()
261 if err != nil {
262 return nil, err
263 }
264
265 vmisolated := cfg.Spec.Linux != nil || (cfg.Spec.Windows != nil && cfg.Spec.Windows.HyperV != nil)
266
267 sandboxID, isSandbox := parseSandboxAnnotations(cfg.Spec.Annotations)
268 hostID := cfg.HostID
269 if isSandbox {
270 if sandboxID != cfg.ID {
271 return nil, errors.New("sandbox ID must match ID")
272 }
273 } else if sandboxID != "" {
274
275 sandbox, err := getContainer(sandboxID, false)
276 if err != nil {
277 return nil, err
278 }
279 defer sandbox.Close()
280 if sandbox.SandboxID != sandboxID {
281 return nil, fmt.Errorf("container %s is not a sandbox", sandboxID)
282 }
283 if hostID == "" {
284
285 hostID = sandbox.HostID
286 } else if sandbox.HostID == "" {
287 return nil, fmt.Errorf("sandbox container %s is not running in a VM host, but host %s was specified", sandboxID, hostID)
288 } else if hostID != sandbox.HostID {
289 return nil, fmt.Errorf("sandbox container %s has a different host %s from the requested host %s", sandboxID, sandbox.HostID, hostID)
290 }
291 if vmisolated && hostID == "" {
292 return nil, fmt.Errorf("container %s is not a VM isolated sandbox", sandboxID)
293 }
294 }
295
296 uniqueID, err := guid.NewV4()
297 if err != nil {
298 return nil, err
299 }
300
301 newvm := false
302 var hostUniqueID guid.GUID
303 if hostID != "" {
304 host, err := getContainer(hostID, false)
305 if err != nil {
306 return nil, err
307 }
308 defer host.Close()
309 if !host.IsHost {
310 return nil, fmt.Errorf("host container %s is not a VM host", hostID)
311 }
312 hostUniqueID = host.UniqueID
313 } else if vmisolated && (isSandbox || cfg.Spec.Linux != nil || osversion.Build() >= osversion.RS5) {
314
315 hostID = cfg.ID
316 newvm = true
317 hostUniqueID = uniqueID
318 }
319
320
321 rootfs := ""
322 if cfg.Spec.Root != nil {
323 rootfs = cfg.Spec.Root.Path
324 if rootfs != "" && !filepath.IsAbs(rootfs) && !strings.HasPrefix(rootfs, `\\?\`) {
325 rootfs = filepath.Join(cwd, rootfs)
326 cfg.Spec.Root.Path = rootfs
327 }
328 }
329
330 netNS := ""
331 if cfg.Spec.Windows != nil {
332 for i, f := range cfg.Spec.Windows.LayerFolders {
333 if !filepath.IsAbs(f) && !strings.HasPrefix(rootfs, `\\?\`) {
334 cfg.Spec.Windows.LayerFolders[i] = filepath.Join(cwd, f)
335 }
336 }
337
338
339 if cfg.Spec.Windows.Network != nil {
340 if cfg.Spec.Windows.Network.NetworkSharedContainerName != "" {
341
342 err = stateKey.Get(cfg.Spec.Windows.Network.NetworkSharedContainerName, keyNetNS, &netNS)
343 if err != nil {
344 if _, ok := err.(*regstate.NoStateError); !ok {
345 return nil, err
346 }
347 }
348 } else if cfg.Spec.Windows.Network.NetworkNamespace != "" {
349
350 netNS = cfg.Spec.Windows.Network.NetworkNamespace
351 }
352 }
353 }
354
355
356
357 c := &container{
358 persistedState: persistedState{
359 ID: cfg.ID,
360 Owner: cfg.Owner,
361 Bundle: cwd,
362 Rootfs: rootfs,
363 Created: time.Now(),
364 Spec: cfg.Spec,
365 SandboxID: sandboxID,
366 HostID: hostID,
367 IsHost: newvm,
368 RequestedNetNS: netNS,
369 UniqueID: uniqueID,
370 HostUniqueID: hostUniqueID,
371 },
372 }
373 err = stateKey.Create(cfg.ID, keyState, &c.persistedState)
374 if err != nil {
375 return nil, err
376 }
377 defer func() {
378 if err != nil {
379 _ = c.Remove()
380 }
381 }()
382 if isSandbox && vmisolated {
383 cnicfg := cni.NewPersistedNamespaceConfig(netNS, cfg.ID, hostUniqueID)
384 err = cnicfg.Store()
385 if err != nil {
386 return nil, err
387 }
388 defer func() {
389 if err != nil {
390 _ = cnicfg.Remove()
391 }
392 }()
393 }
394
395
396 if newvm {
397 opts, err := oci.SpecToUVMCreateOpts(context.Background(), cfg.Spec, vmID(c.ID), cfg.Owner)
398 if err != nil {
399 return nil, err
400 }
401 switch opts := opts.(type) {
402 case *uvm.OptionsLCOW:
403 opts.ConsolePipe = cfg.VMConsolePipe
404 case *uvm.OptionsWCOW:
405
406
407
408 layersLen := len(cfg.Spec.Windows.LayerFolders)
409 layers := make([]string, layersLen)
410 copy(layers, cfg.Spec.Windows.LayerFolders)
411
412 vmPath := filepath.Join(layers[layersLen-1], "vm")
413 err := os.MkdirAll(vmPath, 0)
414 if err != nil {
415 return nil, err
416 }
417 layers[layersLen-1] = vmPath
418
419 opts.LayerFolders = layers
420 }
421
422 shim, err := c.startVMShim(cfg.VMLogFile, opts)
423 if err != nil {
424 return nil, err
425 }
426 _ = shim.Release()
427 }
428
429 if c.HostID != "" {
430
431
432
433 err = c.issueVMRequest(runhcs.OpCreateContainer)
434 if err != nil {
435 return nil, err
436 }
437 c.hc, err = hcs.OpenComputeSystem(context.Background(), cfg.ID)
438 if err != nil {
439 return nil, err
440 }
441 } else {
442
443 err = createContainerInHost(c, nil)
444 if err != nil {
445 return nil, err
446 }
447 }
448
449
450 err = startContainerShim(c, cfg.PidFile, cfg.ShimLogFile)
451 if err != nil {
452 if e := c.Kill(); e == nil {
453 _ = c.Remove()
454 }
455 return nil, err
456 }
457
458 return c, nil
459 }
460
461 func (c *container) ShimPipePath() string {
462 return runhcs.SafePipePath("runhcs-shim-" + c.UniqueID.String())
463 }
464
465 func (c *container) VMPipePath() string {
466 return runhcs.VMPipePath(c.HostUniqueID)
467 }
468
469 func (c *container) VMIsolated() bool {
470 return c.HostID != ""
471 }
472
473 func (c *container) unmountInHost(vm *uvm.UtilityVM, all bool) error {
474 r := &resources.Resources{}
475 err := stateKey.Get(c.ID, keyResources, r)
476 if _, ok := err.(*regstate.NoStateError); ok {
477 return nil
478 }
479 if err != nil {
480 return err
481 }
482 err = resources.ReleaseResources(context.Background(), r, vm, all)
483 if err != nil {
484 _ = stateKey.Set(c.ID, keyResources, r)
485 return err
486 }
487
488 err = stateKey.Clear(c.ID, keyResources)
489 if err != nil {
490 return err
491 }
492 return nil
493 }
494
495 func (c *container) Unmount(all bool) error {
496 if c.VMIsolated() {
497 op := runhcs.OpUnmountContainerDiskOnly
498 if all {
499 op = runhcs.OpUnmountContainer
500 }
501 err := c.issueVMRequest(op)
502 if _, ok := err.(*noVMError); ok {
503 logrus.WithFields(logrus.Fields{
504 logfields.ContainerID: c.ID,
505 logfields.UVMID: c.HostID,
506 logrus.ErrorKey: errors.New("failed to unmount container resources"),
507 }).Warning("VM shim could not be contacted")
508 } else {
509 return err
510 }
511 } else {
512 _ = c.unmountInHost(nil, false)
513 }
514 return nil
515 }
516
517 func createContainerInHost(c *container, vm *uvm.UtilityVM) (err error) {
518 if c.hc != nil {
519 return errors.New("container already created")
520 }
521
522
523 opts := &hcsoci.CreateOptions{
524 ID: c.ID,
525 Owner: c.Owner,
526 Spec: c.Spec,
527 HostingSystem: vm,
528 NetworkNamespace: c.RequestedNetNS,
529 }
530 vmid := ""
531 if vm != nil {
532 vmid = vm.ID()
533 }
534 logrus.WithFields(logrus.Fields{
535 logfields.ContainerID: c.ID,
536 logfields.UVMID: vmid,
537 }).Info("creating container in UVM")
538 hc, r, err := hcsoci.CreateContainer(context.Background(), opts)
539 if err != nil {
540 return err
541 }
542 defer func() {
543 if err != nil {
544 _ = hc.Terminate(context.Background())
545 _ = hc.Wait()
546 _ = resources.ReleaseResources(context.Background(), r, vm, true)
547 }
548 }()
549
550
551 if r.NetNS() != "" {
552 err = stateKey.Set(c.ID, keyNetNS, r.NetNS())
553 if err != nil {
554 return err
555 }
556 }
557
558 err = stateKey.Set(c.ID, keyResources, r)
559 if err != nil {
560 return err
561 }
562 c.hc = hc.(*hcs.System)
563 return nil
564 }
565
566 func startContainerShim(c *container, pidFile, logFile string) error {
567
568 shim, err := startProcessShim(c.ID, pidFile, logFile, nil)
569 if err != nil {
570 return err
571 }
572 defer func() {
573 _ = shim.Release()
574 }()
575 defer func() {
576 if err != nil {
577 _ = shim.Kill()
578 }
579 }()
580
581 c.ShimPid = shim.Pid
582 err = stateKey.Set(c.ID, keyShimPid, shim.Pid)
583 if err != nil {
584 return err
585 }
586
587 if pidFile != "" {
588 if err = createPidFile(pidFile, shim.Pid); err != nil {
589 return err
590 }
591 }
592
593 return nil
594 }
595
596 func (c *container) Close() error {
597 if c.hc == nil {
598 return nil
599 }
600 return c.hc.Close()
601 }
602
603 func (c *container) Exec() error {
604 err := c.hc.Start(context.Background())
605 if err != nil {
606 return err
607 }
608
609 if c.Spec.Process == nil {
610 return nil
611 }
612
613
614 pipe, err := winio.DialPipe(c.ShimPipePath(), nil)
615 if err != nil {
616 return err
617 }
618 defer pipe.Close()
619
620 shim, err := os.FindProcess(c.ShimPid)
621 if err != nil {
622 return err
623 }
624 defer func() {
625 _ = shim.Release()
626 }()
627
628 err = runhcs.GetErrorFromPipe(pipe, shim)
629 if err != nil {
630 return err
631 }
632
633 return nil
634 }
635
636 func getContainer(id string, notStopped bool) (*container, error) {
637 var c container
638 err := stateKey.Get(id, keyState, &c.persistedState)
639 if err != nil {
640 return nil, err
641 }
642 err = stateKey.Get(id, keyShimPid, &c.ShimPid)
643 if err != nil {
644 if _, ok := err.(*regstate.NoStateError); !ok {
645 return nil, err
646 }
647 c.ShimPid = -1
648 }
649 if notStopped && c.ShimPid == 0 {
650 return nil, errContainerStopped
651 }
652
653 hc, err := hcs.OpenComputeSystem(context.Background(), c.ID)
654 if err == nil {
655 c.hc = hc
656 } else if !hcs.IsNotExist(err) {
657 return nil, err
658 } else if notStopped {
659 return nil, errContainerStopped
660 }
661
662 return &c, nil
663 }
664
665 func (c *container) Remove() error {
666
667 err := c.Unmount(!c.IsHost)
668 if err != nil {
669 return err
670 }
671
672
673
674 if c.IsHost {
675 vm, err := hcs.OpenComputeSystem(context.Background(), vmID(c.ID))
676 if err == nil {
677 _ = vm.Terminate(context.Background())
678 _ = vm.Wait()
679 }
680 }
681 return stateKey.Remove(c.ID)
682 }
683
684 func (c *container) Kill() error {
685 if c.hc == nil {
686 return nil
687 }
688 _ = c.hc.Terminate(context.Background())
689 return c.hc.Wait()
690 }
691
692 func (c *container) Status() (containerStatus, error) {
693 if c.hc == nil || c.ShimPid == 0 {
694 return containerStopped, nil
695 }
696 props, err := c.hc.Properties(context.Background())
697 if err != nil {
698 if !strings.Contains(err.Error(), "operation is not valid in the current state") {
699 return "", err
700 }
701 return containerUnknown, nil
702 }
703 state := containerUnknown
704 switch props.State {
705 case "", "Created":
706 state = containerCreated
707 case "Running":
708 state = containerRunning
709 case "Paused":
710 state = containerPaused
711 case "Stopped":
712 state = containerStopped
713 }
714 return state, nil
715 }
716
View as plain text