1 package libcontainer
2
3 import (
4 "errors"
5 "fmt"
6 "io"
7 "os"
8 "os/exec"
9 "path"
10 "path/filepath"
11 "strconv"
12 "strings"
13 "time"
14
15 securejoin "github.com/cyphar/filepath-securejoin"
16 "github.com/moby/sys/mountinfo"
17 "github.com/mrunalp/fileutils"
18 "github.com/opencontainers/runc/libcontainer/cgroups"
19 "github.com/opencontainers/runc/libcontainer/cgroups/fs2"
20 "github.com/opencontainers/runc/libcontainer/configs"
21 "github.com/opencontainers/runc/libcontainer/devices"
22 "github.com/opencontainers/runc/libcontainer/userns"
23 "github.com/opencontainers/runc/libcontainer/utils"
24 "github.com/opencontainers/runtime-spec/specs-go"
25 "github.com/opencontainers/selinux/go-selinux/label"
26 "github.com/sirupsen/logrus"
27 "golang.org/x/sys/unix"
28 )
29
30 const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
31
32 type mountConfig struct {
33 root string
34 label string
35 cgroup2Path string
36 rootlessCgroups bool
37 cgroupns bool
38 fd *int
39 }
40
41
42 func needsSetupDev(config *configs.Config) bool {
43 for _, m := range config.Mounts {
44 if m.Device == "bind" && utils.CleanPath(m.Destination) == "/dev" {
45 return false
46 }
47 }
48 return true
49 }
50
51
52
53
54 func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig, mountFds []int) (err error) {
55 config := iConfig.Config
56 if err := prepareRoot(config); err != nil {
57 return fmt.Errorf("error preparing rootfs: %w", err)
58 }
59
60 if mountFds != nil && len(mountFds) != len(config.Mounts) {
61 return fmt.Errorf("malformed mountFds slice. Expected size: %v, got: %v. Slice: %v", len(config.Mounts), len(mountFds), mountFds)
62 }
63
64 mountConfig := &mountConfig{
65 root: config.Rootfs,
66 label: config.MountLabel,
67 cgroup2Path: iConfig.Cgroup2Path,
68 rootlessCgroups: iConfig.RootlessCgroups,
69 cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
70 }
71 setupDev := needsSetupDev(config)
72 for i, m := range config.Mounts {
73 for _, precmd := range m.PremountCmds {
74 if err := mountCmd(precmd); err != nil {
75 return fmt.Errorf("error running premount command: %w", err)
76 }
77 }
78
79
80
81 if mountFds != nil && mountFds[i] != -1 {
82 mountConfig.fd = &mountFds[i]
83 } else {
84 mountConfig.fd = nil
85 }
86
87 if err := mountToRootfs(m, mountConfig); err != nil {
88 return fmt.Errorf("error mounting %q to rootfs at %q: %w", m.Source, m.Destination, err)
89 }
90
91 for _, postcmd := range m.PostmountCmds {
92 if err := mountCmd(postcmd); err != nil {
93 return fmt.Errorf("error running postmount command: %w", err)
94 }
95 }
96 }
97
98 if setupDev {
99 if err := createDevices(config); err != nil {
100 return fmt.Errorf("error creating device nodes: %w", err)
101 }
102 if err := setupPtmx(config); err != nil {
103 return fmt.Errorf("error setting up ptmx: %w", err)
104 }
105 if err := setupDevSymlinks(config.Rootfs); err != nil {
106 return fmt.Errorf("error setting up /dev symlinks: %w", err)
107 }
108 }
109
110
111
112
113
114
115 if err := syncParentHooks(pipe); err != nil {
116 return err
117 }
118
119
120
121
122
123
124
125
126
127 if err := unix.Chdir(config.Rootfs); err != nil {
128 return &os.PathError{Op: "chdir", Path: config.Rootfs, Err: err}
129 }
130
131 s := iConfig.SpecState
132 s.Pid = unix.Getpid()
133 s.Status = specs.StateCreating
134 if err := iConfig.Config.Hooks[configs.CreateContainer].RunHooks(s); err != nil {
135 return err
136 }
137
138 if config.NoPivotRoot {
139 err = msMoveRoot(config.Rootfs)
140 } else if config.Namespaces.Contains(configs.NEWNS) {
141 err = pivotRoot(config.Rootfs)
142 } else {
143 err = chroot()
144 }
145 if err != nil {
146 return fmt.Errorf("error jailing process inside rootfs: %w", err)
147 }
148
149 if setupDev {
150 if err := reOpenDevNull(); err != nil {
151 return fmt.Errorf("error reopening /dev/null inside container: %w", err)
152 }
153 }
154
155 if cwd := iConfig.Cwd; cwd != "" {
156
157
158 if err := os.MkdirAll(cwd, 0o755); err != nil {
159 return err
160 }
161 }
162
163 return nil
164 }
165
166
167
168 func finalizeRootfs(config *configs.Config) (err error) {
169
170
171 for _, m := range config.Mounts {
172 if m.Flags&unix.MS_RDONLY != unix.MS_RDONLY {
173 continue
174 }
175 if m.Device == "tmpfs" || utils.CleanPath(m.Destination) == "/dev" {
176 if err := remountReadonly(m); err != nil {
177 return err
178 }
179 }
180 }
181
182
183 if config.Readonlyfs {
184 if err := setReadonly(); err != nil {
185 return fmt.Errorf("error setting rootfs as readonly: %w", err)
186 }
187 }
188
189 if config.Umask != nil {
190 unix.Umask(int(*config.Umask))
191 } else {
192 unix.Umask(0o022)
193 }
194 return nil
195 }
196
197
198 func prepareTmp(topTmpDir string) (string, error) {
199 tmpdir, err := os.MkdirTemp(topTmpDir, "runctop")
200 if err != nil {
201 return "", err
202 }
203 if err := mount(tmpdir, tmpdir, "", "bind", unix.MS_BIND, ""); err != nil {
204 return "", err
205 }
206 if err := mount("", tmpdir, "", "", uintptr(unix.MS_PRIVATE), ""); err != nil {
207 return "", err
208 }
209 return tmpdir, nil
210 }
211
212 func cleanupTmp(tmpdir string) {
213 _ = unix.Unmount(tmpdir, 0)
214 _ = os.RemoveAll(tmpdir)
215 }
216
217 func mountCmd(cmd configs.Command) error {
218 command := exec.Command(cmd.Path, cmd.Args[:]...)
219 command.Env = cmd.Env
220 command.Dir = cmd.Dir
221 if out, err := command.CombinedOutput(); err != nil {
222 return fmt.Errorf("%#v failed: %s: %w", cmd, string(out), err)
223 }
224 return nil
225 }
226
227 func prepareBindMount(m *configs.Mount, rootfs string, mountFd *int) error {
228 source := m.Source
229 if mountFd != nil {
230 source = "/proc/self/fd/" + strconv.Itoa(*mountFd)
231 }
232
233 stat, err := os.Stat(source)
234 if err != nil {
235
236
237 return err
238 }
239
240
241
242
243 var dest string
244 if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
245 return err
246 }
247 if err := checkProcMount(rootfs, dest, source); err != nil {
248 return err
249 }
250 if err := createIfNotExists(dest, stat.IsDir()); err != nil {
251 return err
252 }
253
254 return nil
255 }
256
257 func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
258 binds, err := getCgroupMounts(m)
259 if err != nil {
260 return err
261 }
262 var merged []string
263 for _, b := range binds {
264 ss := filepath.Base(b.Destination)
265 if strings.Contains(ss, ",") {
266 merged = append(merged, ss)
267 }
268 }
269 tmpfs := &configs.Mount{
270 Source: "tmpfs",
271 Device: "tmpfs",
272 Destination: m.Destination,
273 Flags: defaultMountFlags,
274 Data: "mode=755",
275 PropagationFlags: m.PropagationFlags,
276 }
277
278 if err := mountToRootfs(tmpfs, c); err != nil {
279 return err
280 }
281
282 for _, b := range binds {
283 if c.cgroupns {
284 subsystemPath := filepath.Join(c.root, b.Destination)
285 if err := os.MkdirAll(subsystemPath, 0o755); err != nil {
286 return err
287 }
288 if err := utils.WithProcfd(c.root, b.Destination, func(procfd string) error {
289 flags := defaultMountFlags
290 if m.Flags&unix.MS_RDONLY != 0 {
291 flags = flags | unix.MS_RDONLY
292 }
293 var (
294 source = "cgroup"
295 data = filepath.Base(subsystemPath)
296 )
297 if data == "systemd" {
298 data = cgroups.CgroupNamePrefix + data
299 source = "systemd"
300 }
301 return mount(source, b.Destination, procfd, "cgroup", uintptr(flags), data)
302 }); err != nil {
303 return err
304 }
305 } else {
306 if err := mountToRootfs(b, c); err != nil {
307 return err
308 }
309 }
310 }
311 for _, mc := range merged {
312 for _, ss := range strings.Split(mc, ",") {
313
314
315
316 if err := os.Symlink(mc, filepath.Join(c.root, m.Destination, ss)); err != nil && !os.IsExist(err) {
317 return err
318 }
319 }
320 }
321 return nil
322 }
323
324 func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
325 dest, err := securejoin.SecureJoin(c.root, m.Destination)
326 if err != nil {
327 return err
328 }
329 if err := os.MkdirAll(dest, 0o755); err != nil {
330 return err
331 }
332 err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
333 return mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data)
334 })
335 if err == nil || !(errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY)) {
336 return err
337 }
338
339
340
341 bindM := &configs.Mount{
342 Device: "bind",
343 Source: fs2.UnifiedMountpoint,
344 Destination: m.Destination,
345 Flags: unix.MS_BIND | m.Flags,
346 PropagationFlags: m.PropagationFlags,
347 }
348 if c.cgroupns && c.cgroup2Path != "" {
349
350
351 bindM.Source = c.cgroup2Path
352 }
353
354
355 err = mountToRootfs(bindM, c)
356 if c.rootlessCgroups && errors.Is(err, unix.ENOENT) {
357
358
359
360
361
362 err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
363 return maskPath(procfd, c.label)
364 })
365 }
366 return err
367 }
368
369 func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
370
371 tmpdir, err := prepareTmp("/tmp")
372 if err != nil {
373 return fmt.Errorf("tmpcopyup: failed to setup tmpdir: %w", err)
374 }
375 defer cleanupTmp(tmpdir)
376 tmpDir, err := os.MkdirTemp(tmpdir, "runctmpdir")
377 if err != nil {
378 return fmt.Errorf("tmpcopyup: failed to create tmpdir: %w", err)
379 }
380 defer os.RemoveAll(tmpDir)
381
382
383
384 oldDest := m.Destination
385 m.Destination = tmpDir
386 err = mountPropagate(m, "/", mountLabel, nil)
387 m.Destination = oldDest
388 if err != nil {
389 return err
390 }
391 defer func() {
392 if Err != nil {
393 if err := unmount(tmpDir, unix.MNT_DETACH); err != nil {
394 logrus.Warnf("tmpcopyup: %v", err)
395 }
396 }
397 }()
398
399 return utils.WithProcfd(rootfs, m.Destination, func(procfd string) (Err error) {
400
401
402
403 if err := fileutils.CopyDirectory(procfd+"/", tmpDir); err != nil {
404 return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %w", m.Destination, procfd, tmpDir, err)
405 }
406
407 if err := mount(tmpDir, m.Destination, procfd, "", unix.MS_MOVE, ""); err != nil {
408 return fmt.Errorf("tmpcopyup: failed to move mount: %w", err)
409 }
410 return nil
411 })
412 }
413
414 func mountToRootfs(m *configs.Mount, c *mountConfig) error {
415 rootfs := c.root
416
417
418
419 switch m.Device {
420 case "proc", "sysfs":
421
422
423
424
425
426 dest := filepath.Clean(m.Destination)
427 if !strings.HasPrefix(dest, rootfs) {
428
429 dest = filepath.Join(rootfs, dest)
430 }
431 if fi, err := os.Lstat(dest); err != nil {
432 if !os.IsNotExist(err) {
433 return err
434 }
435 } else if !fi.IsDir() {
436 return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device)
437 }
438 if err := os.MkdirAll(dest, 0o755); err != nil {
439 return err
440 }
441
442 return mountPropagate(m, rootfs, "", nil)
443 }
444
445 mountLabel := c.label
446 mountFd := c.fd
447 dest, err := securejoin.SecureJoin(rootfs, m.Destination)
448 if err != nil {
449 return err
450 }
451
452 switch m.Device {
453 case "mqueue":
454 if err := os.MkdirAll(dest, 0o755); err != nil {
455 return err
456 }
457 if err := mountPropagate(m, rootfs, "", nil); err != nil {
458 return err
459 }
460 return label.SetFileLabel(dest, mountLabel)
461 case "tmpfs":
462 if stat, err := os.Stat(dest); err != nil {
463 if err := os.MkdirAll(dest, 0o755); err != nil {
464 return err
465 }
466 } else {
467 dt := fmt.Sprintf("mode=%04o", syscallMode(stat.Mode()))
468 if m.Data != "" {
469 dt = dt + "," + m.Data
470 }
471 m.Data = dt
472 }
473
474 if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
475 err = doTmpfsCopyUp(m, rootfs, mountLabel)
476 } else {
477 err = mountPropagate(m, rootfs, mountLabel, nil)
478 }
479
480 return err
481 case "bind":
482 if err := prepareBindMount(m, rootfs, mountFd); err != nil {
483 return err
484 }
485 if err := mountPropagate(m, rootfs, mountLabel, mountFd); err != nil {
486 return err
487 }
488
489
490 if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 {
491
492 if err := remount(m, rootfs, mountFd); err != nil {
493 return err
494 }
495 }
496
497 if m.Relabel != "" {
498 if err := label.Validate(m.Relabel); err != nil {
499 return err
500 }
501 shared := label.IsShared(m.Relabel)
502 if err := label.Relabel(m.Source, mountLabel, shared); err != nil {
503 return err
504 }
505 }
506 case "cgroup":
507 if cgroups.IsCgroup2UnifiedMode() {
508 return mountCgroupV2(m, c)
509 }
510 return mountCgroupV1(m, c)
511 default:
512 if err := checkProcMount(rootfs, dest, m.Source); err != nil {
513 return err
514 }
515 if err := os.MkdirAll(dest, 0o755); err != nil {
516 return err
517 }
518 return mountPropagate(m, rootfs, mountLabel, mountFd)
519 }
520 if err := setRecAttr(m, rootfs); err != nil {
521 return err
522 }
523 return nil
524 }
525
526 func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
527 mounts, err := cgroups.GetCgroupMounts(false)
528 if err != nil {
529 return nil, err
530 }
531
532 cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
533 if err != nil {
534 return nil, err
535 }
536
537 var binds []*configs.Mount
538
539 for _, mm := range mounts {
540 dir, err := mm.GetOwnCgroup(cgroupPaths)
541 if err != nil {
542 return nil, err
543 }
544 relDir, err := filepath.Rel(mm.Root, dir)
545 if err != nil {
546 return nil, err
547 }
548 binds = append(binds, &configs.Mount{
549 Device: "bind",
550 Source: filepath.Join(mm.Mountpoint, relDir),
551 Destination: filepath.Join(m.Destination, filepath.Base(mm.Mountpoint)),
552 Flags: unix.MS_BIND | unix.MS_REC | m.Flags,
553 PropagationFlags: m.PropagationFlags,
554 })
555 }
556
557 return binds, nil
558 }
559
560
561
562
563
564 func checkProcMount(rootfs, dest, source string) error {
565 const procPath = "/proc"
566 path, err := filepath.Rel(filepath.Join(rootfs, procPath), dest)
567 if err != nil {
568 return err
569 }
570
571 if strings.HasPrefix(path, "..") {
572 return nil
573 }
574 if path == "." {
575
576 if source == "" {
577 return nil
578 }
579
580 isproc, err := isProc(source)
581 if err != nil {
582 return err
583 }
584
585
586 if isproc {
587 return nil
588 }
589 return fmt.Errorf("%q cannot be mounted because it is not of type proc", dest)
590 }
591
592
593
594 validProcMounts := []string{
595 "/proc/cpuinfo",
596 "/proc/diskstats",
597 "/proc/meminfo",
598 "/proc/stat",
599 "/proc/swaps",
600 "/proc/uptime",
601 "/proc/loadavg",
602 "/proc/slabinfo",
603 "/proc/net/dev",
604 "/proc/sys/kernel/ns_last_pid",
605 }
606 for _, valid := range validProcMounts {
607 path, err := filepath.Rel(filepath.Join(rootfs, valid), dest)
608 if err != nil {
609 return err
610 }
611 if path == "." {
612 return nil
613 }
614 }
615
616 return fmt.Errorf("%q cannot be mounted because it is inside /proc", dest)
617 }
618
619 func isProc(path string) (bool, error) {
620 var s unix.Statfs_t
621 if err := unix.Statfs(path, &s); err != nil {
622 return false, &os.PathError{Op: "statfs", Path: path, Err: err}
623 }
624 return s.Type == unix.PROC_SUPER_MAGIC, nil
625 }
626
627 func setupDevSymlinks(rootfs string) error {
628 links := [][2]string{
629 {"/proc/self/fd", "/dev/fd"},
630 {"/proc/self/fd/0", "/dev/stdin"},
631 {"/proc/self/fd/1", "/dev/stdout"},
632 {"/proc/self/fd/2", "/dev/stderr"},
633 }
634
635
636 if _, err := os.Stat("/proc/kcore"); err == nil {
637 links = append(links, [2]string{"/proc/kcore", "/dev/core"})
638 }
639 for _, link := range links {
640 var (
641 src = link[0]
642 dst = filepath.Join(rootfs, link[1])
643 )
644 if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
645 return err
646 }
647 }
648 return nil
649 }
650
651
652
653
654
655 func reOpenDevNull() error {
656 var stat, devNullStat unix.Stat_t
657 file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
658 if err != nil {
659 return err
660 }
661 defer file.Close()
662 if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil {
663 return &os.PathError{Op: "fstat", Path: file.Name(), Err: err}
664 }
665 for fd := 0; fd < 3; fd++ {
666 if err := unix.Fstat(fd, &stat); err != nil {
667 return &os.PathError{Op: "fstat", Path: "fd " + strconv.Itoa(fd), Err: err}
668 }
669 if stat.Rdev == devNullStat.Rdev {
670
671 if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil {
672 return &os.PathError{
673 Op: "dup3",
674 Path: "fd " + strconv.Itoa(int(file.Fd())),
675 Err: err,
676 }
677 }
678 }
679 }
680 return nil
681 }
682
683
684 func createDevices(config *configs.Config) error {
685 useBindMount := userns.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
686 oldMask := unix.Umask(0o000)
687 for _, node := range config.Devices {
688
689
690 if utils.CleanPath(node.Path) == "/dev/ptmx" {
691 continue
692 }
693
694
695
696 if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil {
697 unix.Umask(oldMask)
698 return err
699 }
700 }
701 unix.Umask(oldMask)
702 return nil
703 }
704
705 func bindMountDeviceNode(rootfs, dest string, node *devices.Device) error {
706 f, err := os.Create(dest)
707 if err != nil && !os.IsExist(err) {
708 return err
709 }
710 if f != nil {
711 _ = f.Close()
712 }
713 return utils.WithProcfd(rootfs, dest, func(procfd string) error {
714 return mount(node.Path, dest, procfd, "bind", unix.MS_BIND, "")
715 })
716 }
717
718
719 func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
720 if node.Path == "" {
721
722 return nil
723 }
724 dest, err := securejoin.SecureJoin(rootfs, node.Path)
725 if err != nil {
726 return err
727 }
728 if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
729 return err
730 }
731 if bind {
732 return bindMountDeviceNode(rootfs, dest, node)
733 }
734 if err := mknodDevice(dest, node); err != nil {
735 if errors.Is(err, os.ErrExist) {
736 return nil
737 } else if errors.Is(err, os.ErrPermission) {
738 return bindMountDeviceNode(rootfs, dest, node)
739 }
740 return err
741 }
742 return nil
743 }
744
745 func mknodDevice(dest string, node *devices.Device) error {
746 fileMode := node.FileMode
747 switch node.Type {
748 case devices.BlockDevice:
749 fileMode |= unix.S_IFBLK
750 case devices.CharDevice:
751 fileMode |= unix.S_IFCHR
752 case devices.FifoDevice:
753 fileMode |= unix.S_IFIFO
754 default:
755 return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
756 }
757 dev, err := node.Mkdev()
758 if err != nil {
759 return err
760 }
761 if err := unix.Mknod(dest, uint32(fileMode), int(dev)); err != nil {
762 return &os.PathError{Op: "mknod", Path: dest, Err: err}
763 }
764 return os.Chown(dest, int(node.Uid), int(node.Gid))
765 }
766
767
768
769 func getParentMount(rootfs string) (string, string, error) {
770 mi, err := mountinfo.GetMounts(mountinfo.ParentsFilter(rootfs))
771 if err != nil {
772 return "", "", err
773 }
774 if len(mi) < 1 {
775 return "", "", fmt.Errorf("could not find parent mount of %s", rootfs)
776 }
777
778
779 var idx, maxlen int
780 for i := range mi {
781 if len(mi[i].Mountpoint) > maxlen {
782 maxlen = len(mi[i].Mountpoint)
783 idx = i
784 }
785 }
786 return mi[idx].Mountpoint, mi[idx].Optional, nil
787 }
788
789
790 func rootfsParentMountPrivate(rootfs string) error {
791 sharedMount := false
792
793 parentMount, optionalOpts, err := getParentMount(rootfs)
794 if err != nil {
795 return err
796 }
797
798 optsSplit := strings.Split(optionalOpts, " ")
799 for _, opt := range optsSplit {
800 if strings.HasPrefix(opt, "shared:") {
801 sharedMount = true
802 break
803 }
804 }
805
806
807
808
809
810 if sharedMount {
811 return mount("", parentMount, "", "", unix.MS_PRIVATE, "")
812 }
813
814 return nil
815 }
816
817 func prepareRoot(config *configs.Config) error {
818 flag := unix.MS_SLAVE | unix.MS_REC
819 if config.RootPropagation != 0 {
820 flag = config.RootPropagation
821 }
822 if err := mount("", "/", "", "", uintptr(flag), ""); err != nil {
823 return err
824 }
825
826
827
828
829 if err := rootfsParentMountPrivate(config.Rootfs); err != nil {
830 return err
831 }
832
833 return mount(config.Rootfs, config.Rootfs, "", "bind", unix.MS_BIND|unix.MS_REC, "")
834 }
835
836 func setReadonly() error {
837 flags := uintptr(unix.MS_BIND | unix.MS_REMOUNT | unix.MS_RDONLY)
838
839 err := mount("", "/", "", "", flags, "")
840 if err == nil {
841 return nil
842 }
843 var s unix.Statfs_t
844 if err := unix.Statfs("/", &s); err != nil {
845 return &os.PathError{Op: "statfs", Path: "/", Err: err}
846 }
847 flags |= uintptr(s.Flags)
848 return mount("", "/", "", "", flags, "")
849 }
850
851 func setupPtmx(config *configs.Config) error {
852 ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
853 if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
854 return err
855 }
856 if err := os.Symlink("pts/ptmx", ptmx); err != nil {
857 return err
858 }
859 return nil
860 }
861
862
863
864 func pivotRoot(rootfs string) error {
865
866
867
868
869
870
871 oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0)
872 if err != nil {
873 return &os.PathError{Op: "open", Path: "/", Err: err}
874 }
875 defer unix.Close(oldroot)
876
877 newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0)
878 if err != nil {
879 return &os.PathError{Op: "open", Path: rootfs, Err: err}
880 }
881 defer unix.Close(newroot)
882
883
884 if err := unix.Fchdir(newroot); err != nil {
885 return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(newroot), Err: err}
886 }
887
888 if err := unix.PivotRoot(".", "."); err != nil {
889 return &os.PathError{Op: "pivot_root", Path: ".", Err: err}
890 }
891
892
893
894
895
896
897 if err := unix.Fchdir(oldroot); err != nil {
898 return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(oldroot), Err: err}
899 }
900
901
902
903
904
905
906 if err := mount("", ".", "", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
907 return err
908 }
909
910 if err := unmount(".", unix.MNT_DETACH); err != nil {
911 return err
912 }
913
914
915 if err := unix.Chdir("/"); err != nil {
916 return &os.PathError{Op: "chdir", Path: "/", Err: err}
917 }
918 return nil
919 }
920
921 func msMoveRoot(rootfs string) error {
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939 mountinfos, err := mountinfo.GetMounts(func(info *mountinfo.Info) (skip, stop bool) {
940
941
942 if info.Root != "/" ||
943 (info.FSType != "proc" && info.FSType != "sysfs") ||
944 strings.HasPrefix(info.Mountpoint, rootfs) {
945 skip = true
946 }
947 return
948 })
949 if err != nil {
950 return err
951 }
952 for _, info := range mountinfos {
953 p := info.Mountpoint
954
955 if err := mount("", p, "", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
956 if errors.Is(err, unix.ENOENT) {
957
958
959
960 continue
961 }
962 return err
963 }
964 if err := unmount(p, unix.MNT_DETACH); err != nil {
965 if !errors.Is(err, unix.EINVAL) && !errors.Is(err, unix.EPERM) {
966 return err
967 } else {
968
969
970 if err := mount("tmpfs", p, "", "tmpfs", 0, ""); err != nil {
971 return err
972 }
973 }
974 }
975 }
976
977
978 if err := mount(rootfs, "/", "", "", unix.MS_MOVE, ""); err != nil {
979 return err
980 }
981 return chroot()
982 }
983
984 func chroot() error {
985 if err := unix.Chroot("."); err != nil {
986 return &os.PathError{Op: "chroot", Path: ".", Err: err}
987 }
988 if err := unix.Chdir("/"); err != nil {
989 return &os.PathError{Op: "chdir", Path: "/", Err: err}
990 }
991 return nil
992 }
993
994
995 func createIfNotExists(path string, isDir bool) error {
996 if _, err := os.Stat(path); err != nil {
997 if os.IsNotExist(err) {
998 if isDir {
999 return os.MkdirAll(path, 0o755)
1000 }
1001 if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
1002 return err
1003 }
1004 f, err := os.OpenFile(path, os.O_CREATE, 0o755)
1005 if err != nil {
1006 return err
1007 }
1008 _ = f.Close()
1009 }
1010 }
1011 return nil
1012 }
1013
1014
1015 func readonlyPath(path string) error {
1016 if err := mount(path, path, "", "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
1017 if errors.Is(err, os.ErrNotExist) {
1018 return nil
1019 }
1020 return err
1021 }
1022
1023 var s unix.Statfs_t
1024 if err := unix.Statfs(path, &s); err != nil {
1025 return &os.PathError{Op: "statfs", Path: path, Err: err}
1026 }
1027 flags := uintptr(s.Flags) & (unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC)
1028
1029 if err := mount(path, path, "", "", flags|unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY, ""); err != nil {
1030 return err
1031 }
1032
1033 return nil
1034 }
1035
1036
1037 func remountReadonly(m *configs.Mount) error {
1038 var (
1039 dest = m.Destination
1040 flags = m.Flags
1041 )
1042 for i := 0; i < 5; i++ {
1043
1044
1045
1046
1047
1048
1049 flags |= unix.MS_REMOUNT | unix.MS_BIND | unix.MS_RDONLY
1050 if err := mount("", dest, "", "", uintptr(flags), ""); err != nil {
1051 if errors.Is(err, unix.EBUSY) {
1052 time.Sleep(100 * time.Millisecond)
1053 continue
1054 }
1055 return err
1056 }
1057 return nil
1058 }
1059 return fmt.Errorf("unable to mount %s as readonly max retries reached", dest)
1060 }
1061
1062
1063
1064
1065
1066
1067 func maskPath(path string, mountLabel string) error {
1068 if err := mount("/dev/null", path, "", "", unix.MS_BIND, ""); err != nil && !errors.Is(err, os.ErrNotExist) {
1069 if errors.Is(err, unix.ENOTDIR) {
1070 return mount("tmpfs", path, "", "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel))
1071 }
1072 return err
1073 }
1074 return nil
1075 }
1076
1077
1078
1079 func writeSystemProperty(key, value string) error {
1080 keyPath := strings.Replace(key, ".", "/", -1)
1081 return os.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0o644)
1082 }
1083
1084 func remount(m *configs.Mount, rootfs string, mountFd *int) error {
1085 source := m.Source
1086 if mountFd != nil {
1087 source = "/proc/self/fd/" + strconv.Itoa(*mountFd)
1088 }
1089
1090 return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
1091 flags := uintptr(m.Flags | unix.MS_REMOUNT)
1092 err := mount(source, m.Destination, procfd, m.Device, flags, "")
1093 if err == nil {
1094 return nil
1095 }
1096
1097 var s unix.Statfs_t
1098 if err := unix.Statfs(source, &s); err != nil {
1099 return &os.PathError{Op: "statfs", Path: source, Err: err}
1100 }
1101 if s.Flags&unix.MS_RDONLY != unix.MS_RDONLY {
1102 return err
1103 }
1104
1105 flags |= unix.MS_RDONLY
1106 return mount(source, m.Destination, procfd, m.Device, flags, "")
1107 })
1108 }
1109
1110
1111
1112 func mountPropagate(m *configs.Mount, rootfs string, mountLabel string, mountFd *int) error {
1113 var (
1114 data = label.FormatMountLabel(m.Data, mountLabel)
1115 flags = m.Flags
1116 )
1117
1118
1119
1120
1121 if m.Device == "tmpfs" || utils.CleanPath(m.Destination) == "/dev" {
1122 flags &= ^unix.MS_RDONLY
1123 }
1124
1125
1126
1127
1128
1129 source := m.Source
1130 if mountFd != nil {
1131 source = "/proc/self/fd/" + strconv.Itoa(*mountFd)
1132 }
1133
1134 if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
1135 return mount(source, m.Destination, procfd, m.Device, uintptr(flags), data)
1136 }); err != nil {
1137 return err
1138 }
1139
1140
1141
1142 if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
1143 for _, pflag := range m.PropagationFlags {
1144 if err := mount("", m.Destination, procfd, "", uintptr(pflag), ""); err != nil {
1145 return err
1146 }
1147 }
1148 return nil
1149 }); err != nil {
1150 return fmt.Errorf("change mount propagation through procfd: %w", err)
1151 }
1152 return nil
1153 }
1154
1155 func setRecAttr(m *configs.Mount, rootfs string) error {
1156 if m.RecAttr == nil {
1157 return nil
1158 }
1159 return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
1160 return unix.MountSetattr(-1, procfd, unix.AT_RECURSIVE, m.RecAttr)
1161 })
1162 }
1163
View as plain text