1 package libcontainer
2
3 import (
4 "bytes"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "io"
9 "net"
10 "os"
11 "path/filepath"
12 "strings"
13 "unsafe"
14
15 "github.com/containerd/console"
16 "github.com/opencontainers/runtime-spec/specs-go"
17 "github.com/sirupsen/logrus"
18 "github.com/vishvananda/netlink"
19 "golang.org/x/sys/unix"
20
21 "github.com/opencontainers/runc/libcontainer/capabilities"
22 "github.com/opencontainers/runc/libcontainer/cgroups"
23 "github.com/opencontainers/runc/libcontainer/configs"
24 "github.com/opencontainers/runc/libcontainer/system"
25 "github.com/opencontainers/runc/libcontainer/user"
26 "github.com/opencontainers/runc/libcontainer/utils"
27 )
28
29 type initType string
30
31 const (
32 initSetns initType = "setns"
33 initStandard initType = "standard"
34 )
35
36 type pid struct {
37 Pid int `json:"stage2_pid"`
38 PidFirstChild int `json:"stage1_pid"`
39 }
40
41
42 type network struct {
43 configs.Network
44
45
46
47 TempVethPeerName string `json:"temp_veth_peer_name"`
48 }
49
50
51 type initConfig struct {
52 Args []string `json:"args"`
53 Env []string `json:"env"`
54 Cwd string `json:"cwd"`
55 Capabilities *configs.Capabilities `json:"capabilities"`
56 ProcessLabel string `json:"process_label"`
57 AppArmorProfile string `json:"apparmor_profile"`
58 NoNewPrivileges bool `json:"no_new_privileges"`
59 User string `json:"user"`
60 AdditionalGroups []string `json:"additional_groups"`
61 Config *configs.Config `json:"config"`
62 Networks []*network `json:"network"`
63 PassedFilesCount int `json:"passed_files_count"`
64 ContainerId string `json:"containerid"`
65 Rlimits []configs.Rlimit `json:"rlimits"`
66 CreateConsole bool `json:"create_console"`
67 ConsoleWidth uint16 `json:"console_width"`
68 ConsoleHeight uint16 `json:"console_height"`
69 RootlessEUID bool `json:"rootless_euid,omitempty"`
70 RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
71 SpecState *specs.State `json:"spec_state,omitempty"`
72 Cgroup2Path string `json:"cgroup2_path,omitempty"`
73 }
74
75 type initer interface {
76 Init() error
77 }
78
79 func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFds []int) (initer, error) {
80 var config *initConfig
81 if err := json.NewDecoder(pipe).Decode(&config); err != nil {
82 return nil, err
83 }
84 if err := populateProcessEnvironment(config.Env); err != nil {
85 return nil, err
86 }
87 switch t {
88 case initSetns:
89
90 if mountFds != nil {
91 return nil, errors.New("mountFds must be nil. Can't mount while doing runc exec.")
92 }
93
94 return &linuxSetnsInit{
95 pipe: pipe,
96 consoleSocket: consoleSocket,
97 config: config,
98 logFd: logFd,
99 }, nil
100 case initStandard:
101 return &linuxStandardInit{
102 pipe: pipe,
103 consoleSocket: consoleSocket,
104 parentPid: unix.Getppid(),
105 config: config,
106 fifoFd: fifoFd,
107 logFd: logFd,
108 mountFds: mountFds,
109 }, nil
110 }
111 return nil, fmt.Errorf("unknown init type %q", t)
112 }
113
114
115
116 func populateProcessEnvironment(env []string) error {
117 for _, pair := range env {
118 p := strings.SplitN(pair, "=", 2)
119 if len(p) < 2 {
120 return errors.New("invalid environment variable: missing '='")
121 }
122 name, val := p[0], p[1]
123 if name == "" {
124 return errors.New("invalid environment variable: name cannot be empty")
125 }
126 if strings.IndexByte(name, 0) >= 0 {
127 return fmt.Errorf("invalid environment variable %q: name contains nul byte (\\x00)", name)
128 }
129 if strings.IndexByte(val, 0) >= 0 {
130 return fmt.Errorf("invalid environment variable %q: value contains nul byte (\\x00)", name)
131 }
132 if err := os.Setenv(name, val); err != nil {
133 return err
134 }
135 }
136 return nil
137 }
138
139
140
141 func verifyCwd() error {
142
143
144
145
146
147
148
149
150
151
152
153
154 if wd, err := unix.Getwd(); errors.Is(err, unix.ENOENT) {
155 return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected")
156 } else if err != nil {
157 return fmt.Errorf("failed to verify if current working directory is safe: %w", err)
158 } else if !filepath.IsAbs(wd) {
159
160 return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd)
161 }
162 return nil
163 }
164
165
166
167
168 func finalizeNamespace(config *initConfig) error {
169
170
171
172 if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
173 return fmt.Errorf("error closing exec fds: %w", err)
174 }
175
176
177 doChdir := config.Cwd != ""
178 if doChdir {
179
180
181
182 err := unix.Chdir(config.Cwd)
183 switch {
184 case err == nil:
185 doChdir = false
186 case os.IsPermission(err):
187
188
189
190
191 default:
192 return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
193 }
194 }
195
196 caps := &configs.Capabilities{}
197 if config.Capabilities != nil {
198 caps = config.Capabilities
199 } else if config.Config.Capabilities != nil {
200 caps = config.Config.Capabilities
201 }
202 w, err := capabilities.New(caps)
203 if err != nil {
204 return err
205 }
206
207 if err := w.ApplyBoundingSet(); err != nil {
208 return fmt.Errorf("unable to apply bounding set: %w", err)
209 }
210
211 if err := system.SetKeepCaps(); err != nil {
212 return fmt.Errorf("unable to set keep caps: %w", err)
213 }
214 if err := setupUser(config); err != nil {
215 return fmt.Errorf("unable to setup user: %w", err)
216 }
217
218 if doChdir {
219 if err := unix.Chdir(config.Cwd); err != nil {
220 return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
221 }
222 }
223
224 if err := verifyCwd(); err != nil {
225 return err
226 }
227 if err := system.ClearKeepCaps(); err != nil {
228 return fmt.Errorf("unable to clear keep caps: %w", err)
229 }
230 if err := w.ApplyCaps(); err != nil {
231 return fmt.Errorf("unable to apply caps: %w", err)
232 }
233 return nil
234 }
235
236
237
238
239
240
241 func setupConsole(socket *os.File, config *initConfig, mount bool) error {
242 defer socket.Close()
243
244
245
246
247
248
249
250
251 pty, slavePath, err := console.NewPty()
252 if err != nil {
253 return err
254 }
255
256
257 defer pty.Close()
258
259 if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 {
260 err = pty.Resize(console.WinSize{
261 Height: config.ConsoleHeight,
262 Width: config.ConsoleWidth,
263 })
264
265 if err != nil {
266 return err
267 }
268 }
269
270
271 if mount {
272 if err := mountConsole(slavePath); err != nil {
273 return err
274 }
275 }
276
277 if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil {
278 return err
279 }
280
281 return dupStdio(slavePath)
282 }
283
284
285
286
287 func syncParentReady(pipe io.ReadWriter) error {
288
289 if err := writeSync(pipe, procReady); err != nil {
290 return err
291 }
292
293
294 return readSync(pipe, procRun)
295 }
296
297
298
299
300 func syncParentHooks(pipe io.ReadWriter) error {
301
302 if err := writeSync(pipe, procHooks); err != nil {
303 return err
304 }
305
306
307 return readSync(pipe, procResume)
308 }
309
310
311
312
313
314
315
316 func syncParentSeccomp(pipe io.ReadWriter, seccompFd int) error {
317 if seccompFd == -1 {
318 return nil
319 }
320
321
322 if err := writeSyncWithFd(pipe, procSeccomp, seccompFd); err != nil {
323 unix.Close(seccompFd)
324 return err
325 }
326
327
328 if err := readSync(pipe, procSeccompDone); err != nil {
329 unix.Close(seccompFd)
330 return fmt.Errorf("sync parent seccomp: %w", err)
331 }
332
333 if err := unix.Close(seccompFd); err != nil {
334 return fmt.Errorf("close seccomp fd: %w", err)
335 }
336
337 return nil
338 }
339
340
341 func setupUser(config *initConfig) error {
342
343 defaultExecUser := user.ExecUser{
344 Uid: 0,
345 Gid: 0,
346 Home: "/",
347 }
348
349 passwdPath, err := user.GetPasswdPath()
350 if err != nil {
351 return err
352 }
353
354 groupPath, err := user.GetGroupPath()
355 if err != nil {
356 return err
357 }
358
359 execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
360 if err != nil {
361 return err
362 }
363
364 var addGroups []int
365 if len(config.AdditionalGroups) > 0 {
366 addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)
367 if err != nil {
368 return err
369 }
370 }
371
372
373
374 if _, err := config.Config.HostUID(execUser.Uid); err != nil {
375 return errors.New("cannot set uid to unmapped user in user namespace")
376 }
377 if _, err := config.Config.HostGID(execUser.Gid); err != nil {
378 return errors.New("cannot set gid to unmapped user in user namespace")
379 }
380
381 if config.RootlessEUID {
382
383
384
385
386 if len(addGroups) > 0 {
387 return errors.New("cannot set any additional groups in a rootless container")
388 }
389 }
390
391
392
393 if err := fixStdioPermissions(execUser); err != nil {
394 return err
395 }
396
397 setgroups, err := os.ReadFile("/proc/self/setgroups")
398 if err != nil && !os.IsNotExist(err) {
399 return err
400 }
401
402
403
404
405
406 allowSupGroups := !config.RootlessEUID && string(bytes.TrimSpace(setgroups)) != "deny"
407
408 if allowSupGroups {
409 suppGroups := append(execUser.Sgids, addGroups...)
410 if err := unix.Setgroups(suppGroups); err != nil {
411 return &os.SyscallError{Syscall: "setgroups", Err: err}
412 }
413 }
414
415 if err := system.Setgid(execUser.Gid); err != nil {
416 return err
417 }
418 if err := system.Setuid(execUser.Uid); err != nil {
419 return err
420 }
421
422
423 if envHome := os.Getenv("HOME"); envHome == "" {
424 if err := os.Setenv("HOME", execUser.Home); err != nil {
425 return err
426 }
427 }
428 return nil
429 }
430
431
432
433
434 func fixStdioPermissions(u *user.ExecUser) error {
435 var null unix.Stat_t
436 if err := unix.Stat("/dev/null", &null); err != nil {
437 return &os.PathError{Op: "stat", Path: "/dev/null", Err: err}
438 }
439 for _, file := range []*os.File{os.Stdin, os.Stdout, os.Stderr} {
440 var s unix.Stat_t
441 if err := unix.Fstat(int(file.Fd()), &s); err != nil {
442 return &os.PathError{Op: "fstat", Path: file.Name(), Err: err}
443 }
444
445
446
447 if int(s.Uid) == u.Uid || s.Rdev == null.Rdev {
448 continue
449 }
450
451
452
453
454
455
456
457 if err := file.Chown(u.Uid, int(s.Gid)); err != nil {
458
459
460
461
462
463
464
465
466 if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) || errors.Is(err, unix.EROFS) {
467 continue
468 }
469 return err
470 }
471 }
472 return nil
473 }
474
475
476 func setupNetwork(config *initConfig) error {
477 for _, config := range config.Networks {
478 strategy, err := getStrategy(config.Type)
479 if err != nil {
480 return err
481 }
482 if err := strategy.initialize(config); err != nil {
483 return err
484 }
485 }
486 return nil
487 }
488
489 func setupRoute(config *configs.Config) error {
490 for _, config := range config.Routes {
491 _, dst, err := net.ParseCIDR(config.Destination)
492 if err != nil {
493 return err
494 }
495 src := net.ParseIP(config.Source)
496 if src == nil {
497 return fmt.Errorf("Invalid source for route: %s", config.Source)
498 }
499 gw := net.ParseIP(config.Gateway)
500 if gw == nil {
501 return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
502 }
503 l, err := netlink.LinkByName(config.InterfaceName)
504 if err != nil {
505 return err
506 }
507 route := &netlink.Route{
508 Scope: netlink.SCOPE_UNIVERSE,
509 Dst: dst,
510 Src: src,
511 Gw: gw,
512 LinkIndex: l.Attrs().Index,
513 }
514 if err := netlink.RouteAdd(route); err != nil {
515 return err
516 }
517 }
518 return nil
519 }
520
521 func setupRlimits(limits []configs.Rlimit, pid int) error {
522 for _, rlimit := range limits {
523 if err := unix.Prlimit(pid, rlimit.Type, &unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}, nil); err != nil {
524 return fmt.Errorf("error setting rlimit type %v: %w", rlimit.Type, err)
525 }
526 }
527 return nil
528 }
529
530 const _P_PID = 1
531
532
533 type siginfo struct {
534 si_signo int32
535 si_errno int32
536 si_code int32
537
538 si_pid int32
539
540 pad [96]byte
541 }
542
543
544
545 func isWaitable(pid int) (bool, error) {
546 si := &siginfo{}
547 _, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
548 if e != 0 {
549 return false, &os.SyscallError{Syscall: "waitid", Err: e}
550 }
551
552 return si.si_pid != 0, nil
553 }
554
555
556
557
558
559
560 func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
561 var procs []*os.Process
562 if err := m.Freeze(configs.Frozen); err != nil {
563 logrus.Warn(err)
564 }
565 pids, err := m.GetAllPids()
566 if err != nil {
567 if err := m.Freeze(configs.Thawed); err != nil {
568 logrus.Warn(err)
569 }
570 return err
571 }
572 for _, pid := range pids {
573 p, err := os.FindProcess(pid)
574 if err != nil {
575 logrus.Warn(err)
576 continue
577 }
578 procs = append(procs, p)
579 if err := p.Signal(s); err != nil {
580 logrus.Warn(err)
581 }
582 }
583 if err := m.Freeze(configs.Thawed); err != nil {
584 logrus.Warn(err)
585 }
586
587 subreaper, err := system.GetSubreaper()
588 if err != nil {
589
590
591
592
593
594 subreaper = 0
595 }
596
597 for _, p := range procs {
598 if s != unix.SIGKILL {
599 if ok, err := isWaitable(p.Pid); err != nil {
600 if !errors.Is(err, unix.ECHILD) {
601 logrus.Warn("signalAllProcesses: ", p.Pid, err)
602 }
603 continue
604 } else if !ok {
605
606 continue
607 }
608 }
609
610
611
612
613
614
615 if subreaper == 0 {
616 if _, err := p.Wait(); err != nil {
617 if !errors.Is(err, unix.ECHILD) {
618 logrus.Warn("wait: ", err)
619 }
620 }
621 }
622 }
623 return nil
624 }
625
View as plain text