1 package systemd
2
3 import (
4 "bufio"
5 "context"
6 "errors"
7 "fmt"
8 "math"
9 "os"
10 "regexp"
11 "strconv"
12 "strings"
13 "sync"
14 "time"
15
16 systemdDbus "github.com/coreos/go-systemd/v22/dbus"
17 dbus "github.com/godbus/dbus/v5"
18 "github.com/sirupsen/logrus"
19
20 cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
21 "github.com/opencontainers/runc/libcontainer/configs"
22 "github.com/opencontainers/runc/libcontainer/devices"
23 )
24
25 const (
26
27
28
29 defCPUQuotaPeriod = uint64(100000)
30 )
31
32 var (
33 versionOnce sync.Once
34 version int
35
36 isRunningSystemdOnce sync.Once
37 isRunningSystemd bool
38 )
39
40
41
42
43
44
45
46
47 func IsRunningSystemd() bool {
48 isRunningSystemdOnce.Do(func() {
49 fi, err := os.Lstat("/run/systemd/system")
50 isRunningSystemd = err == nil && fi.IsDir()
51 })
52 return isRunningSystemd
53 }
54
55
56
57
58 func ExpandSlice(slice string) (string, error) {
59 suffix := ".slice"
60
61 if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
62 return "", fmt.Errorf("invalid slice name: %s", slice)
63 }
64
65
66 if strings.Contains(slice, "/") {
67 return "", fmt.Errorf("invalid slice name: %s", slice)
68 }
69
70 var path, prefix string
71 sliceName := strings.TrimSuffix(slice, suffix)
72
73 if sliceName == "-" {
74 return "/", nil
75 }
76 for _, component := range strings.Split(sliceName, "-") {
77
78 if component == "" {
79 return "", fmt.Errorf("invalid slice name: %s", slice)
80 }
81
82
83 path += "/" + prefix + component + suffix
84 prefix += component + "-"
85 }
86 return path, nil
87 }
88
89 func groupPrefix(ruleType devices.Type) (string, error) {
90 switch ruleType {
91 case devices.BlockDevice:
92 return "block-", nil
93 case devices.CharDevice:
94 return "char-", nil
95 default:
96 return "", fmt.Errorf("device type %v has no group prefix", ruleType)
97 }
98 }
99
100
101
102
103
104 func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) {
105 fh, err := os.Open("/proc/devices")
106 if err != nil {
107 return "", err
108 }
109 defer fh.Close()
110
111 prefix, err := groupPrefix(ruleType)
112 if err != nil {
113 return "", err
114 }
115
116 scanner := bufio.NewScanner(fh)
117 var currentType devices.Type
118 for scanner.Scan() {
119
120 line := strings.TrimSpace(scanner.Text())
121
122
123 switch line {
124 case "Block devices:":
125 currentType = devices.BlockDevice
126 continue
127 case "Character devices:":
128 currentType = devices.CharDevice
129 continue
130 case "":
131 continue
132 }
133
134
135 if currentType != ruleType {
136 continue
137 }
138
139
140 var (
141 currMajor int64
142 currName string
143 )
144 if n, err := fmt.Sscanf(line, "%d %s", &currMajor, &currName); err != nil || n != 2 {
145 if err == nil {
146 err = errors.New("wrong number of fields")
147 }
148 return "", fmt.Errorf("scan /proc/devices line %q: %w", line, err)
149 }
150
151 if currMajor == ruleMajor {
152 return prefix + currName, nil
153 }
154 }
155 if err := scanner.Err(); err != nil {
156 return "", fmt.Errorf("reading /proc/devices: %w", err)
157 }
158
159 return "", nil
160 }
161
162
163
164 type deviceAllowEntry struct {
165 Path string
166 Perms string
167 }
168
169 func allowAllDevices() []systemdDbus.Property {
170
171
172 return []systemdDbus.Property{
173 newProp("DevicePolicy", "auto"),
174 newProp("DeviceAllow", []deviceAllowEntry{}),
175 }
176 }
177
178
179
180 func generateDeviceProperties(r *configs.Resources, sdVer int) ([]systemdDbus.Property, error) {
181 if r.SkipDevices {
182 return nil, nil
183 }
184
185 properties := []systemdDbus.Property{
186
187 newProp("DevicePolicy", "strict"),
188
189 newProp("DeviceAllow", []deviceAllowEntry{}),
190 }
191
192
193 configEmu := &cgroupdevices.Emulator{}
194 for _, rule := range r.Devices {
195 if err := configEmu.Apply(*rule); err != nil {
196 return nil, fmt.Errorf("unable to apply rule for systemd: %w", err)
197 }
198 }
199
200
201
202
203 if configEmu.IsBlacklist() {
204
205 if configEmu.IsAllowAll() {
206 return allowAllDevices(), nil
207 }
208 logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule")
209 return properties, nil
210 }
211
212
213
214
215 finalRules, err := configEmu.Rules()
216 if err != nil {
217 return nil, fmt.Errorf("unable to get simplified rules for systemd: %w", err)
218 }
219 var deviceAllowList []deviceAllowEntry
220 for _, rule := range finalRules {
221 if !rule.Allow {
222
223 return nil, fmt.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule)
224 }
225 switch rule.Type {
226 case devices.BlockDevice, devices.CharDevice:
227 default:
228
229 return nil, fmt.Errorf("invalid device type for DeviceAllow: %v", rule.Type)
230 }
231
232 entry := deviceAllowEntry{
233 Perms: string(rule.Permissions),
234 }
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259 if rule.Major == devices.Wildcard {
260
261 if rule.Minor != devices.Wildcard {
262 logrus.Warnf("systemd doesn't support '*:n' device rules -- temporarily ignoring rule: %v", *rule)
263 continue
264 }
265
266
267 prefix, err := groupPrefix(rule.Type)
268 if err != nil {
269 return nil, err
270 }
271 entry.Path = prefix + "*"
272 } else if rule.Minor == devices.Wildcard {
273
274 group, err := findDeviceGroup(rule.Type, rule.Major)
275 if err != nil {
276 return nil, fmt.Errorf("unable to find device '%v/%d': %w", rule.Type, rule.Major, err)
277 }
278 if group == "" {
279
280 logrus.Warnf("could not find device group for '%v/%d' in /proc/devices -- temporarily ignoring rule: %v", rule.Type, rule.Major, *rule)
281 continue
282 }
283 entry.Path = group
284 } else {
285
286 switch rule.Type {
287 case devices.BlockDevice:
288 entry.Path = fmt.Sprintf("/dev/block/%d:%d", rule.Major, rule.Minor)
289 case devices.CharDevice:
290 entry.Path = fmt.Sprintf("/dev/char/%d:%d", rule.Major, rule.Minor)
291 }
292 if sdVer < 240 {
293
294
295
296
297
298
299 if _, err := os.Stat(entry.Path); err != nil {
300 continue
301 }
302 }
303 }
304 deviceAllowList = append(deviceAllowList, entry)
305 }
306
307 properties = append(properties, newProp("DeviceAllow", deviceAllowList))
308 return properties, nil
309 }
310
311 func newProp(name string, units interface{}) systemdDbus.Property {
312 return systemdDbus.Property{
313 Name: name,
314 Value: dbus.MakeVariant(units),
315 }
316 }
317
318 func getUnitName(c *configs.Cgroup) string {
319
320 if !strings.HasSuffix(c.Name, ".slice") {
321 return c.ScopePrefix + "-" + c.Name + ".scope"
322 }
323 return c.Name
324 }
325
326
327 func getUnitType(unitName string) string {
328 if strings.HasSuffix(unitName, ".slice") {
329 return "Slice"
330 }
331 return "Scope"
332 }
333
334
335 func isDbusError(err error, name string) bool {
336 if err != nil {
337 var derr dbus.Error
338 if errors.As(err, &derr) {
339 return strings.Contains(derr.Name, name)
340 }
341 }
342 return false
343 }
344
345
346 func isUnitExists(err error) bool {
347 return isDbusError(err, "org.freedesktop.systemd1.UnitExists")
348 }
349
350 func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property, ignoreExist bool) error {
351 statusChan := make(chan string, 1)
352 retry := true
353
354 retry:
355 err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
356 _, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan)
357 return err
358 })
359 if err != nil {
360 if !isUnitExists(err) {
361 return err
362 }
363 if ignoreExist {
364
365
366
367 return nil
368 }
369 if retry {
370
371
372
373 err = resetFailedUnit(cm, unitName)
374 if err != nil {
375 logrus.Warnf("unable to reset failed unit: %v", err)
376 }
377 retry = false
378 goto retry
379 }
380 return err
381 }
382
383 timeout := time.NewTimer(30 * time.Second)
384 defer timeout.Stop()
385
386 select {
387 case s := <-statusChan:
388 close(statusChan)
389
390 if s != "done" {
391 _ = resetFailedUnit(cm, unitName)
392 return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
393 }
394 case <-timeout.C:
395 _ = resetFailedUnit(cm, unitName)
396 return errors.New("Timeout waiting for systemd to create " + unitName)
397 }
398
399 return nil
400 }
401
402 func stopUnit(cm *dbusConnManager, unitName string) error {
403 statusChan := make(chan string, 1)
404 err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
405 _, err := c.StopUnitContext(context.TODO(), unitName, "replace", statusChan)
406 return err
407 })
408 if err == nil {
409 timeout := time.NewTimer(30 * time.Second)
410 defer timeout.Stop()
411
412 select {
413 case s := <-statusChan:
414 close(statusChan)
415
416 if s != "done" {
417 logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
418 }
419 case <-timeout.C:
420 return errors.New("Timed out while waiting for systemd to remove " + unitName)
421 }
422 }
423
424
425 _ = resetFailedUnit(cm, unitName)
426
427 return nil
428 }
429
430 func resetFailedUnit(cm *dbusConnManager, name string) error {
431 return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
432 return c.ResetFailedUnitContext(context.TODO(), name)
433 })
434 }
435
436 func getUnitTypeProperty(cm *dbusConnManager, unitName string, unitType string, propertyName string) (*systemdDbus.Property, error) {
437 var prop *systemdDbus.Property
438 err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) {
439 prop, Err = c.GetUnitTypePropertyContext(context.TODO(), unitName, unitType, propertyName)
440 return Err
441 })
442 return prop, err
443 }
444
445 func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error {
446 return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
447 return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...)
448 })
449 }
450
451 func getManagerProperty(cm *dbusConnManager, name string) (string, error) {
452 str := ""
453 err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
454 var err error
455 str, err = c.GetManagerProperty(name)
456 return err
457 })
458 if err != nil {
459 return "", err
460 }
461 return strconv.Unquote(str)
462 }
463
464 func systemdVersion(cm *dbusConnManager) int {
465 versionOnce.Do(func() {
466 version = -1
467 verStr, err := getManagerProperty(cm, "Version")
468 if err == nil {
469 version, err = systemdVersionAtoi(verStr)
470 }
471
472 if err != nil {
473 logrus.WithError(err).Error("unable to get systemd version")
474 }
475 })
476
477 return version
478 }
479
480 func systemdVersionAtoi(verStr string) (int, error) {
481
482
483
484
485
486 re := regexp.MustCompile(`v?([0-9]+)`)
487 matches := re.FindStringSubmatch(verStr)
488 if len(matches) < 2 {
489 return 0, fmt.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
490 }
491 ver, err := strconv.Atoi(matches[1])
492 if err != nil {
493 return -1, fmt.Errorf("can't parse version: %w", err)
494 }
495 return ver, nil
496 }
497
498 func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) {
499 if period != 0 {
500
501 sdVer := systemdVersion(cm)
502 if sdVer >= 242 {
503 *properties = append(*properties,
504 newProp("CPUQuotaPeriodUSec", period))
505 } else {
506 logrus.Debugf("systemd v%d is too old to support CPUQuotaPeriodSec "+
507 " (setting will still be applied to cgroupfs)", sdVer)
508 }
509 }
510 if quota != 0 || period != 0 {
511
512 cpuQuotaPerSecUSec := uint64(math.MaxUint64)
513 if quota > 0 {
514 if period == 0 {
515
516 period = defCPUQuotaPeriod
517 }
518
519
520
521
522 cpuQuotaPerSecUSec = uint64(quota*1000000) / period
523 if cpuQuotaPerSecUSec%10000 != 0 {
524 cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
525 }
526 }
527 *properties = append(*properties,
528 newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
529 }
530 }
531
532 func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems string) error {
533 if cpus == "" && mems == "" {
534 return nil
535 }
536
537
538 sdVer := systemdVersion(cm)
539 if sdVer < 244 {
540 logrus.Debugf("systemd v%d is too old to support AllowedCPUs/AllowedMemoryNodes"+
541 " (settings will still be applied to cgroupfs)", sdVer)
542 return nil
543 }
544
545 if cpus != "" {
546 bits, err := RangeToBits(cpus)
547 if err != nil {
548 return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w",
549 cpus, err)
550 }
551 *props = append(*props,
552 newProp("AllowedCPUs", bits))
553 }
554 if mems != "" {
555 bits, err := RangeToBits(mems)
556 if err != nil {
557 return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w",
558 mems, err)
559 }
560 *props = append(*props,
561 newProp("AllowedMemoryNodes", bits))
562 }
563 return nil
564 }
565
View as plain text