From 923e7ff8638dc95f74b036587d6826fb9160074a5cc638ffd753d4bd87ce125e Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 13 May 2020 07:16:34 +0000 Subject: [PATCH] Accepting request 804873 from home:cyphar:docker - Backport https://github.com/opencontainers/runc/pull/2391 to help fix bsc#1168481. + bsc1168481-0001-cgroup-devices-major-cleanups-and-minimal-transition.patch OBS-URL: https://build.opensuse.org/request/show/804873 OBS-URL: https://build.opensuse.org/package/show/Virtualization:containers/runc?expand=0&rev=94 --- ...ajor-cleanups-and-minimal-transition.patch | 3832 +++++++++++++++++ runc.changes | 7 + runc.spec | 9 +- 3 files changed, 3846 insertions(+), 2 deletions(-) create mode 100644 bsc1168481-0001-cgroup-devices-major-cleanups-and-minimal-transition.patch diff --git a/bsc1168481-0001-cgroup-devices-major-cleanups-and-minimal-transition.patch b/bsc1168481-0001-cgroup-devices-major-cleanups-and-minimal-transition.patch new file mode 100644 index 0000000..85fea00 --- /dev/null +++ b/bsc1168481-0001-cgroup-devices-major-cleanups-and-minimal-transition.patch @@ -0,0 +1,3832 @@ +From 607e6ef7bf4f32652f5346e72b712b92ace98f58 Mon Sep 17 00:00:00 2001 +From: Aleksa Sarai +Date: Sun, 10 May 2020 02:40:25 +1000 +Subject: [PATCH] cgroup: devices: major cleanups and minimal transition rules + +This is a backport of [1], which resolves many issues with the +implementation of the runc devices cgroup code. Among other things, it +removes some of the disruptive aspects of "runc update" which customers +have run into. See [1] for more details. + +Rather than reproducing all of the commits, this is a squashed version +of the following upstream commits: + + * 19a3e9bf8053 ("contrib: recvtty: add --no-stdin flag") + * 11736767d2c5 ("cgroups: add GetFreezerState() helper to Manager") + * 4d363ccbd9cc ("cgroup: devices: eradicate the Allow/Deny lists") + * 5baa34c9134b ("specconv: remove default /dev/console access") + * 8ed0c4c03641 ("configs: use different types for .Devices and .Resources.Devices") + * 2ef5f417d923 ("cgroups: implement a devices cgroupv1 emulator") + * b70aacc28f43 ("cgroupv1: devices: use minimal transition rules with devices.Emulator") + * 410aef3ef858 ("cgroups: systemd: make use of Device*= properties") + +[1]: https://github.com/opencontainers/runc/pull/2391 + +SUSE-Bugs: bsc#1168481 +Signed-off-by: Aleksa Sarai +--- + contrib/cmd/recvtty/recvtty.go | 19 +- + libcontainer/README.md | 5 +- + libcontainer/cgroups/cgroups.go | 3 + + .../cgroups/devices/devices_emulator.go | 355 ++++++ + .../cgroups/devices/devices_emulator_test.go | 1096 +++++++++++++++++ + .../cgroups/ebpf/devicefilter/devicefilter.go | 4 +- + .../ebpf/devicefilter/devicefilter_test.go | 70 +- + libcontainer/cgroups/fs/apply_raw.go | 39 +- + libcontainer/cgroups/fs/devices.go | 87 +- + libcontainer/cgroups/fs/devices_test.go | 91 +- + libcontainer/cgroups/fs/freezer.go | 34 +- + libcontainer/cgroups/fs2/devices.go | 39 +- + libcontainer/cgroups/fs2/freezer.go | 62 +- + libcontainer/cgroups/fs2/fs2.go | 4 + + libcontainer/cgroups/systemd/apply_systemd.go | 229 +++- + .../cgroups/systemd/unified_hierarchy.go | 14 + + libcontainer/configs/cgroup_linux.go | 11 +- + libcontainer/configs/device.go | 169 ++- + libcontainer/configs/device_defaults.go | 111 -- + libcontainer/container_linux.go | 22 +- + libcontainer/container_linux_test.go | 5 + + libcontainer/devices/devices.go | 36 +- + libcontainer/integration/template_test.go | 11 +- + libcontainer/rootfs_linux.go | 19 +- + libcontainer/specconv/spec_linux.go | 556 +++++---- + 25 files changed, 2416 insertions(+), 675 deletions(-) + create mode 100644 libcontainer/cgroups/devices/devices_emulator.go + create mode 100644 libcontainer/cgroups/devices/devices_emulator_test.go + delete mode 100644 libcontainer/configs/device_defaults.go + +diff --git a/contrib/cmd/recvtty/recvtty.go b/contrib/cmd/recvtty/recvtty.go +index a658b8d20202..00b30e1c39cd 100644 +--- a/contrib/cmd/recvtty/recvtty.go ++++ b/contrib/cmd/recvtty/recvtty.go +@@ -65,7 +65,7 @@ func bail(err error) { + os.Exit(1) + } + +-func handleSingle(path string) error { ++func handleSingle(path string, noStdin bool) error { + // Open a socket. + ln, err := net.Listen("unix", path) + if err != nil { +@@ -113,10 +113,12 @@ func handleSingle(path string) error { + io.Copy(os.Stdout, c) + quitChan <- struct{}{} + }() +- go func() { +- io.Copy(c, os.Stdin) +- quitChan <- struct{}{} +- }() ++ if !noStdin { ++ go func() { ++ io.Copy(c, os.Stdin) ++ quitChan <- struct{}{} ++ }() ++ } + + // Only close the master fd once we've stopped copying. + <-quitChan +@@ -201,6 +203,10 @@ func main() { + Value: "", + Usage: "Path to write daemon process ID to", + }, ++ cli.BoolFlag{ ++ Name: "no-stdin", ++ Usage: "Disable stdin handling (no-op for null mode)", ++ }, + } + + app.Action = func(ctx *cli.Context) error { +@@ -218,9 +224,10 @@ func main() { + } + } + ++ noStdin := ctx.Bool("no-stdin") + switch ctx.String("mode") { + case "single": +- if err := handleSingle(path); err != nil { ++ if err := handleSingle(path, noStdin); err != nil { + return err + } + case "null": +diff --git a/libcontainer/README.md b/libcontainer/README.md +index a791ca2d2494..6803ef56c5ba 100644 +--- a/libcontainer/README.md ++++ b/libcontainer/README.md +@@ -155,8 +155,7 @@ config := &configs.Config{ + Parent: "system", + Resources: &configs.Resources{ + MemorySwappiness: nil, +- AllowAllDevices: nil, +- AllowedDevices: configs.DefaultAllowedDevices, ++ Devices: specconv.AllowedDevices, + }, + }, + MaskPaths: []string{ +@@ -166,7 +165,7 @@ config := &configs.Config{ + ReadonlyPaths: []string{ + "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", + }, +- Devices: configs.DefaultAutoCreatedDevices, ++ Devices: specconv.AllowedDevices, + Hostname: "testing", + Mounts: []*configs.Mount{ + { +diff --git a/libcontainer/cgroups/cgroups.go b/libcontainer/cgroups/cgroups.go +index c0a965923f9b..b4bc05c15f96 100644 +--- a/libcontainer/cgroups/cgroups.go ++++ b/libcontainer/cgroups/cgroups.go +@@ -49,6 +49,9 @@ type Manager interface { + + // Gets the cgroup as configured. + GetCgroups() (*configs.Cgroup, error) ++ ++ // GetFreezerState retrieves the current FreezerState of the cgroup. ++ GetFreezerState() (configs.FreezerState, error) + } + + type NotFoundError struct { +diff --git a/libcontainer/cgroups/devices/devices_emulator.go b/libcontainer/cgroups/devices/devices_emulator.go +new file mode 100644 +index 000000000000..cfb535bd4d8b +--- /dev/null ++++ b/libcontainer/cgroups/devices/devices_emulator.go +@@ -0,0 +1,355 @@ ++// +build linux ++ ++package devices ++ ++import ( ++ "bufio" ++ "io" ++ "regexp" ++ "sort" ++ "strconv" ++ ++ "github.com/opencontainers/runc/libcontainer/configs" ++ ++ "github.com/pkg/errors" ++) ++ ++// deviceMeta is a DeviceRule without the Allow or Permissions fields, and no ++// wildcard-type support. It's effectively the "match" portion of a metadata ++// rule, for the purposes of our emulation. ++type deviceMeta struct { ++ node configs.DeviceType ++ major int64 ++ minor int64 ++} ++ ++// deviceRule is effectively the tuple (deviceMeta, DevicePermissions). ++type deviceRule struct { ++ meta deviceMeta ++ perms configs.DevicePermissions ++} ++ ++// deviceRules is a mapping of device metadata rules to the associated ++// permissions in the ruleset. ++type deviceRules map[deviceMeta]configs.DevicePermissions ++ ++func (r deviceRules) orderedEntries() []deviceRule { ++ var rules []deviceRule ++ for meta, perms := range r { ++ rules = append(rules, deviceRule{meta: meta, perms: perms}) ++ } ++ sort.Slice(rules, func(i, j int) bool { ++ // Sort by (major, minor, type). ++ a, b := rules[i].meta, rules[j].meta ++ return a.major < b.major || ++ (a.major == b.major && a.minor < b.minor) || ++ (a.major == b.major && a.minor == b.minor && a.node < b.node) ++ }) ++ return rules ++} ++ ++type Emulator struct { ++ defaultAllow bool ++ rules deviceRules ++} ++ ++func (e *Emulator) IsBlacklist() bool { ++ return e.defaultAllow ++} ++ ++func (e *Emulator) IsAllowAll() bool { ++ return e.IsBlacklist() && len(e.rules) == 0 ++} ++ ++var devicesListRegexp = regexp.MustCompile(`^([abc])\s+(\d+|\*):(\d+|\*)\s+([rwm]+)$`) ++ ++func parseLine(line string) (*deviceRule, error) { ++ matches := devicesListRegexp.FindStringSubmatch(line) ++ if matches == nil { ++ return nil, errors.Errorf("line doesn't match devices.list format") ++ } ++ var ( ++ rule deviceRule ++ node = matches[1] ++ major = matches[2] ++ minor = matches[3] ++ perms = matches[4] ++ ) ++ ++ // Parse the node type. ++ switch node { ++ case "a": ++ // Super-special case -- "a" always means every device with every ++ // access mode. In fact, for devices.list this actually indicates that ++ // the cgroup is in black-list mode. ++ // TODO: Double-check that the entire file is "a *:* rwm". ++ return nil, nil ++ case "b": ++ rule.meta.node = configs.BlockDevice ++ case "c": ++ rule.meta.node = configs.CharDevice ++ default: ++ // Should never happen! ++ return nil, errors.Errorf("unknown device type %q", node) ++ } ++ ++ // Parse the major number. ++ if major == "*" { ++ rule.meta.major = configs.Wildcard ++ } else { ++ val, err := strconv.ParseUint(major, 10, 32) ++ if err != nil { ++ return nil, errors.Wrap(err, "parse major number") ++ } ++ rule.meta.major = int64(val) ++ } ++ ++ // Parse the minor number. ++ if minor == "*" { ++ rule.meta.minor = configs.Wildcard ++ } else { ++ val, err := strconv.ParseUint(minor, 10, 32) ++ if err != nil { ++ return nil, errors.Wrap(err, "parse minor number") ++ } ++ rule.meta.minor = int64(val) ++ } ++ ++ // Parse the access permissions. ++ rule.perms = configs.DevicePermissions(perms) ++ if !rule.perms.IsValid() || rule.perms.IsEmpty() { ++ // Should never happen! ++ return nil, errors.Errorf("parse access mode: contained unknown modes or is empty: %q", perms) ++ } ++ return &rule, nil ++} ++ ++func (e *Emulator) addRule(rule deviceRule) error { ++ if e.rules == nil { ++ e.rules = make(map[deviceMeta]configs.DevicePermissions) ++ } ++ ++ // Merge with any pre-existing permissions. ++ oldPerms := e.rules[rule.meta] ++ newPerms := rule.perms.Union(oldPerms) ++ e.rules[rule.meta] = newPerms ++ return nil ++} ++ ++func (e *Emulator) rmRule(rule deviceRule) error { ++ // Give an error if any of the permissions requested to be removed are ++ // present in a partially-matching wildcard rule, because such rules will ++ // be ignored by cgroupv1. ++ // ++ // This is a diversion from cgroupv1, but is necessary to avoid leading ++ // users into a false sense of security. cgroupv1 will silently(!) ignore ++ // requests to remove partial exceptions, but we really shouldn't do that. ++ // ++ // It may seem like we could just "split" wildcard rules which hit this ++ // issue, but unfortunately there are 2^32 possible major and minor ++ // numbers, which would exhaust kernel memory quickly if we did this. Not ++ // to mention it'd be really slow (the kernel side is implemented as a ++ // linked-list of exceptions). ++ for _, partialMeta := range []deviceMeta{ ++ {node: rule.meta.node, major: configs.Wildcard, minor: rule.meta.minor}, ++ {node: rule.meta.node, major: rule.meta.major, minor: configs.Wildcard}, ++ {node: rule.meta.node, major: configs.Wildcard, minor: configs.Wildcard}, ++ } { ++ // This wildcard rule is equivalent to the requested rule, so skip it. ++ if rule.meta == partialMeta { ++ continue ++ } ++ // Only give an error if the set of permissions overlap. ++ partialPerms := e.rules[partialMeta] ++ if !partialPerms.Intersection(rule.perms).IsEmpty() { ++ return errors.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms) ++ } ++ } ++ ++ // Subtract all of the permissions listed from the full match rule. If the ++ // rule didn't exist, all of this is a no-op. ++ newPerms := e.rules[rule.meta].Difference(rule.perms) ++ if newPerms.IsEmpty() { ++ delete(e.rules, rule.meta) ++ } else { ++ e.rules[rule.meta] = newPerms ++ } ++ // TODO: The actual cgroup code doesn't care if an exception didn't exist ++ // during removal, so not erroring out here is /accurate/ but quite ++ // worrying. Maybe we should do additional validation, but again we ++ // have to worry about backwards-compatibility. ++ return nil ++} ++ ++func (e *Emulator) allow(rule *deviceRule) error { ++ // This cgroup is configured as a black-list. Reset the entire emulator, ++ // and put is into black-list mode. ++ if rule == nil || rule.meta.node == configs.WildcardDevice { ++ *e = Emulator{ ++ defaultAllow: true, ++ rules: nil, ++ } ++ return nil ++ } ++ ++ var err error ++ if e.defaultAllow { ++ err = errors.Wrap(e.rmRule(*rule), "remove 'deny' exception") ++ } else { ++ err = errors.Wrap(e.addRule(*rule), "add 'allow' exception") ++ } ++ return err ++} ++ ++func (e *Emulator) deny(rule *deviceRule) error { ++ // This cgroup is configured as a white-list. Reset the entire emulator, ++ // and put is into white-list mode. ++ if rule == nil || rule.meta.node == configs.WildcardDevice { ++ *e = Emulator{ ++ defaultAllow: false, ++ rules: nil, ++ } ++ return nil ++ } ++ ++ var err error ++ if e.defaultAllow { ++ err = errors.Wrap(e.addRule(*rule), "add 'deny' exception") ++ } else { ++ err = errors.Wrap(e.rmRule(*rule), "remove 'allow' exception") ++ } ++ return err ++} ++ ++func (e *Emulator) Apply(rule configs.DeviceRule) error { ++ if !rule.Type.CanCgroup() { ++ return errors.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type) ++ } ++ ++ innerRule := &deviceRule{ ++ meta: deviceMeta{ ++ node: rule.Type, ++ major: rule.Major, ++ minor: rule.Minor, ++ }, ++ perms: rule.Permissions, ++ } ++ if innerRule.meta.node == configs.WildcardDevice { ++ innerRule = nil ++ } ++ ++ if rule.Allow { ++ return e.allow(innerRule) ++ } else { ++ return e.deny(innerRule) ++ } ++} ++ ++// EmulatorFromList takes a reader to a "devices.list"-like source, and returns ++// a new Emulator that represents the state of the devices cgroup. Note that ++// black-list devices cgroups cannot be fully reconstructed, due to limitations ++// in the devices cgroup API. Instead, such cgroups are always treated as ++// "allow all" cgroups. ++func EmulatorFromList(list io.Reader) (*Emulator, error) { ++ // Normally cgroups are in black-list mode by default, but the way we ++ // figure out the current mode is whether or not devices.list has an ++ // allow-all rule. So we default to a white-list, and the existence of an ++ // "a *:* rwm" entry will tell us otherwise. ++ e := &Emulator{ ++ defaultAllow: false, ++ } ++ ++ // Parse the "devices.list". ++ s := bufio.NewScanner(list) ++ for s.Scan() { ++ line := s.Text() ++ deviceRule, err := parseLine(line) ++ if err != nil { ++ return nil, errors.Wrapf(err, "parsing line %q", line) ++ } ++ // "devices.list" is an allow list. Note that this means that in ++ // black-list mode, we have no idea what rules are in play. As a ++ // result, we need to be very careful in Transition(). ++ if err := e.allow(deviceRule); err != nil { ++ return nil, errors.Wrapf(err, "adding devices.list rule") ++ } ++ } ++ if err := s.Err(); err != nil { ++ return nil, errors.Wrap(err, "reading devices.list lines") ++ } ++ return e, nil ++} ++ ++// Transition calculates what is the minimally-disruptive set of rules need to ++// be applied to a devices cgroup in order to transition to the given target. ++// This means that any already-existing rules will not be applied, and ++// disruptive rules (like denying all device access) will only be applied if ++// necessary. ++// ++// This function is the sole reason for all of Emulator -- to allow us ++// to figure out how to update a containers' cgroups without causing spurrious ++// device errors (if possible). ++func (source *Emulator) Transition(target *Emulator) ([]*configs.DeviceRule, error) { ++ var transitionRules []*configs.DeviceRule ++ oldRules := source.rules ++ ++ // If the default policy doesn't match, we need to include a "disruptive" ++ // rule (either allow-all or deny-all) in order to switch the cgroup to the ++ // correct default policy. ++ // ++ // However, due to a limitation in "devices.list" we cannot be sure what ++ // deny rules are in place in a black-list cgroup. Thus if the source is a ++ // black-list we also have to include a disruptive rule. ++ if source.IsBlacklist() || source.defaultAllow != target.defaultAllow { ++ transitionRules = append(transitionRules, &configs.DeviceRule{ ++ Type: 'a', ++ Major: -1, ++ Minor: -1, ++ Permissions: configs.DevicePermissions("rwm"), ++ Allow: target.defaultAllow, ++ }) ++ // The old rules are only relevant if we aren't starting out with a ++ // disruptive rule. ++ oldRules = nil ++ } ++ ++ // NOTE: We traverse through the rules in a sorted order so we always write ++ // the same set of rules (this is to aid testing). ++ ++ // First, we create inverse rules for any old rules not in the new set. ++ // This includes partial-inverse rules for specific permissions. This is a ++ // no-op if we added a disruptive rule, since oldRules will be empty. ++ for _, rule := range oldRules.orderedEntries() { ++ meta, oldPerms := rule.meta, rule.perms ++ newPerms := target.rules[meta] ++ droppedPerms := oldPerms.Difference(newPerms) ++ if !droppedPerms.IsEmpty() { ++ transitionRules = append(transitionRules, &configs.DeviceRule{ ++ Type: meta.node, ++ Major: meta.major, ++ Minor: meta.minor, ++ Permissions: droppedPerms, ++ Allow: target.defaultAllow, ++ }) ++ } ++ } ++ ++ // Add any additional rules which weren't in the old set. We happen to ++ // filter out rules which are present in both sets, though this isn't ++ // strictly necessary. ++ for _, rule := range target.rules.orderedEntries() { ++ meta, newPerms := rule.meta, rule.perms ++ oldPerms := oldRules[meta] ++ gainedPerms := newPerms.Difference(oldPerms) ++ if !gainedPerms.IsEmpty() { ++ transitionRules = append(transitionRules, &configs.DeviceRule{ ++ Type: meta.node, ++ Major: meta.major, ++ Minor: meta.minor, ++ Permissions: gainedPerms, ++ Allow: !target.defaultAllow, ++ }) ++ } ++ } ++ return transitionRules, nil ++} +diff --git a/libcontainer/cgroups/devices/devices_emulator_test.go b/libcontainer/cgroups/devices/devices_emulator_test.go +new file mode 100644 +index 000000000000..f813accef239 +--- /dev/null ++++ b/libcontainer/cgroups/devices/devices_emulator_test.go +@@ -0,0 +1,1096 @@ ++package devices ++ ++import ( ++ "bytes" ++ "reflect" ++ "testing" ++ ++ "github.com/opencontainers/runc/libcontainer/configs" ++) ++ ++func TestDeviceEmulatorLoad(t *testing.T) { ++ tests := []struct { ++ name, list string ++ expected *Emulator ++ }{ ++ { ++ name: "BlacklistMode", ++ list: `a *:* rwm`, ++ expected: &Emulator{ ++ defaultAllow: true, ++ }, ++ }, ++ { ++ name: "WhitelistBasic", ++ list: `c 4:2 rw`, ++ expected: &Emulator{ ++ defaultAllow: false, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 4, ++ minor: 2, ++ }: configs.DevicePermissions("rw"), ++ }, ++ }, ++ }, ++ { ++ name: "WhitelistWildcard", ++ list: `b 0:* m`, ++ expected: &Emulator{ ++ defaultAllow: false, ++ rules: deviceRules{ ++ { ++ node: configs.BlockDevice, ++ major: 0, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("m"), ++ }, ++ }, ++ }, ++ { ++ name: "WhitelistDuplicate", ++ list: `c *:* rwm ++c 1:1 r`, ++ expected: &Emulator{ ++ defaultAllow: false, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: configs.Wildcard, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("rwm"), ++ // To match the kernel, we allow redundant rules. ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 1, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ }, ++ { ++ name: "WhitelistComplicated", ++ list: `c *:* m ++b *:* m ++c 1:3 rwm ++c 1:5 rwm ++c 1:7 rwm ++c 1:8 rwm ++c 1:9 rwm ++c 5:0 rwm ++c 5:2 rwm ++c 136:* rwm ++c 10:200 rwm`, ++ expected: &Emulator{ ++ defaultAllow: false, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: configs.Wildcard, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("m"), ++ { ++ node: configs.BlockDevice, ++ major: configs.Wildcard, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("m"), ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 3, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 5, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 7, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 8, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 9, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.CharDevice, ++ major: 5, ++ minor: 0, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.CharDevice, ++ major: 5, ++ minor: 2, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.CharDevice, ++ major: 136, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.CharDevice, ++ major: 10, ++ minor: 200, ++ }: configs.DevicePermissions("rwm"), ++ }, ++ }, ++ }, ++ // Some invalid lists. ++ { ++ name: "InvalidFieldNumber", ++ list: `b 1:0`, ++ expected: nil, ++ }, ++ { ++ name: "InvalidDeviceType", ++ list: `p *:* rwm`, ++ expected: nil, ++ }, ++ { ++ name: "InvalidMajorNumber1", ++ list: `p -1:3 rwm`, ++ expected: nil, ++ }, ++ { ++ name: "InvalidMajorNumber2", ++ list: `c foo:27 rwm`, ++ expected: nil, ++ }, ++ { ++ name: "InvalidMinorNumber1", ++ list: `b 1:-4 rwm`, ++ expected: nil, ++ }, ++ { ++ name: "InvalidMinorNumber2", ++ list: `b 1:foo rwm`, ++ expected: nil, ++ }, ++ { ++ name: "InvalidPermissions", ++ list: `b 1:7 rwk`, ++ expected: nil, ++ }, ++ } ++ ++ for _, test := range tests { ++ test := test // capture range variable ++ t.Run(test.name, func(t *testing.T) { ++ list := bytes.NewBufferString(test.list) ++ emu, err := EmulatorFromList(list) ++ if err != nil && test.expected != nil { ++ t.Fatalf("unexpected failure when creating emulator: %v", err) ++ } else if err == nil && test.expected == nil { ++ t.Fatalf("unexpected success when creating emulator: %#v", emu) ++ } ++ ++ if !reflect.DeepEqual(emu, test.expected) { ++ t.Errorf("final emulator state mismatch: %#v != %#v", emu, test.expected) ++ } ++ }) ++ } ++} ++ ++func testDeviceEmulatorApply(t *testing.T, baseDefaultAllow bool) { ++ tests := []struct { ++ name string ++ rule configs.DeviceRule ++ base, expected *Emulator ++ }{ ++ // Switch between default modes. ++ { ++ name: "SwitchToOtherMode", ++ rule: configs.DeviceRule{ ++ Type: configs.WildcardDevice, ++ Major: configs.Wildcard, ++ Minor: configs.Wildcard, ++ Permissions: configs.DevicePermissions("rwm"), ++ Allow: !baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: configs.Wildcard, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 1, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: !baseDefaultAllow, ++ rules: nil, ++ }, ++ }, ++ { ++ name: "SwitchToSameModeNoop", ++ rule: configs.DeviceRule{ ++ Type: configs.WildcardDevice, ++ Major: configs.Wildcard, ++ Minor: configs.Wildcard, ++ Permissions: configs.DevicePermissions("rwm"), ++ Allow: baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: nil, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: nil, ++ }, ++ }, ++ { ++ name: "SwitchToSameMode", ++ rule: configs.DeviceRule{ ++ Type: configs.WildcardDevice, ++ Major: configs.Wildcard, ++ Minor: configs.Wildcard, ++ Permissions: configs.DevicePermissions("rwm"), ++ Allow: baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: configs.Wildcard, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 1, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: nil, ++ }, ++ }, ++ // Rule addition logic. ++ { ++ name: "RuleAdditionBasic", ++ rule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 42, ++ Minor: 1337, ++ Permissions: configs.DevicePermissions("rm"), ++ Allow: !baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 2, ++ minor: 1, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.BlockDevice, ++ major: 1, ++ minor: 5, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 2, ++ minor: 1, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.BlockDevice, ++ major: 1, ++ minor: 5, ++ }: configs.DevicePermissions("r"), ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: 1337, ++ }: configs.DevicePermissions("rm"), ++ }, ++ }, ++ }, ++ { ++ name: "RuleAdditionBasicDuplicate", ++ rule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 42, ++ Minor: 1337, ++ Permissions: configs.DevicePermissions("rm"), ++ Allow: !baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("rwm"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("rwm"), ++ // To match the kernel, we allow redundant rules. ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: 1337, ++ }: configs.DevicePermissions("rm"), ++ }, ++ }, ++ }, ++ { ++ name: "RuleAdditionBasicDuplicateNoop", ++ rule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 42, ++ Minor: 1337, ++ Permissions: configs.DevicePermissions("rm"), ++ Allow: !baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: 1337, ++ }: configs.DevicePermissions("rm"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: 1337, ++ }: configs.DevicePermissions("rm"), ++ }, ++ }, ++ }, ++ { ++ name: "RuleAdditionMerge", ++ rule: configs.DeviceRule{ ++ Type: configs.BlockDevice, ++ Major: 5, ++ Minor: 12, ++ Permissions: configs.DevicePermissions("rm"), ++ Allow: !baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 2, ++ minor: 1, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.BlockDevice, ++ major: 5, ++ minor: 12, ++ }: configs.DevicePermissions("rw"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 2, ++ minor: 1, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.BlockDevice, ++ major: 5, ++ minor: 12, ++ }: configs.DevicePermissions("rwm"), ++ }, ++ }, ++ }, ++ { ++ name: "RuleAdditionMergeWildcard", ++ rule: configs.DeviceRule{ ++ Type: configs.BlockDevice, ++ Major: 5, ++ Minor: configs.Wildcard, ++ Permissions: configs.DevicePermissions("rm"), ++ Allow: !baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 2, ++ minor: 1, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.BlockDevice, ++ major: 5, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("rw"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 2, ++ minor: 1, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.BlockDevice, ++ major: 5, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("rwm"), ++ }, ++ }, ++ }, ++ { ++ name: "RuleAdditionMergeNoop", ++ rule: configs.DeviceRule{ ++ Type: configs.BlockDevice, ++ Major: 5, ++ Minor: 12, ++ Permissions: configs.DevicePermissions("r"), ++ Allow: !baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 2, ++ minor: 1, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.BlockDevice, ++ major: 5, ++ minor: 12, ++ }: configs.DevicePermissions("rw"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 2, ++ minor: 1, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.BlockDevice, ++ major: 5, ++ minor: 12, ++ }: configs.DevicePermissions("rw"), ++ }, ++ }, ++ }, ++ // Rule removal logic. ++ { ++ name: "RuleRemovalBasic", ++ rule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 42, ++ Minor: 1337, ++ Permissions: configs.DevicePermissions("rm"), ++ Allow: baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: 1337, ++ }: configs.DevicePermissions("rm"), ++ { ++ node: configs.BlockDevice, ++ major: 1, ++ minor: 5, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.BlockDevice, ++ major: 1, ++ minor: 5, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ }, ++ { ++ name: "RuleRemovalNonexistent", ++ rule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 4, ++ Minor: 1, ++ Permissions: configs.DevicePermissions("rw"), ++ Allow: baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.BlockDevice, ++ major: 1, ++ minor: 5, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.BlockDevice, ++ major: 1, ++ minor: 5, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ }, ++ { ++ name: "RuleRemovalFull", ++ rule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 42, ++ Minor: 1337, ++ Permissions: configs.DevicePermissions("rw"), ++ Allow: baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: 1337, ++ }: configs.DevicePermissions("w"), ++ { ++ node: configs.BlockDevice, ++ major: 1, ++ minor: 5, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.BlockDevice, ++ major: 1, ++ minor: 5, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ }, ++ { ++ name: "RuleRemovalPartial", ++ rule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 42, ++ Minor: 1337, ++ Permissions: configs.DevicePermissions("r"), ++ Allow: baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: 1337, ++ }: configs.DevicePermissions("rm"), ++ { ++ node: configs.BlockDevice, ++ major: 1, ++ minor: 5, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: 1337, ++ }: configs.DevicePermissions("m"), ++ { ++ node: configs.BlockDevice, ++ major: 1, ++ minor: 5, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ }, ++ // Check our non-canonical behaviour when it comes to try to "punch ++ // out" holes in a wildcard rule. ++ { ++ name: "RuleRemovalWildcardPunchoutImpossible", ++ rule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 42, ++ Minor: 1337, ++ Permissions: configs.DevicePermissions("r"), ++ Allow: baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("rm"), ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: 1337, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ expected: nil, ++ }, ++ { ++ name: "RuleRemovalWildcardPunchoutPossible", ++ rule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 42, ++ Minor: 1337, ++ Permissions: configs.DevicePermissions("r"), ++ Allow: baseDefaultAllow, ++ }, ++ base: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("wm"), ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: 1337, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ expected: &Emulator{ ++ defaultAllow: baseDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("wm"), ++ }, ++ }, ++ }, ++ } ++ ++ for _, test := range tests { ++ test := test ++ t.Run(test.name, func(t *testing.T) { ++ err := test.base.Apply(test.rule) ++ if err != nil && test.expected != nil { ++ t.Fatalf("unexpected failure when applying apply rule: %v", err) ++ } else if err == nil && test.expected == nil { ++ t.Fatalf("unexpected success when applying apply rule: %#v", test.base) ++ } ++ ++ if test.expected != nil && !reflect.DeepEqual(test.base, test.expected) { ++ t.Errorf("final emulator state mismatch: %#v != %#v", test.base, test.expected) ++ } ++ }) ++ } ++} ++ ++func TestDeviceEmulatorWhitelistApply(t *testing.T) { ++ testDeviceEmulatorApply(t, false) ++} ++ ++func TestDeviceEmulatorBlacklistApply(t *testing.T) { ++ testDeviceEmulatorApply(t, true) ++} ++ ++func testDeviceEmulatorTransition(t *testing.T, sourceDefaultAllow bool) { ++ tests := []struct { ++ name string ++ source, target *Emulator ++ expected []*configs.DeviceRule ++ }{ ++ // No-op changes. ++ { ++ name: "Noop", ++ source: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("wm"), ++ }, ++ }, ++ target: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 42, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("wm"), ++ }, ++ }, ++ // Identical white-lists produce no extra rules. ++ expected: nil, ++ }, ++ // Switching modes. ++ { ++ name: "SwitchToOtherMode", ++ source: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("rwm"), ++ }, ++ }, ++ target: &Emulator{ ++ defaultAllow: !sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.BlockDevice, ++ major: 42, ++ minor: configs.Wildcard, ++ }: configs.DevicePermissions("wm"), ++ }, ++ }, ++ expected: []*configs.DeviceRule{ ++ // Clear-all rule. ++ { ++ Type: configs.WildcardDevice, ++ Major: configs.Wildcard, ++ Minor: configs.Wildcard, ++ Permissions: configs.DevicePermissions("rwm"), ++ Allow: !sourceDefaultAllow, ++ }, ++ // The actual rule-set. ++ { ++ Type: configs.BlockDevice, ++ Major: 42, ++ Minor: configs.Wildcard, ++ Permissions: configs.DevicePermissions("wm"), ++ Allow: sourceDefaultAllow, ++ }, ++ }, ++ }, ++ // Rule changes. ++ { ++ name: "RuleAddition", ++ source: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("rwm"), ++ }, ++ }, ++ target: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.BlockDevice, ++ major: 42, ++ minor: 1337, ++ }: configs.DevicePermissions("rwm"), ++ }, ++ }, ++ expected: []*configs.DeviceRule{ ++ { ++ Type: configs.BlockDevice, ++ Major: 42, ++ Minor: 1337, ++ Permissions: configs.DevicePermissions("rwm"), ++ Allow: !sourceDefaultAllow, ++ }, ++ }, ++ }, ++ { ++ name: "RuleRemoval", ++ source: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.BlockDevice, ++ major: 42, ++ minor: 1337, ++ }: configs.DevicePermissions("rwm"), ++ }, ++ }, ++ target: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("rwm"), ++ }, ++ }, ++ expected: []*configs.DeviceRule{ ++ { ++ Type: configs.BlockDevice, ++ Major: 42, ++ Minor: 1337, ++ Permissions: configs.DevicePermissions("rwm"), ++ Allow: sourceDefaultAllow, ++ }, ++ }, ++ }, ++ { ++ name: "RuleMultipleAdditionRemoval", ++ source: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("rwm"), ++ { ++ node: configs.BlockDevice, ++ major: 3, ++ minor: 9, ++ }: configs.DevicePermissions("rw"), ++ }, ++ }, ++ target: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("rwm"), ++ }, ++ }, ++ expected: []*configs.DeviceRule{ ++ { ++ Type: configs.BlockDevice, ++ Major: 3, ++ Minor: 9, ++ Permissions: configs.DevicePermissions("rw"), ++ Allow: sourceDefaultAllow, ++ }, ++ }, ++ }, ++ // Modifying the access permissions. ++ { ++ name: "RulePartialAddition", ++ source: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("r"), ++ }, ++ }, ++ target: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("rwm"), ++ }, ++ }, ++ expected: []*configs.DeviceRule{ ++ { ++ Type: configs.CharDevice, ++ Major: 1, ++ Minor: 2, ++ Permissions: configs.DevicePermissions("wm"), ++ Allow: !sourceDefaultAllow, ++ }, ++ }, ++ }, ++ { ++ name: "RulePartialRemoval", ++ source: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("rw"), ++ }, ++ }, ++ target: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("w"), ++ }, ++ }, ++ expected: []*configs.DeviceRule{ ++ { ++ Type: configs.CharDevice, ++ Major: 1, ++ Minor: 2, ++ Permissions: configs.DevicePermissions("r"), ++ Allow: sourceDefaultAllow, ++ }, ++ }, ++ }, ++ { ++ name: "RulePartialBoth", ++ source: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("rw"), ++ }, ++ }, ++ target: &Emulator{ ++ defaultAllow: sourceDefaultAllow, ++ rules: deviceRules{ ++ { ++ node: configs.CharDevice, ++ major: 1, ++ minor: 2, ++ }: configs.DevicePermissions("rm"), ++ }, ++ }, ++ expected: []*configs.DeviceRule{ ++ { ++ Type: configs.CharDevice, ++ Major: 1, ++ Minor: 2, ++ Permissions: configs.DevicePermissions("w"), ++ Allow: sourceDefaultAllow, ++ }, ++ { ++ Type: configs.CharDevice, ++ Major: 1, ++ Minor: 2, ++ Permissions: configs.DevicePermissions("m"), ++ Allow: !sourceDefaultAllow, ++ }, ++ }, ++ }, ++ } ++ ++ for _, test := range tests { ++ test := test ++ t.Run(test.name, func(t *testing.T) { ++ // If we are in black-list mode, we need to prepend the relevant ++ // clear-all rule (the expected rule lists are written with ++ // white-list mode in mind), and then make a full copy of the ++ // target rules. ++ if sourceDefaultAllow && test.source.defaultAllow == test.target.defaultAllow { ++ test.expected = []*configs.DeviceRule{{ ++ Type: configs.WildcardDevice, ++ Major: configs.Wildcard, ++ Minor: configs.Wildcard, ++ Permissions: configs.DevicePermissions("rwm"), ++ Allow: test.target.defaultAllow, ++ }} ++ for _, rule := range test.target.rules.orderedEntries() { ++ test.expected = append(test.expected, &configs.DeviceRule{ ++ Type: rule.meta.node, ++ Major: rule.meta.major, ++ Minor: rule.meta.minor, ++ Permissions: rule.perms, ++ Allow: !test.target.defaultAllow, ++ }) ++ } ++ } ++ ++ rules, err := test.source.Transition(test.target) ++ if err != nil { ++ t.Fatalf("unexpected error while calculating transition rules: %#v", err) ++ } ++ ++ if !reflect.DeepEqual(rules, test.expected) { ++ t.Errorf("rules don't match expected set: %#v != %#v", rules, test.expected) ++ } ++ ++ // Apply the rules to the source to see if it actually transitions ++ // correctly. This is all emulated but it's a good thing to ++ // double-check. ++ for _, rule := range rules { ++ if err := test.source.Apply(*rule); err != nil { ++ t.Fatalf("error while applying transition rule [%#v]: %v", rule, err) ++ } ++ } ++ if !reflect.DeepEqual(test.source, test.target) { ++ t.Errorf("transition incomplete after applying all rules: %#v != %#v", test.source, test.target) ++ } ++ }) ++ } ++} ++ ++func TestDeviceEmulatorTransitionFromBlacklist(t *testing.T) { ++ testDeviceEmulatorTransition(t, true) ++} ++ ++func TestDeviceEmulatorTransitionFromWhitelist(t *testing.T) { ++ testDeviceEmulatorTransition(t, false) ++} +diff --git a/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go b/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go +index 847ce8ef1a0d..af9275451cd8 100644 +--- a/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go ++++ b/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go +@@ -22,7 +22,7 @@ const ( + ) + + // DeviceFilter returns eBPF device filter program and its license string +-func DeviceFilter(devices []*configs.Device) (asm.Instructions, string, error) { ++func DeviceFilter(devices []*configs.DeviceRule) (asm.Instructions, string, error) { + p := &program{} + p.init() + for i := len(devices) - 1; i >= 0; i-- { +@@ -67,7 +67,7 @@ func (p *program) init() { + } + + // appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element. +-func (p *program) appendDevice(dev *configs.Device) error { ++func (p *program) appendDevice(dev *configs.DeviceRule) error { + if p.blockID < 0 { + return errors.New("the program is finalized") + } +diff --git a/libcontainer/cgroups/ebpf/devicefilter/devicefilter_test.go b/libcontainer/cgroups/ebpf/devicefilter/devicefilter_test.go +index 59ff4b49bd35..885bf4db4ef5 100644 +--- a/libcontainer/cgroups/ebpf/devicefilter/devicefilter_test.go ++++ b/libcontainer/cgroups/ebpf/devicefilter/devicefilter_test.go +@@ -20,7 +20,7 @@ func hash(s, comm string) string { + return strings.Join(res, "\n") + } + +-func testDeviceFilter(t testing.TB, devices []*configs.Device, expectedStr string) { ++func testDeviceFilter(t testing.TB, devices []*configs.DeviceRule, expectedStr string) { + insts, _, err := DeviceFilter(devices) + if err != nil { + t.Fatalf("%s: %v (devices: %+v)", t.Name(), err, devices) +@@ -81,71 +81,69 @@ block-2: + 18: Exit + block-3: + 19: JNEImm dst: r2 off: -1 imm: 2 +- 20: JNEImm dst: r4 off: -1 imm: 5 +- 21: JNEImm dst: r5 off: -1 imm: 1 ++ 20: JNEImm dst: r4 off: -1 imm: 1 ++ 21: JNEImm dst: r5 off: -1 imm: 9 + 22: Mov32Imm dst: r0 imm: 1 + 23: Exit + block-4: + 24: JNEImm dst: r2 off: -1 imm: 2 + 25: JNEImm dst: r4 off: -1 imm: 1 +- 26: JNEImm dst: r5 off: -1 imm: 9 ++ 26: JNEImm dst: r5 off: -1 imm: 5 + 27: Mov32Imm dst: r0 imm: 1 + 28: Exit + block-5: + 29: JNEImm dst: r2 off: -1 imm: 2 +- 30: JNEImm dst: r4 off: -1 imm: 1 +- 31: JNEImm dst: r5 off: -1 imm: 5 ++ 30: JNEImm dst: r4 off: -1 imm: 5 ++ 31: JNEImm dst: r5 off: -1 imm: 0 + 32: Mov32Imm dst: r0 imm: 1 + 33: Exit + block-6: + 34: JNEImm dst: r2 off: -1 imm: 2 +- 35: JNEImm dst: r4 off: -1 imm: 5 +- 36: JNEImm dst: r5 off: -1 imm: 0 ++ 35: JNEImm dst: r4 off: -1 imm: 1 ++ 36: JNEImm dst: r5 off: -1 imm: 7 + 37: Mov32Imm dst: r0 imm: 1 + 38: Exit + block-7: + 39: JNEImm dst: r2 off: -1 imm: 2 + 40: JNEImm dst: r4 off: -1 imm: 1 +- 41: JNEImm dst: r5 off: -1 imm: 7 ++ 41: JNEImm dst: r5 off: -1 imm: 8 + 42: Mov32Imm dst: r0 imm: 1 + 43: Exit + block-8: + 44: JNEImm dst: r2 off: -1 imm: 2 + 45: JNEImm dst: r4 off: -1 imm: 1 +- 46: JNEImm dst: r5 off: -1 imm: 8 ++ 46: JNEImm dst: r5 off: -1 imm: 3 + 47: Mov32Imm dst: r0 imm: 1 + 48: Exit + block-9: +- 49: JNEImm dst: r2 off: -1 imm: 2 +- 50: JNEImm dst: r4 off: -1 imm: 1 +- 51: JNEImm dst: r5 off: -1 imm: 3 +- 52: Mov32Imm dst: r0 imm: 1 +- 53: Exit +-block-10: + // (b, wildcard, wildcard, m, true) +- 54: JNEImm dst: r2 off: -1 imm: 1 +- 55: Mov32Reg dst: r1 src: r3 +- 56: And32Imm dst: r1 imm: 1 +- 57: JEqImm dst: r1 off: -1 imm: 0 +- 58: Mov32Imm dst: r0 imm: 1 +- 59: Exit +-block-11: ++ 49: JNEImm dst: r2 off: -1 imm: 1 ++ 50: Mov32Reg dst: r1 src: r3 ++ 51: And32Imm dst: r1 imm: 1 ++ 52: JEqImm dst: r1 off: -1 imm: 0 ++ 53: Mov32Imm dst: r0 imm: 1 ++ 54: Exit ++block-10: + // (c, wildcard, wildcard, m, true) +- 60: JNEImm dst: r2 off: -1 imm: 2 +- 61: Mov32Reg dst: r1 src: r3 +- 62: And32Imm dst: r1 imm: 1 +- 63: JEqImm dst: r1 off: -1 imm: 0 +- 64: Mov32Imm dst: r0 imm: 1 +- 65: Exit +-block-12: +- 66: Mov32Imm dst: r0 imm: 0 +- 67: Exit ++ 55: JNEImm dst: r2 off: -1 imm: 2 ++ 56: Mov32Reg dst: r1 src: r3 ++ 57: And32Imm dst: r1 imm: 1 ++ 58: JEqImm dst: r1 off: -1 imm: 0 ++ 59: Mov32Imm dst: r0 imm: 1 ++ 60: Exit ++block-11: ++ 61: Mov32Imm dst: r0 imm: 0 ++ 62: Exit + ` +- testDeviceFilter(t, specconv.AllowedDevices, expected) ++ var devices []*configs.DeviceRule ++ for _, device := range specconv.AllowedDevices { ++ devices = append(devices, &device.DeviceRule) ++ } ++ testDeviceFilter(t, devices, expected) + } + + func TestDeviceFilter_Privileged(t *testing.T) { +- devices := []*configs.Device{ ++ devices := []*configs.DeviceRule{ + { + Type: 'a', + Major: -1, +@@ -171,7 +169,7 @@ block-0: + } + + func TestDeviceFilter_PrivilegedExceptSingleDevice(t *testing.T) { +- devices := []*configs.Device{ ++ devices := []*configs.DeviceRule{ + { + Type: 'a', + Major: -1, +@@ -210,7 +208,7 @@ block-1: + } + + func TestDeviceFilter_Weird(t *testing.T) { +- devices := []*configs.Device{ ++ devices := []*configs.DeviceRule{ + { + Type: 'b', + Major: 8, +diff --git a/libcontainer/cgroups/fs/apply_raw.go b/libcontainer/cgroups/fs/apply_raw.go +index ec148b489051..11a15660beb1 100644 +--- a/libcontainer/cgroups/fs/apply_raw.go ++++ b/libcontainer/cgroups/fs/apply_raw.go +@@ -162,9 +162,6 @@ func (m *Manager) Apply(pid int) (err error) { + } + + for _, sys := range m.getSubsystems() { +- // TODO: Apply should, ideally, be reentrant or be broken up into a separate +- // create and join phase so that the cgroup hierarchy for a container can be +- // created then join consists of writing the process pids to cgroup.procs + p, err := d.path(sys.Name()) + if err != nil { + // The non-presence of the devices subsystem is +@@ -177,10 +174,10 @@ func (m *Manager) Apply(pid int) (err error) { + m.Paths[sys.Name()] = p + + if err := sys.Apply(d); err != nil { +- // In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't +- // been set, we don't bail on error in case of permission problems. +- // Cases where limits have been set (and we couldn't create our own +- // cgroup) are handled by Set. ++ // In the case of rootless (including euid=0 in userns), where an ++ // explicit cgroup path hasn't been set, we don't bail on error in ++ // case of permission problems. Cases where limits have been set ++ // (and we couldn't create our own cgroup) are handled by Set. + if isIgnorableError(m.Rootless, err) && m.Cgroups.Path == "" { + delete(m.Paths, sys.Name()) + continue +@@ -272,22 +269,26 @@ func (m *Manager) Set(container *configs.Config) error { + + // Freeze toggles the container's freezer cgroup depending on the state + // provided +-func (m *Manager) Freeze(state configs.FreezerState) error { +- if m.Cgroups == nil { ++func (m *Manager) Freeze(state configs.FreezerState) (Err error) { ++ path := m.GetPaths()["freezer"] ++ if m.Cgroups == nil || path == "" { + return errors.New("cannot toggle freezer: cgroups not configured for container") + } + +- paths := m.GetPaths() +- dir := paths["freezer"] + prevState := m.Cgroups.Resources.Freezer + m.Cgroups.Resources.Freezer = state ++ defer func() { ++ if Err != nil { ++ m.Cgroups.Resources.Freezer = prevState ++ } ++ }() ++ + freezer, err := m.getSubsystems().Get("freezer") + if err != nil { + return err + } +- err = freezer.Set(dir, m.Cgroups) ++ err = freezer.Set(path, m.Cgroups) + if err != nil { +- m.Cgroups.Resources.Freezer = prevState + return err + } + return nil +@@ -409,3 +410,15 @@ func CheckCpushares(path string, c uint64) error { + func (m *Manager) GetCgroups() (*configs.Cgroup, error) { + return m.Cgroups, nil + } ++ ++func (m *Manager) GetFreezerState() (configs.FreezerState, error) { ++ paths := m.GetPaths() ++ dir := paths["freezer"] ++ freezer, err := m.getSubsystems().Get("freezer") ++ ++ // If the container doesn't have the freezer cgroup, say it's undefined. ++ if err != nil || dir == "" { ++ return configs.Undefined, nil ++ } ++ return freezer.(*FreezerGroup).GetState(dir) ++} +diff --git a/libcontainer/cgroups/fs/devices.go b/libcontainer/cgroups/fs/devices.go +index 036c8db773e5..4fc3951de913 100644 +--- a/libcontainer/cgroups/fs/devices.go ++++ b/libcontainer/cgroups/fs/devices.go +@@ -3,13 +3,19 @@ + package fs + + import ( ++ "bytes" ++ "fmt" ++ "reflect" ++ + "github.com/opencontainers/runc/libcontainer/cgroups" ++ "github.com/opencontainers/runc/libcontainer/cgroups/devices" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" + ) + + type DevicesGroup struct { ++ testingSkipFinalCheck bool + } + + func (s *DevicesGroup) Name() string { +@@ -26,49 +32,74 @@ func (s *DevicesGroup) Apply(d *cgroupData) error { + return nil + } + ++func loadEmulator(path string) (*devices.Emulator, error) { ++ list, err := fscommon.ReadFile(path, "devices.list") ++ if err != nil { ++ return nil, err ++ } ++ return devices.EmulatorFromList(bytes.NewBufferString(list)) ++} ++ ++func buildEmulator(rules []*configs.DeviceRule) (*devices.Emulator, error) { ++ // This defaults to a white-list -- which is what we want! ++ emu := &devices.Emulator{} ++ for _, rule := range rules { ++ if err := emu.Apply(*rule); err != nil { ++ return nil, err ++ } ++ } ++ return emu, nil ++} ++ + func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error { + if system.RunningInUserNS() { + return nil + } + +- devices := cgroup.Resources.Devices +- if len(devices) > 0 { +- for _, dev := range devices { +- file := "devices.deny" +- if dev.Allow { +- file = "devices.allow" +- } +- if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil { +- return err +- } +- } +- return nil ++ // Generate two emulators, one for the current state of the cgroup and one ++ // for the requested state by the user. ++ current, err := loadEmulator(path) ++ if err != nil { ++ return err ++ } ++ target, err := buildEmulator(cgroup.Resources.Devices) ++ if err != nil { ++ return err + } +- if cgroup.Resources.AllowAllDevices != nil { +- if *cgroup.Resources.AllowAllDevices == false { +- if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil { +- return err +- } + +- for _, dev := range cgroup.Resources.AllowedDevices { +- if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil { +- return err +- } +- } +- return nil ++ // Compute the minimal set of transition rules needed to achieve the ++ // requested state. ++ transitionRules, err := current.Transition(target) ++ if err != nil { ++ return err ++ } ++ for _, rule := range transitionRules { ++ file := "devices.deny" ++ if rule.Allow { ++ file = "devices.allow" + } +- +- if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil { ++ if err := fscommon.WriteFile(path, file, rule.CgroupString()); err != nil { + return err + } + } + +- for _, dev := range cgroup.Resources.DeniedDevices { +- if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil { ++ // Final safety check -- ensure that the resulting state is what was ++ // requested. This is only really correct for white-lists, but for ++ // black-lists we can at least check that the cgroup is in the right mode. ++ // ++ // This safety-check is skipped for the unit tests because we cannot ++ // currently mock devices.list correctly. ++ if !s.testingSkipFinalCheck { ++ currentAfter, err := loadEmulator(path) ++ if err != nil { + return err + } ++ if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) { ++ return fmt.Errorf("resulting devices cgroup doesn't precisely match target") ++ } else if target.IsBlacklist() != currentAfter.IsBlacklist() { ++ return fmt.Errorf("resulting devices cgroup doesn't match target mode") ++ } + } +- + return nil + } + +diff --git a/libcontainer/cgroups/fs/devices_test.go b/libcontainer/cgroups/fs/devices_test.go +index 648f4a2ff47b..da7ccb01ecc1 100644 +--- a/libcontainer/cgroups/fs/devices_test.go ++++ b/libcontainer/cgroups/fs/devices_test.go +@@ -9,91 +9,44 @@ import ( + "github.com/opencontainers/runc/libcontainer/configs" + ) + +-var ( +- allowedDevices = []*configs.Device{ +- { +- Path: "/dev/zero", +- Type: 'c', +- Major: 1, +- Minor: 5, +- Permissions: "rwm", +- FileMode: 0666, +- }, +- } +- allowedList = "c 1:5 rwm" +- deniedDevices = []*configs.Device{ +- { +- Path: "/dev/null", +- Type: 'c', +- Major: 1, +- Minor: 3, +- Permissions: "rwm", +- FileMode: 0666, +- }, +- } +- deniedList = "c 1:3 rwm" +-) +- + func TestDevicesSetAllow(t *testing.T) { + helper := NewCgroupTestUtil("devices", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ +- "devices.deny": "a", ++ "devices.allow": "", ++ "devices.deny": "", ++ "devices.list": "a *:* rwm", + }) +- allowAllDevices := false +- helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices +- helper.CgroupData.config.Resources.AllowedDevices = allowedDevices +- devices := &DevicesGroup{} +- if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { +- t.Fatal(err) +- } +- +- value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow") +- if err != nil { +- t.Fatalf("Failed to parse devices.allow - %s", err) +- } +- +- if value != allowedList { +- t.Fatal("Got the wrong value, set devices.allow failed.") +- } + +- // When AllowAllDevices is nil, devices.allow file should not be modified. +- helper.CgroupData.config.Resources.AllowAllDevices = nil +- if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { +- t.Fatal(err) +- } +- value, err = fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow") +- if err != nil { +- t.Fatalf("Failed to parse devices.allow - %s", err) +- } +- if value != allowedList { +- t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.") ++ helper.CgroupData.config.Resources.Devices = []*configs.DeviceRule{ ++ { ++ Type: configs.CharDevice, ++ Major: 1, ++ Minor: 5, ++ Permissions: configs.DevicePermissions("rwm"), ++ Allow: true, ++ }, + } +-} +- +-func TestDevicesSetDeny(t *testing.T) { +- helper := NewCgroupTestUtil("devices", t) +- defer helper.cleanup() + +- helper.writeFileContents(map[string]string{ +- "devices.allow": "a", +- }) +- +- allowAllDevices := true +- helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices +- helper.CgroupData.config.Resources.DeniedDevices = deniedDevices +- devices := &DevicesGroup{} ++ devices := &DevicesGroup{testingSkipFinalCheck: true} + if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { + t.Fatal(err) + } + ++ // The default deny rule must be written. + value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.deny") + if err != nil { +- t.Fatalf("Failed to parse devices.deny - %s", err) ++ t.Fatalf("Failed to parse devices.deny: %s", err) ++ } ++ if value[0] != 'a' { ++ t.Errorf("Got the wrong value (%q), set devices.deny failed.", value) + } + +- if value != deniedList { +- t.Fatal("Got the wrong value, set devices.deny failed.") ++ // Permitted rule must be written. ++ if value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow"); err != nil { ++ t.Fatalf("Failed to parse devices.allow: %s", err) ++ } else if value != "c 1:5 rwm" { ++ t.Errorf("Got the wrong value (%q), set devices.allow failed.", value) + } + } +diff --git a/libcontainer/cgroups/fs/freezer.go b/libcontainer/cgroups/fs/freezer.go +index 9dc81bdb84c3..a6c622958c8c 100644 +--- a/libcontainer/cgroups/fs/freezer.go ++++ b/libcontainer/cgroups/fs/freezer.go +@@ -4,12 +4,15 @@ package fs + + import ( + "fmt" ++ "os" + "strings" + "time" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" ++ "github.com/pkg/errors" ++ "golang.org/x/sys/unix" + ) + + type FreezerGroup struct { +@@ -39,11 +42,11 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { + return err + } + +- state, err := fscommon.ReadFile(path, "freezer.state") ++ state, err := s.GetState(path) + if err != nil { + return err + } +- if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) { ++ if state == cgroup.Resources.Freezer { + break + } + +@@ -65,3 +68,30 @@ func (s *FreezerGroup) Remove(d *cgroupData) error { + func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil + } ++ ++func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) { ++ for { ++ state, err := fscommon.ReadFile(path, "freezer.state") ++ if err != nil { ++ // If the kernel is too old, then we just treat the freezer as ++ // being in an "undefined" state. ++ if os.IsNotExist(err) || errors.Cause(err) == unix.ENODEV { ++ err = nil ++ } ++ return configs.Undefined, err ++ } ++ switch strings.TrimSpace(state) { ++ case "THAWED": ++ return configs.Thawed, nil ++ case "FROZEN": ++ return configs.Frozen, nil ++ case "FREEZING": ++ // Make sure we get a stable freezer state, so retry if the cgroup ++ // is still undergoing freezing. This should be a temporary delay. ++ time.Sleep(1 * time.Millisecond) ++ continue ++ default: ++ return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state) ++ } ++ } ++} +diff --git a/libcontainer/cgroups/fs2/devices.go b/libcontainer/cgroups/fs2/devices.go +index e0fd685402a2..ee180bdaf50b 100644 +--- a/libcontainer/cgroups/fs2/devices.go ++++ b/libcontainer/cgroups/fs2/devices.go +@@ -10,12 +10,10 @@ import ( + "golang.org/x/sys/unix" + ) + +-func isRWM(cgroupPermissions string) bool { +- r := false +- w := false +- m := false +- for _, rn := range cgroupPermissions { +- switch rn { ++func isRWM(perms configs.DevicePermissions) bool { ++ var r, w, m bool ++ for _, perm := range perms { ++ switch perm { + case 'r': + r = true + case 'w': +@@ -39,22 +37,10 @@ func canSkipEBPFError(cgroup *configs.Cgroup) bool { + } + + func setDevices(dirPath string, cgroup *configs.Cgroup) error { ++ // XXX: This is currently a white-list (but all callers pass a blacklist of ++ // devices). This is bad for a whole variety of reasons, but will need ++ // to be fixed with co-ordinated effort with downstreams. + devices := cgroup.Devices +- if allowAllDevices := cgroup.Resources.AllowAllDevices; allowAllDevices != nil { +- // never set by OCI specconv, but *allowAllDevices=false is still used by the integration test +- if *allowAllDevices == true { +- return errors.New("libcontainer AllowAllDevices is not supported, use Devices") +- } +- for _, ad := range cgroup.Resources.AllowedDevices { +- d := *ad +- d.Allow = true +- devices = append(devices, &d) +- } +- } +- if len(cgroup.Resources.DeniedDevices) != 0 { +- // never set by OCI specconv +- return errors.New("libcontainer DeniedDevices is not supported, use Devices") +- } + insts, license, err := devicefilter.DeviceFilter(devices) + if err != nil { + return err +@@ -64,6 +50,17 @@ func setDevices(dirPath string, cgroup *configs.Cgroup) error { + return errors.Errorf("cannot get dir FD for %s", dirPath) + } + defer unix.Close(dirFD) ++ // XXX: This code is currently incorrect when it comes to updating an ++ // existing cgroup with new rules (new rulesets are just appended to ++ // the program list because this uses BPF_F_ALLOW_MULTI). If we didn't ++ // use BPF_F_ALLOW_MULTI we could actually atomically swap the ++ // programs. ++ // ++ // The real issue is that BPF_F_ALLOW_MULTI makes it hard to have a ++ // race-free blacklist because it acts as a whitelist by default, and ++ // having a deny-everything program cannot be overriden by other ++ // programs. You could temporarily insert a deny-everything program ++ // but that would result in spurrious failures during updates. + if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil { + if !canSkipEBPFError(cgroup) { + return err +diff --git a/libcontainer/cgroups/fs2/freezer.go b/libcontainer/cgroups/fs2/freezer.go +index 130c63f3e2bf..3dd4a527bf0f 100644 +--- a/libcontainer/cgroups/fs2/freezer.go ++++ b/libcontainer/cgroups/fs2/freezer.go +@@ -3,32 +3,48 @@ + package fs2 + + import ( +- "strconv" ++ "os" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/pkg/errors" ++ "golang.org/x/sys/unix" + ) + + func setFreezer(dirPath string, state configs.FreezerState) error { +- var desired int ++ if err := supportsFreezer(dirPath); err != nil { ++ // We can ignore this request as long as the user didn't ask us to ++ // freeze the container (since without the freezer cgroup, that's a ++ // no-op). ++ if state == configs.Undefined || state == configs.Thawed { ++ err = nil ++ } ++ return errors.Wrap(err, "freezer not supported") ++ } ++ ++ var stateStr string + switch state { + case configs.Undefined: + return nil + case configs.Frozen: +- desired = 1 ++ stateStr = "1" + case configs.Thawed: +- desired = 0 ++ stateStr = "0" + default: +- return errors.Errorf("unknown freezer state %+v", state) ++ return errors.Errorf("invalid freezer state %q requested", state) ++ } ++ ++ if err := fscommon.WriteFile(dirPath, "cgroup.freeze", stateStr); err != nil { ++ return err + } +- supportedErr := supportsFreezer(dirPath) +- if supportedErr != nil && desired != 0 { +- // can ignore error if desired == 1 +- return errors.Wrap(supportedErr, "freezer not supported") ++ // Confirm that the cgroup did actually change states. ++ if actualState, err := getFreezer(dirPath); err != nil { ++ return err ++ } else if actualState != state { ++ return errors.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState) + } +- return freezeWithInt(dirPath, desired) ++ return nil + } + + func supportsFreezer(dirPath string) error { +@@ -36,18 +52,22 @@ func supportsFreezer(dirPath string) error { + return err + } + +-// freeze writes desired int to "cgroup.freeze". +-func freezeWithInt(dirPath string, desired int) error { +- desiredS := strconv.Itoa(desired) +- if err := fscommon.WriteFile(dirPath, "cgroup.freeze", desiredS); err != nil { +- return err +- } +- got, err := fscommon.ReadFile(dirPath, "cgroup.freeze") ++func getFreezer(dirPath string) (configs.FreezerState, error) { ++ state, err := fscommon.ReadFile(dirPath, "cgroup.freeze") + if err != nil { +- return err ++ // If the kernel is too old, then we just treat the freezer as being in ++ // an "undefined" state. ++ if os.IsNotExist(err) || errors.Cause(err) == unix.ENODEV { ++ err = nil ++ } ++ return configs.Undefined, err + } +- if gotS := strings.TrimSpace(string(got)); gotS != desiredS { +- return errors.Errorf("expected \"cgroup.freeze\" in %q to be %q, got %q", dirPath, desiredS, gotS) ++ switch strings.TrimSpace(state) { ++ case "0": ++ return configs.Thawed, nil ++ case "1": ++ return configs.Frozen, nil ++ default: ++ return configs.Undefined, errors.Errorf(`unknown "cgroup.freeze" state: %q`, state) + } +- return nil + } +diff --git a/libcontainer/cgroups/fs2/fs2.go b/libcontainer/cgroups/fs2/fs2.go +index 4bb7091a1832..82d1d08430c7 100644 +--- a/libcontainer/cgroups/fs2/fs2.go ++++ b/libcontainer/cgroups/fs2/fs2.go +@@ -212,3 +212,7 @@ func (m *manager) Set(container *configs.Config) error { + func (m *manager) GetCgroups() (*configs.Cgroup, error) { + return m.config, nil + } ++ ++func (m *manager) GetFreezerState() (configs.FreezerState, error) { ++ return getFreezer(m.dirPath) ++} +diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go +index c4b19b3e6d4f..e2f94d1b1254 100644 +--- a/libcontainer/cgroups/systemd/apply_systemd.go ++++ b/libcontainer/cgroups/systemd/apply_systemd.go +@@ -3,7 +3,8 @@ + package systemd + + import ( +- "errors" ++ "bufio" ++ stdErrors "errors" + "fmt" + "io/ioutil" + "math" +@@ -16,8 +17,10 @@ import ( + systemdDbus "github.com/coreos/go-systemd/dbus" + "github.com/godbus/dbus" + "github.com/opencontainers/runc/libcontainer/cgroups" ++ "github.com/opencontainers/runc/libcontainer/cgroups/devices" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/configs" ++ "github.com/pkg/errors" + "github.com/sirupsen/logrus" + ) + +@@ -36,7 +39,7 @@ type subsystem interface { + Set(path string, cgroup *configs.Cgroup) error + } + +-var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") ++var errSubsystemDoesNotExist = stdErrors.New("cgroup: subsystem does not exist") + + type subsystemSet []subsystem + +@@ -70,6 +73,209 @@ const ( + testSliceWait = 4 + ) + ++func groupPrefix(ruleType configs.DeviceType) (string, error) { ++ switch ruleType { ++ case configs.BlockDevice: ++ return "block-", nil ++ case configs.CharDevice: ++ return "char-", nil ++ default: ++ return "", errors.Errorf("device type %v has no group prefix", ruleType) ++ } ++} ++ ++// findDeviceGroup tries to find the device group name (as listed in ++// /proc/devices) with the type prefixed as requried for DeviceAllow, for a ++// given (type, major) combination. If more than one device group exists, an ++// arbitrary one is chosen. ++func findDeviceGroup(ruleType configs.DeviceType, ruleMajor int64) (string, error) { ++ fh, err := os.Open("/proc/devices") ++ if err != nil { ++ return "", err ++ } ++ defer fh.Close() ++ ++ prefix, err := groupPrefix(ruleType) ++ if err != nil { ++ return "", err ++ } ++ ++ scanner := bufio.NewScanner(fh) ++ var currentType configs.DeviceType ++ for scanner.Scan() { ++ // We need to strip spaces because the first number is column-aligned. ++ line := strings.TrimSpace(scanner.Text()) ++ ++ // Handle the "header" lines. ++ switch line { ++ case "Block devices:": ++ currentType = configs.BlockDevice ++ continue ++ case "Character devices:": ++ currentType = configs.CharDevice ++ continue ++ case "": ++ continue ++ } ++ ++ // Skip lines unrelated to our type. ++ if currentType != ruleType { ++ continue ++ } ++ ++ // Parse out the (major, name). ++ var ( ++ currMajor int64 ++ currName string ++ ) ++ if n, err := fmt.Sscanf(line, "%d %s", &currMajor, &currName); err != nil || n != 2 { ++ if err == nil { ++ err = errors.Errorf("wrong number of fields") ++ } ++ return "", errors.Wrapf(err, "scan /proc/devices line %q", line) ++ } ++ ++ if currMajor == ruleMajor { ++ return prefix + currName, nil ++ } ++ } ++ if err := scanner.Err(); err != nil { ++ return "", errors.Wrap(err, "reading /proc/devices") ++ } ++ // Couldn't find the device group. ++ return "", nil ++} ++ ++// generateDeviceProperties takes the configured device rules and generates a ++// corresponding set of systemd properties to configure the devices correctly. ++func generateDeviceProperties(rules []*configs.DeviceRule) ([]systemdDbus.Property, error) { ++ // DeviceAllow is the type "a(ss)" which means we need a temporary struct ++ // to represent it in Go. ++ type deviceAllowEntry struct { ++ Path string ++ Perms string ++ } ++ ++ properties := []systemdDbus.Property{ ++ // Always run in the strictest white-list mode. ++ newProp("DevicePolicy", "strict"), ++ // Empty the DevicesAllow array before filling it. ++ newProp("DeviceAllow", []deviceAllowEntry{}), ++ } ++ ++ // Figure out the set of rules. ++ configEmu := &devices.Emulator{} ++ for _, rule := range rules { ++ if err := configEmu.Apply(*rule); err != nil { ++ return nil, errors.Wrap(err, "apply rule for systemd") ++ } ++ } ++ // systemd doesn't support blacklists. So we log a warning, and tell ++ // systemd to act as a deny-all whitelist. This ruleset will be replaced ++ // with our normal fallback code. This may result in spurrious errors, but ++ // the only other option is to error out here. ++ if configEmu.IsBlacklist() { ++ // However, if we're dealing with an allow-all rule then we can do it. ++ if configEmu.IsAllowAll() { ++ return []systemdDbus.Property{ ++ // Run in white-list mode by setting to "auto" and removing all ++ // DeviceAllow rules. ++ newProp("DevicePolicy", "auto"), ++ newProp("DeviceAllow", []deviceAllowEntry{}), ++ }, nil ++ } ++ logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule") ++ return properties, nil ++ } ++ ++ // Now generate the set of rules we actually need to apply. Unlike the ++ // normal devices cgroup, in "strict" mode systemd defaults to a deny-all ++ // whitelist which is the default for devices.Emulator. ++ baseEmu := &devices.Emulator{} ++ finalRules, err := baseEmu.Transition(configEmu) ++ if err != nil { ++ return nil, errors.Wrap(err, "get simplified rules for systemd") ++ } ++ var deviceAllowList []deviceAllowEntry ++ for _, rule := range finalRules { ++ if !rule.Allow { ++ // Should never happen. ++ return nil, errors.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule) ++ } ++ switch rule.Type { ++ case configs.BlockDevice, configs.CharDevice: ++ default: ++ // Should never happen. ++ return nil, errors.Errorf("invalid device type for DeviceAllow: %v", rule.Type) ++ } ++ ++ entry := deviceAllowEntry{ ++ Perms: string(rule.Permissions), ++ } ++ ++ // systemd has a fairly odd (though understandable) syntax here, and ++ // because of the OCI configuration format we have to do quite a bit of ++ // trickery to convert things: ++ // ++ // * Concrete rules with non-wildcard major/minor numbers have to use ++ // /dev/{block,char} paths. This is slightly odd because it means ++ // that we cannot add whitelist rules for devices that don't exist, ++ // but there's not too much we can do about that. ++ // ++ // However, path globbing is not support for path-based rules so we ++ // need to handle wildcards in some other manner. ++ // ++ // * Wildcard-minor rules have to specify a "device group name" (the ++ // second column in /proc/devices). ++ // ++ // * Wildcard (major and minor) rules can just specify a glob with the ++ // type ("char-*" or "block-*"). ++ // ++ // The only type of rule we can't handle is wildcard-major rules, and ++ // so we'll give a warning in that case (note that the fallback code ++ // will insert any rules systemd couldn't handle). What amazing fun. ++ ++ if rule.Major == configs.Wildcard { ++ // "_ *:n _" rules aren't supported by systemd. ++ if rule.Minor != configs.Wildcard { ++ logrus.Warnf("systemd doesn't support '*:n' device rules -- temporarily ignoring rule: %v", *rule) ++ continue ++ } ++ ++ // "_ *:* _" rules just wildcard everything. ++ prefix, err := groupPrefix(rule.Type) ++ if err != nil { ++ return nil, err ++ } ++ entry.Path = prefix + "*" ++ } else if rule.Minor == configs.Wildcard { ++ // "_ n:* _" rules require a device group from /proc/devices. ++ group, err := findDeviceGroup(rule.Type, rule.Major) ++ if err != nil { ++ return nil, errors.Wrapf(err, "find device '%v/%d'", rule.Type, rule.Major) ++ } ++ if group == "" { ++ // Couldn't find a group. ++ logrus.Warnf("could not find device group for '%v/%d' in /proc/devices -- temporarily ignoring rule: %v", rule.Type, rule.Major, *rule) ++ continue ++ } ++ entry.Path = group ++ } else { ++ // "_ n:m _" rules are just a path in /dev/{block,char}/. ++ switch rule.Type { ++ case configs.BlockDevice: ++ entry.Path = fmt.Sprintf("/dev/block/%d:%d", rule.Major, rule.Minor) ++ case configs.CharDevice: ++ entry.Path = fmt.Sprintf("/dev/char/%d:%d", rule.Major, rule.Minor) ++ } ++ } ++ deviceAllowList = append(deviceAllowList, entry) ++ } ++ ++ properties = append(properties, newProp("DeviceAllow", deviceAllowList)) ++ return properties, nil ++} ++ + var ( + connLock sync.Mutex + theConn *systemdDbus.Conn +@@ -196,6 +402,12 @@ func (m *LegacyManager) Apply(pid int) error { + properties = append(properties, + newProp("DefaultDependencies", false)) + ++ deviceProperties, err := generateDeviceProperties(c.Resources.Devices) ++ if err != nil { ++ return err ++ } ++ properties = append(properties, deviceProperties...) ++ + if c.Resources.Memory != 0 { + properties = append(properties, + newProp("MemoryLimit", uint64(c.Resources.Memory))) +@@ -476,7 +688,6 @@ func (m *LegacyManager) Set(container *configs.Config) error { + if err != nil && !cgroups.IsNotFound(err) { + return err + } +- + if err := sys.Set(path, container.Cgroups); err != nil { + return err + } +@@ -532,3 +743,15 @@ func isUnitExists(err error) bool { + func (m *LegacyManager) GetCgroups() (*configs.Cgroup, error) { + return m.Cgroups, nil + } ++ ++func (m *LegacyManager) GetFreezerState() (configs.FreezerState, error) { ++ path, err := getSubsystemPath(m.Cgroups, "freezer") ++ if err != nil && !cgroups.IsNotFound(err) { ++ return configs.Undefined, err ++ } ++ freezer, err := legacySubsystems.Get("freezer") ++ if err != nil { ++ return configs.Undefined, err ++ } ++ return freezer.(*fs.FreezerGroup).GetState(path) ++} +diff --git a/libcontainer/cgroups/systemd/unified_hierarchy.go b/libcontainer/cgroups/systemd/unified_hierarchy.go +index 6605099190ca..2bfcccd130ac 100644 +--- a/libcontainer/cgroups/systemd/unified_hierarchy.go ++++ b/libcontainer/cgroups/systemd/unified_hierarchy.go +@@ -87,6 +87,12 @@ func (m *UnifiedManager) Apply(pid int) error { + properties = append(properties, + newProp("DefaultDependencies", false)) + ++ deviceProperties, err := generateDeviceProperties(c.Resources.Devices) ++ if err != nil { ++ return err ++ } ++ properties = append(properties, deviceProperties...) ++ + if c.Resources.Memory != 0 { + properties = append(properties, + newProp("MemoryLimit", uint64(c.Resources.Memory))) +@@ -310,3 +316,11 @@ func (m *UnifiedManager) Set(container *configs.Config) error { + func (m *UnifiedManager) GetCgroups() (*configs.Cgroup, error) { + return m.Cgroups, nil + } ++ ++func (m *UnifiedManager) GetFreezerState() (configs.FreezerState, error) { ++ fsMgr, err := m.fsManager() ++ if err != nil { ++ return configs.Undefined, err ++ } ++ return fsMgr.GetFreezerState() ++} +diff --git a/libcontainer/configs/cgroup_linux.go b/libcontainer/configs/cgroup_linux.go +index 58ed19c9e78e..e83bb72dc334 100644 +--- a/libcontainer/configs/cgroup_linux.go ++++ b/libcontainer/configs/cgroup_linux.go +@@ -32,15 +32,8 @@ type Cgroup struct { + } + + type Resources struct { +- // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. +- // Deprecated +- AllowAllDevices *bool `json:"allow_all_devices,omitempty"` +- // Deprecated +- AllowedDevices []*Device `json:"allowed_devices,omitempty"` +- // Deprecated +- DeniedDevices []*Device `json:"denied_devices,omitempty"` +- +- Devices []*Device `json:"devices"` ++ // Devices is the set of access rules for devices in the container. ++ Devices []*DeviceRule `json:"devices"` + + // Memory limit (in bytes) + Memory int64 `json:"memory"` +diff --git a/libcontainer/configs/device.go b/libcontainer/configs/device.go +index 8701bb212d7b..24c5bbfa6ae8 100644 +--- a/libcontainer/configs/device.go ++++ b/libcontainer/configs/device.go +@@ -1,8 +1,12 @@ + package configs + + import ( ++ "errors" + "fmt" + "os" ++ "strconv" ++ ++ "golang.org/x/sys/unix" + ) + + const ( +@@ -12,21 +16,11 @@ const ( + // TODO Windows: This can be factored out in the future + + type Device struct { +- // Device type, block, char, etc. +- Type rune `json:"type"` ++ DeviceRule + + // Path to the device. + Path string `json:"path"` + +- // Major is the device's major number. +- Major int64 `json:"major"` +- +- // Minor is the device's minor number. +- Minor int64 `json:"minor"` +- +- // Cgroup permissions format, rwm. +- Permissions string `json:"permissions"` +- + // FileMode permission bits for the device. + FileMode os.FileMode `json:"file_mode"` + +@@ -35,23 +29,154 @@ type Device struct { + + // Gid of the device. + Gid uint32 `json:"gid"` ++} + +- // Write the file to the allowed list +- Allow bool `json:"allow"` ++// DevicePermissions is a cgroupv1-style string to represent device access. It ++// has to be a string for backward compatibility reasons, hence why it has ++// methods to do set operations. ++type DevicePermissions string ++ ++const ( ++ deviceRead uint = (1 << iota) ++ deviceWrite ++ deviceMknod ++) ++ ++func (p DevicePermissions) toSet() uint { ++ var set uint ++ for _, perm := range p { ++ switch perm { ++ case 'r': ++ set |= deviceRead ++ case 'w': ++ set |= deviceWrite ++ case 'm': ++ set |= deviceMknod ++ } ++ } ++ return set ++} ++ ++func fromSet(set uint) DevicePermissions { ++ var perm string ++ if set&deviceRead == deviceRead { ++ perm += "r" ++ } ++ if set&deviceWrite == deviceWrite { ++ perm += "w" ++ } ++ if set&deviceMknod == deviceMknod { ++ perm += "m" ++ } ++ return DevicePermissions(perm) ++} ++ ++// Union returns the union of the two sets of DevicePermissions. ++func (p DevicePermissions) Union(o DevicePermissions) DevicePermissions { ++ lhs := p.toSet() ++ rhs := o.toSet() ++ return fromSet(lhs | rhs) ++} ++ ++// Difference returns the set difference of the two sets of DevicePermissions. ++// In set notation, A.Difference(B) gives you A\B. ++func (p DevicePermissions) Difference(o DevicePermissions) DevicePermissions { ++ lhs := p.toSet() ++ rhs := o.toSet() ++ return fromSet(lhs &^ rhs) ++} ++ ++// Intersection computes the intersection of the two sets of DevicePermissions. ++func (p DevicePermissions) Intersection(o DevicePermissions) DevicePermissions { ++ lhs := p.toSet() ++ rhs := o.toSet() ++ return fromSet(lhs & rhs) ++} ++ ++// IsEmpty returns whether the set of permissions in a DevicePermissions is ++// empty. ++func (p DevicePermissions) IsEmpty() bool { ++ return p == DevicePermissions("") ++} ++ ++// IsValid returns whether the set of permissions is a subset of valid ++// permissions (namely, {r,w,m}). ++func (p DevicePermissions) IsValid() bool { ++ return p == fromSet(p.toSet()) ++} ++ ++type DeviceType rune ++ ++const ( ++ WildcardDevice DeviceType = 'a' ++ BlockDevice DeviceType = 'b' ++ CharDevice DeviceType = 'c' // or 'u' ++ FifoDevice DeviceType = 'p' ++) ++ ++func (t DeviceType) IsValid() bool { ++ switch t { ++ case WildcardDevice, BlockDevice, CharDevice, FifoDevice: ++ return true ++ default: ++ return false ++ } + } + +-func (d *Device) CgroupString() string { +- return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) ++func (t DeviceType) CanMknod() bool { ++ switch t { ++ case BlockDevice, CharDevice, FifoDevice: ++ return true ++ default: ++ return false ++ } ++} ++ ++func (t DeviceType) CanCgroup() bool { ++ switch t { ++ case WildcardDevice, BlockDevice, CharDevice: ++ return true ++ default: ++ return false ++ } + } + +-func (d *Device) Mkdev() int { +- return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) ++type DeviceRule struct { ++ // Type of device ('c' for char, 'b' for block). If set to 'a', this rule ++ // acts as a wildcard and all fields other than Allow are ignored. ++ Type DeviceType `json:"type"` ++ ++ // Major is the device's major number. ++ Major int64 `json:"major"` ++ ++ // Minor is the device's minor number. ++ Minor int64 `json:"minor"` ++ ++ // Permissions is the set of permissions that this rule applies to (in the ++ // cgroupv1 format -- any combination of "rwm"). ++ Permissions DevicePermissions `json:"permissions"` ++ ++ // Allow specifies whether this rule is allowed. ++ Allow bool `json:"allow"` ++} ++ ++func (d *DeviceRule) CgroupString() string { ++ var ( ++ major = strconv.FormatInt(d.Major, 10) ++ minor = strconv.FormatInt(d.Minor, 10) ++ ) ++ if d.Major == Wildcard { ++ major = "*" ++ } ++ if d.Minor == Wildcard { ++ minor = "*" ++ } ++ return fmt.Sprintf("%c %s:%s %s", d.Type, major, minor, d.Permissions) + } + +-// deviceNumberString converts the device number to a string return result. +-func deviceNumberString(number int64) string { +- if number == Wildcard { +- return "*" ++func (d *DeviceRule) Mkdev() (uint64, error) { ++ if d.Major == Wildcard || d.Minor == Wildcard { ++ return 0, errors.New("cannot mkdev() device with wildcards") + } +- return fmt.Sprint(number) ++ return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil + } +diff --git a/libcontainer/configs/device_defaults.go b/libcontainer/configs/device_defaults.go +deleted file mode 100644 +index e4f423c523ff..000000000000 +--- a/libcontainer/configs/device_defaults.go ++++ /dev/null +@@ -1,111 +0,0 @@ +-// +build linux +- +-package configs +- +-var ( +- // DefaultSimpleDevices are devices that are to be both allowed and created. +- DefaultSimpleDevices = []*Device{ +- // /dev/null and zero +- { +- Path: "/dev/null", +- Type: 'c', +- Major: 1, +- Minor: 3, +- Permissions: "rwm", +- FileMode: 0666, +- }, +- { +- Path: "/dev/zero", +- Type: 'c', +- Major: 1, +- Minor: 5, +- Permissions: "rwm", +- FileMode: 0666, +- }, +- +- { +- Path: "/dev/full", +- Type: 'c', +- Major: 1, +- Minor: 7, +- Permissions: "rwm", +- FileMode: 0666, +- }, +- +- // consoles and ttys +- { +- Path: "/dev/tty", +- Type: 'c', +- Major: 5, +- Minor: 0, +- Permissions: "rwm", +- FileMode: 0666, +- }, +- +- // /dev/urandom,/dev/random +- { +- Path: "/dev/urandom", +- Type: 'c', +- Major: 1, +- Minor: 9, +- Permissions: "rwm", +- FileMode: 0666, +- }, +- { +- Path: "/dev/random", +- Type: 'c', +- Major: 1, +- Minor: 8, +- Permissions: "rwm", +- FileMode: 0666, +- }, +- } +- DefaultAllowedDevices = append([]*Device{ +- // allow mknod for any device +- { +- Type: 'c', +- Major: Wildcard, +- Minor: Wildcard, +- Permissions: "m", +- }, +- { +- Type: 'b', +- Major: Wildcard, +- Minor: Wildcard, +- Permissions: "m", +- }, +- +- { +- Path: "/dev/console", +- Type: 'c', +- Major: 5, +- Minor: 1, +- Permissions: "rwm", +- }, +- // /dev/pts/ - pts namespaces are "coming soon" +- { +- Path: "", +- Type: 'c', +- Major: 136, +- Minor: Wildcard, +- Permissions: "rwm", +- }, +- { +- Path: "", +- Type: 'c', +- Major: 5, +- Minor: 2, +- Permissions: "rwm", +- }, +- +- // tuntap +- { +- Path: "", +- Type: 'c', +- Major: 10, +- Minor: 200, +- Permissions: "rwm", +- }, +- }, DefaultSimpleDevices...) +- DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...) +-) +diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go +index fe70c937aad9..e59f29588164 100644 +--- a/libcontainer/container_linux.go ++++ b/libcontainer/container_linux.go +@@ -1812,27 +1812,11 @@ func (c *linuxContainer) runType() (Status, error) { + } + + func (c *linuxContainer) isPaused() (bool, error) { +- fcg := c.cgroupManager.GetPaths()["freezer"] +- if fcg == "" { +- // A container doesn't have a freezer cgroup +- return false, nil +- } +- pausedState := "FROZEN" +- filename := "freezer.state" +- if cgroups.IsCgroup2UnifiedMode() { +- filename = "cgroup.freeze" +- pausedState = "1" +- } +- +- data, err := ioutil.ReadFile(filepath.Join(fcg, filename)) ++ state, err := c.cgroupManager.GetFreezerState() + if err != nil { +- // If freezer cgroup is not mounted, the container would just be not paused. +- if os.IsNotExist(err) || err == syscall.ENODEV { +- return false, nil +- } +- return false, newSystemErrorWithCause(err, "checking if container is paused") ++ return false, err + } +- return bytes.Equal(bytes.TrimSpace(data), []byte(pausedState)), nil ++ return state == configs.Frozen, nil + } + + func (c *linuxContainer) currentState() (*State, error) { +diff --git a/libcontainer/container_linux_test.go b/libcontainer/container_linux_test.go +index f8af05d75fb0..2d972212ce02 100644 +--- a/libcontainer/container_linux_test.go ++++ b/libcontainer/container_linux_test.go +@@ -61,10 +61,15 @@ func (m *mockCgroupManager) GetUnifiedPath() (string, error) { + func (m *mockCgroupManager) Freeze(state configs.FreezerState) error { + return nil + } ++ + func (m *mockCgroupManager) GetCgroups() (*configs.Cgroup, error) { + return nil, nil + } + ++func (m *mockCgroupManager) GetFreezerState() (configs.FreezerState, error) { ++ return configs.Thawed, nil ++} ++ + func (m *mockIntelRdtManager) Apply(pid int) error { + return nil + } +diff --git a/libcontainer/devices/devices.go b/libcontainer/devices/devices.go +index 5dabe06cef5b..702f913ec942 100644 +--- a/libcontainer/devices/devices.go ++++ b/libcontainer/devices/devices.go +@@ -31,33 +31,33 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) { + } + + var ( ++ devType configs.DeviceType ++ mode = stat.Mode + devNumber = uint64(stat.Rdev) + major = unix.Major(devNumber) + minor = unix.Minor(devNumber) + ) +- if major == 0 { +- return nil, ErrNotADevice +- } +- +- var ( +- devType rune +- mode = stat.Mode +- ) + switch { + case mode&unix.S_IFBLK == unix.S_IFBLK: +- devType = 'b' ++ devType = configs.BlockDevice + case mode&unix.S_IFCHR == unix.S_IFCHR: +- devType = 'c' ++ devType = configs.CharDevice ++ case mode&unix.S_IFIFO == unix.S_IFIFO: ++ devType = configs.FifoDevice ++ default: ++ return nil, ErrNotADevice + } + return &configs.Device{ +- Type: devType, +- Path: path, +- Major: int64(major), +- Minor: int64(minor), +- Permissions: permissions, +- FileMode: os.FileMode(mode), +- Uid: stat.Uid, +- Gid: stat.Gid, ++ DeviceRule: configs.DeviceRule{ ++ Type: devType, ++ Major: int64(major), ++ Minor: int64(minor), ++ Permissions: configs.DevicePermissions(permissions), ++ }, ++ Path: path, ++ FileMode: os.FileMode(mode), ++ Uid: stat.Uid, ++ Gid: stat.Gid, + }, nil + } + +diff --git a/libcontainer/integration/template_test.go b/libcontainer/integration/template_test.go +index 5f7cab53b67c..30503a98bb99 100644 +--- a/libcontainer/integration/template_test.go ++++ b/libcontainer/integration/template_test.go +@@ -2,6 +2,7 @@ package integration + + import ( + "github.com/opencontainers/runc/libcontainer/configs" ++ "github.com/opencontainers/runc/libcontainer/specconv" + + "golang.org/x/sys/unix" + ) +@@ -20,7 +21,10 @@ const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV + // it uses a network strategy of just setting a loopback interface + // and the default setup for devices + func newTemplateConfig(rootfs string) *configs.Config { +- allowAllDevices := false ++ var allowedDevices []*configs.DeviceRule ++ for _, device := range specconv.AllowedDevices { ++ allowedDevices = append(allowedDevices, &device.DeviceRule) ++ } + return &configs.Config{ + Rootfs: rootfs, + Capabilities: &configs.Capabilities{ +@@ -116,8 +120,7 @@ func newTemplateConfig(rootfs string) *configs.Config { + Path: "integration/test", + Resources: &configs.Resources{ + MemorySwappiness: nil, +- AllowAllDevices: &allowAllDevices, +- AllowedDevices: configs.DefaultAllowedDevices, ++ Devices: allowedDevices, + }, + }, + MaskPaths: []string{ +@@ -127,7 +130,7 @@ func newTemplateConfig(rootfs string) *configs.Config { + ReadonlyPaths: []string{ + "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", + }, +- Devices: configs.DefaultAutoCreatedDevices, ++ Devices: specconv.AllowedDevices, + Hostname: "integration", + Mounts: []*configs.Mount{ + { +diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go +index 106c4c2b98bf..4285eb8acaaa 100644 +--- a/libcontainer/rootfs_linux.go ++++ b/libcontainer/rootfs_linux.go +@@ -624,11 +624,14 @@ func bindMountDeviceNode(dest string, node *configs.Device) error { + + // Creates the device node in the rootfs of the container. + func createDeviceNode(rootfs string, node *configs.Device, bind bool) error { ++ if node.Path == "" { ++ // The node only exists for cgroup reasons, ignore it here. ++ return nil ++ } + dest := filepath.Join(rootfs, node.Path) + if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { + return err + } +- + if bind { + return bindMountDeviceNode(dest, node) + } +@@ -646,16 +649,20 @@ func createDeviceNode(rootfs string, node *configs.Device, bind bool) error { + func mknodDevice(dest string, node *configs.Device) error { + fileMode := node.FileMode + switch node.Type { +- case 'c', 'u': +- fileMode |= unix.S_IFCHR +- case 'b': ++ case configs.BlockDevice: + fileMode |= unix.S_IFBLK +- case 'p': ++ case configs.CharDevice: ++ fileMode |= unix.S_IFCHR ++ case configs.FifoDevice: + fileMode |= unix.S_IFIFO + default: + return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) + } +- if err := unix.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil { ++ dev, err := node.Mkdev() ++ if err != nil { ++ return err ++ } ++ if err := unix.Mknod(dest, uint32(fileMode), int(dev)); err != nil { + return err + } + return unix.Chown(dest, int(node.Uid), int(node.Gid)) +diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go +index d9e73c46be10..69bbc7c0b889 100644 +--- a/libcontainer/specconv/spec_linux.go ++++ b/libcontainer/specconv/spec_linux.go +@@ -43,104 +43,147 @@ var mountPropagationMapping = map[string]int{ + "": 0, + } + +-// AllowedDevices is exposed for devicefilter_test.go ++// AllowedDevices is the set of devices which are automatically included for ++// all containers. ++// ++// XXX (cyphar) ++// This behaviour is at the very least "questionable" (if not outright ++// wrong) according to the runtime-spec. ++// ++// Yes, we have to include certain devices other than the ones the user ++// specifies, but several devices listed here are not part of the spec ++// (including "mknod for any device"?!). In addition, these rules are ++// appended to the user-provided set which means that users *cannot disable ++// this behaviour*. ++// ++// ... unfortunately I'm too scared to change this now because who knows how ++// many people depend on this (incorrect and arguably insecure) behaviour. + var AllowedDevices = []*configs.Device{ + // allow mknod for any device + { +- Type: 'c', +- Major: wildcard, +- Minor: wildcard, +- Permissions: "m", +- Allow: true, +- }, +- { +- Type: 'b', +- Major: wildcard, +- Minor: wildcard, +- Permissions: "m", +- Allow: true, ++ DeviceRule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: configs.Wildcard, ++ Minor: configs.Wildcard, ++ Permissions: "m", ++ Allow: true, ++ }, + }, + { +- Type: 'c', +- Path: "/dev/null", +- Major: 1, +- Minor: 3, +- Permissions: "rwm", +- Allow: true, ++ DeviceRule: configs.DeviceRule{ ++ Type: configs.BlockDevice, ++ Major: configs.Wildcard, ++ Minor: configs.Wildcard, ++ Permissions: "m", ++ Allow: true, ++ }, + }, + { +- Type: 'c', +- Path: "/dev/random", +- Major: 1, +- Minor: 8, +- Permissions: "rwm", +- Allow: true, ++ Path: "/dev/null", ++ FileMode: 0666, ++ Uid: 0, ++ Gid: 0, ++ DeviceRule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 1, ++ Minor: 3, ++ Permissions: "rwm", ++ Allow: true, ++ }, + }, + { +- Type: 'c', +- Path: "/dev/full", +- Major: 1, +- Minor: 7, +- Permissions: "rwm", +- Allow: true, ++ Path: "/dev/random", ++ FileMode: 0666, ++ Uid: 0, ++ Gid: 0, ++ DeviceRule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 1, ++ Minor: 8, ++ Permissions: "rwm", ++ Allow: true, ++ }, + }, + { +- Type: 'c', +- Path: "/dev/tty", +- Major: 5, +- Minor: 0, +- Permissions: "rwm", +- Allow: true, ++ Path: "/dev/full", ++ FileMode: 0666, ++ Uid: 0, ++ Gid: 0, ++ DeviceRule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 1, ++ Minor: 7, ++ Permissions: "rwm", ++ Allow: true, ++ }, + }, + { +- Type: 'c', +- Path: "/dev/zero", +- Major: 1, +- Minor: 5, +- Permissions: "rwm", +- Allow: true, ++ Path: "/dev/tty", ++ FileMode: 0666, ++ Uid: 0, ++ Gid: 0, ++ DeviceRule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 5, ++ Minor: 0, ++ Permissions: "rwm", ++ Allow: true, ++ }, + }, + { +- Type: 'c', +- Path: "/dev/urandom", +- Major: 1, +- Minor: 9, +- Permissions: "rwm", +- Allow: true, ++ Path: "/dev/zero", ++ FileMode: 0666, ++ Uid: 0, ++ Gid: 0, ++ DeviceRule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 1, ++ Minor: 5, ++ Permissions: "rwm", ++ Allow: true, ++ }, + }, + { +- Path: "/dev/console", +- Type: 'c', +- Major: 5, +- Minor: 1, +- Permissions: "rwm", +- Allow: true, ++ Path: "/dev/urandom", ++ FileMode: 0666, ++ Uid: 0, ++ Gid: 0, ++ DeviceRule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 1, ++ Minor: 9, ++ Permissions: "rwm", ++ Allow: true, ++ }, + }, + // /dev/pts/ - pts namespaces are "coming soon" + { +- Path: "", +- Type: 'c', +- Major: 136, +- Minor: wildcard, +- Permissions: "rwm", +- Allow: true, ++ DeviceRule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 136, ++ Minor: configs.Wildcard, ++ Permissions: "rwm", ++ Allow: true, ++ }, + }, + { +- Path: "", +- Type: 'c', +- Major: 5, +- Minor: 2, +- Permissions: "rwm", +- Allow: true, ++ DeviceRule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 5, ++ Minor: 2, ++ Permissions: "rwm", ++ Allow: true, ++ }, + }, + // tuntap + { +- Path: "", +- Type: 'c', +- Major: 10, +- Minor: 200, +- Permissions: "rwm", +- Allow: true, ++ DeviceRule: configs.DeviceRule{ ++ Type: configs.CharDevice, ++ Major: 10, ++ Minor: 200, ++ Permissions: "rwm", ++ Allow: true, ++ }, + }, + } + +@@ -342,251 +385,198 @@ func CreateCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) { + + // In rootless containers, any attempt to make cgroup changes is likely to fail. + // libcontainer will validate this but ignores the error. +- c.Resources.AllowedDevices = AllowedDevices + if spec.Linux != nil { + r := spec.Linux.Resources +- if r == nil { +- return c, nil +- } +- for i, d := range spec.Linux.Resources.Devices { +- var ( +- t = "a" +- major = int64(-1) +- minor = int64(-1) +- ) +- if d.Type != "" { +- t = d.Type +- } +- if d.Major != nil { +- major = *d.Major +- } +- if d.Minor != nil { +- minor = *d.Minor +- } +- if d.Access == "" { +- return nil, fmt.Errorf("device access at %d field cannot be empty", i) +- } +- dt, err := stringToCgroupDeviceRune(t) +- if err != nil { +- return nil, err +- } +- dd := &configs.Device{ +- Type: dt, +- Major: major, +- Minor: minor, +- Permissions: d.Access, +- Allow: d.Allow, +- } +- c.Resources.Devices = append(c.Resources.Devices, dd) +- } +- if r.Memory != nil { +- if r.Memory.Limit != nil { +- c.Resources.Memory = *r.Memory.Limit +- } +- if r.Memory.Reservation != nil { +- c.Resources.MemoryReservation = *r.Memory.Reservation +- } +- if r.Memory.Swap != nil { +- c.Resources.MemorySwap = *r.Memory.Swap +- } +- if r.Memory.Kernel != nil { +- c.Resources.KernelMemory = *r.Memory.Kernel +- } +- if r.Memory.KernelTCP != nil { +- c.Resources.KernelMemoryTCP = *r.Memory.KernelTCP +- } +- if r.Memory.Swappiness != nil { +- c.Resources.MemorySwappiness = r.Memory.Swappiness +- } +- if r.Memory.DisableOOMKiller != nil { +- c.Resources.OomKillDisable = *r.Memory.DisableOOMKiller +- } +- } +- if r.CPU != nil { +- if r.CPU.Shares != nil { +- c.Resources.CpuShares = *r.CPU.Shares +- } +- if r.CPU.Quota != nil { +- c.Resources.CpuQuota = *r.CPU.Quota +- } +- if r.CPU.Period != nil { +- c.Resources.CpuPeriod = *r.CPU.Period +- } +- if r.CPU.RealtimeRuntime != nil { +- c.Resources.CpuRtRuntime = *r.CPU.RealtimeRuntime +- } +- if r.CPU.RealtimePeriod != nil { +- c.Resources.CpuRtPeriod = *r.CPU.RealtimePeriod +- } +- if r.CPU.Cpus != "" { +- c.Resources.CpusetCpus = r.CPU.Cpus ++ if r != nil { ++ for i, d := range spec.Linux.Resources.Devices { ++ var ( ++ t = "a" ++ major = int64(-1) ++ minor = int64(-1) ++ ) ++ if d.Type != "" { ++ t = d.Type ++ } ++ if d.Major != nil { ++ major = *d.Major ++ } ++ if d.Minor != nil { ++ minor = *d.Minor ++ } ++ if d.Access == "" { ++ return nil, fmt.Errorf("device access at %d field cannot be empty", i) ++ } ++ dt, err := stringToCgroupDeviceRune(t) ++ if err != nil { ++ return nil, err ++ } ++ c.Resources.Devices = append(c.Resources.Devices, &configs.DeviceRule{ ++ Type: dt, ++ Major: major, ++ Minor: minor, ++ Permissions: configs.DevicePermissions(d.Access), ++ Allow: d.Allow, ++ }) + } +- if r.CPU.Mems != "" { +- c.Resources.CpusetMems = r.CPU.Mems ++ if r.Memory != nil { ++ if r.Memory.Limit != nil { ++ c.Resources.Memory = *r.Memory.Limit ++ } ++ if r.Memory.Reservation != nil { ++ c.Resources.MemoryReservation = *r.Memory.Reservation ++ } ++ if r.Memory.Swap != nil { ++ c.Resources.MemorySwap = *r.Memory.Swap ++ } ++ if r.Memory.Kernel != nil { ++ c.Resources.KernelMemory = *r.Memory.Kernel ++ } ++ if r.Memory.KernelTCP != nil { ++ c.Resources.KernelMemoryTCP = *r.Memory.KernelTCP ++ } ++ if r.Memory.Swappiness != nil { ++ c.Resources.MemorySwappiness = r.Memory.Swappiness ++ } ++ if r.Memory.DisableOOMKiller != nil { ++ c.Resources.OomKillDisable = *r.Memory.DisableOOMKiller ++ } + } +- } +- if r.Pids != nil { +- c.Resources.PidsLimit = r.Pids.Limit +- } +- if r.BlockIO != nil { +- if r.BlockIO.Weight != nil { +- c.Resources.BlkioWeight = *r.BlockIO.Weight ++ if r.CPU != nil { ++ if r.CPU.Shares != nil { ++ c.Resources.CpuShares = *r.CPU.Shares ++ } ++ if r.CPU.Quota != nil { ++ c.Resources.CpuQuota = *r.CPU.Quota ++ } ++ if r.CPU.Period != nil { ++ c.Resources.CpuPeriod = *r.CPU.Period ++ } ++ if r.CPU.RealtimeRuntime != nil { ++ c.Resources.CpuRtRuntime = *r.CPU.RealtimeRuntime ++ } ++ if r.CPU.RealtimePeriod != nil { ++ c.Resources.CpuRtPeriod = *r.CPU.RealtimePeriod ++ } ++ if r.CPU.Cpus != "" { ++ c.Resources.CpusetCpus = r.CPU.Cpus ++ } ++ if r.CPU.Mems != "" { ++ c.Resources.CpusetMems = r.CPU.Mems ++ } + } +- if r.BlockIO.LeafWeight != nil { +- c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight ++ if r.Pids != nil { ++ c.Resources.PidsLimit = r.Pids.Limit + } +- if r.BlockIO.WeightDevice != nil { +- for _, wd := range r.BlockIO.WeightDevice { +- var weight, leafWeight uint16 +- if wd.Weight != nil { +- weight = *wd.Weight +- } +- if wd.LeafWeight != nil { +- leafWeight = *wd.LeafWeight ++ if r.BlockIO != nil { ++ if r.BlockIO.Weight != nil { ++ c.Resources.BlkioWeight = *r.BlockIO.Weight ++ } ++ if r.BlockIO.LeafWeight != nil { ++ c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight ++ } ++ if r.BlockIO.WeightDevice != nil { ++ for _, wd := range r.BlockIO.WeightDevice { ++ var weight, leafWeight uint16 ++ if wd.Weight != nil { ++ weight = *wd.Weight ++ } ++ if wd.LeafWeight != nil { ++ leafWeight = *wd.LeafWeight ++ } ++ weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, weight, leafWeight) ++ c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice) + } +- weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, weight, leafWeight) +- c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice) + } +- } +- if r.BlockIO.ThrottleReadBpsDevice != nil { +- for _, td := range r.BlockIO.ThrottleReadBpsDevice { +- rate := td.Rate +- throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) +- c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice) ++ if r.BlockIO.ThrottleReadBpsDevice != nil { ++ for _, td := range r.BlockIO.ThrottleReadBpsDevice { ++ rate := td.Rate ++ throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) ++ c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice) ++ } + } +- } +- if r.BlockIO.ThrottleWriteBpsDevice != nil { +- for _, td := range r.BlockIO.ThrottleWriteBpsDevice { +- rate := td.Rate +- throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) +- c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice) ++ if r.BlockIO.ThrottleWriteBpsDevice != nil { ++ for _, td := range r.BlockIO.ThrottleWriteBpsDevice { ++ rate := td.Rate ++ throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) ++ c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice) ++ } + } +- } +- if r.BlockIO.ThrottleReadIOPSDevice != nil { +- for _, td := range r.BlockIO.ThrottleReadIOPSDevice { +- rate := td.Rate +- throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) +- c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice) ++ if r.BlockIO.ThrottleReadIOPSDevice != nil { ++ for _, td := range r.BlockIO.ThrottleReadIOPSDevice { ++ rate := td.Rate ++ throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) ++ c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice) ++ } + } +- } +- if r.BlockIO.ThrottleWriteIOPSDevice != nil { +- for _, td := range r.BlockIO.ThrottleWriteIOPSDevice { +- rate := td.Rate +- throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) +- c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice) ++ if r.BlockIO.ThrottleWriteIOPSDevice != nil { ++ for _, td := range r.BlockIO.ThrottleWriteIOPSDevice { ++ rate := td.Rate ++ throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) ++ c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice) ++ } + } + } +- } +- for _, l := range r.HugepageLimits { +- c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{ +- Pagesize: l.Pagesize, +- Limit: l.Limit, +- }) +- } +- if r.Network != nil { +- if r.Network.ClassID != nil { +- c.Resources.NetClsClassid = *r.Network.ClassID +- } +- for _, m := range r.Network.Priorities { +- c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{ +- Interface: m.Name, +- Priority: int64(m.Priority), ++ for _, l := range r.HugepageLimits { ++ c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{ ++ Pagesize: l.Pagesize, ++ Limit: l.Limit, + }) + } ++ if r.Network != nil { ++ if r.Network.ClassID != nil { ++ c.Resources.NetClsClassid = *r.Network.ClassID ++ } ++ for _, m := range r.Network.Priorities { ++ c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{ ++ Interface: m.Name, ++ Priority: int64(m.Priority), ++ }) ++ } ++ } + } + } +- // append the default allowed devices to the end of the list +- c.Resources.Devices = append(c.Resources.Devices, AllowedDevices...) ++ // Append the default allowed devices to the end of the list. ++ // XXX: Really this should be prefixed... ++ for _, device := range AllowedDevices { ++ c.Resources.Devices = append(c.Resources.Devices, &device.DeviceRule) ++ } + return c, nil + } + +-func stringToCgroupDeviceRune(s string) (rune, error) { ++func stringToCgroupDeviceRune(s string) (configs.DeviceType, error) { + switch s { + case "a": +- return 'a', nil ++ return configs.WildcardDevice, nil + case "b": +- return 'b', nil ++ return configs.BlockDevice, nil + case "c": +- return 'c', nil ++ return configs.CharDevice, nil + default: + return 0, fmt.Errorf("invalid cgroup device type %q", s) + } + } + +-func stringToDeviceRune(s string) (rune, error) { ++func stringToDeviceRune(s string) (configs.DeviceType, error) { + switch s { + case "p": +- return 'p', nil +- case "u": +- return 'u', nil ++ return configs.FifoDevice, nil ++ case "u", "c": ++ return configs.CharDevice, nil + case "b": +- return 'b', nil +- case "c": +- return 'c', nil ++ return configs.BlockDevice, nil + default: + return 0, fmt.Errorf("invalid device type %q", s) + } + } + + func createDevices(spec *specs.Spec, config *configs.Config) error { +- // add whitelisted devices +- config.Devices = []*configs.Device{ +- { +- Type: 'c', +- Path: "/dev/null", +- Major: 1, +- Minor: 3, +- FileMode: 0666, +- Uid: 0, +- Gid: 0, +- }, +- { +- Type: 'c', +- Path: "/dev/random", +- Major: 1, +- Minor: 8, +- FileMode: 0666, +- Uid: 0, +- Gid: 0, +- }, +- { +- Type: 'c', +- Path: "/dev/full", +- Major: 1, +- Minor: 7, +- FileMode: 0666, +- Uid: 0, +- Gid: 0, +- }, +- { +- Type: 'c', +- Path: "/dev/tty", +- Major: 5, +- Minor: 0, +- FileMode: 0666, +- Uid: 0, +- Gid: 0, +- }, +- { +- Type: 'c', +- Path: "/dev/zero", +- Major: 1, +- Minor: 5, +- FileMode: 0666, +- Uid: 0, +- Gid: 0, +- }, +- { +- Type: 'c', +- Path: "/dev/urandom", +- Major: 1, +- Minor: 9, +- FileMode: 0666, +- Uid: 0, +- Gid: 0, +- }, ++ // Add default set of devices. ++ for _, device := range AllowedDevices { ++ if device.Path != "" { ++ config.Devices = append(config.Devices, device) ++ } + } +- // merge in additional devices from the spec ++ // Merge in additional devices from the spec. + if spec.Linux != nil { + for _, d := range spec.Linux.Devices { + var uid, gid uint32 +@@ -606,10 +596,12 @@ func createDevices(spec *specs.Spec, config *configs.Config) error { + filemode = *d.FileMode + } + device := &configs.Device{ +- Type: dt, ++ DeviceRule: configs.DeviceRule{ ++ Type: dt, ++ Major: d.Major, ++ Minor: d.Minor, ++ }, + Path: d.Path, +- Major: d.Major, +- Minor: d.Minor, + FileMode: filemode, + Uid: uid, + Gid: gid, +-- +2.26.2 + diff --git a/runc.changes b/runc.changes index 8b6ae98..5ebb3e5 100644 --- a/runc.changes +++ b/runc.changes @@ -1,3 +1,10 @@ +------------------------------------------------------------------- +Wed May 13 06:49:44 UTC 2020 - Aleksa Sarai + +- Backport https://github.com/opencontainers/runc/pull/2391 to help fix + bsc#1168481. + + bsc1168481-0001-cgroup-devices-major-cleanups-and-minimal-transition.patch + ------------------------------------------------------------------- Tue Apr 14 10:16:21 UTC 2020 - Ralf Haferkamp diff --git a/runc.spec b/runc.spec index 471b945..d5cb9fa 100644 --- a/runc.spec +++ b/runc.spec @@ -51,8 +51,10 @@ Source0: https://github.com/opencontainers/runc/releases/download/v%{_ver Source1: https://github.com/opencontainers/runc/releases/download/v%{_version}/runc.tar.xz.asc#/runc-%{_version}.tar.xz.asc Source2: runc.keyring Source3: runc-rpmlintrc -# FIX-UPSTREAM: cherry pick of https://github.com/opencontainers/runc/pull/1807 +# FIX-UPSTREAM: Backport of https://github.com/opencontainers/runc/pull/1807. bsc#1149954 Patch0: bsc1149954-0001-sd-notify-do-not-hang-when-NOTIFY_SOCKET-is-used-wit.patch +# FIX-UPSTREAM: Backport of https://github.com/opencontainers/runc/pull/2391. bsc#1168481 +Patch1: bsc1168481-0001-cgroup-devices-major-cleanups-and-minimal-transition.patch BuildRequires: fdupes BuildRequires: go-go-md2man BuildRequires: golang(API) >= %{go_version} @@ -86,7 +88,10 @@ Test package for runc. It contains the source code and the tests. %prep %setup -q -n %{name}-%{_version} -%patch0 -p1 +# bsc#1149954 +%patch0 -p1 +# bsc#1168481 +%patch1 -p1 %build # Do not use symlinks. If you want to run the unit tests for this package at