Skip to content

Commit df4eae4

Browse files
rootless: fix /sys/fs/cgroup mounts
It was found that rootless runc makes `/sys/fs/cgroup` writable in following conditons: 1. when runc is executed inside the user namespace, and the config.json does not specify the cgroup namespace to be unshared (e.g.., `(docker|podman|nerdctl) run --cgroupns=host`, with Rootless Docker/Podman/nerdctl) 2. or, when runc is executed outside the user namespace, and `/sys` is mounted with `rbind, ro` (e.g., `runc spec --rootless`; this condition is very rare) A container may gain the write access to user-owned cgroup hierarchy `/sys/fs/cgroup/user.slice/...` on the host. Other users's cgroup hierarchies are not affected. To fix the issue, this commit does: 1. Remount `/sys/fs/cgroup` to apply `MS_RDONLY` when it is being bind-mounted 2. Mask `/sys/fs/cgroup` when the bind source is unavailable Fix CVE-2023-25809 (GHSA-m8cg-xc2p-r3fc) Co-authored-by: Kir Kolyshkin <[email protected]> Signed-off-by: Akihiro Suda <[email protected]>
1 parent d5be3e2 commit df4eae4

File tree

2 files changed

+51
-19
lines changed

2 files changed

+51
-19
lines changed

libcontainer/rootfs_linux.go

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -306,26 +306,41 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
306306
if err := os.MkdirAll(dest, 0o755); err != nil {
307307
return err
308308
}
309-
return utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
310-
if err := mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
311-
// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
312-
if errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY) {
313-
src := fs2.UnifiedMountpoint
314-
if c.cgroupns && c.cgroup2Path != "" {
315-
// Emulate cgroupns by bind-mounting
316-
// the container cgroup path rather than
317-
// the whole /sys/fs/cgroup.
318-
src = c.cgroup2Path
319-
}
320-
err = mount(src, m.Destination, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
321-
if c.rootlessCgroups && errors.Is(err, unix.ENOENT) {
322-
err = nil
323-
}
324-
}
325-
return err
326-
}
327-
return nil
309+
err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
310+
return mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data)
328311
})
312+
if err == nil || !(errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY)) {
313+
return err
314+
}
315+
316+
// When we are in UserNS but CgroupNS is not unshared, we cannot mount
317+
// cgroup2 (#2158), so fall back to bind mount.
318+
bindM := &configs.Mount{
319+
Device: "bind",
320+
Source: fs2.UnifiedMountpoint,
321+
Destination: m.Destination,
322+
Flags: unix.MS_BIND | m.Flags,
323+
PropagationFlags: m.PropagationFlags,
324+
}
325+
if c.cgroupns && c.cgroup2Path != "" {
326+
// Emulate cgroupns by bind-mounting the container cgroup path
327+
// rather than the whole /sys/fs/cgroup.
328+
bindM.Source = c.cgroup2Path
329+
}
330+
// mountToRootfs() handles remounting for MS_RDONLY.
331+
// No need to set c.fd here, because mountToRootfs() calls utils.WithProcfd() by itself in mountPropagate().
332+
err = mountToRootfs(bindM, c)
333+
if c.rootlessCgroups && errors.Is(err, unix.ENOENT) {
334+
// ENOENT (for `src = c.cgroup2Path`) happens when rootless runc is being executed
335+
// outside the userns+mountns.
336+
//
337+
// Mask `/sys/fs/cgroup` to ensure it is read-only, even when `/sys` is mounted
338+
// with `rbind,ro` (`runc spec --rootless` produces `rbind,ro` for `/sys`).
339+
err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
340+
return maskPath(procfd, c.label)
341+
})
342+
}
343+
return err
329344
}
330345

331346
func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {

tests/integration/mounts.bats

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,20 @@ function teardown() {
6363
runc run test_busybox
6464
[ "$status" -eq 0 ]
6565
}
66+
67+
# https://github.com/opencontainers/runc/security/advisories/GHSA-m8cg-xc2p-r3fc
68+
@test "runc run [ro /sys/fs/cgroup mount]" {
69+
# With cgroup namespace
70+
update_config '.process.args |= ["sh", "-euc", "for f in `grep /sys/fs/cgroup /proc/mounts | awk \"{print \\\\$2}\"| uniq`; do grep -w $f /proc/mounts | tail -n1; done"]'
71+
runc run test_busybox
72+
[ "$status" -eq 0 ]
73+
[ "${#lines[@]}" -ne 0 ]
74+
for line in "${lines[@]}"; do [[ "${line}" == *'ro,'* ]]; done
75+
76+
# Without cgroup namespace
77+
update_config '.linux.namespaces -= [{"type": "cgroup"}]'
78+
runc run test_busybox
79+
[ "$status" -eq 0 ]
80+
[ "${#lines[@]}" -ne 0 ]
81+
for line in "${lines[@]}"; do [[ "${line}" == *'ro,'* ]]; done
82+
}

0 commit comments

Comments
 (0)