Skip to content

Commit 7168d59

Browse files
committed
runc create/run: warn on rootless + shared pidns + no cgroup
Shared pid namespace means `runc kill` (or `runc delete -f`) have to kill all container process, not just init. To do so, it needs a cgroup to read the PIDs from. If there is no cgroup, it's impossible to kill all container processes. Therefore, such configuration is bad and should not be allowed. To keep backward compatibility, though, let's merely warn about this for now. Alas, the only way to know if cgroup access is available is by returning an error from Manager.Apply. Amend fs cgroup managers to do so (systemd doesn't need it, since v1 can't work with rootless, and cgroup v2 does not have a special rootless case). Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent fb11156 commit 7168d59

File tree

5 files changed

+53
-15
lines changed

5 files changed

+53
-15
lines changed

libcontainer/cgroups/cgroups.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ var (
1111
// is not configured to set device rules.
1212
ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules")
1313

14+
// ErrRootless is returned by [Manager.Apply] when there is an error
15+
// creating cgroup directory, and cgroup.Rootless is set. In general,
16+
// this error is to be ignored.
17+
ErrRootless = errors.New("cgroup manager can not access cgroup (rootless container)")
18+
1419
// DevicesSetV1 and DevicesSetV2 are functions to set devices for
1520
// cgroup v1 and v2, respectively. Unless
1621
// [github.com/opencontainers/runc/libcontainer/cgroups/devices]

libcontainer/cgroups/fs/fs.go

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,7 @@ func NewManager(cg *configs.Cgroup, paths map[string]string) (*Manager, error) {
8888
// sense of the word). This includes EROFS (which for an unprivileged user is
8989
// basically a permission error) and EACCES (for similar reasons) as well as
9090
// the normal EPERM.
91-
func isIgnorableError(rootless bool, err error) bool {
92-
// We do not ignore errors if we are root.
93-
if !rootless {
94-
return false
95-
}
91+
func isIgnorableError(err error) bool {
9692
// Is it an ordinary EPERM?
9793
if errors.Is(err, os.ErrPermission) {
9894
return true
@@ -105,7 +101,7 @@ func isIgnorableError(rootless bool, err error) bool {
105101
return false
106102
}
107103

108-
func (m *Manager) Apply(pid int) (err error) {
104+
func (m *Manager) Apply(pid int) (retErr error) {
109105
m.mu.Lock()
110106
defer m.mu.Unlock()
111107

@@ -128,15 +124,16 @@ func (m *Manager) Apply(pid int) (err error) {
128124
// Cases where limits for the subsystem have been set are handled
129125
// later by Set, which fails with a friendly error (see
130126
// if path == "" in Set).
131-
if isIgnorableError(c.Rootless, err) && c.Path == "" {
127+
if c.Rootless && c.Path == "" && isIgnorableError(err) {
128+
retErr = cgroups.ErrRootless
132129
delete(m.paths, name)
133130
continue
134131
}
135132
return err
136133
}
137134

138135
}
139-
return nil
136+
return retErr
140137
}
141138

142139
func (m *Manager) Destroy() error {

libcontainer/cgroups/fs2/fs2.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,11 @@ func (m *Manager) Apply(pid int) error {
6868
// - "runc create (no limits + no cgrouppath + no permission) succeeds"
6969
// - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error"
7070
// - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
71-
if m.config.Rootless {
72-
if m.config.Path == "" {
73-
if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
74-
return nil
75-
}
76-
return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err)
71+
if m.config.Rootless && m.config.Path == "" {
72+
if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
73+
return cgroups.ErrRootless
7774
}
75+
return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err)
7876
}
7977
return err
8078
}

libcontainer/process_linux.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,18 @@ func (p *initProcess) start() (retErr error) {
580580
// cgroup. We don't need to worry about not doing this and not being root
581581
// because we'd be using the rootless cgroup manager in that case.
582582
if err := p.manager.Apply(p.pid()); err != nil {
583-
return fmt.Errorf("unable to apply cgroup configuration: %w", err)
583+
if errors.Is(err, cgroups.ErrRootless) {
584+
// ErrRootless is to be ignored except when the
585+
// container doesn't have private pidns.
586+
if !p.config.Config.Namespaces.IsPrivate(configs.NEWPID) {
587+
// TODO: make this an error in runc 1.3.
588+
logrus.Warn("Creating a rootless container with no cgroup and no private pid namespace. " +
589+
"Such configuration is strongly discouraged (as it is impossible to properly kill all container's processes) " +
590+
"and will result in an error in a future runc version.")
591+
}
592+
} else {
593+
return fmt.Errorf("unable to apply cgroup configuration: %w", err)
594+
}
584595
}
585596
if p.intelRdtManager != nil {
586597
if err := p.intelRdtManager.Apply(p.pid()); err != nil {

tests/integration/create.bats

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,30 @@ function teardown() {
8181

8282
testcontainer test_busybox running
8383
}
84+
85+
# https://github.com/opencontainers/runc/issues/4394#issuecomment-2334926257
86+
@test "runc create [shared pidns + rootless]" {
87+
# Remove pidns so it's shared with the host.
88+
update_config ' .linux.namespaces -= [{"type": "pid"}]'
89+
if [ $EUID -ne 0 ]; then
90+
if rootless_cgroup; then
91+
set_cgroups_path
92+
fi
93+
# Can't mount real /proc when rootless + no pidns,
94+
# so change it to a bind-mounted one from the host.
95+
update_config ' .mounts |= map((select(.type == "proc")
96+
| .type = "none"
97+
| .source = "/proc"
98+
| .options = ["rbind", "nosuid", "nodev", "noexec"]
99+
) // .)'
100+
fi
101+
102+
exp="Such configuration is strongly discouraged"
103+
runc create --console-socket "$CONSOLE_SOCKET" test
104+
[ "$status" -eq 0 ]
105+
if [ $EUID -ne 0 ] && ! rootless_cgroup; then
106+
[[ "$output" = *"$exp"* ]]
107+
else
108+
[[ "$output" != *"$exp"* ]]
109+
fi
110+
}

0 commit comments

Comments
 (0)