Skip to content

Commit 8ec02ea

Browse files
kolyshkincyphar
authored andcommitted
nsexec: retry unshare on EINVAL
Older kernels may return EINVAL on unshare when a process is reading runc's /proc/$PID/status or /proc/$PID/maps. This was fixed by kernel commit 12c641ab8270f ("unshare: Unsharing a thread does not require unsharing a vm") in Linux v4.3. For CentOS 7, the fix was backported to CentOS 7.7 (kernel 3.10.0-1062). To work around this kernel bug, let's retry on EINVAL a few times. Reported-by: zzyyzte <[email protected]> Signed-off-by: Kir Kolyshkin <[email protected]> (cherry picked from commit cecb039) Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent 059d773 commit 8ec02ea

File tree

1 file changed

+22
-8
lines changed

1 file changed

+22
-8
lines changed

libcontainer/nsenter/nsexec.c

+22-8
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,25 @@ void send_mountsources(int sockfd, pid_t child, char *mountsources, size_t mount
832832
bail("failed to close container mount namespace fd %d", container_mntns_fd);
833833
}
834834

835+
void try_unshare(int flags, const char *msg)
836+
{
837+
write_log(DEBUG, "unshare %s", msg);
838+
/*
839+
* Kernels prior to v4.3 may return EINVAL on unshare when another process
840+
* reads runc's /proc/$PID/status or /proc/$PID/maps. To work around this,
841+
* retry on EINVAL a few times.
842+
*/
843+
int retries = 5;
844+
for (; retries > 0; retries--) {
845+
if (unshare(flags) == 0) {
846+
return;
847+
}
848+
if (errno != EINVAL)
849+
break;
850+
}
851+
bail("failed to unshare %s", msg);
852+
}
853+
835854
void nsexec(void)
836855
{
837856
int pipenum;
@@ -1170,9 +1189,7 @@ void nsexec(void)
11701189
* problem.
11711190
*/
11721191
if (config.cloneflags & CLONE_NEWUSER) {
1173-
write_log(DEBUG, "unshare user namespace");
1174-
if (unshare(CLONE_NEWUSER) < 0)
1175-
bail("failed to unshare user namespace");
1192+
try_unshare(CLONE_NEWUSER, "user namespace");
11761193
config.cloneflags &= ~CLONE_NEWUSER;
11771194

11781195
/*
@@ -1224,9 +1241,7 @@ void nsexec(void)
12241241
* some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
12251242
* was broken, so we'll just do it the long way anyway.
12261243
*/
1227-
write_log(DEBUG, "unshare remaining namespace (except cgroupns)");
1228-
if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0)
1229-
bail("failed to unshare remaining namespaces (except cgroupns)");
1244+
try_unshare(config.cloneflags & ~CLONE_NEWCGROUP, "remaining namespaces (except cgroupns)");
12301245

12311246
/* Ask our parent to send the mount sources fds. */
12321247
if (config.mountsources) {
@@ -1344,8 +1359,7 @@ void nsexec(void)
13441359
}
13451360

13461361
if (config.cloneflags & CLONE_NEWCGROUP) {
1347-
if (unshare(CLONE_NEWCGROUP) < 0)
1348-
bail("failed to unshare cgroup namespace");
1362+
try_unshare(CLONE_NEWCGROUP, "cgroup namespace");
13491363
}
13501364

13511365
write_log(DEBUG, "signal completion to stage-0");

0 commit comments

Comments
 (0)