Skip to content

Commit 7fc8312

Browse files
prattmiccherrymui
authored andcommitted
[release-branch.go1.23] os: add clone(CLONE_PIDFD) check to pidfd feature check
clone(CLONE_PIDFD) was added in Linux 5.2 and pidfd_open was added in Linux 5.3. Thus our feature check for pidfd_open should be sufficient to ensure that clone(CLONE_PIDFD) works. Unfortuantely, some alternative Linux implementations may not follow this strict ordering. For example, QEMU 7.2 (Dec 2022) added pidfd_open, but clone(CLONE_PIDFD) was only added in QEMU 8.0 (Apr 2023). Debian bookworm provides QEMU 7.2 by default. For #68976. Fixes #69259. Change-Id: Ie3f3dc51f0cd76944871bf98690abf59f68fd7bf Reviewed-on: https://go-review.googlesource.com/c/go/+/592078 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Cherry Mui <[email protected]> (cherry picked from commit 7a5fc9b) Reviewed-on: https://go-review.googlesource.com/c/go/+/612218
1 parent cc16cdf commit 7fc8312

File tree

2 files changed

+102
-3
lines changed

2 files changed

+102
-3
lines changed

src/os/pidfd_linux.go

+21-3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
// v5.3: pidfd_open syscall, clone3 syscall;
99
// v5.4: P_PIDFD idtype support for waitid syscall;
1010
// v5.6: pidfd_getfd syscall.
11+
//
12+
// N.B. Alternative Linux implementations may not follow this ordering. e.g.,
13+
// QEMU user mode 7.2 added pidfd_open, but CLONE_PIDFD was not added until
14+
// 8.0.
1115

1216
package os
1317

@@ -140,9 +144,9 @@ func pidfdWorks() bool {
140144

141145
var checkPidfdOnce = sync.OnceValue(checkPidfd)
142146

143-
// checkPidfd checks whether all required pidfd-related syscalls work.
144-
// This consists of pidfd_open and pidfd_send_signal syscalls, and waitid
145-
// syscall with idtype of P_PIDFD.
147+
// checkPidfd checks whether all required pidfd-related syscalls work. This
148+
// consists of pidfd_open and pidfd_send_signal syscalls, waitid syscall with
149+
// idtype of P_PIDFD, and clone(CLONE_PIDFD).
146150
//
147151
// Reasons for non-working pidfd syscalls include an older kernel and an
148152
// execution environment in which the above system calls are restricted by
@@ -180,9 +184,23 @@ func checkPidfd() error {
180184
return NewSyscallError("pidfd_send_signal", err)
181185
}
182186

187+
// Verify that clone(CLONE_PIDFD) works.
188+
//
189+
// This shouldn't be necessary since pidfd_open was added in Linux 5.3,
190+
// after CLONE_PIDFD in Linux 5.2, but some alternative Linux
191+
// implementations may not adhere to this ordering.
192+
if err := checkClonePidfd(); err != nil {
193+
return err
194+
}
195+
183196
return nil
184197
}
185198

199+
// Provided by syscall.
200+
//
201+
//go:linkname checkClonePidfd
202+
func checkClonePidfd() error
203+
186204
// Provided by runtime.
187205
//
188206
//go:linkname ignoreSIGSYS

src/syscall/exec_linux.go

+81
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
package syscall
88

99
import (
10+
errpkg "errors"
1011
"internal/itoa"
1112
"runtime"
1213
"unsafe"
@@ -328,6 +329,7 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att
328329
if clone3 != nil {
329330
pid, err1 = rawVforkSyscall(_SYS_clone3, uintptr(unsafe.Pointer(clone3)), unsafe.Sizeof(*clone3), 0)
330331
} else {
332+
// N.B. Keep in sync with doCheckClonePidfd.
331333
flags |= uintptr(SIGCHLD)
332334
if runtime.GOARCH == "s390x" {
333335
// On Linux/s390, the first two arguments of clone(2) are swapped.
@@ -743,3 +745,82 @@ func forkAndExecFailureCleanup(attr *ProcAttr, sys *SysProcAttr) {
743745
*sys.PidFD = -1
744746
}
745747
}
748+
749+
// checkClonePidfd verifies that clone(CLONE_PIDFD) works by actually doing a
750+
// clone.
751+
//
752+
//go:linkname os_checkClonePidfd os.checkClonePidfd
753+
func os_checkClonePidfd() error {
754+
pidfd := int32(-1)
755+
pid, errno := doCheckClonePidfd(&pidfd)
756+
if errno != 0 {
757+
return errno
758+
}
759+
760+
if pidfd == -1 {
761+
// Bad: CLONE_PIDFD failed to provide a pidfd. Reap the process
762+
// before returning.
763+
764+
var err error
765+
for {
766+
var status WaitStatus
767+
_, err = Wait4(int(pid), &status, 0, nil)
768+
if err != EINTR {
769+
break
770+
}
771+
}
772+
if err != nil {
773+
return err
774+
}
775+
776+
return errpkg.New("clone(CLONE_PIDFD) failed to return pidfd")
777+
}
778+
779+
// Good: CLONE_PIDFD provided a pidfd. Reap the process and close the
780+
// pidfd.
781+
defer Close(int(pidfd))
782+
783+
for {
784+
const _P_PIDFD = 3
785+
_, _, errno = Syscall6(SYS_WAITID, _P_PIDFD, uintptr(pidfd), 0, WEXITED, 0, 0)
786+
if errno != EINTR {
787+
break
788+
}
789+
}
790+
if errno != 0 {
791+
return errno
792+
}
793+
794+
return nil
795+
}
796+
797+
// doCheckClonePidfd implements the actual clone call of os_checkClonePidfd and
798+
// child execution. This is a separate function so we can separate the child's
799+
// and parent's stack frames if we're using vfork.
800+
//
801+
// This is go:noinline because the point is to keep the stack frames of this
802+
// and os_checkClonePidfd separate.
803+
//
804+
//go:noinline
805+
func doCheckClonePidfd(pidfd *int32) (pid uintptr, errno Errno) {
806+
flags := uintptr(CLONE_VFORK|CLONE_VM|CLONE_PIDFD|SIGCHLD)
807+
if runtime.GOARCH == "s390x" {
808+
// On Linux/s390, the first two arguments of clone(2) are swapped.
809+
pid, errno = rawVforkSyscall(SYS_CLONE, 0, flags, uintptr(unsafe.Pointer(pidfd)))
810+
} else {
811+
pid, errno = rawVforkSyscall(SYS_CLONE, flags, 0, uintptr(unsafe.Pointer(pidfd)))
812+
}
813+
if errno != 0 || pid != 0 {
814+
// If we're in the parent, we must return immediately
815+
// so we're not in the same stack frame as the child.
816+
// This can at most use the return PC, which the child
817+
// will not modify, and the results of
818+
// rawVforkSyscall, which must have been written after
819+
// the child was replaced.
820+
return
821+
}
822+
823+
for {
824+
RawSyscall(SYS_EXIT, 0, 0, 0)
825+
}
826+
}

0 commit comments

Comments
 (0)