Skip to content

Debug rawhide failures #2625

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from

Conversation

avagin
Copy link
Member

@avagin avagin commented Mar 18, 2025

No description provided.

@avagin avagin marked this pull request as draft March 18, 2025 06:58
Signed-off-by: Andrei Vagin <[email protected]>
@avagin
Copy link
Member Author

avagin commented Mar 18, 2025

Vagrant Fedora Rawhide fails with this error:

b'(00.003569) Handling VMA with the following smaps entry: 7f35b45e8000-7f35b4de9000 rw-p 00000000 00:00 0'^M
...
b'(00.005116) pagemap-cache: 65: filling VMA 7f35b45e8000-7f35b4de9000 (8196K) [l:7f35b4400000 h:7f35b4600000]'^M
b'(00.005123) page-pipe: Add iov to page pipe (1 iovs, 3/2723 total)'^M
b'(00.005125) page-pipe: Add iov to page pipe (2 iovs, 4/2723 total)'^M
...
b'(00.005298) page-pipe: \t\t0x7f35b45e8000 1'^M
...
b'pie: 65: __fetched msg: 66 0 0'^M
b'(00.005371) Sent msg to daemon 66 0 0'^M
b'pie: 65: Error (criu/pie/parasite.c:99): iov_base 0x7f35b45e8000 iov_len 0x1000'^M
b"pie: 65: Error (criu/pie/parasite.c:100): Can't splice pages to pipe (-14/2/3)"^M
b'pie: 65: __sent ack msg: 66 66 -1'^M

@adrianreber @rst0git do you know anything about this issue?

@mihalicyn
Copy link
Member

It looks like it's something to do with glibc and pthreads. I've got a local reproducer with Fedora Rawhide userspace.

Also, if we look on the failed tests:

################### 20 TEST(S) FAILED (TOTAL 473/SKIPPED 41) ###################
 * zdtm/static/cgroup_threads(h)
 * zdtm/static/cgroupv2_01(ns)
 * zdtm/static/clone_fs(h)
 * zdtm/static/different_creds(h)
 * zdtm/static/fpu03(h)
 * zdtm/static/futex(ns)
 * zdtm/static/futex-rl(ns)
 * zdtm/static/pthread00(ns)
 * zdtm/static/pthread00-pac(h)
 * zdtm/static/pthread01(h)
 * zdtm/static/pthread_timers(h)
 * zdtm/static/pthread_timers_h(h)
 * zdtm/static/seccomp_filter_threads(h)
 * zdtm/static/seccomp_filter_tsync(ns)
 * zdtm/static/sigaltstack(ns)
 * zdtm/static/sigpending(uns)
 * zdtm/static/socket_aio(uns)
 * zdtm/static/thread_different_uid_gid(uns)
 * zdtm/transition/file_aio(ns)
 * zdtm/transition/rseq01(h)
##################################### FAIL ####################################

each of these test is using pthread and only these!

@mihalicyn
Copy link
Member

I suspect this bminor/glibc@a6fbe36 change can be somehow connected with these failures. Digging deeper...

@mihalicyn
Copy link
Member

mihalicyn commented Mar 18, 2025

Confirmation:

bash-5.2# strace -f ./test/zdtm/static/sigaltstack --pidfile=test.pid --outfile=test.out
execve("./test/zdtm/static/sigaltstack", ["./test/zdtm/static/sigaltstack", "--pidfile=test.pid", "--outfile=test.out"], 0x7ffffa374ce8 /* 12 vars */) = 0
brk(NULL)                               = 0x39e38000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f0eac6fa000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=11517, ...}) = 0
mmap(NULL, 11517, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f0eac6f7000
close(3)                                = 0
openat(AT_FDCWD, "/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\00007\0\0\0\0\0\0"..., 832) = 832
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
fstat(3, {st_mode=S_IFREG|0755, st_size=2447544, ...}) = 0
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
mmap(NULL, 2038864, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f0eac505000
mmap(0x7f0eac674000, 479232, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16f000) = 0x7f0eac674000
mmap(0x7f0eac6e9000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e3000) = 0x7f0eac6e9000
mmap(0x7f0eac6ef000, 31824, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f0eac6ef000
close(3)                                = 0
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f0eac502000
arch_prctl(ARCH_SET_FS, 0x7f0eac502740) = 0
set_tid_address(0x7f0eac502a10)         = 976
set_robust_list(0x7f0eac502a20, 24)     = 0
rseq(0x7f0eac502680, 0x20, 0, 0x53053053) = 0
mprotect(0x7f0eac6e9000, 16384, PROT_READ) = 0
mprotect(0x408000, 4096, PROT_READ)     = 0
mprotect(0x7f0eac737000, 8192, PROT_READ) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
munmap(0x7f0eac6f7000, 11517)           = 0
prctl(PR_SET_DUMPABLE, SUID_DUMP_USER)  = 0
rt_sigaction(SIGTERM, {sa_handler=0x4012f0, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f0eac51ed70}, NULL, 8) = 0
rt_sigaction(SIGCHLD, {sa_handler=0x4012f0, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f0eac51ed70}, NULL, 8) = 0
access("test.out", F_OK)                = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, ".", O_RDONLY)         = 3
openat(AT_FDCWD, "test.out.inprogress", O_WRONLY|O_CREAT|O_EXCL|O_APPEND, 0644) = 4
dup2(4, 2)                              = 2
dup2(4, 1)                              = 1
close(4)                                = 0
openat(AT_FDCWD, "/dev/null", O_RDWR)   = 4
dup2(4, 0)                              = 0
close(4)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, 0, 0) = 0x7f0eac6f9000
rt_sigprocmask(SIG_BLOCK, ~[], [], 8)   = 0
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLDstrace: Process 977 attached
, child_tidptr=0x7f0eac502a10) = 977
[pid   976] rt_sigprocmask(SIG_SETMASK, [],  <unfinished ...>
[pid   977] set_robust_list(0x7f0eac502a20, 24) = 0
[pid   976] <... rt_sigprocmask resumed>NULL, 8) = 0
[pid   977] rt_sigprocmask(SIG_SETMASK, [],  <unfinished ...>
[pid   976] futex(0x7f0eac6f9000, FUTEX_WAIT, 0, NULL <unfinished ...>
[pid   977] <... rt_sigprocmask resumed>NULL, 8) = 0
[pid   977] setsid()                    = 977
[pid   977] getpid()                    = 977
[pid   977] rt_sigaction(SIGCHLD, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f0eac51ed70}, NULL, 8) = 0
[pid   977] pipe2([4, 5], 0)            = 0
[pid   977] rt_sigaction(SIGUSR1, {sa_handler=0x400e30, sa_mask=[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART, sa_restorer=0x7f0eac51ed70}, NULL, 8) = 0
[pid   977] rt_sigaction(SIGRT_1, {sa_handler=0x7f0eac573bf0, sa_mask=[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x7f0eac51ed70}, NULL, 8) = 0
[pid   977] rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
[pid   977] mmap(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f0eabd01000
[pid   977] madvise(0x7f0eabd01000, 4096, MADV_GUARD_INSTALL) = 0
[pid   977] getrandom("\x1b\x99\xe6\x9a\x1f\xb1\x06\xea", 8, GRND_NONBLOCK) = 8
[pid   977] brk(NULL)                   = 0x39e38000
[pid   977] brk(0x39e59000)             = 0x39e59000
[pid   977] rt_sigprocmask(SIG_BLOCK, ~[], [], 8) = 0
[pid   977] clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7f0eac501990, parent_tid=0x7f0eac501990, exit_signal=0, stack=0x7f0eabd01000, stack_size=0x7fff80, tls=0x7f0eac5016c0}strace: Process 978 attached
 => {parent_tid=[978]}, 88) = 978
[pid   978] rseq(0x7f0eac501600, 0x20, 0, 0x53053053 <unfinished ...>
[pid   977] rt_sigprocmask(SIG_SETMASK, [],  <unfinished ...>
[pid   978] <... rseq resumed>)         = 0
[pid   977] <... rt_sigprocmask resumed>NULL, 8) = 0
[pid   978] set_robust_list(0x7f0eac5019a0, 24 <unfinished ...>
[pid   977] sigaltstack({ss_sp=0x409480, ss_flags=0, ss_size=18424},  <unfinished ...>
[pid   978] <... set_robust_list resumed>) = 0
[pid   977] <... sigaltstack resumed>NULL) = 0
[pid   978] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid   977] write(5, "\1\0\0\0", 4 <unfinished ...>
[pid   978] rt_sigaction(SIGUSR2, {sa_handler=0x400eb0, sa_mask=[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART, sa_restorer=0x7f0eac51ed70},  <unfinished ...>
[pid   977] <... write resumed>)        = 4

we clearly have madvise(0x7f0eabd01000, 4096, MADV_GUARD_INSTALL) = 0 call.

More info:

bash-5.2# cat /proc/988/maps
00400000-00404000 r-xp 00000000 08:02 33377                              /criu/test/zdtm/static/sigaltstack
00404000-00408000 r--p 00004000 08:02 33377                              /criu/test/zdtm/static/sigaltstack
00408000-00409000 r--p 00007000 08:02 33377                              /criu/test/zdtm/static/sigaltstack
00409000-0040a000 rw-p 00008000 08:02 33377                              /criu/test/zdtm/static/sigaltstack
0040a000-00413000 rw-p 00000000 00:00 0 
1f212000-1f233000 rw-p 00000000 00:00 0                                  [heap]
7fef1c000000-7fef1c021000 rw-p 00000000 00:00 0 
7fef1c021000-7fef20000000 ---p 00000000 00:00 0 
7fef2187a000-7fef2207b000 rw-p 00000000 00:00 0 
7fef2207b000-7fef2207e000 rw-p 00000000 00:00 0 
7fef2207e000-7fef221ed000 r-xp 00000000 00:3b 38629                      /usr/lib64/libc.so.6
7fef221ed000-7fef22262000 r--p 0016f000 00:3b 38629                      /usr/lib64/libc.so.6
7fef22262000-7fef22266000 r--p 001e3000 00:3b 38629                      /usr/lib64/libc.so.6
7fef22266000-7fef22268000 rw-p 001e7000 00:3b 38629                      /usr/lib64/libc.so.6
7fef22268000-7fef22270000 rw-p 00000000 00:00 0 
7fef22272000-7fef22273000 rw-s 00000000 00:01 2494                       /dev/zero (deleted)
7fef22273000-7fef22275000 rw-p 00000000 00:00 0 
7fef22275000-7fef22277000 r--p 00000000 00:00 0                          [vvar]
7fef22277000-7fef22279000 r--p 00000000 00:00 0                          [vvar_vclock]
7fef22279000-7fef2227b000 r-xp 00000000 00:00 0                          [vdso]
7fef2227b000-7fef222a5000 r-xp 00000000 00:3b 38601                      /usr/lib64/ld-linux-x86-64.so.2
7fef222a5000-7fef222b0000 r--p 0002a000 00:3b 38601                      /usr/lib64/ld-linux-x86-64.so.2
7fef222b0000-7fef222b2000 r--p 00035000 00:3b 38601                      /usr/lib64/ld-linux-x86-64.so.2
7fef222b2000-7fef222b3000 rw-p 00037000 00:3b 38601                      /usr/lib64/ld-linux-x86-64.so.2
7fef222b3000-7fef222b4000 rw-p 00000000 00:00 0 
7ffe49714000-7ffe49757000 rw-p 00000000 00:00 0                          [stack]
ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]

bash-5.2# cat /proc/988/smaps

<...>

7fef2187a000-7fef2207b000 rw-p 00000000 00:00 0 
Size:               8196 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Rss:                   8 kB
Pss:                   8 kB
Pss_Dirty:             8 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:         8 kB
Referenced:            8 kB
Anonymous:             8 kB
KSM:                   0 kB
LazyFree:              0 kB
AnonHugePages:         0 kB
ShmemPmdMapped:        0 kB
FilePmdMapped:         0 kB
Shared_Hugetlb:        0 kB
Private_Hugetlb:       0 kB
Swap:                  0 kB
SwapPss:               0 kB
Locked:                0 kB
THPeligible:           0
ProtectionKey:         0
VmFlags: rd wr mr mw me ac sd nh 


bash-5.2# cat sigaltstack.strace.log | grep madvise
988   madvise(0x7fef2187a000, 4096, MADV_GUARD_INSTALL) = 0

@rst0git
Copy link
Member

rst0git commented Mar 19, 2025

@mihalicyn Thank you for investigating! I was able to replicate the error locally with 6.13.7-300.vanilla.fc41.x86_64 kernel and Fedora Rawhide container.

@avagin avagin closed this Mar 24, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants