Skip to content

Commit a361b71

Browse files
committed
restorer: Add a lock around cgroupd communication.
Threads are put into cgroups through the cgroupd thread, which communicates with other threads using a socketpair. Previously, each thread received a dup'd copy of the socket, and did the following sendmsg(socket_dup_fd, my_cgroup_set); // wait for ack. while (1) { recvmsg(socket_dup_fd, &h, MSG_PEEK); if (h.pid != my_pid) continue; recvmsg(socket_dup_fd, &h, 0); } close(socket_dup_fd); When restoring many threads, many threads would be spinning in the above loop waiting for their PID to appear. In my test-case, restoring a process with a 11.5G heap and 491 threads could take anywhere between 10 seconds and 60 seconds to complete. To avoid the spinning, we drop the loop and MSG_PEEK, and add a lock around the above code. This does not decrease parallelism, as the cgroupd daemon uses a single thread anyway. With the lock in place, the same restore consistently takes around 10 seconds on my machine (Thinkpad P14s, AMD Ryzen 8840HS). There is a similar "daemon" thread for user namespaces. That already is protected with a similar userns_sync_lock in __userns_call(). Fixes #2614 Signed-off-by: Han-Wen Nienhuys <[email protected]>
1 parent 5b4c819 commit a361b71

File tree

3 files changed

+31
-32
lines changed

3 files changed

+31
-32
lines changed

criu/cr-restore.c

+1
Original file line numberDiff line numberDiff line change
@@ -2329,6 +2329,7 @@ int prepare_task_entries(void)
23292329
task_entries->nr_helpers = 0;
23302330
futex_set(&task_entries->start, CR_STATE_FAIL);
23312331
mutex_init(&task_entries->userns_sync_lock);
2332+
mutex_init(&task_entries->cgroupd_sync_lock);
23322333
mutex_init(&task_entries->last_pid_mutex);
23332334

23342335
return 0;

criu/include/rst_info.h

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ struct task_entries {
1414
futex_t start;
1515
atomic_t cr_err;
1616
mutex_t userns_sync_lock;
17+
mutex_t cgroupd_sync_lock;
1718
mutex_t last_pid_mutex;
1819
};
1920

criu/pie/restorer.c

+29-32
Original file line numberDiff line numberDiff line change
@@ -704,9 +704,8 @@ static int send_cg_set(int sk, int cg_set)
704704
}
705705

706706
/*
707-
* As this socket is shared among threads, recvmsg(MSG_PEEK)
708-
* from the socket until getting its own thread id as an
709-
* acknowledge of successful threaded cgroup fixup
707+
* As the cgroupd socket is shared among threads and processes, this
708+
* should be called with task_entries->cgroupd_sync_lock held.
710709
*/
711710
static int recv_cg_set_restore_ack(int sk)
712711
{
@@ -719,33 +718,22 @@ static int recv_cg_set_restore_ack(int sk)
719718
h.msg_control = cmsg;
720719
h.msg_controllen = sizeof(cmsg);
721720

722-
while (1) {
723-
ret = sys_recvmsg(sk, &h, MSG_PEEK);
724-
if (ret < 0) {
725-
pr_err("Unable to peek from cgroupd %d\n", ret);
726-
return -1;
727-
}
728-
729-
if (h.msg_controllen != sizeof(cmsg)) {
730-
pr_err("The message from cgroupd is truncated\n");
731-
return -1;
732-
}
733-
734-
ch = CMSG_FIRSTHDR(&h);
735-
cred = (struct ucred *)CMSG_DATA(ch);
736-
if (cred->pid != sys_gettid())
737-
continue;
721+
ret = sys_recvmsg(sk, &h, 0);
722+
if (ret < 0) {
723+
pr_err("Unable to receive from cgroupd %d\n", ret);
724+
return -1;
725+
}
738726

739-
/*
740-
* Actual remove message from recv queue of socket
741-
*/
742-
ret = sys_recvmsg(sk, &h, 0);
743-
if (ret < 0) {
744-
pr_err("Unable to receive from cgroupd %d\n", ret);
745-
return -1;
746-
}
727+
if (h.msg_controllen != sizeof(cmsg)) {
728+
pr_err("The message from cgroupd is truncated\n");
729+
return -1;
730+
}
747731

748-
break;
732+
ch = CMSG_FIRSTHDR(&h);
733+
cred = (struct ucred *)CMSG_DATA(ch);
734+
if (cred->pid != sys_gettid()) {
735+
pr_err("cred pid %d != gettid\n", cred->pid);
736+
return -1;
749737
}
750738
return 0;
751739
}
@@ -782,12 +770,21 @@ __visible long __export_restore_thread(struct thread_restore_args *args)
782770
rt_sigframe = (void *)&args->mz->rt_sigframe;
783771

784772
if (args->cg_set != -1) {
773+
int err = 0;
774+
775+
mutex_lock(&task_entries_local->cgroupd_sync_lock);
776+
785777
pr_info("Restore cg_set in thread cg_set: %d\n", args->cg_set);
786-
if (send_cg_set(args->cgroupd_sk, args->cg_set))
787-
goto core_restore_end;
788-
if (recv_cg_set_restore_ack(args->cgroupd_sk))
789-
goto core_restore_end;
778+
779+
err = send_cg_set(args->cgroupd_sk, args->cg_set);
780+
if (!err)
781+
err = recv_cg_set_restore_ack(args->cgroupd_sk);
782+
783+
mutex_unlock(&task_entries_local->cgroupd_sync_lock);
790784
sys_close(args->cgroupd_sk);
785+
786+
if (err)
787+
goto core_restore_end;
791788
}
792789

793790
if (restore_thread_common(args))

0 commit comments

Comments
 (0)