|
| 1 | +From c1bf9b90ce85dd46f958b9f0e60143d58f885963 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Ido Schimmel < [email protected]> |
| 3 | +Date: Tue, 1 Oct 2024 14:20:35 +0300 |
| 4 | +Subject: [PATCH] devlink: Fix RCU stall when unregistering a devlink instance |
| 5 | + |
| 6 | +When a devlink instance is unregistered the following happens (among |
| 7 | +other things): |
| 8 | + |
| 9 | +t0 - The instance is marked with 'DEVLINK_UNREGISTERING'. |
| 10 | +t1 - Blocking until an RCU grace period passes. |
| 11 | +t2 - The 'DEVLINK_UNREGISTERING' mark is cleared from the instance. |
| 12 | + |
| 13 | +When iterating over devlink instances (f.e., when requesting a dump of |
| 14 | +available instances) and encountering an instance that is currently |
| 15 | +being unregistered, the current code will loop around until the |
| 16 | +'DEVLINK_UNREGISTERING' mark is cleared. |
| 17 | + |
| 18 | +The iteration over devlink instances happens in an RCU critical section, |
| 19 | +so if the instance that is currently being unregistered was encountered |
| 20 | +between t0 and t1, the system will deadlock and RCU stalls will be |
| 21 | +reported [1]. The task unregistering the instance will forever wait for an |
| 22 | +RCU grace period to pass and the task iterating over the instances will |
| 23 | +forever wait for the mark to be cleared. |
| 24 | + |
| 25 | +The issue can be reliably reproduced by increasing the time window |
| 26 | +between t0 and t1 (used a 60 seconds sleep) and running the following |
| 27 | +reproducer [2]. |
| 28 | + |
| 29 | +Fix by skipping over instances that are currently being unregistered. |
| 30 | + |
| 31 | +[1] |
| 32 | +rcu: INFO: rcu_preempt detected stalls on CPUs/tasks: |
| 33 | +rcu: Tasks blocked on level-0 rcu_node (CPUs 0-7): P344 |
| 34 | + (detected by 4, t=26002 jiffies, g=5773, q=12 ncpus=8) |
| 35 | +task:devlink state:R running task stack:25568 pid:344 ppid:260 flags:0x00004002 |
| 36 | +[...] |
| 37 | +Call Trace: |
| 38 | + xa_get_mark+0x184/0x3e0 |
| 39 | + devlinks_xa_find_get.constprop.0+0xc6/0x2e0 |
| 40 | + devlink_nl_cmd_get_dumpit+0x105/0x3f0 |
| 41 | + netlink_dump+0x568/0xff0 |
| 42 | + __netlink_dump_start+0x651/0x900 |
| 43 | + genl_family_rcv_msg_dumpit+0x201/0x340 |
| 44 | + genl_rcv_msg+0x573/0x780 |
| 45 | + netlink_rcv_skb+0x15f/0x430 |
| 46 | + genl_rcv+0x29/0x40 |
| 47 | + netlink_unicast+0x546/0x800 |
| 48 | + netlink_sendmsg+0x958/0xe60 |
| 49 | + __sys_sendto+0x3a2/0x480 |
| 50 | + __x64_sys_sendto+0xe1/0x1b0 |
| 51 | + do_syscall_64+0x35/0x80 |
| 52 | + entry_SYSCALL_64_after_hwframe+0x68/0xd2 |
| 53 | + |
| 54 | +[2] |
| 55 | + # echo 10 > /sys/bus/netdevsim/new_device |
| 56 | + # echo 10 > /sys/bus/netdevsim/del_device & |
| 57 | + # devlink dev |
| 58 | + |
| 59 | +Fixes: c2368b19807a ("net: devlink: introduce "unregistering" mark and use it during devlinks iteration") |
| 60 | +Reported-by: Vivek Reddy Karri < [email protected]> |
| 61 | +Signed-off-by: Ido Schimmel < [email protected]> |
| 62 | +--- |
| 63 | + net/devlink/leftover.c | 5 +++-- |
| 64 | + 1 file changed, 3 insertions(+), 2 deletions(-) |
| 65 | + |
| 66 | +diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c |
| 67 | +index 032c7af06..c6f781a08 100644 |
| 68 | +--- a/net/devlink/leftover.c |
| 69 | ++++ b/net/devlink/leftover.c |
| 70 | +@@ -301,6 +301,9 @@ devlinks_xa_find_get(struct net *net, unsigned long *indexp, xa_mark_t filter, |
| 71 | + if (!devlink) |
| 72 | + goto unlock; |
| 73 | + |
| 74 | ++ /* For a possible retry, the xa_find_after() should be always used */ |
| 75 | ++ xa_find_fn = xa_find_after; |
| 76 | ++ |
| 77 | + /* In case devlink_unregister() was already called and "unregistering" |
| 78 | + * mark was set, do not allow to get a devlink reference here. |
| 79 | + * This prevents live-lock of devlink_unregister() wait for completion. |
| 80 | +@@ -308,8 +311,6 @@ devlinks_xa_find_get(struct net *net, unsigned long *indexp, xa_mark_t filter, |
| 81 | + if (xa_get_mark(&devlinks, *indexp, DEVLINK_UNREGISTERING)) |
| 82 | + goto retry; |
| 83 | + |
| 84 | +- /* For a possible retry, the xa_find_after() should be always used */ |
| 85 | +- xa_find_fn = xa_find_after; |
| 86 | + if (!devlink_try_get(devlink)) |
| 87 | + goto retry; |
| 88 | + if (!net_eq(devlink_net(devlink), net)) { |
| 89 | +-- |
| 90 | +2.43.2 |
| 91 | + |
0 commit comments