Skip to content

Commit 3bad7d6

Browse files
committed
zed: Add synchronous zedlets
Historically, ZED has blindly spawned off zedlets in parallel and never worried about their completion order. This means that you can potentially have zedlets for event number 2 starting before zedlets for event number 1 have finished. Most of the time this is fine, and it actually helps a lot when the system is getting spammed with hundreds of events. However, there are times when you want your zedlets to be executed in sequence with the event ID. That is where synchronous zedlets come in. ZED will wait for all previously spawned zedlets to finish before running a synchronous zedlet. Synchronous zedlets are guaranteed to be the only zedlet running. No other zedlets may run in parallel with a synchronous zedlet. Users should be careful to only use synchronous zedlets when needed, since they decrease parallelism. To make a zedlet synchronous, simply add a "-sync-" immediately following the event name in the zedlet's file name: EVENT_NAME-sync-ZEDLETNAME.sh For example, if you wanted a synchronous statechange script: statechange-sync-myzedlet.sh Signed-off-by: Tony Hutter <[email protected]>
1 parent b2284ae commit 3bad7d6

File tree

5 files changed

+254
-22
lines changed

5 files changed

+254
-22
lines changed

cmd/zed/zed_exec.c

Lines changed: 93 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -196,38 +196,33 @@ _nop(int sig)
196196
(void) sig;
197197
}
198198

199-
static void *
200-
_reap_children(void *arg)
199+
static void
200+
wait_for_children(boolean_t do_pause, boolean_t wait)
201201
{
202-
(void) arg;
203-
struct launched_process_node node, *pnode;
204202
pid_t pid;
205-
int status;
206203
struct rusage usage;
207-
struct sigaction sa = {};
208-
209-
(void) sigfillset(&sa.sa_mask);
210-
(void) sigdelset(&sa.sa_mask, SIGCHLD);
211-
(void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL);
212-
213-
(void) sigemptyset(&sa.sa_mask);
214-
sa.sa_handler = _nop;
215-
sa.sa_flags = SA_NOCLDSTOP;
216-
(void) sigaction(SIGCHLD, &sa, NULL);
204+
int status;
205+
struct launched_process_node node, *pnode;
217206

218207
for (_reap_children_stop = B_FALSE; !_reap_children_stop; ) {
219208
(void) pthread_mutex_lock(&_launched_processes_lock);
220-
pid = wait4(0, &status, WNOHANG, &usage);
221-
209+
pid = wait4(0, &status, wait ? 0 : WNOHANG, &usage);
222210
if (pid == 0 || pid == (pid_t)-1) {
223211
(void) pthread_mutex_unlock(&_launched_processes_lock);
224-
if (pid == 0 || errno == ECHILD)
225-
pause();
226-
else if (errno != EINTR)
212+
if ((pid == 0) || (errno == ECHILD)) {
213+
if (do_pause)
214+
pause();
215+
} else if (errno != EINTR)
227216
zed_log_msg(LOG_WARNING,
228217
"Failed to wait for children: %s",
229218
strerror(errno));
219+
zed_log_msg(LOG_INFO, "badpid exit %d", pid);
220+
if (!do_pause)
221+
return;
222+
230223
} else {
224+
zed_log_msg(LOG_INFO, "normal pid %d", pid);
225+
231226
memset(&node, 0, sizeof (node));
232227
node.pid = pid;
233228
pnode = avl_find(&_launched_processes, &node, NULL);
@@ -278,6 +273,25 @@ _reap_children(void *arg)
278273
}
279274
}
280275

276+
}
277+
278+
static void *
279+
_reap_children(void *arg)
280+
{
281+
(void) arg;
282+
struct sigaction sa = {};
283+
284+
(void) sigfillset(&sa.sa_mask);
285+
(void) sigdelset(&sa.sa_mask, SIGCHLD);
286+
(void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL);
287+
288+
(void) sigemptyset(&sa.sa_mask);
289+
sa.sa_handler = _nop;
290+
sa.sa_flags = SA_NOCLDSTOP;
291+
(void) sigaction(SIGCHLD, &sa, NULL);
292+
293+
wait_for_children(B_TRUE, B_FALSE);
294+
281295
return (NULL);
282296
}
283297

@@ -306,6 +320,45 @@ zed_exec_fini(void)
306320
_reap_children_tid = (pthread_t)-1;
307321
}
308322

323+
/*
324+
* Check if the zedlet name indicates if it is a synchronous zedlet
325+
*
326+
* Synchronous zedlets have a "-sync-" immediately following the event name in
327+
* their zedlet filename, like:
328+
*
329+
* EVENT_NAME-sync-ZEDLETNAME.sh
330+
*
331+
* For example, if you wanted a synchronous statechange script:
332+
*
333+
* statechange-sync-myzedlet.sh
334+
*
335+
* Synchronous zedlets are guaranteed to be the only zedlet running. No other
336+
* zedlets may run in parallel with a synchronous zedlet. A synchronously
337+
* zedlet will wait for all previously spawned zedlets to finish before running.
338+
* Users should be careful to only use synchronous zedlets when needed, since
339+
* they decrease parallelism.
340+
*/
341+
static boolean_t
342+
zedlet_is_sync(const char *zedlet, const char *event)
343+
{
344+
const char *sync_str = "-sync-";
345+
size_t sync_str_len;
346+
size_t zedlet_len;
347+
size_t event_len;
348+
349+
sync_str_len = strlen(sync_str);
350+
zedlet_len = strlen(zedlet);
351+
event_len = strlen(event);
352+
353+
if (event_len + sync_str_len >= zedlet_len)
354+
return (B_FALSE);
355+
356+
if (strncmp(&zedlet[event_len], sync_str, sync_str_len) == 0)
357+
return (B_TRUE);
358+
359+
return (B_FALSE);
360+
}
361+
309362
/*
310363
* Process the event [eid] by synchronously invoking all zedlets with a
311364
* matching class prefix.
@@ -368,9 +421,28 @@ zed_exec_process(uint64_t eid, const char *class, const char *subclass,
368421
z = zed_strings_next(zcp->zedlets)) {
369422
for (csp = class_strings; *csp; csp++) {
370423
n = strlen(*csp);
371-
if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n]))
424+
if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n])) {
425+
boolean_t is_sync = zedlet_is_sync(z, *csp);
426+
427+
if (is_sync) {
428+
/*
429+
* Wait for previous zedlets to
430+
* finish
431+
*/
432+
wait_for_children(B_FALSE, B_TRUE);
433+
}
434+
372435
_zed_exec_fork_child(eid, zcp->zedlet_dir,
373436
z, e, zcp->zevent_fd, zcp->do_foreground);
437+
438+
if (is_sync) {
439+
/*
440+
* Wait for sync zedlet we just launched
441+
* to finish.
442+
*/
443+
wait_for_children(B_FALSE, B_TRUE);
444+
}
445+
}
374446
}
375447
}
376448
free(e);

man/man8/zed.8.in

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ Multiple ZEDLETs may be invoked for a given zevent.
158158
ZEDLETs are executables invoked by the ZED in response to a given zevent.
159159
They should be written under the presumption they can be invoked concurrently,
160160
and they should use appropriate locking to access any shared resources.
161+
The one exception to this are "synchronous zedlets", which are described later
162+
in this page.
161163
Common variables used by ZEDLETs can be stored in the default rc file which
162164
is sourced by scripts; these variables should be prefixed with
163165
.Sy ZED_ .
@@ -233,6 +235,36 @@ and
233235
.Sy ZPOOL .
234236
These variables may be overridden in the rc file.
235237
.
238+
.Sh Synchronous ZEDLETS
239+
ZED's normal behavior is to spawn off zedlets in parallel and ignore their
240+
completion order.
241+
This means that ZED can potentially
242+
have zedlets for event ID number 2 starting before zedlets for event ID number
243+
1 have finished.
244+
Most of the time this is fine, and it actually helps when the system is getting
245+
hammered with hundreds of events.
246+
.Pp
247+
However, there are times when you want your zedlets to be executed in sequence
248+
with the event ID.
249+
That is where synchronous zedlets come in.
250+
.Pp
251+
ZED will wait for all previously spawned zedlets to finish before running
252+
a synchronous zedlet.
253+
Synchronous zedlets are guaranteed to be the only
254+
zedlet running.
255+
No other zedlets may run in parallel with a synchronous zedlet.
256+
Users should be careful to only use synchronous zedlets when needed, since
257+
they decrease parallelism.
258+
.Pp
259+
To make a zedlet synchronous, simply add a "-sync-" immediately following the
260+
event name in the zedlet's file name:
261+
.Pp
262+
.Sy EVENT_NAME-sync-ZEDLETNAME.sh
263+
.Pp
264+
For example, if you wanted a synchronous statechange script:
265+
.Pp
266+
.Sy statechange-sync-myzedlet.sh
267+
.
236268
.Sh FILES
237269
.Bl -tag -width "-c"
238270
.It Pa @sysconfdir@/zfs/zed.d

tests/runfiles/linux.run

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,8 @@ tags = ['functional', 'direct']
109109
[tests/functional/events:Linux]
110110
tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill',
111111
'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config',
112-
'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple']
112+
'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple',
113+
'zed_synchronous_zedlet']
113114
tags = ['functional', 'events']
114115

115116
[tests/functional/fadvise:Linux]

tests/zfs-tests/tests/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1518,6 +1518,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
15181518
functional/events/zed_rc_filter.ksh \
15191519
functional/events/zed_slow_io.ksh \
15201520
functional/events/zed_slow_io_many_vdevs.ksh \
1521+
functional/events/zed_synchronous_zedlet.ksh \
15211522
functional/exec/cleanup.ksh \
15221523
functional/exec/exec_001_pos.ksh \
15231524
functional/exec/exec_002_neg.ksh \
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
#!/bin/ksh -p
2+
# SPDX-License-Identifier: CDDL-1.0
3+
#
4+
# CDDL HEADER START
5+
#
6+
# The contents of this file are subject to the terms of the
7+
# Common Development and Distribution License (the "License").
8+
# You may not use this file except in compliance with the License.
9+
#
10+
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11+
# or https://opensource.org/licenses/CDDL-1.0.
12+
# See the License for the specific language governing permissions
13+
# and limitations under the License.
14+
#
15+
# When distributing Covered Code, include this CDDL HEADER in each
16+
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17+
# If applicable, add the following below this CDDL HEADER, with the
18+
# fields enclosed by brackets "[]" replaced with your own identifying
19+
# information: Portions Copyright [yyyy] [name of copyright owner]
20+
#
21+
# CDDL HEADER END
22+
#
23+
24+
#
25+
# Copyright (c) 2025 by Lawrence Livermore National Security, LLC.
26+
#
27+
28+
# DESCRIPTION:
29+
# Verify ZED synchronous zedlets work as expected
30+
#
31+
# STRATEGY:
32+
# 1. Create a scrub_start zedlet that runs quickly
33+
# 2. Create a scrub_start zedlet that runs slowly (takes seconds)
34+
# 3. Create a scrub_finish zedlet that is synchronous
35+
# 4. Scrub the pool
36+
# 5. Verify the synchronous scrub_finish zedlet waited for the scrub_start
37+
# zedlets to finish (including the slow one). If the scrub_finish zedlet
38+
# was not synchronous, it would have completed before the slow scrub_start
39+
# zedlet.
40+
41+
. $STF_SUITE/include/libtest.shlib
42+
. $STF_SUITE/tests/functional/events/events_common.kshlib
43+
44+
verify_runnable "both"
45+
46+
OUR_ZEDLETS="scrub_start-async.sh scrub_start-slow.sh scrub_finish-sync-test.sh"
47+
48+
OUTFILE="$TEST_BASE_DIR/zed_synchronous_zedlet_lines"
49+
TESTPOOL2=testpool2
50+
51+
function cleanup
52+
{
53+
zed_stop
54+
55+
for i in $OUR_ZEDLETS ; do
56+
log_must rm $ZEDLET_DIR/$i
57+
done
58+
destroy_pool $TESTPOOL2
59+
log_must rm -f $TEST_BASE_DIR/vdev-file-sync-zedlet
60+
log_must rm -f $OUTFILE
61+
}
62+
63+
log_assert "Verify ZED synchronous zedlets work as expected"
64+
65+
log_onexit cleanup
66+
67+
# Make a pool
68+
log_must truncate -s 100M $TEST_BASE_DIR/vdev-file-sync-zedlet
69+
log_must zpool create $TESTPOOL2 $TEST_BASE_DIR/vdev-file-sync-zedlet
70+
71+
# Do an initial scrub
72+
log_must zpool scrub -w $TESTPOOL2
73+
74+
log_must zpool events -c
75+
76+
# Create zedlets
77+
cat << EOF > $ZEDLET_DIR/scrub_start-async.sh
78+
#!/bin/ksh -p
79+
echo "\$(date) \$(basename \$0)" >> $OUTFILE
80+
EOF
81+
82+
cat << EOF > $ZEDLET_DIR/scrub_start-slow.sh
83+
#!/bin/ksh -p
84+
sleep 3
85+
echo "\$(date) \$(basename \$0)" >> $OUTFILE
86+
EOF
87+
88+
cat << EOF > $ZEDLET_DIR/scrub_finish-sync-test.sh
89+
#!/bin/ksh -p
90+
echo "\$(date) \$(basename \$0)" >> $OUTFILE
91+
EOF
92+
93+
for i in $OUR_ZEDLETS ; do
94+
chmod +x $ZEDLET_DIR/$i
95+
done
96+
97+
log_must zed_start
98+
99+
# Do a scrub - it should be instantaneous.
100+
log_must zpool scrub -w $TESTPOOL2
101+
102+
# Wait for scrub_finish event to happen for sanity.
103+
log_must file_wait_event $ZED_DEBUG_LOG 'sysevent\.fs\.zfs\.scrub_finish' 10
104+
105+
# At a minimum, scrub_start-slow.sh will take 3 sec to run, so wait for
106+
# 4 sec to be sure.
107+
sleep 4
108+
109+
# If our zedlets were run in the right order, with sync correctly honored, you
110+
# will see this ordering in $OUTFILE:
111+
#
112+
# Thu Apr 10 14:47:00 PDT 2025 scrub_start-async.sh
113+
# Thu Apr 10 14:47:07 PDT 2025 scrub_start-slow.sh
114+
# Thu Apr 10 14:47:08 PDT 2025 scrub_finish-sync-test.sh
115+
#
116+
# Check for this ordering
117+
118+
# Get a list of just the script names in the order they were executed
119+
# from OUTFILE
120+
lines="$(echo $(grep -Eo 'scrub_.+\.sh$' $OUTFILE))"
121+
122+
# Compare it to the ordering we expect
123+
expected="scrub_start-async.sh scrub_start-slow.sh scrub_finish-sync-test.sh"
124+
log_must test "$lines" == "$expected"
125+
126+
log_pass "Verified synchronous zedlets"

0 commit comments

Comments
 (0)