Skip to content

Commit d1fa653

Browse files
authored
Merge pull request #277 from ixhamza/NAS-134106-ft
NAS-134106 / 25.04.0 / arc: avoid possible deadlock in arc_read
2 parents 2c37149 + 182cc05 commit d1fa653

File tree

6 files changed

+40
-16
lines changed

6 files changed

+40
-16
lines changed

cmd/zdb/zdb.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9038,7 +9038,7 @@ zdb_read_block(char *thing, spa_t *spa)
90389038
const blkptr_t *b = (const blkptr_t *)(void *)
90399039
((uintptr_t)buf + (uintptr_t)blkptr_offset);
90409040
if (zfs_blkptr_verify(spa, b,
9041-
BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY) == B_FALSE) {
9041+
BLK_CONFIG_NEEDED, BLK_VERIFY_ONLY)) {
90429042
abd_return_buf_copy(pabd, buf, lsize);
90439043
borrowed = B_FALSE;
90449044
buf = lbuf;
@@ -9047,7 +9047,7 @@ zdb_read_block(char *thing, spa_t *spa)
90479047
b = (const blkptr_t *)(void *)
90489048
((uintptr_t)buf + (uintptr_t)blkptr_offset);
90499049
if (lsize == -1 || zfs_blkptr_verify(spa, b,
9050-
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG) == B_FALSE) {
9050+
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
90519051
printf("invalid block pointer at this DVA\n");
90529052
goto out;
90539053
}

include/sys/zio.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,7 @@ enum blk_verify_flag {
546546
enum blk_config_flag {
547547
BLK_CONFIG_HELD, // SCL_VDEV held for writer
548548
BLK_CONFIG_NEEDED, // SCL_VDEV should be obtained for reader
549+
BLK_CONFIG_NEEDED_TRY, // Try with SCL_VDEV for reader
549550
BLK_CONFIG_SKIP, // skip checks which require SCL_VDEV
550551
};
551552

@@ -663,7 +664,7 @@ extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t);
663664
extern int zio_resume(spa_t *spa);
664665
extern void zio_resume_wait(spa_t *spa);
665666

666-
extern boolean_t zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
667+
extern int zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
667668
enum blk_config_flag blk_config, enum blk_verify_flag blk_verify);
668669

669670
/*

module/zfs/arc.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5568,6 +5568,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
55685568
boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF;
55695569
arc_buf_t *buf = NULL;
55705570
int rc = 0;
5571+
boolean_t bp_validation = B_FALSE;
55715572

55725573
ASSERT(!embedded_bp ||
55735574
BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
@@ -5610,7 +5611,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
56105611
* should always be the case since the blkptr is protected by
56115612
* a checksum.
56125613
*/
5613-
if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_SKIP,
5614+
if (zfs_blkptr_verify(spa, bp, BLK_CONFIG_SKIP,
56145615
BLK_VERIFY_LOG)) {
56155616
mutex_exit(hash_lock);
56165617
rc = SET_ERROR(ECKSUM);
@@ -5762,6 +5763,8 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
57625763
abd_t *hdr_abd;
57635764
int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0;
57645765
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
5766+
int config_lock;
5767+
int error;
57655768

57665769
if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
57675770
if (hash_lock != NULL)
@@ -5770,16 +5773,31 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
57705773
goto done;
57715774
}
57725775

5776+
if (zio_flags & ZIO_FLAG_CONFIG_WRITER) {
5777+
config_lock = BLK_CONFIG_HELD;
5778+
} else if (hash_lock != NULL) {
5779+
/*
5780+
* Prevent lock order reversal
5781+
*/
5782+
config_lock = BLK_CONFIG_NEEDED_TRY;
5783+
} else {
5784+
config_lock = BLK_CONFIG_NEEDED;
5785+
}
5786+
57735787
/*
57745788
* Verify the block pointer contents are reasonable. This
57755789
* should always be the case since the blkptr is protected by
57765790
* a checksum.
57775791
*/
5778-
if (!zfs_blkptr_verify(spa, bp,
5779-
(zio_flags & ZIO_FLAG_CONFIG_WRITER) ?
5780-
BLK_CONFIG_HELD : BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
5792+
if (!bp_validation && (error = zfs_blkptr_verify(spa, bp,
5793+
config_lock, BLK_VERIFY_LOG))) {
57815794
if (hash_lock != NULL)
57825795
mutex_exit(hash_lock);
5796+
if (error == EBUSY && !zfs_blkptr_verify(spa, bp,
5797+
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
5798+
bp_validation = B_TRUE;
5799+
goto top;
5800+
}
57835801
rc = SET_ERROR(ECKSUM);
57845802
goto done;
57855803
}

module/zfs/dsl_scan.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2304,7 +2304,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
23042304
DMU_USERUSED_OBJECT, tx);
23052305
}
23062306
arc_buf_destroy(buf, &buf);
2307-
} else if (!zfs_blkptr_verify(spa, bp,
2307+
} else if (zfs_blkptr_verify(spa, bp,
23082308
BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
23092309
/*
23102310
* Sanity check the block pointer contents, this is handled

module/zfs/spa.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2778,7 +2778,7 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
27782778
* When damaged consider it to be a metadata error since we cannot
27792779
* trust the BP_GET_TYPE and BP_GET_LEVEL values.
27802780
*/
2781-
if (!zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
2781+
if (zfs_blkptr_verify(spa, bp, BLK_CONFIG_NEEDED, BLK_VERIFY_LOG)) {
27822782
atomic_inc_64(&sle->sle_meta_count);
27832783
return (0);
27842784
}

module/zfs/zio.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,7 +1098,7 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
10981098
* it only contains known object types, checksum/compression identifiers,
10991099
* block sizes within the maximum allowed limits, valid DVAs, etc.
11001100
*
1101-
* If everything checks out B_TRUE is returned. The zfs_blkptr_verify
1101+
* If everything checks out 0 is returned. The zfs_blkptr_verify
11021102
* argument controls the behavior when an invalid field is detected.
11031103
*
11041104
* Values for blk_verify_flag:
@@ -1113,7 +1113,7 @@ zfs_blkptr_verify_log(spa_t *spa, const blkptr_t *bp,
11131113
* BLK_CONFIG_SKIP: skip checks which require SCL_VDEV, for better
11141114
* performance
11151115
*/
1116-
boolean_t
1116+
int
11171117
zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
11181118
enum blk_config_flag blk_config, enum blk_verify_flag blk_verify)
11191119
{
@@ -1145,7 +1145,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
11451145
"blkptr at %px has invalid PSIZE %llu",
11461146
bp, (longlong_t)BPE_GET_PSIZE(bp));
11471147
}
1148-
return (errors == 0);
1148+
return (errors ? ECKSUM : 0);
11491149
}
11501150
if (unlikely(BP_GET_CHECKSUM(bp) >= ZIO_CHECKSUM_FUNCTIONS)) {
11511151
errors += zfs_blkptr_verify_log(spa, bp, blk_verify,
@@ -1163,7 +1163,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
11631163
* will be done once the zio is executed in vdev_mirror_map_alloc.
11641164
*/
11651165
if (unlikely(!spa->spa_trust_config))
1166-
return (errors == 0);
1166+
return (errors ? ECKSUM : 0);
11671167

11681168
switch (blk_config) {
11691169
case BLK_CONFIG_HELD:
@@ -1172,8 +1172,12 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
11721172
case BLK_CONFIG_NEEDED:
11731173
spa_config_enter(spa, SCL_VDEV, bp, RW_READER);
11741174
break;
1175+
case BLK_CONFIG_NEEDED_TRY:
1176+
if (!spa_config_tryenter(spa, SCL_VDEV, bp, RW_READER))
1177+
return (EBUSY);
1178+
break;
11751179
case BLK_CONFIG_SKIP:
1176-
return (errors == 0);
1180+
return (errors ? ECKSUM : 0);
11771181
default:
11781182
panic("invalid blk_config %u", blk_config);
11791183
}
@@ -1228,10 +1232,11 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp,
12281232
bp, i, (longlong_t)offset);
12291233
}
12301234
}
1231-
if (blk_config == BLK_CONFIG_NEEDED)
1235+
if (blk_config == BLK_CONFIG_NEEDED || blk_config ==
1236+
BLK_CONFIG_NEEDED_TRY)
12321237
spa_config_exit(spa, SCL_VDEV, bp);
12331238

1234-
return (errors == 0);
1239+
return (errors ? ECKSUM : 0);
12351240
}
12361241

12371242
boolean_t

0 commit comments

Comments
 (0)