@@ -163,12 +163,6 @@ static int dump_bpobj_cb(void *arg, const blkptr_t *bp, boolean_t free,
163
163
dmu_tx_t * tx );
164
164
165
165
typedef struct sublivelist_verify {
166
- /* all ALLOC'd blkptr_t in one sub-livelist */
167
- zfs_btree_t sv_all_allocs ;
168
-
169
- /* all FREE'd blkptr_t in one sub-livelist */
170
- zfs_btree_t sv_all_frees ;
171
-
172
166
/* FREE's that haven't yet matched to an ALLOC, in one sub-livelist */
173
167
zfs_btree_t sv_pair ;
174
168
@@ -227,29 +221,68 @@ typedef struct sublivelist_verify_block {
227
221
228
222
static void zdb_print_blkptr (const blkptr_t * bp , int flags );
229
223
224
+ typedef struct sublivelist_verify_block_refcnt {
225
+ /* block pointer entry in livelist being verified */
226
+ blkptr_t svbr_blk ;
227
+
228
+ /*
229
+ * Refcount gets incremented to 1 when we encounter the first
230
+ * FREE entry for the svfbr block pointer and a node for it
231
+ * is created in our ZDB verification/tracking metadata.
232
+ *
233
+ * As we encounter more FREE entries we increment this counter
234
+ * and similarly decrement it whenever we find the respective
235
+ * ALLOC entries for this block.
236
+ *
237
+ * When the refcount gets to 0 it means that all the FREE and
238
+ * ALLOC entries of this block have paired up and we no longer
239
+ * need to track it in our verification logic (e.g. the node
240
+ * containing this struct in our verification data structure
241
+ * should be freed).
242
+ *
243
+ * [refer to sublivelist_verify_blkptr() for the actual code]
244
+ */
245
+ uint32_t svbr_refcnt ;
246
+ } sublivelist_verify_block_refcnt_t ;
247
+
248
+ static int
249
+ sublivelist_block_refcnt_compare (const void * larg , const void * rarg )
250
+ {
251
+ const sublivelist_verify_block_refcnt_t * l = larg ;
252
+ const sublivelist_verify_block_refcnt_t * r = rarg ;
253
+ return (livelist_compare (& l -> svbr_blk , & r -> svbr_blk ));
254
+ }
255
+
230
256
static int
231
257
sublivelist_verify_blkptr (void * arg , const blkptr_t * bp , boolean_t free ,
232
258
dmu_tx_t * tx )
233
259
{
234
260
ASSERT3P (tx , = = , NULL );
235
261
struct sublivelist_verify * sv = arg ;
236
- char blkbuf [BP_SPRINTF_LEN ];
262
+ sublivelist_verify_block_refcnt_t current = {
263
+ .svbr_blk = * bp ,
264
+
265
+ /*
266
+ * Start with 1 in case this is the first free entry.
267
+ * This field is not used for our B-Tree comparisons
268
+ * anyway.
269
+ */
270
+ .svbr_refcnt = 1 ,
271
+ };
272
+
237
273
zfs_btree_index_t where ;
274
+ sublivelist_verify_block_refcnt_t * pair =
275
+ zfs_btree_find (& sv -> sv_pair , & current , & where );
238
276
if (free ) {
239
- zfs_btree_add (& sv -> sv_pair , bp );
240
- /* Check if the FREE is a duplicate */
241
- if (zfs_btree_find (& sv -> sv_all_frees , bp , & where ) != NULL ) {
242
- snprintf_blkptr_compact (blkbuf , sizeof (blkbuf ), bp ,
243
- free );
244
- (void ) printf ("\tERROR: Duplicate FREE: %s\n" , blkbuf );
277
+ if (pair == NULL ) {
278
+ /* first free entry for this block pointer */
279
+ zfs_btree_add (& sv -> sv_pair , & current );
245
280
} else {
246
- zfs_btree_add_idx ( & sv -> sv_all_frees , bp , & where ) ;
281
+ pair -> svbr_refcnt ++ ;
247
282
}
248
283
} else {
249
- /* Check if the ALLOC has been freed */
250
- if (zfs_btree_find (& sv -> sv_pair , bp , & where ) != NULL ) {
251
- zfs_btree_remove_idx (& sv -> sv_pair , & where );
252
- } else {
284
+ if (pair == NULL ) {
285
+ /* block that is currently marked as allocated */
253
286
for (int i = 0 ; i < SPA_DVAS_PER_BP ; i ++ ) {
254
287
if (DVA_IS_EMPTY (& bp -> blk_dva [i ]))
255
288
break ;
@@ -264,49 +297,39 @@ sublivelist_verify_blkptr(void *arg, const blkptr_t *bp, boolean_t free,
264
297
& svb , & where );
265
298
}
266
299
}
267
- }
268
- /* Check if the ALLOC is a duplicate */
269
- if (zfs_btree_find (& sv -> sv_all_allocs , bp , & where ) != NULL ) {
270
- snprintf_blkptr_compact (blkbuf , sizeof (blkbuf ), bp ,
271
- free );
272
- (void ) printf ("\tERROR: Duplicate ALLOC: %s\n" , blkbuf );
273
300
} else {
274
- zfs_btree_add_idx (& sv -> sv_all_allocs , bp , & where );
301
+ /* alloc matches a free entry */
302
+ pair -> svbr_refcnt -- ;
303
+ if (pair -> svbr_refcnt == 0 ) {
304
+ /* all allocs and frees have been matched */
305
+ zfs_btree_remove_idx (& sv -> sv_pair , & where );
306
+ }
275
307
}
276
308
}
309
+
277
310
return (0 );
278
311
}
279
312
280
313
static int
281
314
sublivelist_verify_func (void * args , dsl_deadlist_entry_t * dle )
282
315
{
283
316
int err ;
284
- char blkbuf [BP_SPRINTF_LEN ];
285
317
struct sublivelist_verify * sv = args ;
286
318
287
- zfs_btree_create (& sv -> sv_all_allocs , livelist_compare ,
288
- sizeof (blkptr_t ));
289
-
290
- zfs_btree_create (& sv -> sv_all_frees , livelist_compare ,
291
- sizeof (blkptr_t ));
292
-
293
- zfs_btree_create (& sv -> sv_pair , livelist_compare ,
294
- sizeof (blkptr_t ));
319
+ zfs_btree_create (& sv -> sv_pair , sublivelist_block_refcnt_compare ,
320
+ sizeof (sublivelist_verify_block_refcnt_t ));
295
321
296
322
err = bpobj_iterate_nofree (& dle -> dle_bpobj , sublivelist_verify_blkptr ,
297
323
sv , NULL );
298
324
299
- zfs_btree_clear (& sv -> sv_all_allocs );
300
- zfs_btree_destroy (& sv -> sv_all_allocs );
301
-
302
- zfs_btree_clear (& sv -> sv_all_frees );
303
- zfs_btree_destroy (& sv -> sv_all_frees );
304
-
305
- blkptr_t * e ;
325
+ sublivelist_verify_block_refcnt_t * e ;
306
326
zfs_btree_index_t * cookie = NULL ;
307
327
while ((e = zfs_btree_destroy_nodes (& sv -> sv_pair , & cookie )) != NULL ) {
308
- snprintf_blkptr_compact (blkbuf , sizeof (blkbuf ), e , B_TRUE );
309
- (void ) printf ("\tERROR: Unmatched FREE: %s\n" , blkbuf );
328
+ char blkbuf [BP_SPRINTF_LEN ];
329
+ snprintf_blkptr_compact (blkbuf , sizeof (blkbuf ),
330
+ & e -> svbr_blk , B_TRUE );
331
+ (void ) printf ("\tERROR: %d unmatched FREE(s): %s\n" ,
332
+ e -> svbr_refcnt , blkbuf );
310
333
}
311
334
zfs_btree_destroy (& sv -> sv_pair );
312
335
@@ -615,10 +638,14 @@ mv_populate_livelist_allocs(metaslab_verify_t *mv, sublivelist_verify_t *sv)
615
638
/*
616
639
* [Livelist Check]
617
640
* Iterate through all the sublivelists and:
618
- * - report leftover frees
619
- * - report double ALLOCs/FREEs
641
+ * - report leftover frees (**)
620
642
* - record leftover ALLOCs together with their TXG [see Cross Check]
621
643
*
644
+ * (**) Note: Double ALLOCs are valid in datasets that have dedup
645
+ * enabled. Similarly double FREEs are allowed as well but
646
+ * only if they pair up with a corresponding ALLOC entry once
647
+ * we our done with our sublivelist iteration.
648
+ *
622
649
* [Spacemap Check]
623
650
* for each metaslab:
624
651
* - iterate over spacemap and then the metaslab's entries in the
0 commit comments