@@ -1621,6 +1621,9 @@ spa_vdev_remove_thread(void *arg)
1621
1621
vca .vca_read_error_bytes = 0 ;
1622
1622
vca .vca_write_error_bytes = 0 ;
1623
1623
1624
+ zfs_range_tree_t * segs = zfs_range_tree_create (NULL , ZFS_RANGE_SEG64 ,
1625
+ NULL , 0 , 0 );
1626
+
1624
1627
mutex_enter (& svr -> svr_lock );
1625
1628
1626
1629
/*
@@ -1633,7 +1636,9 @@ spa_vdev_remove_thread(void *arg)
1633
1636
metaslab_t * msp = vd -> vdev_ms [msi ];
1634
1637
ASSERT3U (msi , <=, vd -> vdev_ms_count );
1635
1638
1639
+ again :
1636
1640
ASSERT0 (zfs_range_tree_space (svr -> svr_allocd_segs ));
1641
+ mutex_exit (& svr -> svr_lock );
1637
1642
1638
1643
mutex_enter (& msp -> ms_sync_lock );
1639
1644
mutex_enter (& msp -> ms_lock );
@@ -1646,36 +1651,49 @@ spa_vdev_remove_thread(void *arg)
1646
1651
}
1647
1652
1648
1653
/*
1649
- * If the metaslab has ever been allocated from (ms_sm!= NULL),
1654
+ * If the metaslab has ever been synced (ms_sm != NULL),
1650
1655
* read the allocated segments from the space map object
1651
1656
* into svr_allocd_segs. Since we do this while holding
1652
- * svr_lock and ms_sync_lock, concurrent frees (which
1657
+ * ms_lock and ms_sync_lock, concurrent frees (which
1653
1658
* would have modified the space map) will wait for us
1654
1659
* to finish loading the spacemap, and then take the
1655
1660
* appropriate action (see free_from_removing_vdev()).
1656
1661
*/
1657
- if (msp -> ms_sm != NULL ) {
1658
- VERIFY0 (space_map_load (msp -> ms_sm ,
1659
- svr -> svr_allocd_segs , SM_ALLOC ));
1660
-
1661
- zfs_range_tree_walk (msp -> ms_unflushed_allocs ,
1662
- zfs_range_tree_add , svr -> svr_allocd_segs );
1663
- zfs_range_tree_walk (msp -> ms_unflushed_frees ,
1664
- zfs_range_tree_remove , svr -> svr_allocd_segs );
1665
- zfs_range_tree_walk (msp -> ms_freeing ,
1666
- zfs_range_tree_remove , svr -> svr_allocd_segs );
1662
+ if (msp -> ms_sm != NULL )
1663
+ VERIFY0 (space_map_load (msp -> ms_sm , segs , SM_ALLOC ));
1667
1664
1668
- /*
1669
- * When we are resuming from a paused removal (i.e.
1670
- * when importing a pool with a removal in progress),
1671
- * discard any state that we have already processed.
1672
- */
1673
- zfs_range_tree_clear (svr -> svr_allocd_segs , 0 ,
1674
- start_offset );
1665
+ /*
1666
+ * We could not hold svr_lock while loading space map, or we
1667
+ * could hit deadlock in a ZIO pipeline, having to wait for
1668
+ * it. But we can not block for it here under metaslab locks,
1669
+ * or it would be a lock ordering violation.
1670
+ */
1671
+ if (!mutex_tryenter (& svr -> svr_lock )) {
1672
+ mutex_exit (& msp -> ms_lock );
1673
+ mutex_exit (& msp -> ms_sync_lock );
1674
+ zfs_range_tree_vacate (segs , NULL , NULL );
1675
+ mutex_enter (& svr -> svr_lock );
1676
+ goto again ;
1675
1677
}
1678
+
1679
+ zfs_range_tree_swap (& segs , & svr -> svr_allocd_segs );
1680
+ zfs_range_tree_walk (msp -> ms_unflushed_allocs ,
1681
+ zfs_range_tree_add , svr -> svr_allocd_segs );
1682
+ zfs_range_tree_walk (msp -> ms_unflushed_frees ,
1683
+ zfs_range_tree_remove , svr -> svr_allocd_segs );
1684
+ zfs_range_tree_walk (msp -> ms_freeing ,
1685
+ zfs_range_tree_remove , svr -> svr_allocd_segs );
1686
+
1676
1687
mutex_exit (& msp -> ms_lock );
1677
1688
mutex_exit (& msp -> ms_sync_lock );
1678
1689
1690
+ /*
1691
+ * When we are resuming from a paused removal (i.e.
1692
+ * when importing a pool with a removal in progress),
1693
+ * discard any state that we have already processed.
1694
+ */
1695
+ zfs_range_tree_clear (svr -> svr_allocd_segs , 0 , start_offset );
1696
+
1679
1697
vca .vca_msp = msp ;
1680
1698
zfs_dbgmsg ("copying %llu segments for metaslab %llu" ,
1681
1699
(u_longlong_t )zfs_btree_numnodes (
@@ -1751,6 +1769,8 @@ spa_vdev_remove_thread(void *arg)
1751
1769
1752
1770
spa_config_exit (spa , SCL_CONFIG , FTAG );
1753
1771
1772
+ zfs_range_tree_destroy (segs );
1773
+
1754
1774
/*
1755
1775
* Wait for all copies to finish before cleaning up the vca.
1756
1776
*/
0 commit comments