Skip to content

Commit 7f5644e

Browse files
committed
Linux: O_TMPFILE and inode lifetime rework
Commit message TODO Signed-off-by: Pavel Snajdr <[email protected]>
1 parent d188001 commit 7f5644e

File tree

8 files changed

+113
-141
lines changed

8 files changed

+113
-141
lines changed

include/os/linux/zfs/sys/zfs_znode_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ extern "C" {
4848
#endif
4949

5050
#define ZNODE_OS_FIELDS \
51+
boolean_t z_is_tmpfile; /* file is a tmpfile */ \
5152
inode_timespec_t z_btime; /* creation/birth time (cached) */ \
5253
struct inode z_inode;
5354

include/sys/zfs_znode.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,6 @@ typedef struct znode {
192192
boolean_t z_zn_prefetch; /* Prefetch znodes? */
193193
boolean_t z_is_sa; /* are we native sa? */
194194
boolean_t z_is_ctldir; /* are we .zfs entry */
195-
boolean_t z_suspended; /* extra ref from a suspend? */
196195
uint_t z_blksz; /* block size in bytes */
197196
uint_t z_seq; /* modification sequence number */
198197
uint64_t z_mapcnt; /* number of pages mapped to file */
@@ -280,6 +279,7 @@ extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
280279
extern int zfs_rezget(znode_t *);
281280
extern void zfs_zinactive(znode_t *);
282281
extern void zfs_znode_delete(znode_t *, dmu_tx_t *);
282+
extern void zfs_znode_delete_held(znode_t *, dmu_tx_t *);
283283
extern void zfs_remove_op_tables(void);
284284
extern int zfs_create_op_tables(void);
285285
extern dev_t zfs_cmpldev(uint64_t);

module/os/linux/zfs/zfs_dir.c

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -654,8 +654,6 @@ zfs_rmnode(znode_t *zp)
654654
objset_t *os = zfsvfs->z_os;
655655
znode_t *xzp = NULL;
656656
dmu_tx_t *tx;
657-
znode_hold_t *zh;
658-
uint64_t z_id = zp->z_id;
659657
uint64_t acl_obj;
660658
uint64_t xattr_obj;
661659
uint64_t links;
@@ -673,9 +671,7 @@ zfs_rmnode(znode_t *zp)
673671
* Not enough space to delete some xattrs.
674672
* Leave it in the unlinked set.
675673
*/
676-
zh = zfs_znode_hold_enter(zfsvfs, z_id);
677674
zfs_znode_dmu_fini(zp);
678-
zfs_znode_hold_exit(zfsvfs, zh);
679675
return;
680676
}
681677
}
@@ -694,9 +690,7 @@ zfs_rmnode(znode_t *zp)
694690
* Not enough space or we were interrupted by unmount.
695691
* Leave the file in the unlinked set.
696692
*/
697-
zh = zfs_znode_hold_enter(zfsvfs, z_id);
698693
zfs_znode_dmu_fini(zp);
699-
zfs_znode_hold_exit(zfsvfs, zh);
700694
return;
701695
}
702696
}
@@ -736,9 +730,7 @@ zfs_rmnode(znode_t *zp)
736730
* which point we'll call zfs_unlinked_drain() to process it).
737731
*/
738732
dmu_tx_abort(tx);
739-
zh = zfs_znode_hold_enter(zfsvfs, z_id);
740733
zfs_znode_dmu_fini(zp);
741-
zfs_znode_hold_exit(zfsvfs, zh);
742734
goto out;
743735
}
744736

@@ -775,7 +767,7 @@ zfs_rmnode(znode_t *zp)
775767

776768
dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
777769

778-
zfs_znode_delete(zp, tx);
770+
zfs_znode_delete_held(zp, tx);
779771

780772
dmu_tx_commit(tx);
781773
out:
@@ -816,7 +808,8 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
816808
mutex_enter(&zp->z_lock);
817809

818810
if (!(flag & ZRENAMING)) {
819-
if (zp->z_unlinked) { /* no new links to unlinked zp */
811+
if (zp->z_unlinked && !zp->z_is_tmpfile) {
812+
/* no new links to unlinked zp */
820813
ASSERT(!(flag & (ZNEW | ZEXISTS)));
821814
mutex_exit(&zp->z_lock);
822815
return (SET_ERROR(ENOENT));

module/os/linux/zfs/zfs_vfsops.c

Lines changed: 13 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1326,29 +1326,19 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
13261326
}
13271327

13281328
/*
1329-
* At this point there are no VFS ops active, and any new VFS ops
1330-
* will fail with EIO since we have z_teardown_lock for writer (only
1329+
* At this point there are no vops active, and any new vops will
1330+
* fail with EIO since we have z_teardown_lock for writer (only
13311331
* relevant for forced unmount).
13321332
*
1333-
* Release all holds on dbufs. We also grab an extra reference to all
1334-
* the remaining inodes so that the kernel does not attempt to free
1335-
* any inodes of a suspended fs. This can cause deadlocks since the
1336-
* zfs_resume_fs() process may involve starting threads, which might
1337-
* attempt to free unreferenced inodes to free up memory for the new
1338-
* thread.
1333+
* Release all holds on dbufs.
13391334
*/
1340-
if (!unmounting) {
1341-
mutex_enter(&zfsvfs->z_znodes_lock);
1342-
for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
1343-
zp = list_next(&zfsvfs->z_all_znodes, zp)) {
1344-
if (zp->z_sa_hdl)
1345-
zfs_znode_dmu_fini(zp);
1346-
if (igrab(ZTOI(zp)) != NULL)
1347-
zp->z_suspended = B_TRUE;
1348-
1349-
}
1350-
mutex_exit(&zfsvfs->z_znodes_lock);
1335+
mutex_enter(&zfsvfs->z_znodes_lock);
1336+
for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
1337+
zp = list_next(&zfsvfs->z_all_znodes, zp)) {
1338+
if (zp->z_sa_hdl)
1339+
zfs_znode_dmu_fini(zp);
13511340
}
1341+
mutex_exit(&zfsvfs->z_znodes_lock);
13521342

13531343
/*
13541344
* If we are unmounting, set the unmounted flag and let new VFS ops
@@ -1717,7 +1707,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
17171707
* Must have an existing ref, so igrab()
17181708
* cannot return NULL
17191709
*/
1720-
VERIFY3P(igrab(*ipp), !=, NULL);
1710+
zhold(ITOZ(*ipp));
17211711
}
17221712
zfs_exit(zfsvfs, FTAG);
17231713
return (0);
@@ -1790,7 +1780,7 @@ zfs_suspend_fs(zfsvfs_t *zfsvfs)
17901780
int
17911781
zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
17921782
{
1793-
int err, err2;
1783+
int err;
17941784
znode_t *zp;
17951785

17961786
ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
@@ -1827,20 +1817,11 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
18271817
* VFS prunes the dentry holding the remaining references
18281818
* on the stale inode.
18291819
*/
1820+
pr_info("Resuming file system: rezget\n");
18301821
mutex_enter(&zfsvfs->z_znodes_lock);
18311822
for (zp = list_head(&zfsvfs->z_all_znodes); zp;
18321823
zp = list_next(&zfsvfs->z_all_znodes, zp)) {
1833-
err2 = zfs_rezget(zp);
1834-
if (err2) {
1835-
zpl_d_drop_aliases(ZTOI(zp));
1836-
remove_inode_hash(ZTOI(zp));
1837-
}
1838-
1839-
/* see comment in zfs_suspend_fs() */
1840-
if (zp->z_suspended) {
1841-
zfs_zrele_async(zp);
1842-
zp->z_suspended = B_FALSE;
1843-
}
1824+
(void) zfs_rezget(zp);
18441825
}
18451826
mutex_exit(&zfsvfs->z_znodes_lock);
18461827

module/os/linux/zfs/zfs_vnops_os.c

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,6 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
768768
* delete the newly created dnode.
769769
*/
770770
zfs_znode_delete(zp, tx);
771-
remove_inode_hash(ZTOI(zp));
772771
zfs_acl_ids_free(&acl_ids);
773772
dmu_tx_commit(tx);
774773
goto out;
@@ -954,9 +953,6 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
954953
if (fuid_dirtied)
955954
zfs_fuid_sync(zfsvfs, tx);
956955

957-
/* Add to unlinked set */
958-
zp->z_unlinked = B_TRUE;
959-
zfs_unlinked_add(zp, tx);
960956
zfs_acl_ids_free(&acl_ids);
961957
dmu_tx_commit(tx);
962958
out:
@@ -1372,7 +1368,6 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
13721368
error = zfs_link_create(dl, zp, tx, ZNEW);
13731369
if (error != 0) {
13741370
zfs_znode_delete(zp, tx);
1375-
remove_inode_hash(ZTOI(zp));
13761371
goto out;
13771372
}
13781373

@@ -3177,10 +3172,12 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
31773172
zfs_mknode(sdzp, wo_vap, tx, cr, 0, &wzp, &acl_ids);
31783173
error = zfs_link_create(sdl, wzp, tx, ZNEW);
31793174
if (error) {
3175+
unlock_new_inode(ZTOI(wzp));
31803176
zfs_znode_delete(wzp, tx);
3181-
remove_inode_hash(ZTOI(wzp));
31823177
goto commit_unlink_td_szp;
31833178
}
3179+
VERIFY0(insert_inode_locked(ZTOI(wzp)));
3180+
unlock_new_inode(ZTOI(wzp));
31843181
break;
31853182
}
31863183

@@ -3415,7 +3412,6 @@ zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
34153412
error = zfs_link_create(dl, zp, tx, ZNEW);
34163413
if (error != 0) {
34173414
zfs_znode_delete(zp, tx);
3418-
remove_inode_hash(ZTOI(zp));
34193415
} else {
34203416
if (flags & FIGNORECASE)
34213417
txtype |= TX_CI;
@@ -3512,11 +3508,8 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
35123508
uint64_t parent;
35133509
uid_t owner;
35143510
boolean_t waited = B_FALSE;
3515-
boolean_t is_tmpfile = 0;
35163511
uint64_t txg;
35173512

3518-
is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE));
3519-
35203513
ASSERT(S_ISDIR(ZTOI(tdzp)->i_mode));
35213514

35223515
if (name == NULL)
@@ -3619,7 +3612,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
36193612
tx = dmu_tx_create(zfsvfs->z_os);
36203613
dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
36213614
dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
3622-
if (is_tmpfile)
3615+
if (szp->z_is_tmpfile && szp->z_unlinked)
36233616
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
36243617

36253618
zfs_sa_upgrade_txholds(tx, szp);
@@ -3637,41 +3630,43 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
36373630
zfs_exit(zfsvfs, FTAG);
36383631
return (error);
36393632
}
3640-
/* unmark z_unlinked so zfs_link_create will not reject */
3641-
if (is_tmpfile)
3642-
szp->z_unlinked = B_FALSE;
36433633
error = zfs_link_create(dl, szp, tx, 0);
36443634

36453635
if (error == 0) {
36463636
uint64_t txtype = TX_LINK;
36473637
/*
3648-
* tmpfile is created to be in z_unlinkedobj, so remove it.
3649-
* Also, we don't log in ZIL, because all previous file
3638+
* We don't log tmpfile in ZIL, because all previous file
36503639
* operation on the tmpfile are ignored by ZIL. Instead we
36513640
* always wait for txg to sync to make sure all previous
36523641
* operation are sync safe.
36533642
*/
3654-
if (is_tmpfile) {
3655-
VERIFY(zap_remove_int(zfsvfs->z_os,
3656-
zfsvfs->z_unlinkedobj, szp->z_id, tx) == 0);
3657-
} else {
3643+
if (!szp->z_is_tmpfile || !szp->z_unlinked) {
36583644
if (flags & FIGNORECASE)
36593645
txtype |= TX_CI;
36603646
zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
36613647
}
3662-
} else if (is_tmpfile) {
3663-
/* restore z_unlinked since when linking failed */
3664-
szp->z_unlinked = B_TRUE;
3648+
if (szp->z_is_tmpfile) {
3649+
mutex_enter(&szp->z_lock);
3650+
if (szp->z_unlinked) {
3651+
szp->z_unlinked = B_FALSE;
3652+
VERIFY0(zap_remove_int(zfsvfs->z_os,
3653+
zfsvfs->z_unlinkedobj,
3654+
szp->z_id, tx));
3655+
}
3656+
mutex_exit(&szp->z_lock);
3657+
}
36653658
}
36663659
txg = dmu_tx_get_txg(tx);
36673660
dmu_tx_commit(tx);
36683661

36693662
zfs_dirent_unlock(dl);
36703663

3671-
if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3664+
if ((!szp->z_is_tmpfile || !szp->z_unlinked) &&
3665+
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
36723666
zil_commit(zilog, 0);
36733667

3674-
if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED)
3668+
if (szp->z_is_tmpfile && szp->z_unlinked &&
3669+
(zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED))
36753670
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg);
36763671

36773672
zfs_znode_update_vfs(tdzp);
@@ -4024,6 +4019,7 @@ zfs_inactive(struct inode *ip)
40244019
need_unlock = 1;
40254020
rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
40264021
}
4022+
40274023
if (zp->z_sa_hdl == NULL) {
40284024
if (need_unlock)
40294025
rw_exit(&zfsvfs->z_teardown_inactive_lock);

0 commit comments

Comments
 (0)