summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Caputi <[email protected]>2019-08-27 12:55:51 -0400
committerBrian Behlendorf <[email protected]>2019-08-27 09:55:51 -0700
commite7a2fa70c3b0d8c8cee2b484038bb5623c7c1ea9 (patch)
treea216b26e20a92c1cd0635a19ab9a285ce5e6fda0
parent142f84dd19f20b47157bbbf45aaba489b6577c88 (diff)
Fix deadlock in 'zfs rollback'
Currently, the 'zfs rollback' code can end up deadlocked due to the way the kernel handles unreferenced inodes on a suspended fs. Essentially, the zfs_resume_fs() code path may cause zfs to spawn new threads as it reinstantiates the suspended fs's zil. When a new thread is spawned, the kernel may attempt to free memory for that thread by freeing some unreferenced inodes. If it happens to select inodes that are a a part of the suspended fs a deadlock will occur because freeing inodes requires holding the fs's z_teardown_inactive_lock which is still held from the suspend. This patch corrects this issue by adding an additional reference to all inodes that are still present when a suspend is initiated. This prevents them from being freed by the kernel for any reason. Reviewed-by: Alek Pinchuk <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Tom Caputi <[email protected]> Closes #9203
-rw-r--r--include/sys/zfs_znode.h1
-rw-r--r--module/zfs/zfs_vfsops.c16
-rw-r--r--module/zfs/zfs_znode.c1
3 files changed, 17 insertions, 1 deletions
diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
index a0a3dd1ad..acaaf2884 100644
--- a/include/sys/zfs_znode.h
+++ b/include/sys/zfs_znode.h
@@ -200,6 +200,7 @@ typedef struct znode {
boolean_t z_is_mapped; /* are we mmap'ed */
boolean_t z_is_ctldir; /* are we .zfs entry */
boolean_t z_is_stale; /* are we stale due to rollback? */
+ boolean_t z_suspended; /* extra ref from a suspend? */
uint_t z_blksz; /* block size in bytes */
uint_t z_seq; /* modification sequence number */
uint64_t z_mapcnt; /* number of pages mapped to file */
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index af82c7bc4..34f4842d7 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1737,7 +1737,12 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
* will fail with EIO since we have z_teardown_lock for writer (only
* relevant for forced unmount).
*
- * Release all holds on dbufs.
+ * Release all holds on dbufs. We also grab an extra reference to all
+ * the remaining inodes so that the kernel does not attempt to free
+ * any inodes of a suspended fs. This can cause deadlocks since the
+ * zfs_resume_fs() process may involve starting threads, which might
+ * attempt to free unreferenced inodes to free up memory for the new
+ * thread.
*/
if (!unmounting) {
mutex_enter(&zfsvfs->z_znodes_lock);
@@ -1745,6 +1750,9 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
zp = list_next(&zfsvfs->z_all_znodes, zp)) {
if (zp->z_sa_hdl)
zfs_znode_dmu_fini(zp);
+ if (igrab(ZTOI(zp)) != NULL)
+ zp->z_suspended = B_TRUE;
+
}
mutex_exit(&zfsvfs->z_znodes_lock);
}
@@ -2202,6 +2210,12 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
remove_inode_hash(ZTOI(zp));
zp->z_is_stale = B_TRUE;
}
+
+ /* see comment in zfs_suspend_fs() */
+ if (zp->z_suspended) {
+ zfs_iput_async(ZTOI(zp));
+ zp->z_suspended = B_FALSE;
+ }
}
mutex_exit(&zfsvfs->z_znodes_lock);
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index 498547758..8512db9bc 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -545,6 +545,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
zp->z_is_mapped = B_FALSE;
zp->z_is_ctldir = B_FALSE;
zp->z_is_stale = B_FALSE;
+ zp->z_suspended = B_FALSE;
zp->z_sa_hdl = NULL;
zp->z_mapcnt = 0;
zp->z_id = db->db_object;