summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGunnar Beutner <[email protected]>2011-06-20 10:36:58 -0700
committerBrian Behlendorf <[email protected]>2011-06-20 13:47:03 -0700
commitb00131d43ca344d4b205a03ab3eb771a060e5087 (patch)
treeba1b2999ba0bd65c8422cbd85b96b0f5136c57bb
parent6f0cf71e0d191abb850a45f6d216cb5af158a6dd (diff)
Fix unlink/xattr deadlock
The problem here is that prune_icache() tries to evict/delete both the xattr directory inode as well as at least one xattr inode contained in that directory. Here's what happens: 1. File is created. 2. xattr is created for that file (behind the scenes a xattr directory and a file in that xattr directory are created) 3. File is deleted. 4. Both the xattr directory inode and at least one xattr inode from that directory are evicted by prune_icache(); prune_icache() acquires a lock on both inodes before it calls ->evict() on the inodes When the xattr directory inode is evicted zfs_zinactive attempts to delete the xattr files contained in that directory. While enumerating these files zfs_zget() is called to obtain a reference to the xattr file znode - which tries to lock the xattr inode. However that very same xattr inode was already locked by prune_icache() further up the call stack, thus leading to a deadlock. This can be reliably reproduced like this: $ touch test $ attr -s a -V b test $ rm test $ echo 3 > /proc/sys/vm/drop_caches This patch fixes the deadlock by moving the zfs_purgedir() call to zfs_unlinked_drain(). Instead zfs_rmnode() now checks whether the xattr dir is empty and leaves the xattr dir in the unlinked set if it finds any xattrs. To ensure zfs_unlinked_drain() never accesses a stale super block zfsvfs_teardown() has been update to block until the iput taskq has been drained. This avoids a potential race where a file with an xattr directory is removed and the file system is immediately unmounted. Signed-off-by: Brian Behlendorf <[email protected]> Closes #266
-rw-r--r--module/zfs/zfs_dir.c139
-rw-r--r--module/zfs/zfs_vfsops.c6
2 files changed, 90 insertions, 55 deletions
diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c
index 58a18bcfc..0df7cc1f8 100644
--- a/module/zfs/zfs_dir.c
+++ b/module/zfs/zfs_dir.c
@@ -474,57 +474,6 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
}
/*
- * Clean up any znodes that had no links when we either crashed or
- * (force) umounted the file system.
- */
-void
-zfs_unlinked_drain(zfs_sb_t *zsb)
-{
- zap_cursor_t zc;
- zap_attribute_t zap;
- dmu_object_info_t doi;
- znode_t *zp;
- int error;
-
- /*
- * Interate over the contents of the unlinked set.
- */
- for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
- zap_cursor_retrieve(&zc, &zap) == 0;
- zap_cursor_advance(&zc)) {
-
- /*
- * See what kind of object we have in list
- */
-
- error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi);
- if (error != 0)
- continue;
-
- ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
- (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
- /*
- * We need to re-mark these list entries for deletion,
- * so we pull them back into core and set zp->z_unlinked.
- */
- error = zfs_zget(zsb, zap.za_first_integer, &zp);
-
- /*
- * We may pick up znodes that are already marked for deletion.
- * This could happen during the purge of an extended attribute
- * directory. All we need to do is skip over them, since they
- * are already in the system marked z_unlinked.
- */
- if (error != 0)
- continue;
-
- zp->z_unlinked = B_TRUE;
- iput(ZTOI(zp));
- }
- zap_cursor_fini(&zc);
-}
-
-/*
* Delete the entire contents of a directory. Return a count
* of the number of entries that could not be deleted. If we encounter
* an error, return a count of at least one so that the directory stays
@@ -590,6 +539,71 @@ zfs_purgedir(znode_t *dzp)
return (skipped);
}
+/*
+ * Clean up any znodes that had no links when we either crashed or
+ * (force) umounted the file system.
+ */
+void
+zfs_unlinked_drain(zfs_sb_t *zsb)
+{
+ zap_cursor_t zc;
+ zap_attribute_t zap;
+ dmu_object_info_t doi;
+ znode_t *zp;
+ int error;
+
+ /*
+ * Interate over the contents of the unlinked set.
+ */
+ for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
+ zap_cursor_retrieve(&zc, &zap) == 0;
+ zap_cursor_advance(&zc)) {
+
+ /*
+ * See what kind of object we have in list
+ */
+
+ error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi);
+ if (error != 0)
+ continue;
+
+ ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
+ (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
+ /*
+ * We need to re-mark these list entries for deletion,
+ * so we pull them back into core and set zp->z_unlinked.
+ */
+ error = zfs_zget(zsb, zap.za_first_integer, &zp);
+
+ /*
+ * We may pick up znodes that are already marked for deletion.
+ * This could happen during the purge of an extended attribute
+ * directory. All we need to do is skip over them, since they
+ * are already in the system marked z_unlinked.
+ */
+ if (error != 0)
+ continue;
+
+ zp->z_unlinked = B_TRUE;
+
+ /*
+ * If this is an attribute directory, purge its contents.
+ */
+ if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
+ /*
+ * We don't need to check the return value of
+ * zfs_purgedir here, because zfs_rmnode will just
+ * return this xattr directory to the unlinked set
+ * until all of its xattrs are gone.
+ */
+ (void) zfs_purgedir(zp);
+ }
+
+ iput(ZTOI(zp));
+ }
+ zap_cursor_fini(&zc);
+}
+
void
zfs_rmnode(znode_t *zp)
{
@@ -599,6 +613,7 @@ zfs_rmnode(znode_t *zp)
dmu_tx_t *tx;
uint64_t acl_obj;
uint64_t xattr_obj;
+ uint64_t count;
int error;
ASSERT(zp->z_links == 0);
@@ -608,13 +623,27 @@ zfs_rmnode(znode_t *zp)
* If this is an attribute directory, purge its contents.
*/
if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
- if (zfs_purgedir(zp) != 0) {
+ error = zap_count(os, zp->z_id, &count);
+ if (error) {
+ zfs_znode_dmu_fini(zp);
+ return;
+ }
+
+ if (count > 0) {
+ taskq_t *taskq;
+
/*
- * Not enough space to delete some xattrs.
- * Leave it in the unlinked set.
+ * There are still directory entries in this xattr
+ * directory. Let zfs_unlinked_drain() deal with
+ * them to avoid deadlocking this process in the
+ * zfs_purgedir()->zfs_zget()->ilookup() callpath
+ * on the xattr inode's I_FREEING bit.
*/
- zfs_znode_dmu_fini(zp);
+ taskq = dsl_pool_iput_taskq(dmu_objset_pool(os));
+ taskq_dispatch(taskq, (task_func_t *)
+ zfs_unlinked_drain, zsb, TQ_SLEEP);
+ zfs_znode_dmu_fini(zp);
return;
}
}
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index e71aa91e1..257563887 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1092,6 +1092,12 @@ zfsvfs_teardown(zfs_sb_t *zsb, boolean_t unmounting)
}
/*
+ * Drain the iput_taskq to ensure all active references to the
+ * zfs_sb_t have been handled only then can it be safely destroyed.
+ */
+ taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os)));
+
+ /*
* Close the zil. NB: Can't close the zil while zfs_inactive
* threads are blocked as zil_close can call zfs_inactive.
*/