Revert "Revert "Fix unlink/xattr deadlock""

This reverts commit 53c7411919a64d6f0889aa0d6974610f6cd35744 effectively reinstating the asynchronous xattr cleanup code. These Linux changes were reverted because after testing and careful contemplation I was convinced that due to the 89260a1c8851ce05ea04b23606ba438b271d890 commit they were no longer required. Unfortunately, the deadlock described in #1176 was a case which wasn't considered. At mount zfs_unlinked_drain() can occur which will unlink a list of znodes in effectively a random order which isn't safe. The only reason it was safe to originally revert this change was the we could guarantee that the VFS would always prune the xattr leaves before the parents. Therefore, until we can cleanly resolve this deadlock for all cases we need to keep this change in spite of the xattr unlink performance penalty associated with it. Signed-off-by: Brian Behlendorf <[email protected]> Closes #1176 Issue #457
author: Brian Behlendorf <[email protected]> 2013-01-17 10:05:42 -0800
committer: Brian Behlendorf <[email protected]> 2013-01-17 11:24:20 -0800
commit: 7973e464de4f93b6e669f7f04a316e013767224e (patch)
tree: f1c1911ebd701749d2b3dc3dfaeebe9cc3edf386 /module
parent: 7b3e34ba5a7ee8d0fda44d214f6f11eb16cdb26f (diff)
2 files changed, 90 insertions, 55 deletions
diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c
index a373537dd..670e31398 100644
--- a/module/zfs/zfs_dir.c
+++ b/module/zfs/zfs_dir.c
@@ -483,57 +483,6 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
 }
 
 /*
- * Clean up any znodes that had no links when we either crashed or
- * (force) umounted the file system.
- */
-void
-zfs_unlinked_drain(zfs_sb_t *zsb)
-{
-	zap_cursor_t	zc;
-	zap_attribute_t zap;
-	dmu_object_info_t doi;
-	znode_t		*zp;
-	int		error;
-
-	/*
-	 * Interate over the contents of the unlinked set.
-	 */
-	for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
-	    zap_cursor_retrieve(&zc, &zap) == 0;
-	    zap_cursor_advance(&zc)) {
-
-		/*
-		 * See what kind of object we have in list
-		 */
-
-		error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi);
-		if (error != 0)
-			continue;
-
-		ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
-		    (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
-		/*
-		 * We need to re-mark these list entries for deletion,
-		 * so we pull them back into core and set zp->z_unlinked.
-		 */
-		error = zfs_zget(zsb, zap.za_first_integer, &zp);
-
-		/*
-		 * We may pick up znodes that are already marked for deletion.
-		 * This could happen during the purge of an extended attribute
-		 * directory.  All we need to do is skip over them, since they
-		 * are already in the system marked z_unlinked.
-		 */
-		if (error != 0)
-			continue;
-
-		zp->z_unlinked = B_TRUE;
-		iput(ZTOI(zp));
-	}
-	zap_cursor_fini(&zc);
-}
-
-/*
  * Delete the entire contents of a directory.  Return a count
  * of the number of entries that could not be deleted. If we encounter
  * an error, return a count of at least one so that the directory stays
@@ -599,6 +548,71 @@ zfs_purgedir(znode_t *dzp)
 	return (skipped);
 }
 
+/*
+ * Clean up any znodes that had no links when we either crashed or
+ * (force) umounted the file system.
+ */
+void
+zfs_unlinked_drain(zfs_sb_t *zsb)
+{
+	zap_cursor_t	zc;
+	zap_attribute_t zap;
+	dmu_object_info_t doi;
+	znode_t		*zp;
+	int		error;
+
+	/*
+	 * Interate over the contents of the unlinked set.
+	 */
+	for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
+	    zap_cursor_retrieve(&zc, &zap) == 0;
+	    zap_cursor_advance(&zc)) {
+
+		/*
+		 * See what kind of object we have in list
+		 */
+
+		error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi);
+		if (error != 0)
+			continue;
+
+		ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
+		    (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
+		/*
+		 * We need to re-mark these list entries for deletion,
+		 * so we pull them back into core and set zp->z_unlinked.
+		 */
+		error = zfs_zget(zsb, zap.za_first_integer, &zp);
+
+		/*
+		 * We may pick up znodes that are already marked for deletion.
+		 * This could happen during the purge of an extended attribute
+		 * directory.  All we need to do is skip over them, since they
+		 * are already in the system marked z_unlinked.
+		 */
+		if (error != 0)
+			continue;
+
+		zp->z_unlinked = B_TRUE;
+
+		/*
+		 * If this is an attribute directory, purge its contents.
+		 */
+		if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
+			/*
+			 * We don't need to check the return value of
+			 * zfs_purgedir here, because zfs_rmnode will just
+			 * return this xattr directory to the unlinked set
+			 * until all of its xattrs are gone.
+			 */
+			(void) zfs_purgedir(zp);
+		}
+
+		iput(ZTOI(zp));
+	}
+	zap_cursor_fini(&zc);
+}
+
 void
 zfs_rmnode(znode_t *zp)
 {
@@ -608,6 +622,7 @@ zfs_rmnode(znode_t *zp)
 	dmu_tx_t	*tx;
 	uint64_t	acl_obj;
 	uint64_t	xattr_obj;
+	uint64_t	count;
 	int		error;
 
 	ASSERT(zp->z_links == 0);
@@ -617,13 +632,27 @@ zfs_rmnode(znode_t *zp)
 	 * If this is an attribute directory, purge its contents.
 	 */
 	if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
-		if (zfs_purgedir(zp) != 0) {
+		error = zap_count(os, zp->z_id, &count);
+		if (error) {
+			zfs_znode_dmu_fini(zp);
+			return;
+		}
+
+		if (count > 0) {
+			taskq_t *taskq;
+
 			/*
-			 * Not enough space to delete some xattrs.
-			 * Leave it in the unlinked set.
+			 * There are still directory entries in this xattr
+			 * directory.  Let zfs_unlinked_drain() deal with
+			 * them to avoid deadlocking this process in the
+			 * zfs_purgedir()->zfs_zget()->ilookup() callpath
+			 * on the xattr inode's I_FREEING bit.
 			 */
-			zfs_znode_dmu_fini(zp);
+			taskq = dsl_pool_iput_taskq(dmu_objset_pool(os));
+			taskq_dispatch(taskq, (task_func_t *)
+			    zfs_unlinked_drain, zsb, TQ_SLEEP);
 
+			zfs_znode_dmu_fini(zp);
 			return;
 		}
 	}
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index ac5c317ce..f44524238 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1056,6 +1056,12 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
 	}
 
 	/*
+	 * Drain the iput_taskq to ensure all active references to the
+	 * zfs_sb_t have been handled only then can it be safely destroyed.
+	 */
+	taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os)));
+
+	/*
 	 * Close the zil. NB: Can't close the zil while zfs_inactive
 	 * threads are blocked as zil_close can call zfs_inactive.
 	 */
author	Brian Behlendorf <[email protected]>	2013-01-17 10:05:42 -0800
committer	Brian Behlendorf <[email protected]>	2013-01-17 11:24:20 -0800
commit	7973e464de4f93b6e669f7f04a316e013767224e (patch)
tree	f1c1911ebd701749d2b3dc3dfaeebe9cc3edf386 /module
parent	7b3e34ba5a7ee8d0fda44d214f6f11eb16cdb26f (diff)