Reintroduce zfs_remove() synchronous deletes

Reintroduce a slightly adapted version of the Illumos logic for synchronous unlinks. The basic idea here is that only files smaller than zfs_delete_blocks (20480) blocks should be deleted synchronously. Unlinking larger files should be handled asynchronously to minimize impact to the caller. To accomplish this iput() which is responsible for calling zfs_znode_delete() on Linux is only called in the delete_now path. Otherwise zfs_async_iput() is used which allows the last reference to be dropped by a taskq thread effectively making the removal asynchronous. Porting notes: - Add zfs_delete_blocks module option for performance analysis. The default value is DMU_MAX_DELETEBLKCNT which is the same as upstream. Reducing this value means that smaller files will be unlinked asynchronously like large files. - All occurrences of zfsvfs changes to zsb. Ported-by: KernelOfTruth [email protected] Signed-off-by: Brian Behlendorf <[email protected]>
author: kernelOfTruth <[email protected]> 2015-08-21 03:43:10 +0200
committer: Brian Behlendorf <[email protected]> 2016-01-26 15:26:02 -0800
commit: a966c5640e432f723753e63354394406a7c3dcf8 (patch)
tree: 85256b0cae2619ed097320ba16b51dcf97178642 /module
parent: 460a021391d32b7f2e9eb05a9a01946714174e3b (diff)
1 files changed, 66 insertions, 12 deletions
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 2f003de9f..f386b98a7 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -418,6 +418,7 @@ mappedread(struct inode *ip, int nbytes, uio_t *uio)
 #endif /* _KERNEL */
 
 unsigned long zfs_read_chunk_size = 1024 * 1024; /* Tunable */
+unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
 
 /*
  * Read bytes from specified file into supplied buffer.
@@ -1520,13 +1521,13 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr)
 	struct inode	*ip;
 	zfs_sb_t	*zsb = ITOZSB(dip);
 	zilog_t		*zilog;
-	uint64_t	xattr_obj;
+	uint64_t	acl_obj, xattr_obj;
 	uint64_t	xattr_obj_unlinked = 0;
 	uint64_t	obj = 0;
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
-	boolean_t	may_delete_now;
-	boolean_t	unlinked;
+	boolean_t	may_delete_now, delete_now = FALSE;
+	boolean_t	unlinked, toobig = FALSE;
 	uint64_t	txtype;
 	pathname_t	*realnmp = NULL;
 #ifdef HAVE_PN_UTILS
@@ -1590,9 +1591,10 @@ top:
 	mutex_exit(&zp->z_lock);
 
 	/*
-	 * We never delete the znode and always place it in the unlinked
-	 * set.  The dentry cache will always hold the last reference and
-	 * is responsible for safely freeing the znode.
+	 * We may delete the znode now, or we may put it in the unlinked set;
+	 * it depends on whether we're the last link, and on whether there are
+	 * other holds on the inode.  So we dmu_tx_hold() the right things to
+	 * allow for either case.
 	 */
 	obj = zp->z_id;
 	tx = dmu_tx_create(zsb->z_os);
@@ -1600,6 +1602,12 @@ top:
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	zfs_sa_upgrade_txholds(tx, dzp);
+	if (may_delete_now) {
+		toobig = zp->z_size > zp->z_blksz * zfs_delete_blocks;
+		/* if the file is too big, only hold_free a token amount */
+		dmu_tx_hold_free(tx, zp->z_id, 0,
+		    (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
+	}
 
 	/* are there any extended attributes? */
 	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
@@ -1611,6 +1619,11 @@ top:
 		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
 	}
 
+	mutex_enter(&zp->z_lock);
+	if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
+		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
+	mutex_exit(&zp->z_lock);
+
 	/* charge as an update -- would be nice not to charge at all */
 	dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
 
@@ -1662,6 +1675,42 @@ top:
 		mutex_enter(&zp->z_lock);
 		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
 		    &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
+		delete_now = may_delete_now && !toobig &&
+		    atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped) &&
+		    xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) ==
+		    acl_obj;
+	}
+
+	if (delete_now) {
+		if (xattr_obj_unlinked) {
+			ASSERT3U(xzp->z_links, ==, 2);
+			mutex_enter(&xzp->z_lock);
+			xzp->z_unlinked = 1;
+			xzp->z_links = 0;
+			error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb),
+			    &xzp->z_links, sizeof (xzp->z_links), tx);
+			ASSERT3U(error,  ==,  0);
+			mutex_exit(&xzp->z_lock);
+			zfs_unlinked_add(xzp, tx);
+
+			if (zp->z_is_sa)
+				error = sa_remove(zp->z_sa_hdl,
+				    SA_ZPL_XATTR(zsb), tx);
+			else
+				error = sa_update(zp->z_sa_hdl,
+				    SA_ZPL_XATTR(zsb), &null_xattr,
+				    sizeof (uint64_t), tx);
+			ASSERT0(error);
+		}
+		/*
+		 * Add to the unlinked set because a new reference could be
+		 * taken concurrently resulting in a deferred destruction.
+		 */
+		zfs_unlinked_add(zp, tx);
+		mutex_exit(&zp->z_lock);
+		zfs_inode_update(zp);
+		iput(ip);
+	} else if (unlinked) {
 		mutex_exit(&zp->z_lock);
 		zfs_unlinked_add(zp, tx);
 	}
@@ -1682,13 +1731,16 @@ out:
 
 	zfs_dirent_unlock(dl);
 	zfs_inode_update(dzp);
-	zfs_inode_update(zp);
-	if (xzp)
-		zfs_inode_update(xzp);
 
-	iput(ip);
-	if (xzp)
-		iput(ZTOI(xzp));
+	if (!delete_now) {
+		zfs_inode_update(zp);
+		zfs_iput_async(ip);
+	}
+
+	if (xzp) {
+		zfs_inode_update(xzp);
+		zfs_iput_async(ZTOI(xzp));
+	}
 
 	if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
@@ -4710,6 +4762,8 @@ zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr)
 #endif /* HAVE_UIO_ZEROCOPY */
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
+module_param(zfs_delete_blocks, ulong, 0644);
+MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
 module_param(zfs_read_chunk_size, long, 0644);
 MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk");
 #endif
author	kernelOfTruth <[email protected]>	2015-08-21 03:43:10 +0200
committer	Brian Behlendorf <[email protected]>	2016-01-26 15:26:02 -0800
commit	a966c5640e432f723753e63354394406a7c3dcf8 (patch)
tree	85256b0cae2619ed097320ba16b51dcf97178642 /module
parent	460a021391d32b7f2e9eb05a9a01946714174e3b (diff)