linux: zvol: avoid heap allocation for zvol_request_sync=1

The spl_kmem_alloc showed up in some flamegraphs in a single-threaded 4k sync write workload at 85k IOPS on an Intel(R) Xeon(R) Silver 4215 CPU @ 2.50GHz. Certainly not a huge win but I believe the change is clean and easy to maintain down the road. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Matthew Ahrens <[email protected]> Signed-off-by: Christian Schwarz <[email protected]> Closes #11666
author: Christian Schwarz <[email protected]> 2021-03-03 17:15:28 +0100
committer: GitHub <[email protected]> 2021-03-03 08:15:28 -0800
commit: e439ee83c1316b58d480148c9c851f42c1458305 (patch)
tree: f42344ef9b355b258a0f3ed097f8ddb36d859b37 /module
parent: 3242b5358ea816909b3ade995cf95007f181b280 (diff)
1 files changed, 64 insertions, 29 deletions
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
index 0d62b1490..9a5913a38 100644
--- a/module/os/linux/zfs/zvol_os.c
+++ b/module/os/linux/zfs/zvol_os.c
@@ -56,12 +56,32 @@ struct zvol_state_os {
 taskq_t *zvol_taskq;
 static struct ida zvol_ida;
 
-typedef struct zv_request {
+typedef struct zv_request_stack {
 	zvol_state_t	*zv;
 	struct bio	*bio;
-	taskq_ent_t	ent;
 } zv_request_t;
 
+typedef struct zv_request_task {
+	zv_request_t zvr;
+	taskq_ent_t	ent;
+} zv_request_task_t;
+
+static zv_request_task_t *
+zv_request_task_create(zv_request_t zvr)
+{
+	zv_request_task_t *task;
+	task = kmem_alloc(sizeof (zv_request_task_t), KM_SLEEP);
+	taskq_init_ent(&task->ent);
+	task->zvr = zvr;
+	return (task);
+}
+
+static void
+zv_request_task_free(zv_request_task_t *task)
+{
+	kmem_free(task, sizeof (*task));
+}
+
 /*
  * Given a path, return TRUE if path is a ZVOL.
  */
@@ -80,9 +100,8 @@ zvol_is_zvol_impl(const char *path)
 }
 
 static void
-zvol_write(void *arg)
+zvol_write(zv_request_t *zvr)
 {
-	zv_request_t *zvr = arg;
 	struct bio *bio = zvr->bio;
 	int error = 0;
 	zfs_uio_t uio;
@@ -102,7 +121,6 @@ zvol_write(void *arg)
 	if (uio.uio_resid == 0) {
 		rw_exit(&zv->zv_suspend_lock);
 		BIO_END_IO(bio, 0);
-		kmem_free(zvr, sizeof (zv_request_t));
 		return;
 	}
 
@@ -162,13 +180,19 @@ zvol_write(void *arg)
 		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
 
 	BIO_END_IO(bio, -error);
-	kmem_free(zvr, sizeof (zv_request_t));
 }
 
 static void
-zvol_discard(void *arg)
+zvol_write_task(void *arg)
+{
+	zv_request_task_t *task = arg;
+	zvol_write(&task->zvr);
+	zv_request_task_free(task);
+}
+
+static void
+zvol_discard(zv_request_t *zvr)
 {
-	zv_request_t *zvr = arg;
 	struct bio *bio = zvr->bio;
 	zvol_state_t *zv = zvr->zv;
 	uint64_t start = BIO_BI_SECTOR(bio) << 9;
@@ -238,13 +262,19 @@ unlock:
 		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
 
 	BIO_END_IO(bio, -error);
-	kmem_free(zvr, sizeof (zv_request_t));
 }
 
 static void
-zvol_read(void *arg)
+zvol_discard_task(void *arg)
+{
+	zv_request_task_t *task = arg;
+	zvol_discard(&task->zvr);
+	zv_request_task_free(task);
+}
+
+static void
+zvol_read(zv_request_t *zvr)
 {
-	zv_request_t *zvr = arg;
 	struct bio *bio = zvr->bio;
 	int error = 0;
 	zfs_uio_t uio;
@@ -295,7 +325,14 @@ zvol_read(void *arg)
 		blk_generic_end_io_acct(q, disk, READ, bio, start_time);
 
 	BIO_END_IO(bio, -error);
-	kmem_free(zvr, sizeof (zv_request_t));
+}
+
+static void
+zvol_read_task(void *arg)
+{
+	zv_request_task_t *task = arg;
+	zvol_read(&task->zvr);
+	zv_request_task_free(task);
 }
 
 #ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
@@ -318,7 +355,6 @@ zvol_request(struct request_queue *q, struct bio *bio)
 	uint64_t offset = BIO_BI_SECTOR(bio) << 9;
 	uint64_t size = BIO_BI_SIZE(bio);
 	int rw = bio_data_dir(bio);
-	zv_request_t *zvr;
 
 	if (bio_has_data(bio) && offset + size > zv->zv_volsize) {
 		printk(KERN_INFO
@@ -331,6 +367,12 @@ zvol_request(struct request_queue *q, struct bio *bio)
 		goto out;
 	}
 
+	zv_request_t zvr = {
+		.zv = zv,
+		.bio = bio,
+	};
+	zv_request_task_t *task;
+
 	if (rw == WRITE) {
 		if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
 			BIO_END_IO(bio, -SET_ERROR(EROFS));
@@ -361,11 +403,6 @@ zvol_request(struct request_queue *q, struct bio *bio)
 			rw_downgrade(&zv->zv_suspend_lock);
 		}
 
-		zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
-		zvr->zv = zv;
-		zvr->bio = bio;
-		taskq_init_ent(&zvr->ent);
-
 		/*
 		 * We don't want this thread to be blocked waiting for i/o to
 		 * complete, so we instead wait from a taskq callback. The
@@ -398,17 +435,19 @@ zvol_request(struct request_queue *q, struct bio *bio)
 		 */
 		if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
 			if (zvol_request_sync) {
-				zvol_discard(zvr);
+				zvol_discard(&zvr);
 			} else {
+				task = zv_request_task_create(zvr);
 				taskq_dispatch_ent(zvol_taskq,
-				    zvol_discard, zvr, 0, &zvr->ent);
+				    zvol_discard_task, task, 0, &task->ent);
 			}
 		} else {
 			if (zvol_request_sync) {
-				zvol_write(zvr);
+				zvol_write(&zvr);
 			} else {
+				task = zv_request_task_create(zvr);
 				taskq_dispatch_ent(zvol_taskq,
-				    zvol_write, zvr, 0, &zvr->ent);
+				    zvol_write_task, task, 0, &task->ent);
 			}
 		}
 	} else {
@@ -422,19 +461,15 @@ zvol_request(struct request_queue *q, struct bio *bio)
 			goto out;
 		}
 
-		zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
-		zvr->zv = zv;
-		zvr->bio = bio;
-		taskq_init_ent(&zvr->ent);
-
 		rw_enter(&zv->zv_suspend_lock, RW_READER);
 
 		/* See comment in WRITE case above. */
 		if (zvol_request_sync) {
-			zvol_read(zvr);
+			zvol_read(&zvr);
 		} else {
+			task = zv_request_task_create(zvr);
 			taskq_dispatch_ent(zvol_taskq,
-			    zvol_read, zvr, 0, &zvr->ent);
+			    zvol_read_task, task, 0, &task->ent);
 		}
 	}
author	Christian Schwarz <[email protected]>	2021-03-03 17:15:28 +0100
committer	GitHub <[email protected]>	2021-03-03 08:15:28 -0800
commit	e439ee83c1316b58d480148c9c851f42c1458305 (patch)
tree	f42344ef9b355b258a0f3ed097f8ddb36d859b37 /module
parent	3242b5358ea816909b3ade995cf95007f181b280 (diff)