Illumos #734: Use taskq_dispatch_ent() interface

It has been observed that some of the hottest locks are those of the zio taskqs. Contention on these locks can limit the rate at which zios are dispatched which limits performance. This upstream change from Illumos uses new interface to the taskqs which allow them to utilize a prealloc'ed taskq_ent_t. This removes the need to perform an allocation at dispatch time while holding the contended lock. This has the effect of improving system performance. Reviewed by: Albert Lee <[email protected]> Reviewed by: Richard Lowe <[email protected]> Reviewed by: Alexey Zaytsev <[email protected]> Reviewed by: Jason Brian King <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Adam Leventhal <[email protected]> Approved by: Gordon Ross <[email protected]> References to Illumos issue: https://www.illumos.org/issues/734 Ported-by: Prakash Surya <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #482
author: Garrett D'Amore <[email protected]> 2011-11-07 16:26:52 -0800
committer: Brian Behlendorf <[email protected]> 2011-12-14 09:19:30 -0800
commit: a38718a63d79116d6cb614dd2821e2a3955e5c8c (patch)
tree: 05bdaf166ff065cedd2951380d92c65c7545b9cd /lib
parent: 30a9524e45f76d4c7860bcbf5567aeaa8aeb7a82 (diff)
1 files changed, 91 insertions, 35 deletions
diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c
index 36c0ec7df..6143a9189 100644
--- a/lib/libzpool/taskq.c
+++ b/lib/libzpool/taskq.c
@@ -22,19 +22,15 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
 
 #include <sys/zfs_context.h>
 
 int taskq_now;
 taskq_t *system_taskq;
 
-typedef struct task {
-	struct task	*task_next;
-	struct task	*task_prev;
-	task_func_t	*task_func;
-	void		*task_arg;
-} task_t;
-
 #define	TASKQ_ACTIVE	0x00010000
 
 struct taskq {
@@ -51,18 +47,19 @@ struct taskq {
 	int		tq_maxalloc;
 	kcondvar_t	tq_maxalloc_cv;
 	int		tq_maxalloc_wait;
-	task_t		*tq_freelist;
-	task_t		tq_task;
+	taskq_ent_t	*tq_freelist;
+	taskq_ent_t	tq_task;
 };
 
-static task_t *
+static taskq_ent_t *
 task_alloc(taskq_t *tq, int tqflags)
 {
-	task_t *t;
+	taskq_ent_t *t;
 	int rv;
 
 again:	if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
-		tq->tq_freelist = t->task_next;
+		ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+		tq->tq_freelist = t->tqent_next;
 	} else {
 		if (tq->tq_nalloc >= tq->tq_maxalloc) {
 			if (!(tqflags & KM_SLEEP))
@@ -87,25 +84,28 @@ again:	if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
 		}
 		mutex_exit(&tq->tq_lock);
 
-		t = kmem_alloc(sizeof (task_t), tqflags);
+		t = kmem_alloc(sizeof (taskq_ent_t), tqflags);
 
 		mutex_enter(&tq->tq_lock);
-		if (t != NULL)
+		if (t != NULL) {
+			/* Make sure we start without any flags */
+			t->tqent_flags = 0;
 			tq->tq_nalloc++;
+		}
 	}
 	return (t);
 }
 
 static void
-task_free(taskq_t *tq, task_t *t)
+task_free(taskq_t *tq, taskq_ent_t *t)
 {
 	if (tq->tq_nalloc <= tq->tq_minalloc) {
-		t->task_next = tq->tq_freelist;
+		t->tqent_next = tq->tq_freelist;
 		tq->tq_freelist = t;
 	} else {
 		tq->tq_nalloc--;
 		mutex_exit(&tq->tq_lock);
-		kmem_free(t, sizeof (task_t));
+		kmem_free(t, sizeof (taskq_ent_t));
 		mutex_enter(&tq->tq_lock);
 	}
 
@@ -116,7 +116,7 @@ task_free(taskq_t *tq, task_t *t)
 taskqid_t
 taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
 {
-	task_t *t;
+	taskq_ent_t *t;
 
 	if (taskq_now) {
 		func(arg);
@@ -130,26 +130,77 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
 		return (0);
 	}
 	if (tqflags & TQ_FRONT) {
-		t->task_next = tq->tq_task.task_next;
-		t->task_prev = &tq->tq_task;
+		t->tqent_next = tq->tq_task.tqent_next;
+		t->tqent_prev = &tq->tq_task;
 	} else {
-		t->task_next = &tq->tq_task;
-		t->task_prev = tq->tq_task.task_prev;
+		t->tqent_next = &tq->tq_task;
+		t->tqent_prev = tq->tq_task.tqent_prev;
 	}
-	t->task_next->task_prev = t;
-	t->task_prev->task_next = t;
-	t->task_func = func;
-	t->task_arg = arg;
+	t->tqent_next->tqent_prev = t;
+	t->tqent_prev->tqent_next = t;
+	t->tqent_func = func;
+	t->tqent_arg = arg;
+
+	ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+
 	cv_signal(&tq->tq_dispatch_cv);
 	mutex_exit(&tq->tq_lock);
 	return (1);
 }
 
+int
+taskq_empty_ent(taskq_ent_t *t)
+{
+	return t->tqent_next == NULL;
+}
+
+void
+taskq_init_ent(taskq_ent_t *t)
+{
+	t->tqent_next = NULL;
+	t->tqent_prev = NULL;
+	t->tqent_func = NULL;
+	t->tqent_arg = NULL;
+	t->tqent_flags = 0;
+}
+
+void
+taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
+    taskq_ent_t *t)
+{
+	ASSERT(func != NULL);
+	ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
+
+	/*
+	 * Mark it as a prealloc'd task.  This is important
+	 * to ensure that we don't free it later.
+	 */
+	t->tqent_flags |= TQENT_FLAG_PREALLOC;
+	/*
+	 * Enqueue the task to the underlying queue.
+	 */
+	mutex_enter(&tq->tq_lock);
+
+	if (flags & TQ_FRONT) {
+		t->tqent_next = tq->tq_task.tqent_next;
+		t->tqent_prev = &tq->tq_task;
+	} else {
+		t->tqent_next = &tq->tq_task;
+		t->tqent_prev = tq->tq_task.tqent_prev;
+	}
+	t->tqent_next->tqent_prev = t;
+	t->tqent_prev->tqent_next = t;
+	t->tqent_func = func;
+	t->tqent_arg = arg;
+	cv_signal(&tq->tq_dispatch_cv);
+	mutex_exit(&tq->tq_lock);
+}
+
 void
 taskq_wait(taskq_t *tq)
 {
 	mutex_enter(&tq->tq_lock);
-	while (tq->tq_task.task_next != &tq->tq_task || tq->tq_active != 0)
+	while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0)
 		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
 	mutex_exit(&tq->tq_lock);
 }
@@ -158,27 +209,32 @@ static void
 taskq_thread(void *arg)
 {
 	taskq_t *tq = arg;
-	task_t *t;
+	taskq_ent_t *t;
+	boolean_t prealloc;
 
 	mutex_enter(&tq->tq_lock);
 	while (tq->tq_flags & TASKQ_ACTIVE) {
-		if ((t = tq->tq_task.task_next) == &tq->tq_task) {
+		if ((t = tq->tq_task.tqent_next) == &tq->tq_task) {
 			if (--tq->tq_active == 0)
 				cv_broadcast(&tq->tq_wait_cv);
 			cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock);
 			tq->tq_active++;
 			continue;
 		}
-		t->task_prev->task_next = t->task_next;
-		t->task_next->task_prev = t->task_prev;
+		t->tqent_prev->tqent_next = t->tqent_next;
+		t->tqent_next->tqent_prev = t->tqent_prev;
+		t->tqent_next = NULL;
+		t->tqent_prev = NULL;
+		prealloc = t->tqent_flags & TQENT_FLAG_PREALLOC;
 		mutex_exit(&tq->tq_lock);
 
 		rw_enter(&tq->tq_threadlock, RW_READER);
-		t->task_func(t->task_arg);
+		t->tqent_func(t->tqent_arg);
 		rw_exit(&tq->tq_threadlock);
 
 		mutex_enter(&tq->tq_lock);
-		task_free(tq, t);
+		if (!prealloc)
+			task_free(tq, t);
 	}
 	tq->tq_nthreads--;
 	cv_broadcast(&tq->tq_wait_cv);
@@ -217,8 +273,8 @@ taskq_create(const char *name, int nthreads, pri_t pri,
 	tq->tq_nthreads = nthreads;
 	tq->tq_minalloc = minalloc;
 	tq->tq_maxalloc = maxalloc;
-	tq->tq_task.task_next = &tq->tq_task;
-	tq->tq_task.task_prev = &tq->tq_task;
+	tq->tq_task.tqent_next = &tq->tq_task;
+	tq->tq_task.tqent_prev = &tq->tq_task;
 	tq->tq_threadlist = kmem_alloc(nthreads*sizeof(kthread_t *), KM_SLEEP);
 
 	if (flags & TASKQ_PREPOPULATE) {
author	Garrett D'Amore <[email protected]>	2011-11-07 16:26:52 -0800
committer	Brian Behlendorf <[email protected]>	2011-12-14 09:19:30 -0800
commit	a38718a63d79116d6cb614dd2821e2a3955e5c8c (patch)
tree	05bdaf166ff065cedd2951380d92c65c7545b9cd /lib
parent	30a9524e45f76d4c7860bcbf5567aeaa8aeb7a82 (diff)