summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/zfs_context.h17
-rw-r--r--include/sys/zio.h6
-rw-r--r--lib/libzpool/taskq.c126
-rw-r--r--module/zfs/spa.c5
-rw-r--r--module/zfs/zio.c21
5 files changed, 134 insertions, 41 deletions
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index a32848941..4abafcc6f 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -22,6 +22,9 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
#ifndef _SYS_ZFS_CONTEXT_H
#define _SYS_ZFS_CONTEXT_H
@@ -365,6 +368,16 @@ typedef struct taskq taskq_t;
typedef uintptr_t taskqid_t;
typedef void (task_func_t)(void *);
+typedef struct taskq_ent {
+ struct taskq_ent *tqent_next;
+ struct taskq_ent *tqent_prev;
+ task_func_t *tqent_func;
+ void *tqent_arg;
+ uintptr_t tqent_flags;
+} taskq_ent_t;
+
+#define TQENT_FLAG_PREALLOC 0x1 /* taskq_dispatch_ent used */
+
#define TASKQ_PREPOPULATE 0x0001
#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
@@ -385,6 +398,10 @@ extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
(taskq_create(a, b, maxclsyspri, d, e, f))
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
+extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
+ taskq_ent_t *);
+extern int taskq_empty_ent(taskq_ent_t *);
+extern void taskq_init_ent(taskq_ent_t *);
extern void taskq_destroy(taskq_t *);
extern void taskq_wait(taskq_t *);
extern int taskq_member(taskq_t *, kthread_t *);
diff --git a/include/sys/zio.h b/include/sys/zio.h
index a46918174..c0da4e2d7 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -22,6 +22,9 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
#ifndef _ZIO_H
#define _ZIO_H
@@ -423,6 +426,9 @@ struct zio {
/* FMA state */
zio_cksum_report_t *io_cksum_report;
uint64_t io_ena;
+
+ /* Taskq dispatching state */
+ taskq_ent_t io_tqent;
};
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c
index 36c0ec7df..6143a9189 100644
--- a/lib/libzpool/taskq.c
+++ b/lib/libzpool/taskq.c
@@ -22,19 +22,15 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
#include <sys/zfs_context.h>
int taskq_now;
taskq_t *system_taskq;
-typedef struct task {
- struct task *task_next;
- struct task *task_prev;
- task_func_t *task_func;
- void *task_arg;
-} task_t;
-
#define TASKQ_ACTIVE 0x00010000
struct taskq {
@@ -51,18 +47,19 @@ struct taskq {
int tq_maxalloc;
kcondvar_t tq_maxalloc_cv;
int tq_maxalloc_wait;
- task_t *tq_freelist;
- task_t tq_task;
+ taskq_ent_t *tq_freelist;
+ taskq_ent_t tq_task;
};
-static task_t *
+static taskq_ent_t *
task_alloc(taskq_t *tq, int tqflags)
{
- task_t *t;
+ taskq_ent_t *t;
int rv;
again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
- tq->tq_freelist = t->task_next;
+ ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+ tq->tq_freelist = t->tqent_next;
} else {
if (tq->tq_nalloc >= tq->tq_maxalloc) {
if (!(tqflags & KM_SLEEP))
@@ -87,25 +84,28 @@ again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
}
mutex_exit(&tq->tq_lock);
- t = kmem_alloc(sizeof (task_t), tqflags);
+ t = kmem_alloc(sizeof (taskq_ent_t), tqflags);
mutex_enter(&tq->tq_lock);
- if (t != NULL)
+ if (t != NULL) {
+ /* Make sure we start without any flags */
+ t->tqent_flags = 0;
tq->tq_nalloc++;
+ }
}
return (t);
}
static void
-task_free(taskq_t *tq, task_t *t)
+task_free(taskq_t *tq, taskq_ent_t *t)
{
if (tq->tq_nalloc <= tq->tq_minalloc) {
- t->task_next = tq->tq_freelist;
+ t->tqent_next = tq->tq_freelist;
tq->tq_freelist = t;
} else {
tq->tq_nalloc--;
mutex_exit(&tq->tq_lock);
- kmem_free(t, sizeof (task_t));
+ kmem_free(t, sizeof (taskq_ent_t));
mutex_enter(&tq->tq_lock);
}
@@ -116,7 +116,7 @@ task_free(taskq_t *tq, task_t *t)
taskqid_t
taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
{
- task_t *t;
+ taskq_ent_t *t;
if (taskq_now) {
func(arg);
@@ -130,26 +130,77 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
return (0);
}
if (tqflags & TQ_FRONT) {
- t->task_next = tq->tq_task.task_next;
- t->task_prev = &tq->tq_task;
+ t->tqent_next = tq->tq_task.tqent_next;
+ t->tqent_prev = &tq->tq_task;
} else {
- t->task_next = &tq->tq_task;
- t->task_prev = tq->tq_task.task_prev;
+ t->tqent_next = &tq->tq_task;
+ t->tqent_prev = tq->tq_task.tqent_prev;
}
- t->task_next->task_prev = t;
- t->task_prev->task_next = t;
- t->task_func = func;
- t->task_arg = arg;
+ t->tqent_next->tqent_prev = t;
+ t->tqent_prev->tqent_next = t;
+ t->tqent_func = func;
+ t->tqent_arg = arg;
+
+ ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
+
cv_signal(&tq->tq_dispatch_cv);
mutex_exit(&tq->tq_lock);
return (1);
}
+int
+taskq_empty_ent(taskq_ent_t *t)
+{
+ return t->tqent_next == NULL;
+}
+
+void
+taskq_init_ent(taskq_ent_t *t)
+{
+ t->tqent_next = NULL;
+ t->tqent_prev = NULL;
+ t->tqent_func = NULL;
+ t->tqent_arg = NULL;
+ t->tqent_flags = 0;
+}
+
+void
+taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
+ taskq_ent_t *t)
+{
+ ASSERT(func != NULL);
+ ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
+
+ /*
+ * Mark it as a prealloc'd task. This is important
+ * to ensure that we don't free it later.
+ */
+ t->tqent_flags |= TQENT_FLAG_PREALLOC;
+ /*
+ * Enqueue the task to the underlying queue.
+ */
+ mutex_enter(&tq->tq_lock);
+
+ if (flags & TQ_FRONT) {
+ t->tqent_next = tq->tq_task.tqent_next;
+ t->tqent_prev = &tq->tq_task;
+ } else {
+ t->tqent_next = &tq->tq_task;
+ t->tqent_prev = tq->tq_task.tqent_prev;
+ }
+ t->tqent_next->tqent_prev = t;
+ t->tqent_prev->tqent_next = t;
+ t->tqent_func = func;
+ t->tqent_arg = arg;
+ cv_signal(&tq->tq_dispatch_cv);
+ mutex_exit(&tq->tq_lock);
+}
+
void
taskq_wait(taskq_t *tq)
{
mutex_enter(&tq->tq_lock);
- while (tq->tq_task.task_next != &tq->tq_task || tq->tq_active != 0)
+ while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0)
cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
mutex_exit(&tq->tq_lock);
}
@@ -158,27 +209,32 @@ static void
taskq_thread(void *arg)
{
taskq_t *tq = arg;
- task_t *t;
+ taskq_ent_t *t;
+ boolean_t prealloc;
mutex_enter(&tq->tq_lock);
while (tq->tq_flags & TASKQ_ACTIVE) {
- if ((t = tq->tq_task.task_next) == &tq->tq_task) {
+ if ((t = tq->tq_task.tqent_next) == &tq->tq_task) {
if (--tq->tq_active == 0)
cv_broadcast(&tq->tq_wait_cv);
cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock);
tq->tq_active++;
continue;
}
- t->task_prev->task_next = t->task_next;
- t->task_next->task_prev = t->task_prev;
+ t->tqent_prev->tqent_next = t->tqent_next;
+ t->tqent_next->tqent_prev = t->tqent_prev;
+ t->tqent_next = NULL;
+ t->tqent_prev = NULL;
+ prealloc = t->tqent_flags & TQENT_FLAG_PREALLOC;
mutex_exit(&tq->tq_lock);
rw_enter(&tq->tq_threadlock, RW_READER);
- t->task_func(t->task_arg);
+ t->tqent_func(t->tqent_arg);
rw_exit(&tq->tq_threadlock);
mutex_enter(&tq->tq_lock);
- task_free(tq, t);
+ if (!prealloc)
+ task_free(tq, t);
}
tq->tq_nthreads--;
cv_broadcast(&tq->tq_wait_cv);
@@ -217,8 +273,8 @@ taskq_create(const char *name, int nthreads, pri_t pri,
tq->tq_nthreads = nthreads;
tq->tq_minalloc = minalloc;
tq->tq_maxalloc = maxalloc;
- tq->tq_task.task_next = &tq->tq_task;
- tq->tq_task.task_prev = &tq->tq_task;
+ tq->tq_task.tqent_next = &tq->tq_task;
+ tq->tq_task.tqent_prev = &tq->tq_task;
tq->tq_threadlist = kmem_alloc(nthreads*sizeof(kthread_t *), KM_SLEEP);
if (flags & TASKQ_PREPOPULATE) {
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 868a0d9d2..0b9649723 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -22,6 +22,9 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
/*
* This file contains all the routines used when modifying on-disk SPA state.
@@ -665,7 +668,7 @@ spa_create_zio_taskqs(spa_t *spa)
const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
enum zti_modes mode = ztip->zti_mode;
uint_t value = ztip->zti_value;
- uint_t flags = TASKQ_PREPOPULATE;
+ uint_t flags = 0;
char name[32];
if (t == ZIO_TYPE_WRITE)
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 6b03be6f3..c96442d0b 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -570,6 +571,8 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio_add_child(pio, zio);
}
+ taskq_init_ent(&zio->io_tqent);
+
return (zio);
}
@@ -1073,7 +1076,7 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
{
spa_t *spa = zio->io_spa;
zio_type_t t = zio->io_type;
- int flags = TQ_NOSLEEP | (cutinline ? TQ_FRONT : 0);
+ int flags = (cutinline ? TQ_FRONT : 0);
/*
* If we're a config writer or a probe, the normal issue and
@@ -1098,8 +1101,14 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
ASSERT3U(q, <, ZIO_TASKQ_TYPES);
- while (taskq_dispatch(spa->spa_zio_taskq[t][q],
- (task_func_t *)zio_execute, zio, flags) == 0); /* do nothing */
+ /*
+ * NB: We are assuming that the zio can only be dispatched
+ * to a single taskq at a time. It would be a grievous error
+ * to dispatch the zio to another taskq at the same time.
+ */
+ ASSERT(taskq_empty_ent(&zio->io_tqent));
+ taskq_dispatch_ent(spa->spa_zio_taskq[t][q],
+ (task_func_t *)zio_execute, zio, flags, &zio->io_tqent);
}
static boolean_t
@@ -2947,9 +2956,11 @@ zio_done(zio_t *zio)
* Reexecution is potentially a huge amount of work.
* Hand it off to the otherwise-unused claim taskq.
*/
- (void) taskq_dispatch(
+ ASSERT(taskq_empty_ent(&zio->io_tqent));
+ (void) taskq_dispatch_ent(
zio->io_spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
- (task_func_t *)zio_reexecute, zio, TQ_SLEEP);
+ (task_func_t *)zio_reexecute, zio, 0,
+ &zio->io_tqent);
}
return (ZIO_PIPELINE_STOP);
}