aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/spa.c203
-rw-r--r--module/zfs/vdev_file.c2
-rw-r--r--module/zfs/zio.c28
3 files changed, 158 insertions, 75 deletions
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 22fa7878c..e022c3258 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -78,23 +78,25 @@
#include "zfs_comutil.h"
typedef enum zti_modes {
- zti_mode_fixed, /* value is # of threads (min 1) */
- zti_mode_online_percent, /* value is % of online CPUs */
- zti_mode_batch, /* cpu-intensive; value is ignored */
- zti_mode_null, /* don't create a taskq */
- zti_nmodes
+ ZTI_MODE_FIXED, /* value is # of threads (min 1) */
+ ZTI_MODE_ONLINE_PERCENT, /* value is % of online CPUs */
+ ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */
+ ZTI_MODE_NULL, /* don't create a taskq */
+ ZTI_NMODES
} zti_modes_t;
-#define ZTI_FIX(n) { zti_mode_fixed, (n) }
-#define ZTI_PCT(n) { zti_mode_online_percent, (n) }
-#define ZTI_BATCH { zti_mode_batch, 0 }
-#define ZTI_NULL { zti_mode_null, 0 }
+#define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
+#define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
+#define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 }
+#define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
-#define ZTI_ONE ZTI_FIX(1)
+#define ZTI_N(n) ZTI_P(n, 1)
+#define ZTI_ONE ZTI_N(1)
typedef struct zio_taskq_info {
- enum zti_modes zti_mode;
+ zti_modes_t zti_mode;
uint_t zti_value;
+ uint_t zti_count;
} zio_taskq_info_t;
static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
@@ -102,17 +104,30 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
};
/*
- * Define the taskq threads for the following I/O types:
- * NULL, READ, WRITE, FREE, CLAIM, and IOCTL
+ * This table defines the taskq settings for each ZFS I/O type. When
+ * initializing a pool, we use this table to create an appropriately sized
+ * taskq. Some operations are low volume and therefore have a small, static
+ * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE
+ * macros. Other operations process a large amount of data; the ZTI_BATCH
+ * macro causes us to create a taskq oriented for throughput. Some operations
+ * are so high frequency and short-lived that the taskq itself can become a a
+ * point of lock contention. The ZTI_P(#, #) macro indicates that we need an
+ * additional degree of parallelism specified by the number of threads per-
+ * taskq and the number of taskqs; when dispatching an event in this case, the
+ * particular taskq is chosen at random.
+ *
+ * The different taskq priorities are to handle the different contexts (issue
+ * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
+ * need to be handled with minimum delay.
*/
const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
- { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
- { ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL },
- { ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(16), ZTI_FIX(5) },
- { ZTI_FIX(8), ZTI_NULL, ZTI_ONE, ZTI_NULL },
- { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
- { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
+ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
+ { ZTI_N(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, /* READ */
+ { ZTI_BATCH, ZTI_N(5), ZTI_N(16), ZTI_N(5) }, /* WRITE */
+ { ZTI_P(4, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
+ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */
+ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */
};
static dsl_syncfunc_t spa_sync_version;
@@ -794,48 +809,121 @@ spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
offsetof(spa_error_entry_t, se_avl));
}
-static taskq_t *
-spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
- uint_t value)
+static void
+spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
{
- uint_t flags = TASKQ_PREPOPULATE;
+ const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
+ enum zti_modes mode = ztip->zti_mode;
+ uint_t value = ztip->zti_value;
+ uint_t count = ztip->zti_count;
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
+ char name[32];
+ uint_t i, flags = 0;
boolean_t batch = B_FALSE;
- switch (mode) {
- case zti_mode_null:
- return (NULL); /* no taskq needed */
+ if (mode == ZTI_MODE_NULL) {
+ tqs->stqs_count = 0;
+ tqs->stqs_taskq = NULL;
+ return;
+ }
- case zti_mode_fixed:
- ASSERT3U(value, >=, 1);
- value = MAX(value, 1);
- break;
+ ASSERT3U(count, >, 0);
- case zti_mode_batch:
- batch = B_TRUE;
- flags |= TASKQ_THREADS_CPU_PCT;
- value = zio_taskq_batch_pct;
- break;
+ tqs->stqs_count = count;
+ tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
- case zti_mode_online_percent:
- flags |= TASKQ_THREADS_CPU_PCT;
- break;
+ for (i = 0; i < count; i++) {
+ taskq_t *tq;
- default:
- panic("unrecognized mode for %s taskq (%u:%u) in "
- "spa_activate()",
- name, mode, value);
- break;
+ switch (mode) {
+ case ZTI_MODE_FIXED:
+ ASSERT3U(value, >=, 1);
+ value = MAX(value, 1);
+ break;
+
+ case ZTI_MODE_BATCH:
+ batch = B_TRUE;
+ flags |= TASKQ_THREADS_CPU_PCT;
+ value = zio_taskq_batch_pct;
+ break;
+
+ case ZTI_MODE_ONLINE_PERCENT:
+ flags |= TASKQ_THREADS_CPU_PCT;
+ break;
+
+ default:
+ panic("unrecognized mode for %s_%s taskq (%u:%u) in "
+ "spa_activate()",
+ zio_type_name[t], zio_taskq_types[q], mode, value);
+ break;
+ }
+
+ if (count > 1) {
+ (void) snprintf(name, sizeof (name), "%s_%s_%u",
+ zio_type_name[t], zio_taskq_types[q], i);
+ } else {
+ (void) snprintf(name, sizeof (name), "%s_%s",
+ zio_type_name[t], zio_taskq_types[q]);
+ }
+
+ if (zio_taskq_sysdc && spa->spa_proc != &p0) {
+ if (batch)
+ flags |= TASKQ_DC_BATCH;
+
+ tq = taskq_create_sysdc(name, value, 50, INT_MAX,
+ spa->spa_proc, zio_taskq_basedc, flags);
+ } else {
+ tq = taskq_create_proc(name, value, maxclsyspri, 50,
+ INT_MAX, spa->spa_proc, flags);
+ }
+
+ tqs->stqs_taskq[i] = tq;
}
+}
+
+static void
+spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
+{
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
+ uint_t i;
- if (zio_taskq_sysdc && spa->spa_proc != &p0) {
- if (batch)
- flags |= TASKQ_DC_BATCH;
+ if (tqs->stqs_taskq == NULL) {
+ ASSERT3U(tqs->stqs_count, ==, 0);
+ return;
+ }
- return (taskq_create_sysdc(name, value, 50, INT_MAX,
- spa->spa_proc, zio_taskq_basedc, flags));
+ for (i = 0; i < tqs->stqs_count; i++) {
+ ASSERT3P(tqs->stqs_taskq[i], !=, NULL);
+ taskq_destroy(tqs->stqs_taskq[i]);
}
- return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX,
- spa->spa_proc, flags));
+
+ kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *));
+ tqs->stqs_taskq = NULL;
+}
+
+/*
+ * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority.
+ * Note that a type may have multiple discrete taskqs to avoid lock contention
+ * on the taskq itself. In that case we choose which taskq at random by using
+ * the low bits of gethrtime().
+ */
+void
+spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
+ task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent)
+{
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
+ taskq_t *tq;
+
+ ASSERT3P(tqs->stqs_taskq, !=, NULL);
+ ASSERT3U(tqs->stqs_count, !=, 0);
+
+ if (tqs->stqs_count == 1) {
+ tq = tqs->stqs_taskq[0];
+ } else {
+ tq = tqs->stqs_taskq[gethrtime() % tqs->stqs_count];
+ }
+
+ taskq_dispatch_ent(tq, func, arg, flags, ent);
}
static void
@@ -845,16 +933,7 @@ spa_create_zio_taskqs(spa_t *spa)
for (t = 0; t < ZIO_TYPES; t++) {
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
- const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
- enum zti_modes mode = ztip->zti_mode;
- uint_t value = ztip->zti_value;
- char name[32];
-
- (void) snprintf(name, sizeof (name),
- "%s_%s", zio_type_name[t], zio_taskq_types[q]);
-
- spa->spa_zio_taskq[t][q] =
- spa_taskq_create(spa, name, mode, value);
+ spa_taskqs_init(spa, t, q);
}
}
}
@@ -1017,9 +1096,7 @@ spa_deactivate(spa_t *spa)
for (t = 0; t < ZIO_TYPES; t++) {
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
- if (spa->spa_zio_taskq[t][q] != NULL)
- taskq_destroy(spa->spa_zio_taskq[t][q]);
- spa->spa_zio_taskq[t][q] = NULL;
+ spa_taskqs_fini(spa, t, q);
}
}
diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c
index 45c917b09..06999a880 100644
--- a/module/zfs/vdev_file.c
+++ b/module/zfs/vdev_file.c
@@ -185,7 +185,7 @@ vdev_file_io_start(zio_t *zio)
return (ZIO_PIPELINE_CONTINUE);
}
- taskq_dispatch_ent(spa->spa_zio_taskq[ZIO_TYPE_FREE][ZIO_TASKQ_ISSUE],
+ spa_taskq_dispatch_ent(spa, ZIO_TYPE_FREE, ZIO_TASKQ_ISSUE,
vdev_file_io_strategy, zio, 0, &zio->io_tqent);
return (ZIO_PIPELINE_STOP);
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index fda76123e..099349186 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -1165,7 +1165,7 @@ zio_free_bp_init(zio_t *zio)
*/
static void
-zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
+zio_taskq_dispatch(zio_t *zio, zio_taskq_type_t q, boolean_t cutinline)
{
spa_t *spa = zio->io_spa;
zio_type_t t = zio->io_type;
@@ -1186,10 +1186,11 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
t = ZIO_TYPE_NULL;
/*
- * If this is a high priority I/O, then use the high priority taskq.
+ * If this is a high priority I/O, then use the high priority taskq if
+ * available.
*/
if (zio->io_priority == ZIO_PRIORITY_NOW &&
- spa->spa_zio_taskq[t][q + 1] != NULL)
+ spa->spa_zio_taskq[t][q + 1].stqs_count != 0)
q++;
ASSERT3U(q, <, ZIO_TASKQ_TYPES);
@@ -1200,20 +1201,25 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
* to dispatch the zio to another taskq at the same time.
*/
ASSERT(taskq_empty_ent(&zio->io_tqent));
- taskq_dispatch_ent(spa->spa_zio_taskq[t][q],
- (task_func_t *)zio_execute, zio, flags, &zio->io_tqent);
+ spa_taskq_dispatch_ent(spa, t, q, (task_func_t *)zio_execute, zio,
+ flags, &zio->io_tqent);
}
static boolean_t
-zio_taskq_member(zio_t *zio, enum zio_taskq_type q)
+zio_taskq_member(zio_t *zio, zio_taskq_type_t q)
{
kthread_t *executor = zio->io_executor;
spa_t *spa = zio->io_spa;
zio_type_t t;
- for (t = 0; t < ZIO_TYPES; t++)
- if (taskq_member(spa->spa_zio_taskq[t][q], executor))
- return (B_TRUE);
+ for (t = 0; t < ZIO_TYPES; t++) {
+ spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
+ uint_t i;
+ for (i = 0; i < tqs->stqs_count; i++) {
+ if (taskq_member(tqs->stqs_taskq[i], executor))
+ return (B_TRUE);
+ }
+ }
return (B_FALSE);
}
@@ -3078,8 +3084,8 @@ zio_done(zio_t *zio)
* Hand it off to the otherwise-unused claim taskq.
*/
ASSERT(taskq_empty_ent(&zio->io_tqent));
- taskq_dispatch_ent(
- zio->io_spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
+ spa_taskq_dispatch_ent(zio->io_spa,
+ ZIO_TYPE_CLAIM, ZIO_TASKQ_ISSUE,
(task_func_t *)zio_reexecute, zio, 0,
&zio->io_tqent);
}