aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorge Wilson <[email protected]>2014-10-20 22:07:45 +0000
committerBrian Behlendorf <[email protected]>2015-04-30 15:07:47 -0700
commit98b254188a730553361adfabca9f658421be2b82 (patch)
treeaf1e47047131dec542f7f0637557cba5c22a1d46
parent8dd86a10cf836d64cddd9c8693f449686e35788c (diff)
Illumos #5244 - zio pipeline callers should explicitly invoke next stage
5244 zio pipeline callers should explicitly invoke next stage Reviewed by: Adam Leventhal <[email protected]> Reviewed by: Alex Reece <[email protected]> Reviewed by: Christopher Siden <[email protected]> Reviewed by: Matthew Ahrens <[email protected]> Reviewed by: Richard Elling <[email protected]> Reviewed by: Dan McDonald <[email protected]> Reviewed by: Steven Hartland <[email protected]> Approved by: Gordon Ross <[email protected]> References: https://www.illumos.org/issues/5244 https://github.com/illumos/illumos-gate/commit/738f37b Porting Notes: 1. The unported "2932 support crash dumps to raidz, etc. pools" caused a merge conflict due to a copyright difference in module/zfs/vdev_raidz.c. 2. The unported "4128 disks in zpools never go away when pulled" and additional Linux-specific changes caused merge conflicts in module/zfs/vdev_disk.c. Ported-by: Richard Yao <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #2828
-rw-r--r--include/sys/vdev_impl.h2
-rw-r--r--include/sys/zio.h3
-rw-r--r--lib/libzpool/taskq.c4
-rw-r--r--module/zfs/vdev_disk.c19
-rw-r--r--module/zfs/vdev_file.c14
-rw-r--r--module/zfs/vdev_mirror.c9
-rw-r--r--module/zfs/vdev_missing.c6
-rw-r--r--module/zfs/vdev_raidz.c9
-rw-r--r--module/zfs/zio.c23
9 files changed, 55 insertions, 34 deletions
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index 1048dec5e..43c4c7941 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -60,7 +60,7 @@ typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
uint64_t *ashift);
typedef void vdev_close_func_t(vdev_t *vd);
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
-typedef int vdev_io_start_func_t(zio_t *zio);
+typedef void vdev_io_start_func_t(zio_t *zio);
typedef void vdev_io_done_func_t(zio_t *zio);
typedef void vdev_state_change_func_t(vdev_t *vd, int, int);
typedef void vdev_hold_func_t(vdev_t *vd);
diff --git a/include/sys/zio.h b/include/sys/zio.h
index 0368d9c59..d31b2acdd 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -153,9 +153,6 @@ typedef enum zio_priority {
ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */
} zio_priority_t;
-#define ZIO_PIPELINE_CONTINUE 0x100
-#define ZIO_PIPELINE_STOP 0x101
-
enum zio_flag {
/*
* Flags inherited by gang, ddt, and vdev children,
diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c
index 72807f6a3..d63bc28e2 100644
--- a/lib/libzpool/taskq.c
+++ b/lib/libzpool/taskq.c
@@ -25,6 +25,7 @@
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright 2012 Garrett D'Amore <[email protected]>. All rights reserved.
+ * Copyright (c) 2014 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -33,8 +34,10 @@ int taskq_now;
taskq_t *system_taskq;
#define TASKQ_ACTIVE 0x00010000
+#define TASKQ_NAMELEN 31
struct taskq {
+ char tq_name[TASKQ_NAMELEN + 1];
kmutex_t tq_lock;
krwlock_t tq_threadlock;
kcondvar_t tq_dispatch_cv;
@@ -280,6 +283,7 @@ taskq_create(const char *name, int nthreads, pri_t pri,
cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL);
cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL);
+ (void) strncpy(tq->tq_name, name, TASKQ_NAMELEN + 1);
tq->tq_flags = flags | TASKQ_ACTIVE;
tq->tq_active = nthreads;
tq->tq_nthreads = nthreads;
diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
index 7f2263457..db13b7bc4 100644
--- a/module/zfs/vdev_disk.c
+++ b/module/zfs/vdev_disk.c
@@ -657,7 +657,7 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
return (0);
}
-static int
+static void
vdev_disk_io_start(zio_t *zio)
{
vdev_t *v = zio->io_vd;
@@ -669,7 +669,8 @@ vdev_disk_io_start(zio_t *zio)
if (!vdev_readable(v)) {
zio->io_error = SET_ERROR(ENXIO);
- return (ZIO_PIPELINE_CONTINUE);
+ zio_interrupt(zio);
+ return;
}
switch (zio->io_cmd) {
@@ -685,7 +686,7 @@ vdev_disk_io_start(zio_t *zio)
error = vdev_disk_io_flush(vd->vd_bdev, zio);
if (error == 0)
- return (ZIO_PIPELINE_STOP);
+ return;
zio->io_error = error;
if (error == ENOTSUP)
@@ -697,8 +698,8 @@ vdev_disk_io_start(zio_t *zio)
zio->io_error = SET_ERROR(ENOTSUP);
}
- return (ZIO_PIPELINE_CONTINUE);
-
+ zio_execute(zio);
+ return;
case ZIO_TYPE_WRITE:
flags = WRITE;
break;
@@ -709,17 +710,17 @@ vdev_disk_io_start(zio_t *zio)
default:
zio->io_error = SET_ERROR(ENOTSUP);
- return (ZIO_PIPELINE_CONTINUE);
+ zio_interrupt(zio);
+ return;
}
error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data,
zio->io_size, zio->io_offset, flags);
if (error) {
zio->io_error = error;
- return (ZIO_PIPELINE_CONTINUE);
+ zio_interrupt(zio);
+ return;
}
-
- return (ZIO_PIPELINE_STOP);
}
static void
diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c
index 7f43ad800..3c3a13993 100644
--- a/module/zfs/vdev_file.c
+++ b/module/zfs/vdev_file.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -172,7 +172,7 @@ vdev_file_io_fsync(void *arg)
zio_interrupt(zio);
}
-static int
+static void
vdev_file_io_start(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
@@ -182,7 +182,8 @@ vdev_file_io_start(zio_t *zio)
/* XXPOLICY */
if (!vdev_readable(vd)) {
zio->io_error = SET_ERROR(ENXIO);
- return (ZIO_PIPELINE_CONTINUE);
+ zio_interrupt(zio);
+ return;
}
switch (zio->io_cmd) {
@@ -201,7 +202,7 @@ vdev_file_io_start(zio_t *zio)
if (spl_fstrans_check()) {
VERIFY3U(taskq_dispatch(vdev_file_taskq,
vdev_file_io_fsync, zio, TQ_SLEEP), !=, 0);
- return (ZIO_PIPELINE_STOP);
+ return;
}
zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
@@ -211,13 +212,12 @@ vdev_file_io_start(zio_t *zio)
zio->io_error = SET_ERROR(ENOTSUP);
}
- return (ZIO_PIPELINE_CONTINUE);
+ zio_execute(zio);
+ return;
}
VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
TQ_SLEEP), !=, 0);
-
- return (ZIO_PIPELINE_STOP);
}
/* ARGSUSED */
diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c
index 77c3d8d38..6b699e883 100644
--- a/module/zfs/vdev_mirror.c
+++ b/module/zfs/vdev_mirror.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -327,7 +327,7 @@ vdev_mirror_child_select(zio_t *zio)
return (-1);
}
-static int
+static void
vdev_mirror_io_start(zio_t *zio)
{
mirror_map_t *mm;
@@ -352,7 +352,8 @@ vdev_mirror_io_start(zio_t *zio)
zio->io_type, zio->io_priority, 0,
vdev_mirror_scrub_done, mc));
}
- return (ZIO_PIPELINE_CONTINUE);
+ zio_execute(zio);
+ return;
}
/*
* For normal reads just pick one child.
@@ -378,7 +379,7 @@ vdev_mirror_io_start(zio_t *zio)
c++;
}
- return (ZIO_PIPELINE_CONTINUE);
+ zio_execute(zio);
}
static int
diff --git a/module/zfs/vdev_missing.c b/module/zfs/vdev_missing.c
index b9eb99d18..228757334 100644
--- a/module/zfs/vdev_missing.c
+++ b/module/zfs/vdev_missing.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
/*
@@ -66,11 +66,11 @@ vdev_missing_close(vdev_t *vd)
}
/* ARGSUSED */
-static int
+static void
vdev_missing_io_start(zio_t *zio)
{
zio->io_error = SET_ERROR(ENOTSUP);
- return (ZIO_PIPELINE_CONTINUE);
+ zio_execute(zio);
}
/* ARGSUSED */
diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c
index 493b332c4..b9479092c 100644
--- a/module/zfs/vdev_raidz.c
+++ b/module/zfs/vdev_raidz.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -1567,7 +1567,7 @@ vdev_raidz_child_done(zio_t *zio)
* vdevs have had errors, then create zio read operations to the parity
* columns' VDevs as well.
*/
-static int
+static void
vdev_raidz_io_start(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
@@ -1611,7 +1611,8 @@ vdev_raidz_io_start(zio_t *zio)
ZIO_FLAG_NODATA | ZIO_FLAG_OPTIONAL, NULL, NULL));
}
- return (ZIO_PIPELINE_CONTINUE);
+ zio_execute(zio);
+ return;
}
ASSERT(zio->io_type == ZIO_TYPE_READ);
@@ -1651,7 +1652,7 @@ vdev_raidz_io_start(zio_t *zio)
}
}
- return (ZIO_PIPELINE_CONTINUE);
+ zio_execute(zio);
}
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 066f04f18..032341378 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -59,6 +59,9 @@ kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
int zio_delay_max = ZIO_DELAY_MAX;
+#define ZIO_PIPELINE_CONTINUE 0x100
+#define ZIO_PIPELINE_STOP 0x101
+
/*
* The following actions directly effect the spa's sync-to-convergence logic.
* The values below define the sync pass when we start performing the action.
@@ -2526,6 +2529,18 @@ zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp)
* Read and write to physical devices
* ==========================================================================
*/
+
+
+/*
+ * Issue an I/O to the underlying vdev. Typically the issue pipeline
+ * stops after this stage and will resume upon I/O completion.
+ * However, there are instances where the vdev layer may need to
+ * continue the pipeline when an I/O was not issued. Since the I/O
+ * that was sent to the vdev layer might be different than the one
+ * currently active in the pipeline (see vdev_queue_io()), we explicitly
+ * force the underlying vdev layers to call either zio_execute() or
+ * zio_interrupt() to ensure that the pipeline continues with the correct I/O.
+ */
static int
zio_vdev_io_start(zio_t *zio)
{
@@ -2543,7 +2558,8 @@ zio_vdev_io_start(zio_t *zio)
/*
* The mirror_ops handle multiple DVAs in a single BP.
*/
- return (vdev_mirror_ops.vdev_op_io_start(zio));
+ vdev_mirror_ops.vdev_op_io_start(zio);
+ return (ZIO_PIPELINE_STOP);
}
/*
@@ -2551,7 +2567,7 @@ zio_vdev_io_start(zio_t *zio)
* can quickly react to certain workloads. In particular, we care
* about non-scrubbing, top-level reads and writes with the following
* characteristics:
- * - synchronous writes of user data to non-slog devices
+ * - synchronous writes of user data to non-slog devices
* - any reads of user data
* When these conditions are met, adjust the timestamp of spa_last_io
* which allows the scan thread to adjust its workload accordingly.
@@ -2637,7 +2653,8 @@ zio_vdev_io_start(zio_t *zio)
}
}
- return (vd->vdev_ops->vdev_op_io_start(zio));
+ vd->vdev_ops->vdev_op_io_start(zio);
+ return (ZIO_PIPELINE_STOP);
}
static int