aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/zio.c
diff options
context:
space:
mode:
authorTom Caputi <[email protected]>2017-12-21 12:13:06 -0500
committerBrian Behlendorf <[email protected]>2017-12-21 09:13:06 -0800
commita8b2e30685c9214ccfd0181977540e080340df4e (patch)
treedb07450d097c27b1e3d627f8ae58387cad9d0038 /module/zfs/zio.c
parent993669a7bf17a26843630c547999be0b27483497 (diff)
Support re-prioritizing asynchronous prefetches
When sequential scrubs were merged, all calls to arc_read() (including prefetch IOs) were given ZIO_PRIORITY_ASYNC_READ. Unfortunately, this behaves badly with an existing issue where prefetch IOs cannot be re-prioritized after the issue. The result is that synchronous reads end up in the same vdev_queue as the scrub IOs and can have (in some workloads) multiple seconds of latency. This patch incorporates 2 changes. The first ensures that all scrub IOs are given ZIO_PRIORITY_SCRUB to allow the vdev_queue code to differentiate between these I/Os and user prefetches. Second, this patch introduces zio_change_priority() to provide the missing capability to upgrade a zio's priority. Reviewed by: George Wilson <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Tom Caputi <[email protected]> Closes #6921 Closes #6926
Diffstat (limited to 'module/zfs/zio.c')
-rw-r--r--module/zfs/zio.c43
1 files changed, 38 insertions, 5 deletions
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 92e5a8dd8..c3b571e9a 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -539,6 +539,8 @@ zio_walk_children(zio_t *pio, zio_link_t **zl)
{
list_t *cl = &pio->io_child_list;
+ ASSERT(MUTEX_HELD(&pio->io_lock));
+
*zl = (*zl == NULL) ? list_head(cl) : list_next(cl, *zl);
if (*zl == NULL)
return (NULL);
@@ -573,8 +575,8 @@ zio_add_child(zio_t *pio, zio_t *cio)
zl->zl_parent = pio;
zl->zl_child = cio;
- mutex_enter(&cio->io_lock);
mutex_enter(&pio->io_lock);
+ mutex_enter(&cio->io_lock);
ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);
@@ -587,8 +589,8 @@ zio_add_child(zio_t *pio, zio_t *cio)
pio->io_child_count++;
cio->io_parent_count++;
- mutex_exit(&pio->io_lock);
mutex_exit(&cio->io_lock);
+ mutex_exit(&pio->io_lock);
}
static void
@@ -597,8 +599,8 @@ zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl)
ASSERT(zl->zl_parent == pio);
ASSERT(zl->zl_child == cio);
- mutex_enter(&cio->io_lock);
mutex_enter(&pio->io_lock);
+ mutex_enter(&cio->io_lock);
list_remove(&pio->io_child_list, zl);
list_remove(&cio->io_parent_list, zl);
@@ -606,8 +608,8 @@ zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl)
pio->io_child_count--;
cio->io_parent_count--;
- mutex_exit(&pio->io_lock);
mutex_exit(&cio->io_lock);
+ mutex_exit(&pio->io_lock);
kmem_cache_free(zio_link_cache, zl);
}
@@ -1963,14 +1965,16 @@ zio_reexecute(zio_t *pio)
* cannot be affected by any side effects of reexecuting 'cio'.
*/
zio_link_t *zl = NULL;
+ mutex_enter(&pio->io_lock);
for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) {
cio_next = zio_walk_children(pio, &zl);
- mutex_enter(&pio->io_lock);
for (int w = 0; w < ZIO_WAIT_TYPES; w++)
pio->io_children[cio->io_child_type][w]++;
mutex_exit(&pio->io_lock);
zio_reexecute(cio);
+ mutex_enter(&pio->io_lock);
}
+ mutex_exit(&pio->io_lock);
/*
* Now that all children have been reexecuted, execute the parent.
@@ -3475,6 +3479,35 @@ zio_vdev_io_done(zio_t *zio)
}
/*
+ * This function is used to change the priority of an existing zio that is
+ * currently in-flight. This is used by the arc to upgrade priority in the
+ * event that a demand read is made for a block that is currently queued
+ * as a scrub or async read IO. Otherwise, the high priority read request
+ * would end up having to wait for the lower priority IO.
+ */
+void
+zio_change_priority(zio_t *pio, zio_priority_t priority)
+{
+ zio_t *cio, *cio_next;
+ zio_link_t *zl = NULL;
+
+ ASSERT3U(priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
+
+ if (pio->io_vd != NULL && pio->io_vd->vdev_ops->vdev_op_leaf) {
+ vdev_queue_change_io_priority(pio, priority);
+ } else {
+ pio->io_priority = priority;
+ }
+
+ mutex_enter(&pio->io_lock);
+ for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) {
+ cio_next = zio_walk_children(pio, &zl);
+ zio_change_priority(cio, priority);
+ }
+ mutex_exit(&pio->io_lock);
+}
+
+/*
* For non-raidz ZIOs, we can just copy aside the bad data read from the
* disk, and use that to finish the checksum ereport later.
*/