diff options
author | Tom Caputi <[email protected]> | 2017-12-21 12:13:06 -0500 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2017-12-21 09:13:06 -0800 |
commit | a8b2e30685c9214ccfd0181977540e080340df4e (patch) | |
tree | db07450d097c27b1e3d627f8ae58387cad9d0038 /module/zfs/arc.c | |
parent | 993669a7bf17a26843630c547999be0b27483497 (diff) |
Support re-prioritizing asynchronous prefetches
When sequential scrubs were merged, all calls to arc_read()
(including prefetch IOs) were given ZIO_PRIORITY_ASYNC_READ.
Unfortunately, this behaves badly with an existing issue where
prefetch IOs cannot be re-prioritized after the issue. The
result is that synchronous reads end up in the same vdev_queue
as the scrub IOs and can have (in some workloads) multiple
seconds of latency.
This patch incorporates 2 changes. The first ensures that all
scrub IOs are given ZIO_PRIORITY_SCRUB to allow the vdev_queue
code to differentiate between these I/Os and user prefetches.
Second, this patch introduces zio_change_priority() to provide
the missing capability to upgrade a zio's priority.
Reviewed by: George Wilson <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Tom Caputi <[email protected]>
Closes #6921
Closes #6926
Diffstat (limited to 'module/zfs/arc.c')
-rw-r--r-- | module/zfs/arc.c | 54 |
1 files changed, 28 insertions, 26 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 10b1c60d5..476351eb4 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -663,7 +663,7 @@ typedef struct arc_stats { kstat_named_t arcstat_dnode_limit; kstat_named_t arcstat_meta_max; kstat_named_t arcstat_meta_min; - kstat_named_t arcstat_sync_wait_for_async; + kstat_named_t arcstat_async_upgrade_sync; kstat_named_t arcstat_demand_hit_predictive_prefetch; kstat_named_t arcstat_demand_hit_prescient_prefetch; kstat_named_t arcstat_need_free; @@ -763,7 +763,7 @@ static arc_stats_t arc_stats = { { "arc_dnode_limit", KSTAT_DATA_UINT64 }, { "arc_meta_max", KSTAT_DATA_UINT64 }, { "arc_meta_min", KSTAT_DATA_UINT64 }, - { "sync_wait_for_async", KSTAT_DATA_UINT64 }, + { "async_upgrade_sync", KSTAT_DATA_UINT64 }, { "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 }, { "demand_hit_prescient_prefetch", KSTAT_DATA_UINT64 }, { "arc_need_free", KSTAT_DATA_UINT64 }, @@ -5911,32 +5911,20 @@ top: *arc_flags |= ARC_FLAG_CACHED; if (HDR_IO_IN_PROGRESS(hdr)) { + zio_t *head_zio = hdr->b_l1hdr.b_acb->acb_zio_head; + ASSERT3P(head_zio, !=, NULL); if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) && priority == ZIO_PRIORITY_SYNC_READ) { /* - * This sync read must wait for an - * in-progress async read (e.g. a predictive - * prefetch). Async reads are queued - * separately at the vdev_queue layer, so - * this is a form of priority inversion. - * Ideally, we would "inherit" the demand - * i/o's priority by moving the i/o from - * the async queue to the synchronous queue, - * but there is currently no mechanism to do - * so. Track this so that we can evaluate - * the magnitude of this potential performance - * problem. - * - * Note that if the prefetch i/o is already - * active (has been issued to the device), - * the prefetch improved performance, because - * we issued it sooner than we would have - * without the prefetch. + * This is a sync read that needs to wait for + * an in-flight async read. Request that the + * zio have its priority upgraded. */ - DTRACE_PROBE1(arc__sync__wait__for__async, + zio_change_priority(head_zio, priority); + DTRACE_PROBE1(arc__async__upgrade__sync, arc_buf_hdr_t *, hdr); - ARCSTAT_BUMP(arcstat_sync_wait_for_async); + ARCSTAT_BUMP(arcstat_async_upgrade_sync); } if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) { arc_hdr_clear_flags(hdr, @@ -5966,6 +5954,7 @@ top: spa, NULL, NULL, NULL, zio_flags); ASSERT3P(acb->acb_done, !=, NULL); + acb->acb_zio_head = head_zio; acb->acb_next = hdr->b_l1hdr.b_acb; hdr->b_l1hdr.b_acb = acb; mutex_exit(hash_lock); @@ -6182,14 +6171,17 @@ top: vd = NULL; } - if (priority == ZIO_PRIORITY_ASYNC_READ) + /* + * We count both async reads and scrub IOs as asynchronous so + * that both can be upgraded in the event of a cache hit while + * the read IO is still in-flight. + */ + if (priority == ZIO_PRIORITY_ASYNC_READ || + priority == ZIO_PRIORITY_SCRUB) arc_hdr_set_flags(hdr, ARC_FLAG_PRIO_ASYNC_READ); else arc_hdr_clear_flags(hdr, ARC_FLAG_PRIO_ASYNC_READ); - if (hash_lock != NULL) - mutex_exit(hash_lock); - /* * At this point, we have a level 1 cache miss. Try again in * L2ARC if possible. @@ -6260,6 +6252,10 @@ top: ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY, B_FALSE); + acb->acb_zio_head = rzio; + + if (hash_lock != NULL) + mutex_exit(hash_lock); DTRACE_PROBE2(l2arc__read, vdev_t *, vd, zio_t *, rzio); @@ -6276,6 +6272,8 @@ top: goto out; /* l2arc read error; goto zio_read() */ + if (hash_lock != NULL) + mutex_enter(hash_lock); } else { DTRACE_PROBE1(l2arc__miss, arc_buf_hdr_t *, hdr); @@ -6296,6 +6294,10 @@ top: rzio = zio_read(pio, spa, bp, hdr_abd, size, arc_read_done, hdr, priority, zio_flags, zb); + acb->acb_zio_head = rzio; + + if (hash_lock != NULL) + mutex_exit(hash_lock); if (*arc_flags & ARC_FLAG_WAIT) { rc = zio_wait(rzio); |