diff options
author | Matthew Ahrens <[email protected]> | 2015-12-26 22:10:31 +0100 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-01-12 09:02:33 -0800 |
commit | 7f60329a261bd48558a498fb10e9b29638eab33b (patch) | |
tree | 5e6616fbac91442ce424ca44a8288bf9f46db3cc /module/zfs/arc.c | |
parent | ab5cbbd1078bf007b50b084bb31fd58c7c5652f4 (diff) |
Illumos 5987 - zfs prefetch code needs work
5987 zfs prefetch code needs work
Reviewed by: Adam Leventhal <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: Paul Dagnelie <[email protected]>
Approved by: Gordon Ross <[email protected]>
References:
https://www.illumos.org/issues/5987 zfs prefetch code needs work
illumos/illumos-gate@cf6106c 5987 zfs prefetch code needs work
Porting notes:
- [module/zfs/dbuf.c]
- 5f6d0b6 Handle block pointers with a corrupt logical size
- [module/zfs/dmu_zfetch.c]
- c65aa5b Fix gcc missing parenthesis warnings
- 428870f Update core ZFS code from build 121 to build 141.
- 79c76d5 Change KM_PUSHPAGE -> KM_SLEEP
- b8d06fc Switch KM_SLEEP to KM_PUSHPAGE
- Account for ISO C90 - mixed declarations and code - warnings
- Module parameters (new/changed):
- Replaced zfetch_block_cap with zfetch_max_distance
(Max bytes to prefetch per stream (default 8MB; 8 * 1024 * 1024))
- Preserved zfs_prefetch_disable as 'int' for consistency with
existing Linux module options.
- [include/sys/trace_arc.h]
- Added new tracepoints
- DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__sync__wait__for__async);
- DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__demand__hit__predictive__prefetch);
- [man/man5/zfs-module-parameters.5]
- Updated man page
Ported-by: kernelOfTruth [email protected]
Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'module/zfs/arc.c')
-rw-r--r-- | module/zfs/arc.c | 74 |
1 files changed, 67 insertions, 7 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c index c3d88679f..b8707474d 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -474,6 +474,8 @@ typedef struct arc_stats { kstat_named_t arcstat_meta_limit; kstat_named_t arcstat_meta_max; kstat_named_t arcstat_meta_min; + kstat_named_t arcstat_sync_wait_for_async; + kstat_named_t arcstat_demand_hit_predictive_prefetch; kstat_named_t arcstat_need_free; kstat_named_t arcstat_sys_free; } arc_stats_t; @@ -568,6 +570,8 @@ static arc_stats_t arc_stats = { { "arc_meta_limit", KSTAT_DATA_UINT64 }, { "arc_meta_max", KSTAT_DATA_UINT64 }, { "arc_meta_min", KSTAT_DATA_UINT64 }, + { "sync_wait_for_async", KSTAT_DATA_UINT64 }, + { "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 }, { "arc_need_free", KSTAT_DATA_UINT64 }, { "arc_sys_free", KSTAT_DATA_UINT64 } }; @@ -4244,6 +4248,36 @@ top: if (HDR_IO_IN_PROGRESS(hdr)) { + if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) && + priority == ZIO_PRIORITY_SYNC_READ) { + /* + * This sync read must wait for an + * in-progress async read (e.g. a predictive + * prefetch). Async reads are queued + * separately at the vdev_queue layer, so + * this is a form of priority inversion. + * Ideally, we would "inherit" the demand + * i/o's priority by moving the i/o from + * the async queue to the synchronous queue, + * but there is currently no mechanism to do + * so. Track this so that we can evaluate + * the magnitude of this potential performance + * problem. + * + * Note that if the prefetch i/o is already + * active (has been issued to the device), + * the prefetch improved performance, because + * we issued it sooner than we would have + * without the prefetch. + */ + DTRACE_PROBE1(arc__sync__wait__for__async, + arc_buf_hdr_t *, hdr); + ARCSTAT_BUMP(arcstat_sync_wait_for_async); + } + if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) { + hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH; + } + if (*arc_flags & ARC_FLAG_WAIT) { cv_wait(&hdr->b_l1hdr.b_cv, hash_lock); mutex_exit(hash_lock); @@ -4252,7 +4286,7 @@ top: ASSERT(*arc_flags & ARC_FLAG_NOWAIT); if (done) { - arc_callback_t *acb = NULL; + arc_callback_t *acb = NULL; acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP); @@ -4277,6 +4311,19 @@ top: hdr->b_l1hdr.b_state == arc_mfu); if (done) { + if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) { + /* + * This is a demand read which does not have to + * wait for i/o because we did a predictive + * prefetch i/o for it, which has completed. + */ + DTRACE_PROBE1( + arc__demand__hit__predictive__prefetch, + arc_buf_hdr_t *, hdr); + ARCSTAT_BUMP( + arcstat_demand_hit_predictive_prefetch); + hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH; + } add_reference(hdr, hash_lock, private); /* * If this block is already in use, create a new @@ -4349,12 +4396,16 @@ top: goto top; /* restart the IO request */ } - /* if this is a prefetch, we don't have a reference */ - if (*arc_flags & ARC_FLAG_PREFETCH) { + /* + * If there is a callback, we pass our reference to + * it; otherwise we remove our reference. + */ + if (done == NULL) { (void) remove_reference(hdr, hash_lock, private); - hdr->b_flags |= ARC_FLAG_PREFETCH; } + if (*arc_flags & ARC_FLAG_PREFETCH) + hdr->b_flags |= ARC_FLAG_PREFETCH; if (*arc_flags & ARC_FLAG_L2CACHE) hdr->b_flags |= ARC_FLAG_L2CACHE; if (*arc_flags & ARC_FLAG_L2COMPRESS) @@ -4377,11 +4428,13 @@ top: ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - /* if this is a prefetch, we don't have a reference */ + /* + * If there is a callback, we pass a reference to it. + */ + if (done != NULL) + add_reference(hdr, hash_lock, private); if (*arc_flags & ARC_FLAG_PREFETCH) hdr->b_flags |= ARC_FLAG_PREFETCH; - else - add_reference(hdr, hash_lock, private); if (*arc_flags & ARC_FLAG_L2CACHE) hdr->b_flags |= ARC_FLAG_L2CACHE; if (*arc_flags & ARC_FLAG_L2COMPRESS) @@ -4399,6 +4452,8 @@ top: arc_access(hdr, hash_lock); } + if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH) + hdr->b_flags |= ARC_FLAG_PREDICTIVE_PREFETCH; ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state)); acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP); @@ -4438,6 +4493,11 @@ top: demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, misses); + if (priority == ZIO_PRIORITY_ASYNC_READ) + hdr->b_flags |= ARC_FLAG_PRIO_ASYNC_READ; + else + hdr->b_flags &= ~ARC_FLAG_PRIO_ASYNC_READ; + if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) { /* * Read from the L2ARC if the following are true: |