diff options
Diffstat (limited to 'module/zfs/arc.c')
-rw-r--r-- | module/zfs/arc.c | 74 |
1 files changed, 67 insertions, 7 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c index c3d88679f..b8707474d 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -474,6 +474,8 @@ typedef struct arc_stats { kstat_named_t arcstat_meta_limit; kstat_named_t arcstat_meta_max; kstat_named_t arcstat_meta_min; + kstat_named_t arcstat_sync_wait_for_async; + kstat_named_t arcstat_demand_hit_predictive_prefetch; kstat_named_t arcstat_need_free; kstat_named_t arcstat_sys_free; } arc_stats_t; @@ -568,6 +570,8 @@ static arc_stats_t arc_stats = { { "arc_meta_limit", KSTAT_DATA_UINT64 }, { "arc_meta_max", KSTAT_DATA_UINT64 }, { "arc_meta_min", KSTAT_DATA_UINT64 }, + { "sync_wait_for_async", KSTAT_DATA_UINT64 }, + { "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 }, { "arc_need_free", KSTAT_DATA_UINT64 }, { "arc_sys_free", KSTAT_DATA_UINT64 } }; @@ -4244,6 +4248,36 @@ top: if (HDR_IO_IN_PROGRESS(hdr)) { + if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) && + priority == ZIO_PRIORITY_SYNC_READ) { + /* + * This sync read must wait for an + * in-progress async read (e.g. a predictive + * prefetch). Async reads are queued + * separately at the vdev_queue layer, so + * this is a form of priority inversion. + * Ideally, we would "inherit" the demand + * i/o's priority by moving the i/o from + * the async queue to the synchronous queue, + * but there is currently no mechanism to do + * so. Track this so that we can evaluate + * the magnitude of this potential performance + * problem. + * + * Note that if the prefetch i/o is already + * active (has been issued to the device), + * the prefetch improved performance, because + * we issued it sooner than we would have + * without the prefetch. + */ + DTRACE_PROBE1(arc__sync__wait__for__async, + arc_buf_hdr_t *, hdr); + ARCSTAT_BUMP(arcstat_sync_wait_for_async); + } + if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) { + hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH; + } + if (*arc_flags & ARC_FLAG_WAIT) { cv_wait(&hdr->b_l1hdr.b_cv, hash_lock); mutex_exit(hash_lock); @@ -4252,7 +4286,7 @@ top: ASSERT(*arc_flags & ARC_FLAG_NOWAIT); if (done) { - arc_callback_t *acb = NULL; + arc_callback_t *acb = NULL; acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP); @@ -4277,6 +4311,19 @@ top: hdr->b_l1hdr.b_state == arc_mfu); if (done) { + if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) { + /* + * This is a demand read which does not have to + * wait for i/o because we did a predictive + * prefetch i/o for it, which has completed. + */ + DTRACE_PROBE1( + arc__demand__hit__predictive__prefetch, + arc_buf_hdr_t *, hdr); + ARCSTAT_BUMP( + arcstat_demand_hit_predictive_prefetch); + hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH; + } add_reference(hdr, hash_lock, private); /* * If this block is already in use, create a new @@ -4349,12 +4396,16 @@ top: goto top; /* restart the IO request */ } - /* if this is a prefetch, we don't have a reference */ - if (*arc_flags & ARC_FLAG_PREFETCH) { + /* + * If there is a callback, we pass our reference to + * it; otherwise we remove our reference. + */ + if (done == NULL) { (void) remove_reference(hdr, hash_lock, private); - hdr->b_flags |= ARC_FLAG_PREFETCH; } + if (*arc_flags & ARC_FLAG_PREFETCH) + hdr->b_flags |= ARC_FLAG_PREFETCH; if (*arc_flags & ARC_FLAG_L2CACHE) hdr->b_flags |= ARC_FLAG_L2CACHE; if (*arc_flags & ARC_FLAG_L2COMPRESS) @@ -4377,11 +4428,13 @@ top: ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - /* if this is a prefetch, we don't have a reference */ + /* + * If there is a callback, we pass a reference to it. + */ + if (done != NULL) + add_reference(hdr, hash_lock, private); if (*arc_flags & ARC_FLAG_PREFETCH) hdr->b_flags |= ARC_FLAG_PREFETCH; - else - add_reference(hdr, hash_lock, private); if (*arc_flags & ARC_FLAG_L2CACHE) hdr->b_flags |= ARC_FLAG_L2CACHE; if (*arc_flags & ARC_FLAG_L2COMPRESS) @@ -4399,6 +4452,8 @@ top: arc_access(hdr, hash_lock); } + if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH) + hdr->b_flags |= ARC_FLAG_PREDICTIVE_PREFETCH; ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state)); acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP); @@ -4438,6 +4493,11 @@ top: demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, misses); + if (priority == ZIO_PRIORITY_ASYNC_READ) + hdr->b_flags |= ARC_FLAG_PRIO_ASYNC_READ; + else + hdr->b_flags &= ~ARC_FLAG_PRIO_ASYNC_READ; + if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) { /* * Read from the L2ARC if the following are true: |