diff options
Diffstat (limited to 'module/zfs/arc.c')
-rw-r--r-- | module/zfs/arc.c | 123 |
1 files changed, 84 insertions, 39 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c index cd343b04e..698357632 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -357,7 +357,8 @@ int arc_no_grow_shift = 5; * minimum lifespan of a prefetch block in clock ticks * (initialized in arc_init()) */ -static int arc_min_prefetch_lifespan; +static int arc_min_prefetch_ms; +static int arc_min_prescient_prefetch_ms; /* * If this percent of memory is free, don't throttle. @@ -407,7 +408,8 @@ unsigned long zfs_arc_dnode_limit_percent = 10; * These tunables are Linux specific */ unsigned long zfs_arc_sys_free = 0; -int zfs_arc_min_prefetch_lifespan = 0; +int zfs_arc_min_prefetch_ms = 0; +int zfs_arc_min_prescient_prefetch_ms = 0; int zfs_arc_p_aggressive_disable = 1; int zfs_arc_p_dampener_disable = 1; int zfs_arc_meta_prune = 10000; @@ -663,6 +665,7 @@ typedef struct arc_stats { kstat_named_t arcstat_meta_min; kstat_named_t arcstat_sync_wait_for_async; kstat_named_t arcstat_demand_hit_predictive_prefetch; + kstat_named_t arcstat_demand_hit_prescient_prefetch; kstat_named_t arcstat_need_free; kstat_named_t arcstat_sys_free; kstat_named_t arcstat_raw_size; @@ -762,6 +765,7 @@ static arc_stats_t arc_stats = { { "arc_meta_min", KSTAT_DATA_UINT64 }, { "sync_wait_for_async", KSTAT_DATA_UINT64 }, { "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 }, + { "demand_hit_prescient_prefetch", KSTAT_DATA_UINT64 }, { "arc_need_free", KSTAT_DATA_UINT64 }, { "arc_sys_free", KSTAT_DATA_UINT64 }, { "arc_raw_size", KSTAT_DATA_UINT64 } @@ -861,6 +865,8 @@ static taskq_t *arc_prune_taskq; #define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) #define HDR_IO_ERROR(hdr) ((hdr)->b_flags & ARC_FLAG_IO_ERROR) #define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_FLAG_PREFETCH) +#define HDR_PRESCIENT_PREFETCH(hdr) \ + ((hdr)->b_flags & ARC_FLAG_PRESCIENT_PREFETCH) #define HDR_COMPRESSION_ENABLED(hdr) \ ((hdr)->b_flags & ARC_FLAG_COMPRESSED_ARC) @@ -3778,6 +3784,8 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) { arc_state_t *evicted_state, *state; int64_t bytes_evicted = 0; + int min_lifetime = HDR_PRESCIENT_PREFETCH(hdr) ? + arc_min_prescient_prefetch_ms : arc_min_prefetch_ms; ASSERT(MUTEX_HELD(hash_lock)); ASSERT(HDR_HAS_L1HDR(hdr)); @@ -3831,8 +3839,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) /* prefetch buffers have a minimum lifespan */ if (HDR_IO_IN_PROGRESS(hdr) || ((hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT)) && - ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access < - arc_min_prefetch_lifespan)) { + ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access < min_lifetime * hz)) { ARCSTAT_BUMP(arcstat_evict_skip); return (bytes_evicted); } @@ -5492,13 +5499,15 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) * - move the buffer to the head of the list if this is * another prefetch (to make it less likely to be evicted). */ - if (HDR_PREFETCH(hdr)) { + if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) { if (refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) { /* link protected by hash lock */ ASSERT(multilist_link_active( &hdr->b_l1hdr.b_arc_node)); } else { - arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH); + arc_hdr_clear_flags(hdr, + ARC_FLAG_PREFETCH | + ARC_FLAG_PRESCIENT_PREFETCH); atomic_inc_32(&hdr->b_l1hdr.b_mru_hits); ARCSTAT_BUMP(arcstat_mru_hits); } @@ -5532,10 +5541,13 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) * MFU state. */ - if (HDR_PREFETCH(hdr)) { + if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) { new_state = arc_mru; - if (refcount_count(&hdr->b_l1hdr.b_refcnt) > 0) - arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH); + if (refcount_count(&hdr->b_l1hdr.b_refcnt) > 0) { + arc_hdr_clear_flags(hdr, + ARC_FLAG_PREFETCH | + ARC_FLAG_PRESCIENT_PREFETCH); + } DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr); } else { new_state = arc_mfu; @@ -5557,11 +5569,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) * If it was a prefetch, we will explicitly move it to * the head of the list now. */ - if ((HDR_PREFETCH(hdr)) != 0) { - ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); - /* link protected by hash_lock */ - ASSERT(multilist_link_active(&hdr->b_l1hdr.b_arc_node)); - } + atomic_inc_32(&hdr->b_l1hdr.b_mfu_hits); ARCSTAT_BUMP(arcstat_mfu_hits); hdr->b_l1hdr.b_arc_access = ddi_get_lbolt(); @@ -5573,12 +5581,11 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) * MFU state. */ - if (HDR_PREFETCH(hdr)) { + if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) { /* * This is a prefetch access... * move this block back to the MRU state. */ - ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt)); new_state = arc_mru; } @@ -5605,20 +5612,25 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) /* a generic arc_read_done_func_t which you can use */ /* ARGSUSED */ void -arc_bcopy_func(zio_t *zio, int error, arc_buf_t *buf, void *arg) +arc_bcopy_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, + arc_buf_t *buf, void *arg) { - if (error == 0) - bcopy(buf->b_data, arg, arc_buf_size(buf)); + if (buf == NULL) + return; + + bcopy(buf->b_data, arg, arc_buf_size(buf)); arc_buf_destroy(buf, arg); } /* a generic arc_read_done_func_t */ +/* ARGSUSED */ void -arc_getbuf_func(zio_t *zio, int error, arc_buf_t *buf, void *arg) +arc_getbuf_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, + arc_buf_t *buf, void *arg) { arc_buf_t **bufp = arg; - if (error != 0) { - arc_buf_destroy(buf, arg); + + if (buf == NULL) { *bufp = NULL; } else { *bufp = buf; @@ -5652,7 +5664,6 @@ arc_read_done(zio_t *zio) arc_callback_t *callback_list; arc_callback_t *acb; boolean_t freeable = B_FALSE; - boolean_t no_zio_error = (zio->io_error == 0); /* * The hdr was inserted into hash-table and removed from lists @@ -5699,7 +5710,7 @@ arc_read_done(zio_t *zio) } } - if (no_zio_error) { + if (zio->io_error == 0) { /* byteswap if necessary */ if (BP_SHOULD_BYTESWAP(zio->io_bp)) { if (BP_GET_LEVEL(zio->io_bp) > 0) { @@ -5720,7 +5731,8 @@ arc_read_done(zio_t *zio) callback_list = hdr->b_l1hdr.b_acb; ASSERT3P(callback_list, !=, NULL); - if (hash_lock && no_zio_error && hdr->b_l1hdr.b_state == arc_anon) { + if (hash_lock && zio->io_error == 0 && + hdr->b_l1hdr.b_state == arc_anon) { /* * Only call arc_access on anonymous buffers. This is because * if we've issued an I/O for an evicted buffer, we've already @@ -5741,13 +5753,19 @@ arc_read_done(zio_t *zio) if (!acb->acb_done) continue; - /* This is a demand read since prefetches don't use callbacks */ callback_cnt++; + if (zio->io_error != 0) + continue; + int error = arc_buf_alloc_impl(hdr, zio->io_spa, acb->acb_dsobj, acb->acb_private, acb->acb_encrypted, - acb->acb_compressed, acb->acb_noauth, no_zio_error, + acb->acb_compressed, acb->acb_noauth, B_TRUE, &acb->acb_buf); + if (error != 0) { + arc_buf_destroy(acb->acb_buf, acb->acb_private); + acb->acb_buf = NULL; + } /* * Assert non-speculative zios didn't fail because an @@ -5770,9 +5788,8 @@ arc_read_done(zio_t *zio) } } - if (no_zio_error) { + if (zio->io_error == 0) zio->io_error = error; - } } hdr->b_l1hdr.b_acb = NULL; arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); @@ -5782,7 +5799,7 @@ arc_read_done(zio_t *zio) ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) || callback_list != NULL); - if (no_zio_error) { + if (zio->io_error == 0) { arc_hdr_verify(hdr, zio->io_bp); } else { arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR); @@ -5816,8 +5833,8 @@ arc_read_done(zio_t *zio) /* execute each callback and free its structure */ while ((acb = callback_list) != NULL) { if (acb->acb_done) { - acb->acb_done(zio, zio->io_error, acb->acb_buf, - acb->acb_private); + acb->acb_done(zio, &zio->io_bookmark, zio->io_bp, + acb->acb_buf, acb->acb_private); } if (acb->acb_zio_dummy != NULL) { @@ -5974,12 +5991,25 @@ top: arc_hdr_clear_flags(hdr, ARC_FLAG_PREDICTIVE_PREFETCH); } + + if (hdr->b_flags & ARC_FLAG_PRESCIENT_PREFETCH) { + ARCSTAT_BUMP( + arcstat_demand_hit_prescient_prefetch); + arc_hdr_clear_flags(hdr, + ARC_FLAG_PRESCIENT_PREFETCH); + } + ASSERT(!BP_IS_EMBEDDED(bp) || !BP_IS_HOLE(bp)); /* Get a buf with the desired data in it. */ rc = arc_buf_alloc_impl(hdr, spa, zb->zb_objset, private, encrypted_read, compressed_read, noauth_read, B_TRUE, &buf); + if (rc != 0) { + arc_buf_destroy(buf, private); + buf = NULL; + } + ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || rc == 0); } else if (*arc_flags & ARC_FLAG_PREFETCH && refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) { @@ -5987,6 +6017,8 @@ top: } DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr); arc_access(hdr, hash_lock); + if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH) + arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH); if (*arc_flags & ARC_FLAG_L2CACHE) arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE); mutex_exit(hash_lock); @@ -5996,7 +6028,7 @@ top: data, metadata, hits); if (done) - done(NULL, rc, buf, private); + done(NULL, zb, bp, buf, private); } else { uint64_t lsize = BP_GET_LSIZE(bp); uint64_t psize = BP_GET_PSIZE(bp); @@ -6112,6 +6144,8 @@ top: if (*arc_flags & ARC_FLAG_PREFETCH && refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); + if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH) + arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH); if (*arc_flags & ARC_FLAG_L2CACHE) arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE); if (BP_IS_AUTHENTICATED(bp)) @@ -7223,9 +7257,15 @@ arc_tuning_update(void) if (zfs_arc_p_min_shift) arc_p_min_shift = zfs_arc_p_min_shift; - /* Valid range: 1 - N ticks */ - if (zfs_arc_min_prefetch_lifespan) - arc_min_prefetch_lifespan = zfs_arc_min_prefetch_lifespan; + /* Valid range: 1 - N ms */ + if (zfs_arc_min_prefetch_ms) + arc_min_prefetch_ms = zfs_arc_min_prefetch_ms; + + /* Valid range: 1 - N ms */ + if (zfs_arc_min_prescient_prefetch_ms) { + arc_min_prescient_prefetch_ms = + zfs_arc_min_prescient_prefetch_ms; + } /* Valid range: 0 - 100 */ if ((zfs_arc_lotsfree_percent >= 0) && @@ -7368,7 +7408,8 @@ arc_init(void) cv_init(&arc_reclaim_waiters_cv, NULL, CV_DEFAULT, NULL); /* Convert seconds to clock ticks */ - arc_min_prefetch_lifespan = 1 * hz; + arc_min_prefetch_ms = 1; + arc_min_prescient_prefetch_ms = 6; #ifdef _KERNEL /* @@ -9006,8 +9047,12 @@ MODULE_PARM_DESC(zfs_arc_average_blocksize, "Target average block size"); module_param(zfs_compressed_arc_enabled, int, 0644); MODULE_PARM_DESC(zfs_compressed_arc_enabled, "Disable compressed arc buffers"); -module_param(zfs_arc_min_prefetch_lifespan, int, 0644); -MODULE_PARM_DESC(zfs_arc_min_prefetch_lifespan, "Min life of prefetch block"); +module_param(zfs_arc_min_prefetch_ms, int, 0644); +MODULE_PARM_DESC(zfs_arc_min_prefetch_ms, "Min life of prefetch block in ms"); + +module_param(zfs_arc_min_prescient_prefetch_ms, int, 0644); +MODULE_PARM_DESC(zfs_arc_min_prescient_prefetch_ms, + "Min life of prescient prefetched block in ms"); module_param(l2arc_write_max, ulong, 0644); MODULE_PARM_DESC(l2arc_write_max, "Max write bytes per interval"); |