summaryrefslogtreecommitdiffstats
path: root/module/zfs/arc.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs/arc.c')
-rw-r--r--module/zfs/arc.c123
1 files changed, 84 insertions, 39 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index cd343b04e..698357632 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -357,7 +357,8 @@ int arc_no_grow_shift = 5;
* minimum lifespan of a prefetch block in clock ticks
* (initialized in arc_init())
*/
-static int arc_min_prefetch_lifespan;
+static int arc_min_prefetch_ms;
+static int arc_min_prescient_prefetch_ms;
/*
* If this percent of memory is free, don't throttle.
@@ -407,7 +408,8 @@ unsigned long zfs_arc_dnode_limit_percent = 10;
* These tunables are Linux specific
*/
unsigned long zfs_arc_sys_free = 0;
-int zfs_arc_min_prefetch_lifespan = 0;
+int zfs_arc_min_prefetch_ms = 0;
+int zfs_arc_min_prescient_prefetch_ms = 0;
int zfs_arc_p_aggressive_disable = 1;
int zfs_arc_p_dampener_disable = 1;
int zfs_arc_meta_prune = 10000;
@@ -663,6 +665,7 @@ typedef struct arc_stats {
kstat_named_t arcstat_meta_min;
kstat_named_t arcstat_sync_wait_for_async;
kstat_named_t arcstat_demand_hit_predictive_prefetch;
+ kstat_named_t arcstat_demand_hit_prescient_prefetch;
kstat_named_t arcstat_need_free;
kstat_named_t arcstat_sys_free;
kstat_named_t arcstat_raw_size;
@@ -762,6 +765,7 @@ static arc_stats_t arc_stats = {
{ "arc_meta_min", KSTAT_DATA_UINT64 },
{ "sync_wait_for_async", KSTAT_DATA_UINT64 },
{ "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 },
+ { "demand_hit_prescient_prefetch", KSTAT_DATA_UINT64 },
{ "arc_need_free", KSTAT_DATA_UINT64 },
{ "arc_sys_free", KSTAT_DATA_UINT64 },
{ "arc_raw_size", KSTAT_DATA_UINT64 }
@@ -861,6 +865,8 @@ static taskq_t *arc_prune_taskq;
#define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS)
#define HDR_IO_ERROR(hdr) ((hdr)->b_flags & ARC_FLAG_IO_ERROR)
#define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_FLAG_PREFETCH)
+#define HDR_PRESCIENT_PREFETCH(hdr) \
+ ((hdr)->b_flags & ARC_FLAG_PRESCIENT_PREFETCH)
#define HDR_COMPRESSION_ENABLED(hdr) \
((hdr)->b_flags & ARC_FLAG_COMPRESSED_ARC)
@@ -3778,6 +3784,8 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
{
arc_state_t *evicted_state, *state;
int64_t bytes_evicted = 0;
+ int min_lifetime = HDR_PRESCIENT_PREFETCH(hdr) ?
+ arc_min_prescient_prefetch_ms : arc_min_prefetch_ms;
ASSERT(MUTEX_HELD(hash_lock));
ASSERT(HDR_HAS_L1HDR(hdr));
@@ -3831,8 +3839,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
/* prefetch buffers have a minimum lifespan */
if (HDR_IO_IN_PROGRESS(hdr) ||
((hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT)) &&
- ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access <
- arc_min_prefetch_lifespan)) {
+ ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access < min_lifetime * hz)) {
ARCSTAT_BUMP(arcstat_evict_skip);
return (bytes_evicted);
}
@@ -5492,13 +5499,15 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
* - move the buffer to the head of the list if this is
* another prefetch (to make it less likely to be evicted).
*/
- if (HDR_PREFETCH(hdr)) {
+ if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) {
if (refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
/* link protected by hash lock */
ASSERT(multilist_link_active(
&hdr->b_l1hdr.b_arc_node));
} else {
- arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH);
+ arc_hdr_clear_flags(hdr,
+ ARC_FLAG_PREFETCH |
+ ARC_FLAG_PRESCIENT_PREFETCH);
atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
ARCSTAT_BUMP(arcstat_mru_hits);
}
@@ -5532,10 +5541,13 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
* MFU state.
*/
- if (HDR_PREFETCH(hdr)) {
+ if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) {
new_state = arc_mru;
- if (refcount_count(&hdr->b_l1hdr.b_refcnt) > 0)
- arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH);
+ if (refcount_count(&hdr->b_l1hdr.b_refcnt) > 0) {
+ arc_hdr_clear_flags(hdr,
+ ARC_FLAG_PREFETCH |
+ ARC_FLAG_PRESCIENT_PREFETCH);
+ }
DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr);
} else {
new_state = arc_mfu;
@@ -5557,11 +5569,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
* If it was a prefetch, we will explicitly move it to
* the head of the list now.
*/
- if ((HDR_PREFETCH(hdr)) != 0) {
- ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
- /* link protected by hash_lock */
- ASSERT(multilist_link_active(&hdr->b_l1hdr.b_arc_node));
- }
+
atomic_inc_32(&hdr->b_l1hdr.b_mfu_hits);
ARCSTAT_BUMP(arcstat_mfu_hits);
hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
@@ -5573,12 +5581,11 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
* MFU state.
*/
- if (HDR_PREFETCH(hdr)) {
+ if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) {
/*
* This is a prefetch access...
* move this block back to the MRU state.
*/
- ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt));
new_state = arc_mru;
}
@@ -5605,20 +5612,25 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
/* a generic arc_read_done_func_t which you can use */
/* ARGSUSED */
void
-arc_bcopy_func(zio_t *zio, int error, arc_buf_t *buf, void *arg)
+arc_bcopy_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
+ arc_buf_t *buf, void *arg)
{
- if (error == 0)
- bcopy(buf->b_data, arg, arc_buf_size(buf));
+ if (buf == NULL)
+ return;
+
+ bcopy(buf->b_data, arg, arc_buf_size(buf));
arc_buf_destroy(buf, arg);
}
/* a generic arc_read_done_func_t */
+/* ARGSUSED */
void
-arc_getbuf_func(zio_t *zio, int error, arc_buf_t *buf, void *arg)
+arc_getbuf_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
+ arc_buf_t *buf, void *arg)
{
arc_buf_t **bufp = arg;
- if (error != 0) {
- arc_buf_destroy(buf, arg);
+
+ if (buf == NULL) {
*bufp = NULL;
} else {
*bufp = buf;
@@ -5652,7 +5664,6 @@ arc_read_done(zio_t *zio)
arc_callback_t *callback_list;
arc_callback_t *acb;
boolean_t freeable = B_FALSE;
- boolean_t no_zio_error = (zio->io_error == 0);
/*
* The hdr was inserted into hash-table and removed from lists
@@ -5699,7 +5710,7 @@ arc_read_done(zio_t *zio)
}
}
- if (no_zio_error) {
+ if (zio->io_error == 0) {
/* byteswap if necessary */
if (BP_SHOULD_BYTESWAP(zio->io_bp)) {
if (BP_GET_LEVEL(zio->io_bp) > 0) {
@@ -5720,7 +5731,8 @@ arc_read_done(zio_t *zio)
callback_list = hdr->b_l1hdr.b_acb;
ASSERT3P(callback_list, !=, NULL);
- if (hash_lock && no_zio_error && hdr->b_l1hdr.b_state == arc_anon) {
+ if (hash_lock && zio->io_error == 0 &&
+ hdr->b_l1hdr.b_state == arc_anon) {
/*
* Only call arc_access on anonymous buffers. This is because
* if we've issued an I/O for an evicted buffer, we've already
@@ -5741,13 +5753,19 @@ arc_read_done(zio_t *zio)
if (!acb->acb_done)
continue;
- /* This is a demand read since prefetches don't use callbacks */
callback_cnt++;
+ if (zio->io_error != 0)
+ continue;
+
int error = arc_buf_alloc_impl(hdr, zio->io_spa,
acb->acb_dsobj, acb->acb_private, acb->acb_encrypted,
- acb->acb_compressed, acb->acb_noauth, no_zio_error,
+ acb->acb_compressed, acb->acb_noauth, B_TRUE,
&acb->acb_buf);
+ if (error != 0) {
+ arc_buf_destroy(acb->acb_buf, acb->acb_private);
+ acb->acb_buf = NULL;
+ }
/*
* Assert non-speculative zios didn't fail because an
@@ -5770,9 +5788,8 @@ arc_read_done(zio_t *zio)
}
}
- if (no_zio_error) {
+ if (zio->io_error == 0)
zio->io_error = error;
- }
}
hdr->b_l1hdr.b_acb = NULL;
arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
@@ -5782,7 +5799,7 @@ arc_read_done(zio_t *zio)
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) ||
callback_list != NULL);
- if (no_zio_error) {
+ if (zio->io_error == 0) {
arc_hdr_verify(hdr, zio->io_bp);
} else {
arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR);
@@ -5816,8 +5833,8 @@ arc_read_done(zio_t *zio)
/* execute each callback and free its structure */
while ((acb = callback_list) != NULL) {
if (acb->acb_done) {
- acb->acb_done(zio, zio->io_error, acb->acb_buf,
- acb->acb_private);
+ acb->acb_done(zio, &zio->io_bookmark, zio->io_bp,
+ acb->acb_buf, acb->acb_private);
}
if (acb->acb_zio_dummy != NULL) {
@@ -5974,12 +5991,25 @@ top:
arc_hdr_clear_flags(hdr,
ARC_FLAG_PREDICTIVE_PREFETCH);
}
+
+ if (hdr->b_flags & ARC_FLAG_PRESCIENT_PREFETCH) {
+ ARCSTAT_BUMP(
+ arcstat_demand_hit_prescient_prefetch);
+ arc_hdr_clear_flags(hdr,
+ ARC_FLAG_PRESCIENT_PREFETCH);
+ }
+
ASSERT(!BP_IS_EMBEDDED(bp) || !BP_IS_HOLE(bp));
/* Get a buf with the desired data in it. */
rc = arc_buf_alloc_impl(hdr, spa, zb->zb_objset,
private, encrypted_read, compressed_read,
noauth_read, B_TRUE, &buf);
+ if (rc != 0) {
+ arc_buf_destroy(buf, private);
+ buf = NULL;
+ }
+
ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || rc == 0);
} else if (*arc_flags & ARC_FLAG_PREFETCH &&
refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
@@ -5987,6 +6017,8 @@ top:
}
DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
arc_access(hdr, hash_lock);
+ if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH)
+ arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH);
if (*arc_flags & ARC_FLAG_L2CACHE)
arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE);
mutex_exit(hash_lock);
@@ -5996,7 +6028,7 @@ top:
data, metadata, hits);
if (done)
- done(NULL, rc, buf, private);
+ done(NULL, zb, bp, buf, private);
} else {
uint64_t lsize = BP_GET_LSIZE(bp);
uint64_t psize = BP_GET_PSIZE(bp);
@@ -6112,6 +6144,8 @@ top:
if (*arc_flags & ARC_FLAG_PREFETCH &&
refcount_is_zero(&hdr->b_l1hdr.b_refcnt))
arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
+ if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH)
+ arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH);
if (*arc_flags & ARC_FLAG_L2CACHE)
arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE);
if (BP_IS_AUTHENTICATED(bp))
@@ -7223,9 +7257,15 @@ arc_tuning_update(void)
if (zfs_arc_p_min_shift)
arc_p_min_shift = zfs_arc_p_min_shift;
- /* Valid range: 1 - N ticks */
- if (zfs_arc_min_prefetch_lifespan)
- arc_min_prefetch_lifespan = zfs_arc_min_prefetch_lifespan;
+ /* Valid range: 1 - N ms */
+ if (zfs_arc_min_prefetch_ms)
+ arc_min_prefetch_ms = zfs_arc_min_prefetch_ms;
+
+ /* Valid range: 1 - N ms */
+ if (zfs_arc_min_prescient_prefetch_ms) {
+ arc_min_prescient_prefetch_ms =
+ zfs_arc_min_prescient_prefetch_ms;
+ }
/* Valid range: 0 - 100 */
if ((zfs_arc_lotsfree_percent >= 0) &&
@@ -7368,7 +7408,8 @@ arc_init(void)
cv_init(&arc_reclaim_waiters_cv, NULL, CV_DEFAULT, NULL);
/* Convert seconds to clock ticks */
- arc_min_prefetch_lifespan = 1 * hz;
+ arc_min_prefetch_ms = 1;
+ arc_min_prescient_prefetch_ms = 6;
#ifdef _KERNEL
/*
@@ -9006,8 +9047,12 @@ MODULE_PARM_DESC(zfs_arc_average_blocksize, "Target average block size");
module_param(zfs_compressed_arc_enabled, int, 0644);
MODULE_PARM_DESC(zfs_compressed_arc_enabled, "Disable compressed arc buffers");
-module_param(zfs_arc_min_prefetch_lifespan, int, 0644);
-MODULE_PARM_DESC(zfs_arc_min_prefetch_lifespan, "Min life of prefetch block");
+module_param(zfs_arc_min_prefetch_ms, int, 0644);
+MODULE_PARM_DESC(zfs_arc_min_prefetch_ms, "Min life of prefetch block in ms");
+
+module_param(zfs_arc_min_prescient_prefetch_ms, int, 0644);
+MODULE_PARM_DESC(zfs_arc_min_prescient_prefetch_ms,
+ "Min life of prescient prefetched block in ms");
module_param(l2arc_write_max, ulong, 0644);
MODULE_PARM_DESC(l2arc_write_max, "Max write bytes per interval");