aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
Diffstat (limited to 'module')
-rw-r--r--module/os/freebsd/zfs/sysctl_os.c17
-rw-r--r--module/zfs/arc.c199
-rw-r--r--module/zfs/dbuf.c99
-rw-r--r--module/zfs/dmu.c18
-rw-r--r--module/zfs/dmu_objset.c2
-rw-r--r--module/zfs/dmu_tx.c7
6 files changed, 211 insertions, 131 deletions
diff --git a/module/os/freebsd/zfs/sysctl_os.c b/module/os/freebsd/zfs/sysctl_os.c
index 48af1eaf8..05d58ad74 100644
--- a/module/os/freebsd/zfs/sysctl_os.c
+++ b/module/os/freebsd/zfs/sysctl_os.c
@@ -366,10 +366,10 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD,
&ARC_anon.arcs_size.rc_count, 0, "size of anonymous state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
&ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
- "size of anonymous state");
+ "size of metadata in anonymous state");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
&ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
- "size of anonymous state");
+ "size of data in anonymous state");
/* END CSTYLED */
extern arc_state_t ARC_mru;
@@ -424,6 +424,19 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
"size of data in mfu ghost state");
/* END CSTYLED */
+extern arc_state_t ARC_uncached;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_size, CTLFLAG_RD,
+ &ARC_uncached.arcs_size.rc_count, 0, "size of uncached state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_metadata_esize, CTLFLAG_RD,
+ &ARC_uncached.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+ "size of metadata in uncached state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_data_esize, CTLFLAG_RD,
+ &ARC_uncached.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+ "size of data in uncached state");
+/* END CSTYLED */
+
extern arc_state_t ARC_l2c_only;
/* BEGIN CSTYLED */
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 1521caa6e..0d8aff947 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -333,6 +333,7 @@ static int arc_state_evict_marker_count;
static kmutex_t arc_evict_lock;
static boolean_t arc_evict_needed = B_FALSE;
+static clock_t arc_last_uncached_flush;
/*
* Count of bytes evicted since boot.
@@ -473,13 +474,14 @@ static uint_t zfs_arc_lotsfree_percent = 10;
*/
static int zfs_arc_prune_task_threads = 1;
-/* The 6 states: */
+/* The 7 states: */
arc_state_t ARC_anon;
arc_state_t ARC_mru;
arc_state_t ARC_mru_ghost;
arc_state_t ARC_mfu;
arc_state_t ARC_mfu_ghost;
arc_state_t ARC_l2c_only;
+arc_state_t ARC_uncached;
arc_stats_t arc_stats = {
{ "hits", KSTAT_DATA_UINT64 },
@@ -501,6 +503,7 @@ arc_stats_t arc_stats = {
{ "mru_ghost_hits", KSTAT_DATA_UINT64 },
{ "mfu_hits", KSTAT_DATA_UINT64 },
{ "mfu_ghost_hits", KSTAT_DATA_UINT64 },
+ { "uncached_hits", KSTAT_DATA_UINT64 },
{ "deleted", KSTAT_DATA_UINT64 },
{ "mutex_miss", KSTAT_DATA_UINT64 },
{ "access_skip", KSTAT_DATA_UINT64 },
@@ -549,6 +552,9 @@ arc_stats_t arc_stats = {
{ "mfu_ghost_size", KSTAT_DATA_UINT64 },
{ "mfu_ghost_evictable_data", KSTAT_DATA_UINT64 },
{ "mfu_ghost_evictable_metadata", KSTAT_DATA_UINT64 },
+ { "uncached_size", KSTAT_DATA_UINT64 },
+ { "uncached_evictable_data", KSTAT_DATA_UINT64 },
+ { "uncached_evictable_metadata", KSTAT_DATA_UINT64 },
{ "l2_hits", KSTAT_DATA_UINT64 },
{ "l2_misses", KSTAT_DATA_UINT64 },
{ "l2_prefetch_asize", KSTAT_DATA_UINT64 },
@@ -702,6 +708,7 @@ taskq_t *arc_prune_taskq;
((hdr)->b_flags & ARC_FLAG_COMPRESSED_ARC)
#define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_FLAG_L2CACHE)
+#define HDR_UNCACHED(hdr) ((hdr)->b_flags & ARC_FLAG_UNCACHED)
#define HDR_L2_READING(hdr) \
(((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) && \
((hdr)->b_flags & ARC_FLAG_HAS_L2HDR))
@@ -854,6 +861,7 @@ enum arc_hdr_alloc_flags {
ARC_HDR_ALLOC_RDATA = 0x1,
ARC_HDR_DO_ADAPT = 0x2,
ARC_HDR_USE_RESERVE = 0x4,
+ ARC_HDR_ALLOC_LINEAR = 0x8,
};
@@ -866,8 +874,10 @@ static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size,
const void *tag);
static void arc_hdr_free_abd(arc_buf_hdr_t *, boolean_t);
static void arc_hdr_alloc_abd(arc_buf_hdr_t *, int);
+static void arc_hdr_destroy(arc_buf_hdr_t *);
static void arc_access(arc_buf_hdr_t *, arc_flags_t, boolean_t);
static void arc_buf_watch(arc_buf_t *);
+static void arc_change_state(arc_state_t *, arc_buf_hdr_t *);
static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *);
static uint32_t arc_bufc_to_flags(arc_buf_contents_t);
@@ -2321,18 +2331,23 @@ remove_reference(arc_buf_hdr_t *hdr, const void *tag)
ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(state == arc_anon || MUTEX_HELD(HDR_LOCK(hdr)));
- ASSERT(!GHOST_STATE(state));
+ ASSERT(!GHOST_STATE(state)); /* arc_l2c_only counts as a ghost. */
- /*
- * arc_l2c_only counts as a ghost state so we don't need to explicitly
- * check to prevent usage of the arc_l2c_only list.
- */
- if (((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) &&
- (state != arc_anon)) {
- multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr);
- arc_evictable_space_increment(hdr, state);
+ if ((cnt = zfs_refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) != 0)
+ return (cnt);
+
+ if (state == arc_anon) {
+ arc_hdr_destroy(hdr);
+ return (0);
}
- return (cnt);
+ if (state == arc_uncached && !HDR_PREFETCH(hdr)) {
+ arc_change_state(arc_anon, hdr);
+ arc_hdr_destroy(hdr);
+ return (0);
+ }
+ multilist_insert(&state->arcs_list[arc_buf_type(hdr)], hdr);
+ arc_evictable_space_increment(hdr, state);
+ return (0);
}
/*
@@ -2439,8 +2454,12 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
if (refcnt == 0) {
if (old_state != arc_anon && old_state != arc_l2c_only) {
ASSERT(HDR_HAS_L1HDR(hdr));
- multilist_remove(&old_state->arcs_list[buftype], hdr);
- arc_evictable_space_decrement(hdr, old_state);
+ /* remove_reference() saves on insert. */
+ if (multilist_link_active(&hdr->b_l1hdr.b_arc_node)) {
+ multilist_remove(&old_state->arcs_list[buftype],
+ hdr);
+ arc_evictable_space_decrement(hdr, old_state);
+ }
}
if (new_state != arc_anon && new_state != arc_l2c_only) {
/*
@@ -3845,7 +3864,6 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag)
ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
VERIFY0(remove_reference(hdr, tag));
- arc_hdr_destroy(hdr);
return;
}
@@ -3858,8 +3876,8 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag)
ASSERT3P(hdr->b_l1hdr.b_state, !=, arc_anon);
ASSERT3P(buf->b_data, !=, NULL);
- (void) remove_reference(hdr, tag);
arc_buf_destroy_impl(buf);
+ (void) remove_reference(hdr, tag);
mutex_exit(hash_lock);
}
@@ -3874,6 +3892,7 @@ arc_buf_destroy(arc_buf_t *buf, const void *tag)
* - arc_mru_ghost -> deleted
* - arc_mfu_ghost -> arc_l2c_only
* - arc_mfu_ghost -> deleted
+ * - arc_uncached -> deleted
*
* Return total size of evicted data buffers for eviction progress tracking.
* When evicting from ghost states return logical buffer size to make eviction
@@ -3941,8 +3960,9 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)
return (bytes_evicted);
}
- ASSERT(state == arc_mru || state == arc_mfu);
- evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
+ ASSERT(state == arc_mru || state == arc_mfu || state == arc_uncached);
+ evicted_state = (state == arc_uncached) ? arc_anon :
+ ((state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost);
/* prefetch buffers have a minimum lifespan */
if ((hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT)) &&
@@ -4013,9 +4033,13 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)
arc_hdr_free_abd(hdr, B_TRUE);
arc_change_state(evicted_state, hdr);
- ASSERT(HDR_IN_HASH_TABLE(hdr));
- arc_hdr_set_flags(hdr, ARC_FLAG_IN_HASH_TABLE);
DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, hdr);
+ if (evicted_state == arc_anon) {
+ arc_hdr_destroy(hdr);
+ *real_evicted += HDR_FULL_SIZE;
+ } else {
+ ASSERT(HDR_IN_HASH_TABLE(hdr));
+ }
}
return (bytes_evicted);
@@ -4779,6 +4803,9 @@ arc_flush(spa_t *spa, boolean_t retry)
(void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_DATA, retry);
(void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_METADATA, retry);
+
+ (void) arc_flush_state(arc_uncached, guid, ARC_BUFC_DATA, retry);
+ (void) arc_flush_state(arc_uncached, guid, ARC_BUFC_METADATA, retry);
}
void
@@ -4910,7 +4937,16 @@ arc_evict_cb_check(void *arg, zthr_t *zthr)
* which is held before this function is called, and is held by
* arc_wait_for_eviction() when it calls zthr_wakeup().
*/
- return (arc_evict_needed);
+ if (arc_evict_needed)
+ return (B_TRUE);
+
+ /*
+ * If we have buffers in uncached state, evict them periodically.
+ */
+ return ((zfs_refcount_count(&arc_uncached->arcs_esize[ARC_BUFC_DATA]) +
+ zfs_refcount_count(&arc_uncached->arcs_esize[ARC_BUFC_METADATA]) &&
+ ddi_get_lbolt() - arc_last_uncached_flush >
+ MSEC_TO_TICK(arc_min_prefetch_ms / 2)));
}
/*
@@ -4925,8 +4961,14 @@ arc_evict_cb(void *arg, zthr_t *zthr)
uint64_t evicted = 0;
fstrans_cookie_t cookie = spl_fstrans_mark();
- /* Evict from cache */
- evicted = arc_evict();
+ /* Always try to evict from uncached state. */
+ arc_last_uncached_flush = ddi_get_lbolt();
+ evicted += arc_flush_state(arc_uncached, 0, ARC_BUFC_DATA, B_FALSE);
+ evicted += arc_flush_state(arc_uncached, 0, ARC_BUFC_METADATA, B_FALSE);
+
+ /* Evict from other states only if told to. */
+ if (arc_evict_needed)
+ evicted += arc_evict();
/*
* If evicted is zero, we couldn't evict anything
@@ -5205,12 +5247,10 @@ arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, const void *tag,
arc_buf_contents_t type = arc_buf_type(hdr);
arc_get_data_impl(hdr, size, tag, alloc_flags);
- if (type == ARC_BUFC_METADATA) {
- return (abd_alloc(size, B_TRUE));
- } else {
- ASSERT(type == ARC_BUFC_DATA);
- return (abd_alloc(size, B_FALSE));
- }
+ if (alloc_flags & ARC_HDR_ALLOC_LINEAR)
+ return (abd_alloc_linear(size, type == ARC_BUFC_METADATA));
+ else
+ return (abd_alloc(size, type == ARC_BUFC_METADATA));
}
static void *
@@ -5475,16 +5515,22 @@ arc_access(arc_buf_hdr_t *hdr, arc_flags_t arc_flags, boolean_t hit)
clock_t now = ddi_get_lbolt();
if (hdr->b_l1hdr.b_state == arc_anon) {
+ arc_state_t *new_state;
/*
- * This buffer is not in the cache, and does not
- * appear in our "ghost" list. Add the new buffer
- * to the MRU state.
+ * This buffer is not in the cache, and does not appear in
+ * our "ghost" lists. Add it to the MRU or uncached state.
*/
ASSERT0(hdr->b_l1hdr.b_arc_access);
hdr->b_l1hdr.b_arc_access = now;
- DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr);
- arc_change_state(arc_mru, hdr);
-
+ if (HDR_UNCACHED(hdr)) {
+ new_state = arc_uncached;
+ DTRACE_PROBE1(new_state__uncached, arc_buf_hdr_t *,
+ hdr);
+ } else {
+ new_state = arc_mru;
+ DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr);
+ }
+ arc_change_state(new_state, hdr);
} else if (hdr->b_l1hdr.b_state == arc_mru) {
/*
* This buffer has been accessed once recently and either
@@ -5556,6 +5602,14 @@ arc_access(arc_buf_hdr_t *hdr, arc_flags_t arc_flags, boolean_t hit)
hdr->b_l1hdr.b_arc_access = now;
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
arc_change_state(arc_mfu, hdr);
+ } else if (hdr->b_l1hdr.b_state == arc_uncached) {
+ /*
+ * This buffer is uncacheable, but we got a hit. Probably
+ * a demand read after prefetch. Nothing more to do here.
+ */
+ if (!HDR_IO_IN_PROGRESS(hdr))
+ ARCSTAT_BUMP(arcstat_uncached_hits);
+ hdr->b_l1hdr.b_arc_access = now;
} else if (hdr->b_l1hdr.b_state == arc_l2c_only) {
/*
* This buffer is on the 2nd Level ARC and was not accessed
@@ -5603,7 +5657,8 @@ arc_buf_access(arc_buf_t *buf)
mutex_exit(&buf->b_evict_lock);
ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
- hdr->b_l1hdr.b_state == arc_mfu);
+ hdr->b_l1hdr.b_state == arc_mfu ||
+ hdr->b_l1hdr.b_state == arc_uncached);
DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
arc_access(hdr, 0, B_TRUE);
@@ -5671,7 +5726,6 @@ arc_read_done(zio_t *zio)
kmutex_t *hash_lock = NULL;
arc_callback_t *callback_list;
arc_callback_t *acb;
- boolean_t freeable = B_FALSE;
/*
* The hdr was inserted into hash-table and removed from lists
@@ -5821,9 +5875,6 @@ arc_read_done(zio_t *zio)
*/
ASSERT(callback_cnt < 2 || hash_lock != NULL);
- arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
- (void) remove_reference(hdr, hdr); /* For IO_IN_PROGRESS. */
-
if (zio->io_error == 0) {
arc_hdr_verify(hdr, zio->io_bp);
} else {
@@ -5832,7 +5883,6 @@ arc_read_done(zio_t *zio)
arc_change_state(arc_anon, hdr);
if (HDR_IN_HASH_TABLE(hdr))
buf_hash_remove(hdr);
- freeable = zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt);
}
/*
@@ -5842,18 +5892,11 @@ arc_read_done(zio_t *zio)
*/
cv_broadcast(&hdr->b_l1hdr.b_cv);
- if (hash_lock != NULL) {
+ arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
+ (void) remove_reference(hdr, hdr);
+
+ if (hash_lock != NULL)
mutex_exit(hash_lock);
- } else {
- /*
- * This block was freed while we waited for the read to
- * complete. It has been removed from the hash table and
- * moved to the anonymous state (so that it won't show up
- * in the cache).
- */
- ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
- freeable = zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt);
- }
/* execute each callback and free its structure */
while ((acb = callback_list) != NULL) {
@@ -5889,9 +5932,6 @@ arc_read_done(zio_t *zio)
kmem_free(acb, sizeof (arc_callback_t));
}
}
-
- if (freeable)
- arc_hdr_destroy(hdr);
}
/*
@@ -6072,7 +6112,8 @@ top:
}
ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
- hdr->b_l1hdr.b_state == arc_mfu);
+ hdr->b_l1hdr.b_state == arc_mfu ||
+ hdr->b_l1hdr.b_state == arc_uncached);
DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
arc_access(hdr, *arc_flags, B_TRUE);
@@ -6099,9 +6140,9 @@ top:
}
}
if (rc != 0) {
- (void) remove_reference(hdr, private);
arc_buf_destroy_impl(buf);
buf = NULL;
+ (void) remove_reference(hdr, private);
}
/* assert any errors weren't due to unloaded keys */
@@ -6191,6 +6232,11 @@ top:
goto top;
}
}
+ if (*arc_flags & ARC_FLAG_UNCACHED) {
+ arc_hdr_set_flags(hdr, ARC_FLAG_UNCACHED);
+ if (!encrypted_read)
+ alloc_flags |= ARC_HDR_ALLOC_LINEAR;
+ }
/*
* Call arc_adapt() explicitly before arc_access() to allow
@@ -6624,7 +6670,7 @@ arc_release(arc_buf_t *buf, const void *tag)
VERIFY3U(hdr->b_type, ==, type);
ASSERT(hdr->b_l1hdr.b_buf != buf || buf->b_next != NULL);
- (void) remove_reference(hdr, tag);
+ VERIFY3S(remove_reference(hdr, tag), >, 0);
if (arc_buf_is_shared(buf) && !ARC_BUF_COMPRESSED(buf)) {
ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf);
@@ -6897,7 +6943,8 @@ arc_write_ready(zio_t *zio)
arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT | ARC_HDR_ALLOC_RDATA |
ARC_HDR_USE_RESERVE);
abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
- } else if (!abd_size_alloc_linear(arc_buf_size(buf)) ||
+ } else if (!(HDR_UNCACHED(hdr) ||
+ abd_size_alloc_linear(arc_buf_size(buf))) ||
!arc_can_share(hdr, buf)) {
/*
* Ideally, we would always copy the io_abd into b_pabd, but the
@@ -7025,17 +7072,16 @@ arc_write_done(zio_t *zio)
}
}
arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
- (void) remove_reference(hdr, hdr); /* For IO_IN_PROGRESS. */
+ VERIFY3S(remove_reference(hdr, hdr), >, 0);
/* if it's not anon, we are doing a scrub */
if (exists == NULL && hdr->b_l1hdr.b_state == arc_anon)
arc_access(hdr, 0, B_FALSE);
mutex_exit(hash_lock);
} else {
arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
- (void) remove_reference(hdr, hdr); /* For IO_IN_PROGRESS. */
+ VERIFY3S(remove_reference(hdr, hdr), >, 0);
}
- ASSERT(!zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
callback->awcb_done(zio, buf, callback->awcb_private);
abd_free(zio->io_abd);
@@ -7044,7 +7090,7 @@ arc_write_done(zio_t *zio)
zio_t *
arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
- blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc,
+ blkptr_t *bp, arc_buf_t *buf, boolean_t uncached, boolean_t l2arc,
const zio_prop_t *zp, arc_write_done_func_t *ready,
arc_write_done_func_t *children_ready, arc_write_done_func_t *physdone,
arc_write_done_func_t *done, void *private, zio_priority_t priority,
@@ -7061,7 +7107,9 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
- if (l2arc)
+ if (uncached)
+ arc_hdr_set_flags(hdr, ARC_FLAG_UNCACHED);
+ else if (l2arc)
arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE);
if (ARC_BUF_ENCRYPTED(buf)) {
@@ -7283,6 +7331,8 @@ arc_kstat_update(kstat_t *ksp, int rw)
wmsum_value(&arc_sums.arcstat_mfu_hits);
as->arcstat_mfu_ghost_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_mfu_ghost_hits);
+ as->arcstat_uncached_hits.value.ui64 =
+ wmsum_value(&arc_sums.arcstat_uncached_hits);
as->arcstat_deleted.value.ui64 =
wmsum_value(&arc_sums.arcstat_deleted);
as->arcstat_mutex_miss.value.ui64 =
@@ -7352,6 +7402,10 @@ arc_kstat_update(kstat_t *ksp, int rw)
&as->arcstat_mfu_ghost_size,
&as->arcstat_mfu_ghost_evictable_data,
&as->arcstat_mfu_ghost_evictable_metadata);
+ arc_kstat_update_state(arc_uncached,
+ &as->arcstat_uncached_size,
+ &as->arcstat_uncached_evictable_data,
+ &as->arcstat_uncached_evictable_metadata);
as->arcstat_dnode_size.value.ui64 =
aggsum_value(&arc_sums.arcstat_dnode_size);
@@ -7660,6 +7714,10 @@ arc_state_init(void)
arc_state_multilist_index_func, &num_sublists);
arc_state_multilist_init(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
arc_state_multilist_index_func, &num_sublists);
+ arc_state_multilist_init(&arc_uncached->arcs_list[ARC_BUFC_METADATA],
+ arc_state_multilist_index_func, &num_sublists);
+ arc_state_multilist_init(&arc_uncached->arcs_list[ARC_BUFC_DATA],
+ arc_state_multilist_index_func, &num_sublists);
/*
* L2 headers should never be on the L2 state list since they don't
@@ -7689,6 +7747,8 @@ arc_state_init(void)
zfs_refcount_create(&arc_mfu_ghost->arcs_esize[ARC_BUFC_DATA]);
zfs_refcount_create(&arc_l2c_only->arcs_esize[ARC_BUFC_METADATA]);
zfs_refcount_create(&arc_l2c_only->arcs_esize[ARC_BUFC_DATA]);
+ zfs_refcount_create(&arc_uncached->arcs_esize[ARC_BUFC_METADATA]);
+ zfs_refcount_create(&arc_uncached->arcs_esize[ARC_BUFC_DATA]);
zfs_refcount_create(&arc_anon->arcs_size);
zfs_refcount_create(&arc_mru->arcs_size);
@@ -7696,6 +7756,7 @@ arc_state_init(void)
zfs_refcount_create(&arc_mfu->arcs_size);
zfs_refcount_create(&arc_mfu_ghost->arcs_size);
zfs_refcount_create(&arc_l2c_only->arcs_size);
+ zfs_refcount_create(&arc_uncached->arcs_size);
wmsum_init(&arc_sums.arcstat_hits, 0);
wmsum_init(&arc_sums.arcstat_iohits, 0);
@@ -7716,6 +7777,7 @@ arc_state_init(void)
wmsum_init(&arc_sums.arcstat_mru_ghost_hits, 0);
wmsum_init(&arc_sums.arcstat_mfu_hits, 0);
wmsum_init(&arc_sums.arcstat_mfu_ghost_hits, 0);
+ wmsum_init(&arc_sums.arcstat_uncached_hits, 0);
wmsum_init(&arc_sums.arcstat_deleted, 0);
wmsum_init(&arc_sums.arcstat_mutex_miss, 0);
wmsum_init(&arc_sums.arcstat_access_skip, 0);
@@ -7800,6 +7862,7 @@ arc_state_init(void)
arc_mfu->arcs_state = ARC_STATE_MFU;
arc_mfu_ghost->arcs_state = ARC_STATE_MFU_GHOST;
arc_l2c_only->arcs_state = ARC_STATE_L2C_ONLY;
+ arc_uncached->arcs_state = ARC_STATE_UNCACHED;
}
static void
@@ -7817,6 +7880,8 @@ arc_state_fini(void)
zfs_refcount_destroy(&arc_mfu_ghost->arcs_esize[ARC_BUFC_DATA]);
zfs_refcount_destroy(&arc_l2c_only->arcs_esize[ARC_BUFC_METADATA]);
zfs_refcount_destroy(&arc_l2c_only->arcs_esize[ARC_BUFC_DATA]);
+ zfs_refcount_destroy(&arc_uncached->arcs_esize[ARC_BUFC_METADATA]);
+ zfs_refcount_destroy(&arc_uncached->arcs_esize[ARC_BUFC_DATA]);
zfs_refcount_destroy(&arc_anon->arcs_size);
zfs_refcount_destroy(&arc_mru->arcs_size);
@@ -7824,6 +7889,7 @@ arc_state_fini(void)
zfs_refcount_destroy(&arc_mfu->arcs_size);
zfs_refcount_destroy(&arc_mfu_ghost->arcs_size);
zfs_refcount_destroy(&arc_l2c_only->arcs_size);
+ zfs_refcount_destroy(&arc_uncached->arcs_size);
multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
@@ -7835,6 +7901,8 @@ arc_state_fini(void)
multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
+ multilist_destroy(&arc_uncached->arcs_list[ARC_BUFC_METADATA]);
+ multilist_destroy(&arc_uncached->arcs_list[ARC_BUFC_DATA]);
wmsum_fini(&arc_sums.arcstat_hits);
wmsum_fini(&arc_sums.arcstat_iohits);
@@ -7855,6 +7923,7 @@ arc_state_fini(void)
wmsum_fini(&arc_sums.arcstat_mru_ghost_hits);
wmsum_fini(&arc_sums.arcstat_mfu_hits);
wmsum_fini(&arc_sums.arcstat_mfu_ghost_hits);
+ wmsum_fini(&arc_sums.arcstat_uncached_hits);
wmsum_fini(&arc_sums.arcstat_deleted);
wmsum_fini(&arc_sums.arcstat_mutex_miss);
wmsum_fini(&arc_sums.arcstat_access_skip);
@@ -8039,8 +8108,8 @@ arc_init(void)
arc_state_evict_markers =
arc_state_alloc_markers(arc_state_evict_marker_count);
- arc_evict_zthr = zthr_create("arc_evict",
- arc_evict_cb_check, arc_evict_cb, NULL, defclsyspri);
+ arc_evict_zthr = zthr_create_timer("arc_evict",
+ arc_evict_cb_check, arc_evict_cb, NULL, SEC2NSEC(1), defclsyspri);
arc_reap_zthr = zthr_create_timer("arc_reap",
arc_reap_cb_check, arc_reap_cb, NULL, SEC2NSEC(1), minclsyspri);
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 5e1ed8386..efaa13317 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -1608,7 +1608,9 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
DTRACE_SET_STATE(db, "read issued");
mutex_exit(&db->db_mtx);
- if (dbuf_is_l2cacheable(db))
+ if (!DBUF_IS_CACHEABLE(db))
+ aflags |= ARC_FLAG_UNCACHED;
+ else if (dbuf_is_l2cacheable(db))
aflags |= ARC_FLAG_L2CACHE;
dbuf_add_ref(db, NULL);
@@ -1736,10 +1738,13 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
dn = DB_DNODE(db);
prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
- (flags & DB_RF_NOPREFETCH) == 0 && dn != NULL &&
- DBUF_IS_CACHEABLE(db);
+ (flags & DB_RF_NOPREFETCH) == 0 && dn != NULL;
mutex_enter(&db->db_mtx);
+ if (flags & DB_RF_PARTIAL_FIRST)
+ db->db_partial_read = B_TRUE;
+ else if (!(flags & DB_RF_PARTIAL_MORE))
+ db->db_partial_read = B_FALSE;
if (db->db_state == DB_CACHED) {
/*
* Ensure that this block's dnode has been decrypted if
@@ -3463,8 +3468,9 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
dpa->dpa_cb = cb;
dpa->dpa_arg = arg;
- /* flag if L2ARC eligible, l2arc_noprefetch then decides */
- if (dnode_level_is_l2cacheable(&bp, dn, level))
+ if (!DNODE_LEVEL_IS_CACHEABLE(dn, level))
+ dpa->dpa_aflags |= ARC_FLAG_UNCACHED;
+ else if (dnode_level_is_l2cacheable(&bp, dn, level))
dpa->dpa_aflags |= ARC_FLAG_L2CACHE;
/*
@@ -3853,59 +3859,38 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, const void *tag, boolean_t evicting)
* This dbuf has anonymous data associated with it.
*/
dbuf_destroy(db);
- } else {
- boolean_t do_arc_evict = B_FALSE;
- blkptr_t bp;
- spa_t *spa = dmu_objset_spa(db->db_objset);
-
- if (!DBUF_IS_CACHEABLE(db) &&
- db->db_blkptr != NULL &&
- !BP_IS_HOLE(db->db_blkptr) &&
- !BP_IS_EMBEDDED(db->db_blkptr)) {
- do_arc_evict = B_TRUE;
- bp = *db->db_blkptr;
- }
-
- if (!DBUF_IS_CACHEABLE(db) ||
- db->db_pending_evict) {
- dbuf_destroy(db);
- } else if (!multilist_link_active(&db->db_cache_link)) {
- ASSERT3U(db->db_caching_status, ==,
- DB_NO_CACHE);
-
- dbuf_cached_state_t dcs =
- dbuf_include_in_metadata_cache(db) ?
- DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE;
- db->db_caching_status = dcs;
-
- multilist_insert(&dbuf_caches[dcs].cache, db);
- uint64_t db_size = db->db.db_size;
- size = zfs_refcount_add_many(
- &dbuf_caches[dcs].size, db_size, db);
- uint8_t db_level = db->db_level;
- mutex_exit(&db->db_mtx);
-
- if (dcs == DB_DBUF_METADATA_CACHE) {
- DBUF_STAT_BUMP(metadata_cache_count);
- DBUF_STAT_MAX(
- metadata_cache_size_bytes_max,
- size);
- } else {
- DBUF_STAT_BUMP(cache_count);
- DBUF_STAT_MAX(cache_size_bytes_max,
- size);
- DBUF_STAT_BUMP(cache_levels[db_level]);
- DBUF_STAT_INCR(
- cache_levels_bytes[db_level],
- db_size);
- }
+ } else if (!(DBUF_IS_CACHEABLE(db) || db->db_partial_read) ||
+ db->db_pending_evict) {
+ dbuf_destroy(db);
+ } else if (!multilist_link_active(&db->db_cache_link)) {
+ ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
+
+ dbuf_cached_state_t dcs =
+ dbuf_include_in_metadata_cache(db) ?
+ DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE;
+ db->db_caching_status = dcs;
+
+ multilist_insert(&dbuf_caches[dcs].cache, db);
+ uint64_t db_size = db->db.db_size;
+ size = zfs_refcount_add_many(
+ &dbuf_caches[dcs].size, db_size, db);
+ uint8_t db_level = db->db_level;
+ mutex_exit(&db->db_mtx);
- if (dcs == DB_DBUF_CACHE && !evicting)
- dbuf_evict_notify(size);
+ if (dcs == DB_DBUF_METADATA_CACHE) {
+ DBUF_STAT_BUMP(metadata_cache_count);
+ DBUF_STAT_MAX(metadata_cache_size_bytes_max,
+ size);
+ } else {
+ DBUF_STAT_BUMP(cache_count);
+ DBUF_STAT_MAX(cache_size_bytes_max, size);
+ DBUF_STAT_BUMP(cache_levels[db_level]);
+ DBUF_STAT_INCR(cache_levels_bytes[db_level],
+ db_size);
}
- if (do_arc_evict)
- arc_freed(spa, &bp);
+ if (dcs == DB_DBUF_CACHE && !evicting)
+ dbuf_evict_notify(size);
}
} else {
mutex_exit(&db->db_mtx);
@@ -5083,8 +5068,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
children_ready_cb = dbuf_write_children_ready;
dr->dr_zio = arc_write(pio, os->os_spa, txg,
- &dr->dr_bp_copy, data, dbuf_is_l2cacheable(db),
- &zp, dbuf_write_ready,
+ &dr->dr_bp_copy, data, !DBUF_IS_CACHEABLE(db),
+ dbuf_is_l2cacheable(db), &zp, dbuf_write_ready,
children_ready_cb, dbuf_write_physdone,
dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_MUSTSUCCEED, &zb);
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 45304e7dd..d6a9f813c 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -549,14 +549,14 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
ZIO_FLAG_CANFAIL);
blkid = dbuf_whichblock(dn, 0, offset);
if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
- DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
+ length <= zfetch_array_rd_sz) {
/*
* Prepare the zfetch before initiating the demand reads, so
* that if multiple threads block on same indirect block, we
* base predictions on the original less racy request order.
*/
- zs = dmu_zfetch_prepare(&dn->dn_zfetch, blkid, nblks,
- read && DNODE_IS_CACHEABLE(dn), B_TRUE);
+ zs = dmu_zfetch_prepare(&dn->dn_zfetch, blkid, nblks, read,
+ B_TRUE);
}
for (i = 0; i < nblks; i++) {
dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
@@ -579,6 +579,14 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
* state will not yet be CACHED.
*/
if (read) {
+ if (i == nblks - 1 && blkid + i < dn->dn_maxblkid &&
+ offset + length < db->db.db_offset +
+ db->db.db_size) {
+ if (offset <= db->db.db_offset)
+ dbuf_flags |= DB_RF_PARTIAL_FIRST;
+ else
+ dbuf_flags |= DB_RF_PARTIAL_MORE;
+ }
(void) dbuf_read(db, zio, dbuf_flags);
if (db->db_state != DB_CACHED)
missed = B_TRUE;
@@ -1850,8 +1858,8 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
dsa->dsa_zgd = zgd;
dsa->dsa_tx = NULL;
- zio_nowait(arc_write(pio, os->os_spa, txg,
- zgd->zgd_bp, dr->dt.dl.dr_data, dbuf_is_l2cacheable(db),
+ zio_nowait(arc_write(pio, os->os_spa, txg, zgd->zgd_bp,
+ dr->dt.dl.dr_data, !DBUF_IS_CACHEABLE(db), dbuf_is_l2cacheable(db),
&zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index c17c829a0..5083b1763 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -1694,7 +1694,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
}
zio = arc_write(pio, os->os_spa, tx->tx_txg,
- blkptr_copy, os->os_phys_buf, dmu_os_is_l2cacheable(os),
+ blkptr_copy, os->os_phys_buf, B_FALSE, dmu_os_is_l2cacheable(os),
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
index 28f64369d..a7a554691 100644
--- a/module/zfs/dmu_tx.c
+++ b/module/zfs/dmu_tx.c
@@ -214,7 +214,12 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
rw_exit(&dn->dn_struct_rwlock);
if (db == NULL)
return (SET_ERROR(EIO));
- err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH);
+ /*
+ * PARTIAL_FIRST allows caching for uncacheable blocks. It will
+ * be cleared after dmu_buf_will_dirty() call dbuf_read() again.
+ */
+ err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH |
+ (level == 0 ? DB_RF_PARTIAL_FIRST : 0));
dbuf_rele(db, FTAG);
return (err);
}