aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/arc_impl.h2
-rw-r--r--module/os/freebsd/zfs/arc_os.c2
-rw-r--r--module/os/linux/zfs/arc_os.c2
-rw-r--r--module/zfs/arc.c113
4 files changed, 68 insertions, 51 deletions
diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h
index a38083714..43818d410 100644
--- a/include/sys/arc_impl.h
+++ b/include/sys/arc_impl.h
@@ -989,7 +989,7 @@ extern unsigned long zfs_arc_max;
extern void arc_reduce_target_size(int64_t to_free);
extern boolean_t arc_reclaim_needed(void);
extern void arc_kmem_reap_soon(void);
-extern void arc_wait_for_eviction(uint64_t);
+extern void arc_wait_for_eviction(uint64_t, boolean_t);
extern void arc_lowmem_init(void);
extern void arc_lowmem_fini(void);
diff --git a/module/os/freebsd/zfs/arc_os.c b/module/os/freebsd/zfs/arc_os.c
index 3b8b11cff..fddb1f0e8 100644
--- a/module/os/freebsd/zfs/arc_os.c
+++ b/module/os/freebsd/zfs/arc_os.c
@@ -233,7 +233,7 @@ arc_lowmem(void *arg __unused, int howto __unused)
* with ARC reclaim thread.
*/
if (curproc == pageproc)
- arc_wait_for_eviction(to_free);
+ arc_wait_for_eviction(to_free, B_FALSE);
}
void
diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c
index c3e889d54..f96cd1271 100644
--- a/module/os/linux/zfs/arc_os.c
+++ b/module/os/linux/zfs/arc_os.c
@@ -217,7 +217,7 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
* for the requested amount of data to be evicted.
*/
arc_reduce_target_size(ptob(sc->nr_to_scan));
- arc_wait_for_eviction(ptob(sc->nr_to_scan));
+ arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE);
if (current->reclaim_state != NULL)
current->reclaim_state->reclaimed_slab += sc->nr_to_scan;
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 677b80029..a89a9e65d 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -835,12 +835,13 @@ static kcondvar_t l2arc_rebuild_thr_cv;
enum arc_hdr_alloc_flags {
ARC_HDR_ALLOC_RDATA = 0x1,
ARC_HDR_DO_ADAPT = 0x2,
+ ARC_HDR_USE_RESERVE = 0x4,
};
-static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
+static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *, int);
static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *);
-static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, boolean_t);
+static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *, int);
static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *);
static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *);
static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag);
@@ -1855,7 +1856,8 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
* and then loan a buffer from it, rather than allocating a
* linear buffer and wrapping it in an abd later.
*/
- cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr, B_TRUE);
+ cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
+ ARC_HDR_DO_ADAPT);
tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
@@ -3170,7 +3172,6 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags)
{
uint64_t size;
boolean_t alloc_rdata = ((alloc_flags & ARC_HDR_ALLOC_RDATA) != 0);
- boolean_t do_adapt = ((alloc_flags & ARC_HDR_DO_ADAPT) != 0);
ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
ASSERT(HDR_HAS_L1HDR(hdr));
@@ -3181,14 +3182,14 @@ arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, int alloc_flags)
size = HDR_GET_PSIZE(hdr);
ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr,
- do_adapt);
+ alloc_flags);
ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL);
ARCSTAT_INCR(arcstat_raw_size, size);
} else {
size = arc_hdr_size(hdr);
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr,
- do_adapt);
+ alloc_flags);
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
}
@@ -3234,13 +3235,34 @@ arc_hdr_free_abd(arc_buf_hdr_t *hdr, boolean_t free_rdata)
ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr));
}
+/*
+ * Allocate empty anonymous ARC header. The header will get its identity
+ * assigned and buffers attached later as part of read or write operations.
+ *
+ * In case of read arc_read() assigns header its identify (b_dva + b_birth),
+ * inserts it into ARC hash to become globally visible and allocates physical
+ * (b_pabd) or raw (b_rabd) ABD buffer to read into from disk. On disk read
+ * completion arc_read_done() allocates ARC buffer(s) as needed, potentially
+ * sharing one of them with the physical ABD buffer.
+ *
+ * In case of write arc_alloc_buf() allocates ARC buffer to be filled with
+ * data. Then after compression and/or encryption arc_write_ready() allocates
+ * and fills (or potentially shares) physical (b_pabd) or raw (b_rabd) ABD
+ * buffer. On disk write completion arc_write_done() assigns the header its
+ * new identity (b_dva + b_birth) and inserts into ARC hash.
+ *
+ * In case of partial overwrite the old data is read first as described. Then
+ * arc_release() either allocates new anonymous ARC header and moves the ARC
+ * buffer to it, or reuses the old ARC header by discarding its identity and
+ * removing it from ARC hash. After buffer modification normal write process
+ * follows as described.
+ */
static arc_buf_hdr_t *
arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
boolean_t protected, enum zio_compress compression_type, uint8_t complevel,
- arc_buf_contents_t type, boolean_t alloc_rdata)
+ arc_buf_contents_t type)
{
arc_buf_hdr_t *hdr;
- int flags = ARC_HDR_DO_ADAPT;
VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
if (protected) {
@@ -3248,7 +3270,6 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
} else {
hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
}
- flags |= alloc_rdata ? ARC_HDR_ALLOC_RDATA : 0;
ASSERT(HDR_EMPTY(hdr));
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
@@ -3272,12 +3293,6 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
hdr->b_l1hdr.b_bufcnt = 0;
hdr->b_l1hdr.b_buf = NULL;
- /*
- * Allocate the hdr's buffer. This will contain either
- * the compressed or uncompressed data depending on the block
- * it references and compressed arc enablement.
- */
- arc_hdr_alloc_abd(hdr, flags);
ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
return (hdr);
@@ -3564,7 +3579,7 @@ arc_buf_t *
arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
{
arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size,
- B_FALSE, ZIO_COMPRESS_OFF, 0, type, B_FALSE);
+ B_FALSE, ZIO_COMPRESS_OFF, 0, type);
arc_buf_t *buf = NULL;
VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE, B_FALSE,
@@ -3588,7 +3603,7 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
- B_FALSE, compression_type, complevel, ARC_BUFC_DATA, B_FALSE);
+ B_FALSE, compression_type, complevel, ARC_BUFC_DATA);
arc_buf_t *buf = NULL;
VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE,
@@ -3596,16 +3611,12 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
arc_buf_thaw(buf);
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
- if (!arc_buf_is_shared(buf)) {
- /*
- * To ensure that the hdr has the correct data in it if we call
- * arc_untransform() on this buf before it's been written to
- * disk, it's easiest if we just set up sharing between the
- * buf and the hdr.
- */
- arc_hdr_free_abd(hdr, B_FALSE);
- arc_share_buf(hdr, buf);
- }
+ /*
+ * To ensure that the hdr has the correct data in it if we call
+ * arc_untransform() on this buf before it's been written to disk,
+ * it's easiest if we just set up sharing between the buf and the hdr.
+ */
+ arc_share_buf(hdr, buf);
return (buf);
}
@@ -3627,7 +3638,7 @@ arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, B_TRUE,
- compression_type, complevel, type, B_TRUE);
+ compression_type, complevel, type);
hdr->b_crypt_hdr.b_dsobj = dsobj;
hdr->b_crypt_hdr.b_ot = ot;
@@ -5125,7 +5136,7 @@ arc_adapt(int bytes, arc_state_t *state)
* zfs_arc_overflow_shift.
*/
static arc_ovf_level_t
-arc_is_overflowing(void)
+arc_is_overflowing(boolean_t use_reserve)
{
/* Always allow at least one block of overflow */
int64_t overflow = MAX(SPA_MAXBLOCKSIZE,
@@ -5142,17 +5153,19 @@ arc_is_overflowing(void)
*/
int64_t over = aggsum_lower_bound(&arc_sums.arcstat_size) -
arc_c - overflow / 2;
+ if (!use_reserve)
+ overflow /= 2;
return (over < 0 ? ARC_OVF_NONE :
over < overflow ? ARC_OVF_SOME : ARC_OVF_SEVERE);
}
static abd_t *
arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
- boolean_t do_adapt)
+ int alloc_flags)
{
arc_buf_contents_t type = arc_buf_type(hdr);
- arc_get_data_impl(hdr, size, tag, do_adapt);
+ arc_get_data_impl(hdr, size, tag, alloc_flags);
if (type == ARC_BUFC_METADATA) {
return (abd_alloc(size, B_TRUE));
} else {
@@ -5166,7 +5179,7 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
{
arc_buf_contents_t type = arc_buf_type(hdr);
- arc_get_data_impl(hdr, size, tag, B_TRUE);
+ arc_get_data_impl(hdr, size, tag, ARC_HDR_DO_ADAPT);
if (type == ARC_BUFC_METADATA) {
return (zio_buf_alloc(size));
} else {
@@ -5183,9 +5196,9 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
* of ARC behavior and settings. See arc_lowmem_init().
*/
void
-arc_wait_for_eviction(uint64_t amount)
+arc_wait_for_eviction(uint64_t amount, boolean_t use_reserve)
{
- switch (arc_is_overflowing()) {
+ switch (arc_is_overflowing(use_reserve)) {
case ARC_OVF_NONE:
return;
case ARC_OVF_SOME:
@@ -5262,12 +5275,12 @@ arc_wait_for_eviction(uint64_t amount)
*/
static void
arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
- boolean_t do_adapt)
+ int alloc_flags)
{
arc_state_t *state = hdr->b_l1hdr.b_state;
arc_buf_contents_t type = arc_buf_type(hdr);
- if (do_adapt)
+ if (alloc_flags & ARC_HDR_DO_ADAPT)
arc_adapt(size, state);
/*
@@ -5283,7 +5296,8 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
* ensure that that progress is also made towards getting arc_size
* under arc_c. See the comment above zfs_arc_eviction_pct.
*/
- arc_wait_for_eviction(size * zfs_arc_eviction_pct / 100);
+ arc_wait_for_eviction(size * zfs_arc_eviction_pct / 100,
+ alloc_flags & ARC_HDR_USE_RESERVE);
VERIFY3U(hdr->b_type, ==, type);
if (type == ARC_BUFC_METADATA) {
@@ -6093,8 +6107,7 @@ top:
arc_buf_hdr_t *exists = NULL;
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
- BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type,
- encrypted_read);
+ BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type);
if (!embedded_bp) {
hdr->b_dva = *BP_IDENTITY(bp);
@@ -6108,6 +6121,7 @@ top:
arc_hdr_destroy(hdr);
goto top; /* restart the IO request */
}
+ alloc_flags |= ARC_HDR_DO_ADAPT;
} else {
/*
* This block is in the ghost cache or encrypted data
@@ -6155,9 +6169,9 @@ top:
*/
arc_adapt(arc_hdr_size(hdr), hdr->b_l1hdr.b_state);
arc_access(hdr, hash_lock);
- arc_hdr_alloc_abd(hdr, alloc_flags);
}
+ arc_hdr_alloc_abd(hdr, alloc_flags);
if (encrypted_read) {
ASSERT(HDR_HAS_RABD(hdr));
size = HDR_GET_PSIZE(hdr);
@@ -6679,7 +6693,7 @@ arc_release(arc_buf_t *buf, void *tag)
* buffer which will be freed in arc_write().
*/
nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
- compress, hdr->b_complevel, type, HDR_HAS_RABD(hdr));
+ compress, hdr->b_complevel, type);
ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
ASSERT0(nhdr->b_l1hdr.b_bufcnt);
ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
@@ -6859,7 +6873,8 @@ arc_write_ready(zio_t *zio)
if (ARC_BUF_ENCRYPTED(buf)) {
ASSERT3U(psize, >, 0);
ASSERT(ARC_BUF_COMPRESSED(buf));
- arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
+ arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT | ARC_HDR_ALLOC_RDATA |
+ ARC_HDR_USE_RESERVE);
abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
} else if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) {
/*
@@ -6869,17 +6884,19 @@ arc_write_ready(zio_t *zio)
*/
if (BP_IS_ENCRYPTED(bp)) {
ASSERT3U(psize, >, 0);
- arc_hdr_alloc_abd(hdr,
- ARC_HDR_DO_ADAPT|ARC_HDR_ALLOC_RDATA);
+ arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+ ARC_HDR_ALLOC_RDATA | ARC_HDR_USE_RESERVE);
abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
} else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF &&
!ARC_BUF_COMPRESSED(buf)) {
ASSERT3U(psize, >, 0);
- arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
+ arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+ ARC_HDR_USE_RESERVE);
abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize);
} else {
ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr));
- arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT);
+ arc_hdr_alloc_abd(hdr, ARC_HDR_DO_ADAPT |
+ ARC_HDR_USE_RESERVE);
abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data,
arc_buf_size(buf));
}
@@ -8689,7 +8706,7 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
*/
if (BP_IS_ENCRYPTED(bp)) {
abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
- B_TRUE);
+ ARC_HDR_DO_ADAPT | ARC_HDR_USE_RESERVE);
zio_crypt_decode_params_bp(bp, salt, iv);
zio_crypt_decode_mac_bp(bp, mac);
@@ -8726,7 +8743,7 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
!HDR_COMPRESSION_ENABLED(hdr)) {
abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr,
- B_TRUE);
+ ARC_HDR_DO_ADAPT | ARC_HDR_USE_RESERVE);
void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),