diff options
author | George Amanakis <[email protected]> | 2020-05-07 19:34:03 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2020-05-07 16:34:03 -0700 |
commit | 657fd33bcff17e44ad55dffdf294d7c107b4bf5d (patch) | |
tree | 33abf04e8eeb45e5c63a7f20719ddad67aabeb65 /module | |
parent | 108a454a4604df6ea3be817f3cf076726df2c67a (diff) |
Improvements on persistent L2ARC
Functional changes:
We implement refcounts of log blocks and their aligned size on the
cache device along with two corresponding arcstats. The refcounts are
reflected in the header of the device and provide valuable information
as to whether log blocks are accounted for correctly. These are
dynamically adjusted as log blocks are committed/evicted. zdb also uses
this information in the device header and compares it to the
corresponding values as reported by dump_l2arc_log_blocks() which
emulates l2arc_rebuild(). If the refcounts saved in the device header
report higher values, zdb exits with an error. For this feature to work
correctly there should be no active writes on the device. This is also
employed in the tests of persistent L2ARC. We extend the structure of
the cache device header by adding the two new variables mirroring the
refcounts after the existing variables to preserve backward
compatibility in terms of persistent L2ARC.
1) a new arcstat "l2_log_blk_asize" and refcount "l2ad_lb_asize" which
reflect the total aligned size of log blocks on the device. This is
also reflected in the header of the cache device as "dh_lb_asize".
2) a new arcstat "l2arc_log_blk_count" and refcount "l2ad_lb_count"
which reflect the total number of L2ARC log blocks present on cache
devices. It is also reflected in the header of the cache device as
"dh_lb_count".
In l2arc_rebuild_vdev() if the amount of committed log entries in a log
block is 0 and the device header is valid we update the device header.
This will facilitate trimming of the whole device in this case when
TRIM for L2ARC is implemented.
Improve loop protection in l2arc_rebuild() by using the starting offset
of the payload of each log block instead of the starting offset of the
log block.
If the zio in l2arc_write_buffers() fails, restore the lbps array in the
header of the device to its previous state in l2arc_write_done().
If l2arc_rebuild() ends the rebuild process without restoring any L2ARC
log blocks in ARC and without any other error, this means that the lbps
array in the header is pointing to non-existent or invalid log blocks.
Reset the device header in this case.
In l2arc_rebuild() change the zfs_dbgmsg messages to
spa_history_log_internal() making them user visible with zpool history
command.
Non-functional changes:
Make the first test in persistent L2ARC use `zdb -lll` to increase
coverage in `zdb.c`.
Rename psize with asize when referring to log blocks, since
L2ARC_SET_PSIZE stores the vdev aligned size for log blocks. Also
rename dh_log_blk_entries to dh_log_entries to make it clear that
it is a mirror of l2ad_log_entries. Added comments for both changes.
Fix inaccurate comments for example in l2arc_log_blk_restore().
Add asserts at the end in l2arc_evict() and l2arc_write_buffers().
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: George Amanakis <[email protected]>
Closes #10228
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/arc.c | 234 |
1 files changed, 159 insertions, 75 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 5b34d4abd..a6b739ec3 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -530,7 +530,9 @@ arc_stats_t arc_stats = { { "l2_asize", KSTAT_DATA_UINT64 }, { "l2_hdr_size", KSTAT_DATA_UINT64 }, { "l2_log_blk_writes", KSTAT_DATA_UINT64 }, - { "l2_log_blk_avg_size", KSTAT_DATA_UINT64 }, + { "l2_log_blk_avg_asize", KSTAT_DATA_UINT64 }, + { "l2_log_blk_asize", KSTAT_DATA_UINT64 }, + { "l2_log_blk_count", KSTAT_DATA_UINT64 }, { "l2_data_to_meta_ratio", KSTAT_DATA_UINT64 }, { "l2_rebuild_success", KSTAT_DATA_UINT64 }, { "l2_rebuild_unsupported", KSTAT_DATA_UINT64 }, @@ -539,9 +541,9 @@ arc_stats_t arc_stats = { { "l2_rebuild_cksum_lb_errors", KSTAT_DATA_UINT64 }, { "l2_rebuild_lowmem", KSTAT_DATA_UINT64 }, { "l2_rebuild_size", KSTAT_DATA_UINT64 }, + { "l2_rebuild_asize", KSTAT_DATA_UINT64 }, { "l2_rebuild_bufs", KSTAT_DATA_UINT64 }, { "l2_rebuild_bufs_precached", KSTAT_DATA_UINT64 }, - { "l2_rebuild_psize", KSTAT_DATA_UINT64 }, { "l2_rebuild_log_blks", KSTAT_DATA_UINT64 }, { "memory_throttle_count", KSTAT_DATA_UINT64 }, { "memory_direct_count", KSTAT_DATA_UINT64 }, @@ -895,7 +897,7 @@ static void l2arc_log_blk_fetch_abort(zio_t *zio); /* L2ARC persistence block restoration routines. */ static void l2arc_log_blk_restore(l2arc_dev_t *dev, - const l2arc_log_blk_phys_t *lb, uint64_t lb_psize, uint64_t lb_daddr); + const l2arc_log_blk_phys_t *lb, uint64_t lb_asize, uint64_t lb_daddr); static void l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev); @@ -7864,6 +7866,7 @@ l2arc_write_done(zio_t *zio) l2arc_lb_abd_buf_t *abd_buf; l2arc_lb_ptr_buf_t *lb_ptr_buf; l2arc_dev_t *dev; + l2arc_dev_hdr_phys_t *l2dhdr; list_t *buflist; arc_buf_hdr_t *head, *hdr, *hdr_prev; kmutex_t *hash_lock; @@ -7872,6 +7875,7 @@ l2arc_write_done(zio_t *zio) cb = zio->io_private; ASSERT3P(cb, !=, NULL); dev = cb->l2wcb_dev; + l2dhdr = dev->l2ad_dev_hdr; ASSERT3P(dev, !=, NULL); head = cb->l2wcb_head; ASSERT3P(head, !=, NULL); @@ -7975,8 +7979,18 @@ top: zio_buf_free(abd_buf, sizeof (*abd_buf)); if (zio->io_error != 0) { lb_ptr_buf = list_remove_head(&dev->l2ad_lbptr_list); - bytes_dropped += + /* + * L2BLK_GET_PSIZE returns aligned size for log + * blocks. + */ + uint64_t asize = L2BLK_GET_PSIZE((lb_ptr_buf->lb_ptr)->lbp_prop); + bytes_dropped += asize; + ARCSTAT_INCR(arcstat_l2_log_blk_asize, -asize); + ARCSTAT_BUMPDOWN(arcstat_l2_log_blk_count); + zfs_refcount_remove_many(&dev->l2ad_lb_asize, asize, + lb_ptr_buf); + zfs_refcount_remove(&dev->l2ad_lb_count, lb_ptr_buf); kmem_free(lb_ptr_buf->lb_ptr, sizeof (l2arc_log_blkptr_t)); kmem_free(lb_ptr_buf, sizeof (l2arc_lb_ptr_buf_t)); @@ -7984,6 +7998,17 @@ top: } list_destroy(&cb->l2wcb_abd_list); + if (zio->io_error != 0) { + /* restore the lbps array in the header to its previous state */ + lb_ptr_buf = list_head(&dev->l2ad_lbptr_list); + for (int i = 0; i < 2; i++) { + bcopy(lb_ptr_buf->lb_ptr, &l2dhdr->dh_start_lbps[i], + sizeof (l2arc_log_blkptr_t)); + lb_ptr_buf = list_next(&dev->l2ad_lbptr_list, + lb_ptr_buf); + } + } + atomic_inc_64(&l2arc_writes_done); list_remove(buflist, head); ASSERT(!HDR_HAS_L1HDR(head)); @@ -8277,21 +8302,21 @@ l2arc_sublist_lock(int list_num) /* * Calculates the maximum overhead of L2ARC metadata log blocks for a given - * L2ARC write size. l2arc_evict and l2arc_write_buffers need to include this + * L2ARC write size. l2arc_evict and l2arc_write_size need to include this * overhead in processing to make sure there is enough headroom available * when writing buffers. */ static inline uint64_t l2arc_log_blk_overhead(uint64_t write_sz, l2arc_dev_t *dev) { - if (dev->l2ad_dev_hdr->dh_log_blk_ent == 0) { + if (dev->l2ad_log_entries == 0) { return (0); } else { uint64_t log_entries = write_sz >> SPA_MINBLOCKSHIFT; uint64_t log_blocks = (log_entries + - dev->l2ad_dev_hdr->dh_log_blk_ent - 1) / - dev->l2ad_dev_hdr->dh_log_blk_ent; + dev->l2ad_log_entries - 1) / + dev->l2ad_log_entries; return (vdev_psize_to_asize(dev->l2ad_vdev, sizeof (l2arc_log_blk_phys_t)) * log_blocks); @@ -8373,17 +8398,24 @@ retry: lb_ptr_buf_prev = list_prev(&dev->l2ad_lbptr_list, lb_ptr_buf); + /* L2BLK_GET_PSIZE returns aligned size for log blocks */ + uint64_t asize = L2BLK_GET_PSIZE( + (lb_ptr_buf->lb_ptr)->lbp_prop); + /* * We don't worry about log blocks left behind (ie - * lbp_daddr + psize < l2ad_hand) because l2arc_write_buffers() + * lbp_payload_start < l2ad_hand) because l2arc_write_buffers() * will never write more than l2arc_evict() evicts. */ if (!all && l2arc_log_blkptr_valid(dev, lb_ptr_buf->lb_ptr)) { break; } else { - vdev_space_update(dev->l2ad_vdev, - -L2BLK_GET_PSIZE( - (lb_ptr_buf->lb_ptr)->lbp_prop), 0, 0); + vdev_space_update(dev->l2ad_vdev, -asize, 0, 0); + ARCSTAT_INCR(arcstat_l2_log_blk_asize, -asize); + ARCSTAT_BUMPDOWN(arcstat_l2_log_blk_count); + zfs_refcount_remove_many(&dev->l2ad_lb_asize, asize, + lb_ptr_buf); + zfs_refcount_remove(&dev->l2ad_lb_count, lb_ptr_buf); list_remove(&dev->l2ad_lbptr_list, lb_ptr_buf); kmem_free(lb_ptr_buf->lb_ptr, sizeof (l2arc_log_blkptr_t)); @@ -8475,6 +8507,10 @@ out: dev->l2ad_first = B_FALSE; goto top; } + + ASSERT3U(dev->l2ad_hand + distance, <, dev->l2ad_end); + if (!dev->l2ad_first) + ASSERT3U(dev->l2ad_hand, <, dev->l2ad_evict); } /* @@ -8777,6 +8813,10 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) sizeof (l2arc_write_callback_t), KM_SLEEP); cb->l2wcb_dev = dev; cb->l2wcb_head = head; + /* + * Create a list to save allocated abd buffers + * for l2arc_log_blk_commit(). + */ list_create(&cb->l2wcb_abd_list, sizeof (l2arc_lb_abd_buf_t), offsetof(l2arc_lb_abd_buf_t, node)); @@ -8846,6 +8886,9 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) return (0); } + if (!dev->l2ad_first) + ASSERT3U(dev->l2ad_hand, <=, dev->l2ad_evict); + ASSERT3U(write_asize, <=, target_sz); ARCSTAT_BUMP(arcstat_l2_writes_sent); ARCSTAT_INCR(arcstat_l2_write_bytes, write_psize); @@ -9036,6 +9079,8 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd) vdev_space_update(vd, 0, 0, adddev->l2ad_end - adddev->l2ad_hand); zfs_refcount_create(&adddev->l2ad_alloc); + zfs_refcount_create(&adddev->l2ad_lb_asize); + zfs_refcount_create(&adddev->l2ad_lb_count); /* * Add device to global list @@ -9059,7 +9104,7 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen) uint64_t l2dhdr_asize; spa_t *spa; int err; - boolean_t rebuild = B_TRUE; + boolean_t l2dhdr_valid = B_TRUE; dev = l2arc_vdev_get(vd); ASSERT3P(dev, !=, NULL); @@ -9089,9 +9134,9 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen) * Read the device header, if an error is returned do not rebuild L2ARC. */ if ((err = l2arc_dev_hdr_read(dev)) != 0) - rebuild = B_FALSE; + l2dhdr_valid = B_FALSE; - if (rebuild && l2dhdr->dh_log_blk_ent > 0) { + if (l2dhdr_valid && dev->l2ad_log_entries > 0) { /* * If we are onlining a cache device (vdev_reopen) that was * still present (l2arc_vdev_present()) and rebuild is enabled, @@ -9117,12 +9162,10 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen) * async task which will call l2arc_spa_rebuild_start. */ dev->l2ad_rebuild = B_TRUE; - } else if (!rebuild && spa_writeable(spa)) { + } else if (spa_writeable(spa)) { /* - * The boolean rebuild is false if reading the device header - * returned an error. In this case create a new header. We - * zero out the memory holding the header to reset - * dh_start_lbps. + * In this case create a new header. We zero out the memory + * holding the header to reset dh_start_lbps. */ bzero(l2dhdr, l2dhdr_asize); l2arc_dev_hdr_update(dev); @@ -9172,6 +9215,8 @@ l2arc_remove_vdev(vdev_t *vd) list_destroy(&remdev->l2ad_lbptr_list); mutex_destroy(&remdev->l2ad_mtx); zfs_refcount_destroy(&remdev->l2ad_alloc); + zfs_refcount_destroy(&remdev->l2ad_lb_asize); + zfs_refcount_destroy(&remdev->l2ad_lb_count); kmem_free(remdev->l2ad_dev_hdr, remdev->l2ad_dev_hdr_asize); vmem_free(remdev, sizeof (l2arc_dev_t)); } @@ -9309,7 +9354,7 @@ l2arc_rebuild(l2arc_dev_t *dev) { vdev_t *vd = dev->l2ad_vdev; spa_t *spa = vd->vdev_spa; - int i = 0, err = 0; + int err = 0; l2arc_dev_hdr_phys_t *l2dhdr = dev->l2ad_dev_hdr; l2arc_log_blk_phys_t *this_lb, *next_lb; zio_t *this_io = NULL, *next_io = NULL; @@ -9332,6 +9377,7 @@ l2arc_rebuild(l2arc_dev_t *dev) /* * Retrieve the persistent L2ARC device state. + * L2BLK_GET_PSIZE returns aligned size for log blocks. */ dev->l2ad_evict = MAX(l2dhdr->dh_evict, dev->l2ad_start); dev->l2ad_hand = MAX(l2dhdr->dh_start_lbps[0].lbp_daddr + @@ -9381,11 +9427,10 @@ l2arc_rebuild(l2arc_dev_t *dev) /* * Now that we know that the next_lb checks out alright, we * can start reconstruction from this log block. + * L2BLK_GET_PSIZE returns aligned size for log blocks. */ - l2arc_log_blk_restore(dev, this_lb, - L2BLK_GET_PSIZE((&lbps[0])->lbp_prop), - lbps[0].lbp_daddr); - i++; + uint64_t asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop); + l2arc_log_blk_restore(dev, this_lb, asize, lbps[0].lbp_daddr); /* * log block restored, include its pointer in the list of @@ -9398,9 +9443,12 @@ l2arc_rebuild(l2arc_dev_t *dev) sizeof (l2arc_log_blkptr_t)); mutex_enter(&dev->l2ad_mtx); list_insert_tail(&dev->l2ad_lbptr_list, lb_ptr_buf); + ARCSTAT_INCR(arcstat_l2_log_blk_asize, asize); + ARCSTAT_BUMP(arcstat_l2_log_blk_count); + zfs_refcount_add_many(&dev->l2ad_lb_asize, asize, lb_ptr_buf); + zfs_refcount_add(&dev->l2ad_lb_count, lb_ptr_buf); mutex_exit(&dev->l2ad_mtx); - vdev_space_update(vd, - L2BLK_GET_PSIZE((&lbps[0])->lbp_prop), 0, 0); + vdev_space_update(vd, asize, 0, 0); /* * Protection against loops of log blocks: @@ -9417,13 +9465,16 @@ l2arc_rebuild(l2arc_dev_t *dev) * l2arc_log_blkptr_valid() but the log block should not be * restored as it is overwritten by the payload of log block * (0). Only log blocks (0)-(3) should be restored. We check - * whether l2ad_evict lies in between the next log block - * offset (lbps[1].lbp_daddr) and the present log block offset - * (lbps[0].lbp_daddr). If true and this isn't the first pass, - * we are looping from the beginning and we should stop. + * whether l2ad_evict lies in between the payload starting + * offset of the next log block (lbps[1].lbp_payload_start) + * and the payload starting offset of the present log block + * (lbps[0].lbp_payload_start). If true and this isn't the + * first pass, we are looping from the beginning and we should + * stop. */ - if (l2arc_range_check_overlap(lbps[1].lbp_daddr, - lbps[0].lbp_daddr, dev->l2ad_evict) && !dev->l2ad_first) + if (l2arc_range_check_overlap(lbps[1].lbp_payload_start, + lbps[0].lbp_payload_start, dev->l2ad_evict) && + !dev->l2ad_first) goto out; for (;;) { @@ -9470,14 +9521,27 @@ out: vmem_free(next_lb, sizeof (*next_lb)); if (!l2arc_rebuild_enabled) { - zfs_dbgmsg("L2ARC rebuild disabled"); - } else if (err == 0 && i > 0) { + spa_history_log_internal(spa, "L2ARC rebuild", NULL, + "disabled"); + } else if (err == 0 && zfs_refcount_count(&dev->l2ad_lb_count) > 0) { ARCSTAT_BUMP(arcstat_l2_rebuild_success); - zfs_dbgmsg("L2ARC successfully rebuilt, " - "restored %d blocks", i); + spa_history_log_internal(spa, "L2ARC rebuild", NULL, + "successful, restored %llu blocks", + (u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count)); + } else if (err == 0 && zfs_refcount_count(&dev->l2ad_lb_count) == 0) { + /* + * No error but also nothing restored, meaning the lbps array + * in the device header points to invalid/non-present log + * blocks. Reset the header. + */ + spa_history_log_internal(spa, "L2ARC rebuild", NULL, + "no valid log blocks"); + bzero(l2dhdr, dev->l2ad_dev_hdr_asize); + l2arc_dev_hdr_update(dev); } else if (err != 0) { - zfs_dbgmsg("L2ARC rebuild aborted, " - "restored %d blocks", i); + spa_history_log_internal(spa, "L2ARC rebuild", NULL, + "aborted, restored %llu blocks", + (u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count)); } if (lock_held) @@ -9527,7 +9591,7 @@ l2arc_dev_hdr_read(l2arc_dev_t *dev) l2dhdr->dh_spa_guid != guid || l2dhdr->dh_vdev_guid != dev->l2ad_vdev->vdev_guid || l2dhdr->dh_version != L2ARC_PERSISTENT_VERSION || - l2dhdr->dh_log_blk_ent != dev->l2ad_log_entries || + l2dhdr->dh_log_entries != dev->l2ad_log_entries || l2dhdr->dh_end != dev->l2ad_end || !l2arc_range_check_overlap(dev->l2ad_start, dev->l2ad_end, l2dhdr->dh_evict)) { @@ -9578,7 +9642,7 @@ l2arc_log_blk_read(l2arc_dev_t *dev, int err = 0; zio_cksum_t cksum; abd_t *abd = NULL; - uint64_t psize; + uint64_t asize; ASSERT(this_lbp != NULL && next_lbp != NULL); ASSERT(this_lb != NULL && next_lb != NULL); @@ -9616,9 +9680,12 @@ l2arc_log_blk_read(l2arc_dev_t *dev, goto cleanup; } - /* Make sure the buffer checks out */ - psize = L2BLK_GET_PSIZE((this_lbp)->lbp_prop); - fletcher_4_native(this_lb, psize, NULL, &cksum); + /* + * Make sure the buffer checks out. + * L2BLK_GET_PSIZE returns aligned size for log blocks. + */ + asize = L2BLK_GET_PSIZE((this_lbp)->lbp_prop); + fletcher_4_native(this_lb, asize, NULL, &cksum); if (!ZIO_CHECKSUM_EQUAL(cksum, this_lbp->lbp_cksum)) { ARCSTAT_BUMP(arcstat_l2_rebuild_abort_cksum_lb_errors); zfs_dbgmsg("L2ARC log block cksum failed, offset: %llu, " @@ -9634,11 +9701,11 @@ l2arc_log_blk_read(l2arc_dev_t *dev, case ZIO_COMPRESS_OFF: break; case ZIO_COMPRESS_LZ4: - abd = abd_alloc_for_io(psize, B_TRUE); - abd_copy_from_buf_off(abd, this_lb, 0, psize); + abd = abd_alloc_for_io(asize, B_TRUE); + abd_copy_from_buf_off(abd, this_lb, 0, asize); if ((err = zio_decompress_data( L2BLK_GET_COMPRESS((this_lbp)->lbp_prop), - abd, this_lb, psize, sizeof (*this_lb))) != 0) { + abd, this_lb, asize, sizeof (*this_lb))) != 0) { err = SET_ERROR(EINVAL); goto cleanup; } @@ -9672,10 +9739,10 @@ cleanup: */ static void l2arc_log_blk_restore(l2arc_dev_t *dev, const l2arc_log_blk_phys_t *lb, - uint64_t lb_psize, uint64_t lb_daddr) + uint64_t lb_asize, uint64_t lb_daddr) { - uint64_t size = 0, psize = 0; - uint64_t log_entries = dev->l2ad_dev_hdr->dh_log_blk_ent; + uint64_t size = 0, asize = 0; + uint64_t log_entries = dev->l2ad_log_entries; for (int i = log_entries - 1; i >= 0; i--) { /* @@ -9692,27 +9759,28 @@ l2arc_log_blk_restore(l2arc_dev_t *dev, const l2arc_log_blk_phys_t *lb, * ^ ^ * | | * | | - * l2arc_fill_thread l2arc_rebuild - * places new bufs here restores bufs here + * l2arc_feed_thread l2arc_rebuild + * will place new bufs here restores bufs here * - * This also works when the restored bufs get evicted at any - * point during the rebuild. + * During l2arc_rebuild() the device is not used by + * l2arc_feed_thread() as dev->l2ad_rebuild is set to true. */ size += L2BLK_GET_LSIZE((&lb->lb_entries[i])->le_prop); - psize += L2BLK_GET_PSIZE((&lb->lb_entries[i])->le_prop); + asize += vdev_psize_to_asize(dev->l2ad_vdev, + L2BLK_GET_PSIZE((&lb->lb_entries[i])->le_prop)); l2arc_hdr_restore(&lb->lb_entries[i], dev); } /* * Record rebuild stats: * size Logical size of restored buffers in the L2ARC - * psize Physical size of restored buffers in the L2ARC + * asize Aligned size of restored buffers in the L2ARC */ ARCSTAT_INCR(arcstat_l2_rebuild_size, size); - ARCSTAT_INCR(arcstat_l2_rebuild_psize, psize); + ARCSTAT_INCR(arcstat_l2_rebuild_asize, asize); ARCSTAT_INCR(arcstat_l2_rebuild_bufs, log_entries); - ARCSTAT_F_AVG(arcstat_l2_log_blk_avg_size, lb_psize); - ARCSTAT_F_AVG(arcstat_l2_data_to_meta_ratio, psize / lb_psize); + ARCSTAT_F_AVG(arcstat_l2_log_blk_avg_asize, lb_asize); + ARCSTAT_F_AVG(arcstat_l2_data_to_meta_ratio, asize / lb_asize); ARCSTAT_BUMP(arcstat_l2_rebuild_log_blks); } @@ -9800,18 +9868,20 @@ static zio_t * l2arc_log_blk_fetch(vdev_t *vd, const l2arc_log_blkptr_t *lbp, l2arc_log_blk_phys_t *lb) { - uint32_t psize; + uint32_t asize; zio_t *pio; l2arc_read_callback_t *cb; - psize = L2BLK_GET_PSIZE((lbp)->lbp_prop); - ASSERT(psize <= sizeof (l2arc_log_blk_phys_t)); + /* L2BLK_GET_PSIZE returns aligned size for log blocks */ + asize = L2BLK_GET_PSIZE((lbp)->lbp_prop); + ASSERT(asize <= sizeof (l2arc_log_blk_phys_t)); + cb = kmem_zalloc(sizeof (l2arc_read_callback_t), KM_SLEEP); - cb->l2rcb_abd = abd_get_from_buf(lb, psize); + cb->l2rcb_abd = abd_get_from_buf(lb, asize); pio = zio_root(vd->vdev_spa, l2arc_blk_fetch_done, cb, ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY); - (void) zio_nowait(zio_read_phys(pio, vd, lbp->lbp_daddr, psize, + (void) zio_nowait(zio_read_phys(pio, vd, lbp->lbp_daddr, asize, cb->l2rcb_abd, ZIO_CHECKSUM_OFF, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY, B_FALSE)); @@ -9841,14 +9911,18 @@ l2arc_dev_hdr_update(l2arc_dev_t *dev) abd_t *abd; int err; + VERIFY(spa_config_held(dev->l2ad_spa, SCL_STATE_ALL, RW_READER)); + l2dhdr->dh_magic = L2ARC_DEV_HDR_MAGIC; l2dhdr->dh_version = L2ARC_PERSISTENT_VERSION; l2dhdr->dh_spa_guid = spa_guid(dev->l2ad_vdev->vdev_spa); l2dhdr->dh_vdev_guid = dev->l2ad_vdev->vdev_guid; - l2dhdr->dh_log_blk_ent = dev->l2ad_log_entries; + l2dhdr->dh_log_entries = dev->l2ad_log_entries; l2dhdr->dh_evict = dev->l2ad_evict; l2dhdr->dh_start = dev->l2ad_start; l2dhdr->dh_end = dev->l2ad_end; + l2dhdr->dh_lb_asize = zfs_refcount_count(&dev->l2ad_lb_asize); + l2dhdr->dh_lb_count = zfs_refcount_count(&dev->l2ad_lb_count); l2dhdr->dh_flags = 0; if (dev->l2ad_first) l2dhdr->dh_flags |= L2ARC_DEV_HDR_EVICT_FIRST; @@ -9884,7 +9958,7 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb) uint8_t *tmpbuf; l2arc_lb_ptr_buf_t *lb_ptr_buf; - VERIFY3S(dev->l2ad_log_ent_idx, ==, l2dhdr->dh_log_blk_ent); + VERIFY3S(dev->l2ad_log_ent_idx, ==, dev->l2ad_log_entries); tmpbuf = zio_buf_alloc(sizeof (*lb)); abd_buf = zio_buf_alloc(sizeof (*abd_buf)); @@ -9896,8 +9970,14 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb) lb->lb_prev_lbp = l2dhdr->dh_start_lbps[1]; lb->lb_magic = L2ARC_LOG_BLK_MAGIC; - /* try to compress the buffer */ + /* + * l2arc_log_blk_commit() may be called multiple times during a single + * l2arc_write_buffers() call. Save the allocated abd buffers in a list + * so we can free them in l2arc_write_done() later on. + */ list_insert_tail(&cb->l2wcb_abd_list, abd_buf); + + /* try to compress the buffer */ psize = zio_compress_data(ZIO_COMPRESS_LZ4, abd_buf->abd, tmpbuf, sizeof (*lb)); @@ -9962,13 +10042,17 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb) sizeof (l2arc_log_blkptr_t)); mutex_enter(&dev->l2ad_mtx); list_insert_head(&dev->l2ad_lbptr_list, lb_ptr_buf); + ARCSTAT_INCR(arcstat_l2_log_blk_asize, asize); + ARCSTAT_BUMP(arcstat_l2_log_blk_count); + zfs_refcount_add_many(&dev->l2ad_lb_asize, asize, lb_ptr_buf); + zfs_refcount_add(&dev->l2ad_lb_count, lb_ptr_buf); mutex_exit(&dev->l2ad_mtx); vdev_space_update(dev->l2ad_vdev, asize, 0, 0); /* bump the kstats */ ARCSTAT_INCR(arcstat_l2_write_bytes, asize); ARCSTAT_BUMP(arcstat_l2_log_blk_writes); - ARCSTAT_F_AVG(arcstat_l2_log_blk_avg_size, asize); + ARCSTAT_F_AVG(arcstat_l2_log_blk_avg_asize, asize); ARCSTAT_F_AVG(arcstat_l2_data_to_meta_ratio, dev->l2ad_log_blk_payload_asize / asize); @@ -9985,8 +10069,9 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb) boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev, const l2arc_log_blkptr_t *lbp) { - uint64_t psize = L2BLK_GET_PSIZE((lbp)->lbp_prop); - uint64_t end = lbp->lbp_daddr + psize - 1; + /* L2BLK_GET_PSIZE returns aligned size for log blocks */ + uint64_t asize = L2BLK_GET_PSIZE((lbp)->lbp_prop); + uint64_t end = lbp->lbp_daddr + asize - 1; uint64_t start = lbp->lbp_payload_start; boolean_t evicted = B_FALSE; @@ -10017,7 +10102,7 @@ l2arc_log_blkptr_valid(l2arc_dev_t *dev, const l2arc_log_blkptr_t *lbp) l2arc_range_check_overlap(dev->l2ad_hand, dev->l2ad_evict, end); return (start >= dev->l2ad_start && end <= dev->l2ad_end && - psize > 0 && psize <= sizeof (l2arc_log_blk_phys_t) && + asize > 0 && asize <= sizeof (l2arc_log_blk_phys_t) && (!evicted || dev->l2ad_first)); } @@ -10032,14 +10117,13 @@ l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr) { l2arc_log_blk_phys_t *lb = &dev->l2ad_log_blk; l2arc_log_ent_phys_t *le; - l2arc_dev_hdr_phys_t *l2dhdr = dev->l2ad_dev_hdr; - if (l2dhdr->dh_log_blk_ent == 0) + if (dev->l2ad_log_entries == 0) return (B_FALSE); int index = dev->l2ad_log_ent_idx++; - ASSERT3S(index, <, l2dhdr->dh_log_blk_ent); + ASSERT3S(index, <, dev->l2ad_log_entries); ASSERT(HDR_HAS_L2HDR(hdr)); le = &lb->lb_entries[index]; @@ -10059,7 +10143,7 @@ l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr) dev->l2ad_log_blk_payload_asize += vdev_psize_to_asize(dev->l2ad_vdev, HDR_GET_PSIZE(hdr)); - return (dev->l2ad_log_ent_idx == l2dhdr->dh_log_blk_ent); + return (dev->l2ad_log_ent_idx == dev->l2ad_log_entries); } /* |