summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cmd/zdb/zdb.c123
-rw-r--r--include/sys/arc_impl.h40
-rw-r--r--man/man8/zdb.85
-rw-r--r--module/zfs/arc.c234
-rwxr-xr-xtests/zfs-tests/tests/functional/persist_l2arc/persist_l2arc_001_pos.ksh2
5 files changed, 278 insertions, 126 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index f4b4b454b..00258799b 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -3493,12 +3493,13 @@ print_l2arc_log_blocks(void)
static void
dump_l2arc_log_entries(uint64_t log_entries,
- l2arc_log_ent_phys_t *le, int i)
+ l2arc_log_ent_phys_t *le, uint64_t i)
{
for (int j = 0; j < log_entries; j++) {
dva_t dva = le[j].le_dva;
- (void) printf("lb[%4d]\tle[%4d]\tDVA asize: %llu, "
- "vdev: %llu, offset: %llu\n", i, j + 1,
+ (void) printf("lb[%4llu]\tle[%4d]\tDVA asize: %llu, "
+ "vdev: %llu, offset: %llu\n",
+ (u_longlong_t)i, j + 1,
(u_longlong_t)DVA_GET_ASIZE(&dva),
(u_longlong_t)DVA_GET_VDEV(&dva),
(u_longlong_t)DVA_GET_OFFSET(&dva));
@@ -3533,7 +3534,7 @@ dump_l2arc_log_blkptr(l2arc_log_blkptr_t lbps)
(u_longlong_t)lbps.lbp_payload_start);
(void) printf("|\t\tlsize: %llu\n",
(u_longlong_t)L2BLK_GET_LSIZE((&lbps)->lbp_prop));
- (void) printf("|\t\tpsize: %llu\n",
+ (void) printf("|\t\tasize: %llu\n",
(u_longlong_t)L2BLK_GET_PSIZE((&lbps)->lbp_prop));
(void) printf("|\t\tcompralgo: %llu\n",
(u_longlong_t)L2BLK_GET_COMPRESS((&lbps)->lbp_prop));
@@ -3543,17 +3544,19 @@ dump_l2arc_log_blkptr(l2arc_log_blkptr_t lbps)
}
static void
-dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr)
+dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr,
+ l2arc_dev_hdr_phys_t *rebuild)
{
l2arc_log_blk_phys_t this_lb;
- uint64_t psize;
+ uint64_t asize;
l2arc_log_blkptr_t lbps[2];
abd_t *abd;
zio_cksum_t cksum;
- int i = 0, failed = 0;
+ int failed = 0;
l2arc_dev_t dev;
- print_l2arc_log_blocks();
+ if (!dump_opt['q'])
+ print_l2arc_log_blocks();
bcopy((&l2dhdr)->dh_start_lbps, lbps, sizeof (lbps));
dev.l2ad_evict = l2dhdr.dh_evict;
@@ -3562,8 +3565,10 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr)
if (l2dhdr.dh_start_lbps[0].lbp_daddr == 0) {
/* no log blocks to read */
- (void) printf("No log blocks to read\n");
- (void) printf("\n");
+ if (!dump_opt['q']) {
+ (void) printf("No log blocks to read\n");
+ (void) printf("\n");
+ }
return;
} else {
dev.l2ad_hand = lbps[0].lbp_daddr +
@@ -3576,17 +3581,23 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr)
if (!l2arc_log_blkptr_valid(&dev, &lbps[0]))
break;
- psize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
- if (pread64(fd, &this_lb, psize, lbps[0].lbp_daddr) != psize) {
- (void) printf("Error while reading next log block\n\n");
+ /* L2BLK_GET_PSIZE returns aligned size for log blocks */
+ asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
+ if (pread64(fd, &this_lb, asize, lbps[0].lbp_daddr) != asize) {
+ if (!dump_opt['q']) {
+ (void) printf("Error while reading next log "
+ "block\n\n");
+ }
break;
}
- fletcher_4_native_varsize(&this_lb, psize, &cksum);
+ fletcher_4_native_varsize(&this_lb, asize, &cksum);
if (!ZIO_CHECKSUM_EQUAL(cksum, lbps[0].lbp_cksum)) {
failed++;
- (void) printf("Invalid cksum\n");
- dump_l2arc_log_blkptr(lbps[0]);
+ if (!dump_opt['q']) {
+ (void) printf("Invalid cksum\n");
+ dump_l2arc_log_blkptr(lbps[0]);
+ }
break;
}
@@ -3594,11 +3605,11 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr)
case ZIO_COMPRESS_OFF:
break;
case ZIO_COMPRESS_LZ4:
- abd = abd_alloc_for_io(psize, B_TRUE);
- abd_copy_from_buf_off(abd, &this_lb, 0, psize);
+ abd = abd_alloc_for_io(asize, B_TRUE);
+ abd_copy_from_buf_off(abd, &this_lb, 0, asize);
zio_decompress_data(L2BLK_GET_COMPRESS(
(&lbps[0])->lbp_prop), abd, &this_lb,
- psize, sizeof (this_lb));
+ asize, sizeof (this_lb));
abd_free(abd);
break;
default:
@@ -3608,39 +3619,52 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr)
if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC))
byteswap_uint64_array(&this_lb, sizeof (this_lb));
if (this_lb.lb_magic != L2ARC_LOG_BLK_MAGIC) {
- (void) printf("Invalid log block magic\n\n");
+ if (!dump_opt['q'])
+ (void) printf("Invalid log block magic\n\n");
break;
}
- i++;
- if (dump_opt['l'] > 1) {
- (void) printf("lb[%4d]\tmagic: %llu\n", i,
+ rebuild->dh_lb_count++;
+ rebuild->dh_lb_asize += asize;
+ if (dump_opt['l'] > 1 && !dump_opt['q']) {
+ (void) printf("lb[%4llu]\tmagic: %llu\n",
+ (u_longlong_t)rebuild->dh_lb_count,
(u_longlong_t)this_lb.lb_magic);
dump_l2arc_log_blkptr(lbps[0]);
}
- if (dump_opt['l'] > 2)
- dump_l2arc_log_entries(l2dhdr.dh_log_blk_ent,
- this_lb.lb_entries, i);
+ if (dump_opt['l'] > 2 && !dump_opt['q'])
+ dump_l2arc_log_entries(l2dhdr.dh_log_entries,
+ this_lb.lb_entries,
+ rebuild->dh_lb_count);
- if (l2arc_range_check_overlap(lbps[1].lbp_daddr,
- lbps[0].lbp_daddr, dev.l2ad_evict) && !dev.l2ad_first)
+ if (l2arc_range_check_overlap(lbps[1].lbp_payload_start,
+ lbps[0].lbp_payload_start, dev.l2ad_evict) &&
+ !dev.l2ad_first)
break;
lbps[0] = lbps[1];
lbps[1] = this_lb.lb_prev_lbp;
}
- (void) printf("log_blk_count:\t %d with valid cksum\n", i);
- (void) printf("\t\t %d with invalid cksum\n\n", failed);
+ if (!dump_opt['q']) {
+ (void) printf("log_blk_count:\t %llu with valid cksum\n",
+ (u_longlong_t)rebuild->dh_lb_count);
+ (void) printf("\t\t %d with invalid cksum\n", failed);
+ (void) printf("log_blk_asize:\t %llu\n\n",
+ (u_longlong_t)rebuild->dh_lb_asize);
+ }
}
-static void
+static int
dump_l2arc_header(int fd)
{
- l2arc_dev_hdr_phys_t l2dhdr;
+ l2arc_dev_hdr_phys_t l2dhdr, rebuild;
int error = B_FALSE;
+ bzero(&l2dhdr, sizeof (l2dhdr));
+ bzero(&rebuild, sizeof (rebuild));
+
if (pread64(fd, &l2dhdr, sizeof (l2dhdr),
VDEV_LABEL_START_SIZE) != sizeof (l2dhdr)) {
error = B_TRUE;
@@ -3654,6 +3678,8 @@ dump_l2arc_header(int fd)
if (error) {
(void) printf("L2ARC device header not found\n\n");
+ /* Do not return an error here for backward compatibility */
+ return (0);
} else if (!dump_opt['q']) {
print_l2arc_header();
@@ -3672,16 +3698,39 @@ dump_l2arc_header(int fd)
(u_longlong_t)
l2dhdr.dh_start_lbps[1].lbp_daddr);
(void) printf(" log_blk_ent: %llu\n",
- (u_longlong_t)l2dhdr.dh_log_blk_ent);
+ (u_longlong_t)l2dhdr.dh_log_entries);
(void) printf(" start: %llu\n",
(u_longlong_t)l2dhdr.dh_start);
(void) printf(" end: %llu\n",
(u_longlong_t)l2dhdr.dh_end);
- (void) printf(" evict: %llu\n\n",
+ (void) printf(" evict: %llu\n",
(u_longlong_t)l2dhdr.dh_evict);
-
- dump_l2arc_log_blocks(fd, l2dhdr);
+ (void) printf(" lb_asize_refcount: %llu\n",
+ (u_longlong_t)l2dhdr.dh_lb_asize);
+ (void) printf(" lb_count_refcount: %llu\n\n",
+ (u_longlong_t)l2dhdr.dh_lb_count);
}
+
+ dump_l2arc_log_blocks(fd, l2dhdr, &rebuild);
+ /*
+ * The total aligned size of log blocks and the number of log blocks
+ * reported in the header of the device may be less than what zdb
+ * reports by dump_l2arc_log_blocks() which emulates l2arc_rebuild().
+ * This happens because dump_l2arc_log_blocks() lacks the memory
+ * pressure valve that l2arc_rebuild() has. Thus, if we are on a system
+ * with low memory, l2arc_rebuild will exit prematurely and dh_lb_asize
+ * and dh_lb_count will be lower to begin with than what exists on the
+ * device. This is normal and zdb should not exit with an error. The
+ * opposite case should never happen though, the values reported in the
+ * header should never be higher than what dump_l2arc_log_blocks() and
+ * l2arc_rebuild() report. If this happens there is a leak in the
+ * accounting of log blocks.
+ */
+ if (l2dhdr.dh_lb_asize > rebuild.dh_lb_asize ||
+ l2dhdr.dh_lb_count > rebuild.dh_lb_count)
+ return (1);
+
+ return (0);
}
static void
@@ -4009,7 +4058,7 @@ dump_label(const char *dev)
* Dump the L2ARC header, if existent.
*/
if (read_l2arc_header)
- dump_l2arc_header(fd);
+ error |= dump_l2arc_header(fd);
cookie = NULL;
while ((node = avl_destroy_nodes(&config_tree, &cookie)) != NULL)
diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h
index 928b72325..e8c944ce8 100644
--- a/include/sys/arc_impl.h
+++ b/include/sys/arc_impl.h
@@ -200,7 +200,7 @@ typedef struct l2arc_log_blkptr {
/*
* lbp_prop has the following format:
* * logical size (in bytes)
- * * physical (compressed) size (in bytes)
+ * * aligned (after compression) size (in bytes)
* * compression algorithm (we always LZ4-compress l2arc logs)
* * checksum algorithm (used for lbp_cksum)
*/
@@ -221,22 +221,26 @@ typedef struct l2arc_dev_hdr_phys {
*/
uint64_t dh_spa_guid;
uint64_t dh_vdev_guid;
- uint64_t dh_log_blk_ent; /* entries per log blk */
+ uint64_t dh_log_entries; /* mirror of l2ad_log_entries */
uint64_t dh_evict; /* evicted offset in bytes */
uint64_t dh_flags; /* l2arc_dev_hdr_flags_t */
/*
* Used in zdb.c for determining if a log block is valid, in the same
* way that l2arc_rebuild() does.
*/
- uint64_t dh_start;
- uint64_t dh_end;
-
+ uint64_t dh_start; /* mirror of l2ad_start */
+ uint64_t dh_end; /* mirror of l2ad_end */
/*
* Start of log block chain. [0] -> newest log, [1] -> one older (used
* for initiating prefetch).
*/
l2arc_log_blkptr_t dh_start_lbps[2];
- const uint64_t dh_pad[34]; /* pad to 512 bytes */
+ /*
+ * Aligned size of all log blocks as accounted by vdev_space_update().
+ */
+ uint64_t dh_lb_asize; /* mirror of l2ad_lb_asize */
+ uint64_t dh_lb_count; /* mirror of l2ad_lb_count */
+ const uint64_t dh_pad[32]; /* pad to 512 bytes */
zio_eck_t dh_tail;
} l2arc_dev_hdr_phys_t;
CTASSERT_GLOBAL(sizeof (l2arc_dev_hdr_phys_t) == SPA_MINBLOCKSIZE);
@@ -387,6 +391,14 @@ typedef struct l2arc_dev {
uint64_t l2ad_evict; /* evicted offset in bytes */
/* List of pointers to log blocks present in the L2ARC device */
list_t l2ad_lbptr_list;
+ /*
+ * Aligned size of all log blocks as accounted by vdev_space_update().
+ */
+ zfs_refcount_t l2ad_lb_asize;
+ /*
+ * Number of log blocks present on the device.
+ */
+ zfs_refcount_t l2ad_lb_count;
} l2arc_dev_t;
/*
@@ -738,14 +750,18 @@ typedef struct arc_stats {
*/
kstat_named_t arcstat_l2_log_blk_writes;
/*
- * Moving average of the physical size of the L2ARC log blocks, in
+ * Moving average of the aligned size of the L2ARC log blocks, in
* bytes. Updated during L2ARC rebuild and during writing of L2ARC
* log blocks.
*/
- kstat_named_t arcstat_l2_log_blk_avg_size;
+ kstat_named_t arcstat_l2_log_blk_avg_asize;
+ /* Aligned size of L2ARC log blocks on L2ARC devices. */
+ kstat_named_t arcstat_l2_log_blk_asize;
+ /* Number of L2ARC log blocks present on L2ARC devices. */
+ kstat_named_t arcstat_l2_log_blk_count;
/*
- * Moving average of the physical size of L2ARC restored data, in bytes,
- * to the physical size of their metadata in ARC, in bytes.
+ * Moving average of the aligned size of L2ARC restored data, in bytes,
+ * to the aligned size of their metadata in L2ARC, in bytes.
* Updated during L2ARC rebuild and during writing of L2ARC log blocks.
*/
kstat_named_t arcstat_l2_data_to_meta_ratio;
@@ -780,6 +796,8 @@ typedef struct arc_stats {
kstat_named_t arcstat_l2_rebuild_abort_lowmem;
/* Logical size of L2ARC restored data, in bytes. */
kstat_named_t arcstat_l2_rebuild_size;
+ /* Aligned size of L2ARC restored data, in bytes. */
+ kstat_named_t arcstat_l2_rebuild_asize;
/*
* Number of L2ARC log entries (buffers) that were successfully
* restored in ARC.
@@ -790,8 +808,6 @@ typedef struct arc_stats {
* were not restored again.
*/
kstat_named_t arcstat_l2_rebuild_bufs_precached;
- /* Physical size of L2ARC restored data, in bytes. */
- kstat_named_t arcstat_l2_rebuild_psize;
/*
* Number of L2ARC log blocks that were restored successfully. Each
* log block may hold up to L2ARC_LOG_BLK_MAX_ENTRIES buffers.
diff --git a/man/man8/zdb.8 b/man/man8/zdb.8
index 3915be3f8..01ad95b3e 100644
--- a/man/man8/zdb.8
+++ b/man/man8/zdb.8
@@ -216,7 +216,10 @@ Read the vdev labels and L2ARC header from the specified device.
.Nm Fl l
will return 0 if valid label was found, 1 if error occurred, and 2 if no valid
labels were found. The presence of L2ARC header is indicated by a specific
-sequence (L2ARC_DEV_HDR_MAGIC). Each unique configuration is displayed only
+sequence (L2ARC_DEV_HDR_MAGIC). If there is an accounting error in the size
+or the number of L2ARC log blocks
+.Nm Fl l
+will return 1. Each unique configuration is displayed only
once.
.It Fl ll Ar device
In addition display label space usage stats. If a valid L2ARC header was found
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 5b34d4abd..a6b739ec3 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -530,7 +530,9 @@ arc_stats_t arc_stats = {
{ "l2_asize", KSTAT_DATA_UINT64 },
{ "l2_hdr_size", KSTAT_DATA_UINT64 },
{ "l2_log_blk_writes", KSTAT_DATA_UINT64 },
- { "l2_log_blk_avg_size", KSTAT_DATA_UINT64 },
+ { "l2_log_blk_avg_asize", KSTAT_DATA_UINT64 },
+ { "l2_log_blk_asize", KSTAT_DATA_UINT64 },
+ { "l2_log_blk_count", KSTAT_DATA_UINT64 },
{ "l2_data_to_meta_ratio", KSTAT_DATA_UINT64 },
{ "l2_rebuild_success", KSTAT_DATA_UINT64 },
{ "l2_rebuild_unsupported", KSTAT_DATA_UINT64 },
@@ -539,9 +541,9 @@ arc_stats_t arc_stats = {
{ "l2_rebuild_cksum_lb_errors", KSTAT_DATA_UINT64 },
{ "l2_rebuild_lowmem", KSTAT_DATA_UINT64 },
{ "l2_rebuild_size", KSTAT_DATA_UINT64 },
+ { "l2_rebuild_asize", KSTAT_DATA_UINT64 },
{ "l2_rebuild_bufs", KSTAT_DATA_UINT64 },
{ "l2_rebuild_bufs_precached", KSTAT_DATA_UINT64 },
- { "l2_rebuild_psize", KSTAT_DATA_UINT64 },
{ "l2_rebuild_log_blks", KSTAT_DATA_UINT64 },
{ "memory_throttle_count", KSTAT_DATA_UINT64 },
{ "memory_direct_count", KSTAT_DATA_UINT64 },
@@ -895,7 +897,7 @@ static void l2arc_log_blk_fetch_abort(zio_t *zio);
/* L2ARC persistence block restoration routines. */
static void l2arc_log_blk_restore(l2arc_dev_t *dev,
- const l2arc_log_blk_phys_t *lb, uint64_t lb_psize, uint64_t lb_daddr);
+ const l2arc_log_blk_phys_t *lb, uint64_t lb_asize, uint64_t lb_daddr);
static void l2arc_hdr_restore(const l2arc_log_ent_phys_t *le,
l2arc_dev_t *dev);
@@ -7864,6 +7866,7 @@ l2arc_write_done(zio_t *zio)
l2arc_lb_abd_buf_t *abd_buf;
l2arc_lb_ptr_buf_t *lb_ptr_buf;
l2arc_dev_t *dev;
+ l2arc_dev_hdr_phys_t *l2dhdr;
list_t *buflist;
arc_buf_hdr_t *head, *hdr, *hdr_prev;
kmutex_t *hash_lock;
@@ -7872,6 +7875,7 @@ l2arc_write_done(zio_t *zio)
cb = zio->io_private;
ASSERT3P(cb, !=, NULL);
dev = cb->l2wcb_dev;
+ l2dhdr = dev->l2ad_dev_hdr;
ASSERT3P(dev, !=, NULL);
head = cb->l2wcb_head;
ASSERT3P(head, !=, NULL);
@@ -7975,8 +7979,18 @@ top:
zio_buf_free(abd_buf, sizeof (*abd_buf));
if (zio->io_error != 0) {
lb_ptr_buf = list_remove_head(&dev->l2ad_lbptr_list);
- bytes_dropped +=
+ /*
+ * L2BLK_GET_PSIZE returns aligned size for log
+ * blocks.
+ */
+ uint64_t asize =
L2BLK_GET_PSIZE((lb_ptr_buf->lb_ptr)->lbp_prop);
+ bytes_dropped += asize;
+ ARCSTAT_INCR(arcstat_l2_log_blk_asize, -asize);
+ ARCSTAT_BUMPDOWN(arcstat_l2_log_blk_count);
+ zfs_refcount_remove_many(&dev->l2ad_lb_asize, asize,
+ lb_ptr_buf);
+ zfs_refcount_remove(&dev->l2ad_lb_count, lb_ptr_buf);
kmem_free(lb_ptr_buf->lb_ptr,
sizeof (l2arc_log_blkptr_t));
kmem_free(lb_ptr_buf, sizeof (l2arc_lb_ptr_buf_t));
@@ -7984,6 +7998,17 @@ top:
}
list_destroy(&cb->l2wcb_abd_list);
+ if (zio->io_error != 0) {
+ /* restore the lbps array in the header to its previous state */
+ lb_ptr_buf = list_head(&dev->l2ad_lbptr_list);
+ for (int i = 0; i < 2; i++) {
+ bcopy(lb_ptr_buf->lb_ptr, &l2dhdr->dh_start_lbps[i],
+ sizeof (l2arc_log_blkptr_t));
+ lb_ptr_buf = list_next(&dev->l2ad_lbptr_list,
+ lb_ptr_buf);
+ }
+ }
+
atomic_inc_64(&l2arc_writes_done);
list_remove(buflist, head);
ASSERT(!HDR_HAS_L1HDR(head));
@@ -8277,21 +8302,21 @@ l2arc_sublist_lock(int list_num)
/*
* Calculates the maximum overhead of L2ARC metadata log blocks for a given
- * L2ARC write size. l2arc_evict and l2arc_write_buffers need to include this
+ * L2ARC write size. l2arc_evict and l2arc_write_size need to include this
* overhead in processing to make sure there is enough headroom available
* when writing buffers.
*/
static inline uint64_t
l2arc_log_blk_overhead(uint64_t write_sz, l2arc_dev_t *dev)
{
- if (dev->l2ad_dev_hdr->dh_log_blk_ent == 0) {
+ if (dev->l2ad_log_entries == 0) {
return (0);
} else {
uint64_t log_entries = write_sz >> SPA_MINBLOCKSHIFT;
uint64_t log_blocks = (log_entries +
- dev->l2ad_dev_hdr->dh_log_blk_ent - 1) /
- dev->l2ad_dev_hdr->dh_log_blk_ent;
+ dev->l2ad_log_entries - 1) /
+ dev->l2ad_log_entries;
return (vdev_psize_to_asize(dev->l2ad_vdev,
sizeof (l2arc_log_blk_phys_t)) * log_blocks);
@@ -8373,17 +8398,24 @@ retry:
lb_ptr_buf_prev = list_prev(&dev->l2ad_lbptr_list, lb_ptr_buf);
+ /* L2BLK_GET_PSIZE returns aligned size for log blocks */
+ uint64_t asize = L2BLK_GET_PSIZE(
+ (lb_ptr_buf->lb_ptr)->lbp_prop);
+
/*
* We don't worry about log blocks left behind (ie
- * lbp_daddr + psize < l2ad_hand) because l2arc_write_buffers()
+ * lbp_payload_start < l2ad_hand) because l2arc_write_buffers()
* will never write more than l2arc_evict() evicts.
*/
if (!all && l2arc_log_blkptr_valid(dev, lb_ptr_buf->lb_ptr)) {
break;
} else {
- vdev_space_update(dev->l2ad_vdev,
- -L2BLK_GET_PSIZE(
- (lb_ptr_buf->lb_ptr)->lbp_prop), 0, 0);
+ vdev_space_update(dev->l2ad_vdev, -asize, 0, 0);
+ ARCSTAT_INCR(arcstat_l2_log_blk_asize, -asize);
+ ARCSTAT_BUMPDOWN(arcstat_l2_log_blk_count);
+ zfs_refcount_remove_many(&dev->l2ad_lb_asize, asize,
+ lb_ptr_buf);
+ zfs_refcount_remove(&dev->l2ad_lb_count, lb_ptr_buf);
list_remove(&dev->l2ad_lbptr_list, lb_ptr_buf);
kmem_free(lb_ptr_buf->lb_ptr,
sizeof (l2arc_log_blkptr_t));
@@ -8475,6 +8507,10 @@ out:
dev->l2ad_first = B_FALSE;
goto top;
}
+
+ ASSERT3U(dev->l2ad_hand + distance, <, dev->l2ad_end);
+ if (!dev->l2ad_first)
+ ASSERT3U(dev->l2ad_hand, <, dev->l2ad_evict);
}
/*
@@ -8777,6 +8813,10 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
sizeof (l2arc_write_callback_t), KM_SLEEP);
cb->l2wcb_dev = dev;
cb->l2wcb_head = head;
+ /*
+ * Create a list to save allocated abd buffers
+ * for l2arc_log_blk_commit().
+ */
list_create(&cb->l2wcb_abd_list,
sizeof (l2arc_lb_abd_buf_t),
offsetof(l2arc_lb_abd_buf_t, node));
@@ -8846,6 +8886,9 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
return (0);
}
+ if (!dev->l2ad_first)
+ ASSERT3U(dev->l2ad_hand, <=, dev->l2ad_evict);
+
ASSERT3U(write_asize, <=, target_sz);
ARCSTAT_BUMP(arcstat_l2_writes_sent);
ARCSTAT_INCR(arcstat_l2_write_bytes, write_psize);
@@ -9036,6 +9079,8 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
vdev_space_update(vd, 0, 0, adddev->l2ad_end - adddev->l2ad_hand);
zfs_refcount_create(&adddev->l2ad_alloc);
+ zfs_refcount_create(&adddev->l2ad_lb_asize);
+ zfs_refcount_create(&adddev->l2ad_lb_count);
/*
* Add device to global list
@@ -9059,7 +9104,7 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen)
uint64_t l2dhdr_asize;
spa_t *spa;
int err;
- boolean_t rebuild = B_TRUE;
+ boolean_t l2dhdr_valid = B_TRUE;
dev = l2arc_vdev_get(vd);
ASSERT3P(dev, !=, NULL);
@@ -9089,9 +9134,9 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen)
* Read the device header, if an error is returned do not rebuild L2ARC.
*/
if ((err = l2arc_dev_hdr_read(dev)) != 0)
- rebuild = B_FALSE;
+ l2dhdr_valid = B_FALSE;
- if (rebuild && l2dhdr->dh_log_blk_ent > 0) {
+ if (l2dhdr_valid && dev->l2ad_log_entries > 0) {
/*
* If we are onlining a cache device (vdev_reopen) that was
* still present (l2arc_vdev_present()) and rebuild is enabled,
@@ -9117,12 +9162,10 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen)
* async task which will call l2arc_spa_rebuild_start.
*/
dev->l2ad_rebuild = B_TRUE;
- } else if (!rebuild && spa_writeable(spa)) {
+ } else if (spa_writeable(spa)) {
/*
- * The boolean rebuild is false if reading the device header
- * returned an error. In this case create a new header. We
- * zero out the memory holding the header to reset
- * dh_start_lbps.
+ * In this case create a new header. We zero out the memory
+ * holding the header to reset dh_start_lbps.
*/
bzero(l2dhdr, l2dhdr_asize);
l2arc_dev_hdr_update(dev);
@@ -9172,6 +9215,8 @@ l2arc_remove_vdev(vdev_t *vd)
list_destroy(&remdev->l2ad_lbptr_list);
mutex_destroy(&remdev->l2ad_mtx);
zfs_refcount_destroy(&remdev->l2ad_alloc);
+ zfs_refcount_destroy(&remdev->l2ad_lb_asize);
+ zfs_refcount_destroy(&remdev->l2ad_lb_count);
kmem_free(remdev->l2ad_dev_hdr, remdev->l2ad_dev_hdr_asize);
vmem_free(remdev, sizeof (l2arc_dev_t));
}
@@ -9309,7 +9354,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
{
vdev_t *vd = dev->l2ad_vdev;
spa_t *spa = vd->vdev_spa;
- int i = 0, err = 0;
+ int err = 0;
l2arc_dev_hdr_phys_t *l2dhdr = dev->l2ad_dev_hdr;
l2arc_log_blk_phys_t *this_lb, *next_lb;
zio_t *this_io = NULL, *next_io = NULL;
@@ -9332,6 +9377,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
/*
* Retrieve the persistent L2ARC device state.
+ * L2BLK_GET_PSIZE returns aligned size for log blocks.
*/
dev->l2ad_evict = MAX(l2dhdr->dh_evict, dev->l2ad_start);
dev->l2ad_hand = MAX(l2dhdr->dh_start_lbps[0].lbp_daddr +
@@ -9381,11 +9427,10 @@ l2arc_rebuild(l2arc_dev_t *dev)
/*
* Now that we know that the next_lb checks out alright, we
* can start reconstruction from this log block.
+ * L2BLK_GET_PSIZE returns aligned size for log blocks.
*/
- l2arc_log_blk_restore(dev, this_lb,
- L2BLK_GET_PSIZE((&lbps[0])->lbp_prop),
- lbps[0].lbp_daddr);
- i++;
+ uint64_t asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
+ l2arc_log_blk_restore(dev, this_lb, asize, lbps[0].lbp_daddr);
/*
* log block restored, include its pointer in the list of
@@ -9398,9 +9443,12 @@ l2arc_rebuild(l2arc_dev_t *dev)
sizeof (l2arc_log_blkptr_t));
mutex_enter(&dev->l2ad_mtx);
list_insert_tail(&dev->l2ad_lbptr_list, lb_ptr_buf);
+ ARCSTAT_INCR(arcstat_l2_log_blk_asize, asize);
+ ARCSTAT_BUMP(arcstat_l2_log_blk_count);
+ zfs_refcount_add_many(&dev->l2ad_lb_asize, asize, lb_ptr_buf);
+ zfs_refcount_add(&dev->l2ad_lb_count, lb_ptr_buf);
mutex_exit(&dev->l2ad_mtx);
- vdev_space_update(vd,
- L2BLK_GET_PSIZE((&lbps[0])->lbp_prop), 0, 0);
+ vdev_space_update(vd, asize, 0, 0);
/*
* Protection against loops of log blocks:
@@ -9417,13 +9465,16 @@ l2arc_rebuild(l2arc_dev_t *dev)
* l2arc_log_blkptr_valid() but the log block should not be
* restored as it is overwritten by the payload of log block
* (0). Only log blocks (0)-(3) should be restored. We check
- * whether l2ad_evict lies in between the next log block
- * offset (lbps[1].lbp_daddr) and the present log block offset
- * (lbps[0].lbp_daddr). If true and this isn't the first pass,
- * we are looping from the beginning and we should stop.
+ * whether l2ad_evict lies in between the payload starting
+ * offset of the next log block (lbps[1].lbp_payload_start)
+ * and the payload starting offset of the present log block
+ * (lbps[0].lbp_payload_start). If true and this isn't the
+ * first pass, we are looping from the beginning and we should
+ * stop.
*/
- if (l2arc_range_check_overlap(lbps[1].lbp_daddr,
- lbps[0].lbp_daddr, dev->l2ad_evict) && !dev->l2ad_first)
+ if (l2arc_range_check_overlap(lbps[1].lbp_payload_start,
+ lbps[0].lbp_payload_start, dev->l2ad_evict) &&
+ !dev->l2ad_first)
goto out;
for (;;) {
@@ -9470,14 +9521,27 @@ out:
vmem_free(next_lb, sizeof (*next_lb));
if (!l2arc_rebuild_enabled) {
- zfs_dbgmsg("L2ARC rebuild disabled");
- } else if (err == 0 && i > 0) {
+ spa_history_log_internal(spa, "L2ARC rebuild", NULL,
+ "disabled");
+ } else if (err == 0 && zfs_refcount_count(&dev->l2ad_lb_count) > 0) {
ARCSTAT_BUMP(arcstat_l2_rebuild_success);
- zfs_dbgmsg("L2ARC successfully rebuilt, "
- "restored %d blocks", i);
+ spa_history_log_internal(spa, "L2ARC rebuild", NULL,
+ "successful, restored %llu blocks",
+ (u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count));
+ } else if (err == 0 && zfs_refcount_count(&dev->l2ad_lb_count) == 0) {
+ /*
+ * No error but also nothing restored, meaning the lbps array
+ * in the device header points to invalid/non-present log
+ * blocks. Reset the header.
+ */
+ spa_history_log_internal(spa, "L2ARC rebuild", NULL,
+ "no valid log blocks");
+ bzero(l2dhdr, dev->l2ad_dev_hdr_asize);
+ l2arc_dev_hdr_update(dev);
} else if (err != 0) {
- zfs_dbgmsg("L2ARC rebuild aborted, "
- "restored %d blocks", i);
+ spa_history_log_internal(spa, "L2ARC rebuild", NULL,
+ "aborted, restored %llu blocks",
+ (u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count));
}
if (lock_held)
@@ -9527,7 +9591,7 @@ l2arc_dev_hdr_read(l2arc_dev_t *dev)
l2dhdr->dh_spa_guid != guid ||
l2dhdr->dh_vdev_guid != dev->l2ad_vdev->vdev_guid ||
l2dhdr->dh_version != L2ARC_PERSISTENT_VERSION ||
- l2dhdr->dh_log_blk_ent != dev->l2ad_log_entries ||
+ l2dhdr->dh_log_entries != dev->l2ad_log_entries ||
l2dhdr->dh_end != dev->l2ad_end ||
!l2arc_range_check_overlap(dev->l2ad_start, dev->l2ad_end,
l2dhdr->dh_evict)) {
@@ -9578,7 +9642,7 @@ l2arc_log_blk_read(l2arc_dev_t *dev,
int err = 0;
zio_cksum_t cksum;
abd_t *abd = NULL;
- uint64_t psize;
+ uint64_t asize;
ASSERT(this_lbp != NULL && next_lbp != NULL);
ASSERT(this_lb != NULL && next_lb != NULL);
@@ -9616,9 +9680,12 @@ l2arc_log_blk_read(l2arc_dev_t *dev,
goto cleanup;
}
- /* Make sure the buffer checks out */
- psize = L2BLK_GET_PSIZE((this_lbp)->lbp_prop);
- fletcher_4_native(this_lb, psize, NULL, &cksum);
+ /*
+ * Make sure the buffer checks out.
+ * L2BLK_GET_PSIZE returns aligned size for log blocks.
+ */
+ asize = L2BLK_GET_PSIZE((this_lbp)->lbp_prop);
+ fletcher_4_native(this_lb, asize, NULL, &cksum);
if (!ZIO_CHECKSUM_EQUAL(cksum, this_lbp->lbp_cksum)) {
ARCSTAT_BUMP(arcstat_l2_rebuild_abort_cksum_lb_errors);
zfs_dbgmsg("L2ARC log block cksum failed, offset: %llu, "
@@ -9634,11 +9701,11 @@ l2arc_log_blk_read(l2arc_dev_t *dev,
case ZIO_COMPRESS_OFF:
break;
case ZIO_COMPRESS_LZ4:
- abd = abd_alloc_for_io(psize, B_TRUE);
- abd_copy_from_buf_off(abd, this_lb, 0, psize);
+ abd = abd_alloc_for_io(asize, B_TRUE);
+ abd_copy_from_buf_off(abd, this_lb, 0, asize);
if ((err = zio_decompress_data(
L2BLK_GET_COMPRESS((this_lbp)->lbp_prop),
- abd, this_lb, psize, sizeof (*this_lb))) != 0) {
+ abd, this_lb, asize, sizeof (*this_lb))) != 0) {
err = SET_ERROR(EINVAL);
goto cleanup;
}
@@ -9672,10 +9739,10 @@ cleanup:
*/
static void
l2arc_log_blk_restore(l2arc_dev_t *dev, const l2arc_log_blk_phys_t *lb,
- uint64_t lb_psize, uint64_t lb_daddr)
+ uint64_t lb_asize, uint64_t lb_daddr)
{
- uint64_t size = 0, psize = 0;
- uint64_t log_entries = dev->l2ad_dev_hdr->dh_log_blk_ent;
+ uint64_t size = 0, asize = 0;
+ uint64_t log_entries = dev->l2ad_log_entries;
for (int i = log_entries - 1; i >= 0; i--) {
/*
@@ -9692,27 +9759,28 @@ l2arc_log_blk_restore(l2arc_dev_t *dev, const l2arc_log_blk_phys_t *lb,
* ^ ^
* | |
* | |
- * l2arc_fill_thread l2arc_rebuild
- * places new bufs here restores bufs here
+ * l2arc_feed_thread l2arc_rebuild
+ * will place new bufs here restores bufs here
*
- * This also works when the restored bufs get evicted at any
- * point during the rebuild.
+ * During l2arc_rebuild() the device is not used by
+ * l2arc_feed_thread() as dev->l2ad_rebuild is set to true.
*/
size += L2BLK_GET_LSIZE((&lb->lb_entries[i])->le_prop);
- psize += L2BLK_GET_PSIZE((&lb->lb_entries[i])->le_prop);
+ asize += vdev_psize_to_asize(dev->l2ad_vdev,
+ L2BLK_GET_PSIZE((&lb->lb_entries[i])->le_prop));
l2arc_hdr_restore(&lb->lb_entries[i], dev);
}
/*
* Record rebuild stats:
* size Logical size of restored buffers in the L2ARC
- * psize Physical size of restored buffers in the L2ARC
+ * asize Aligned size of restored buffers in the L2ARC
*/
ARCSTAT_INCR(arcstat_l2_rebuild_size, size);
- ARCSTAT_INCR(arcstat_l2_rebuild_psize, psize);
+ ARCSTAT_INCR(arcstat_l2_rebuild_asize, asize);
ARCSTAT_INCR(arcstat_l2_rebuild_bufs, log_entries);
- ARCSTAT_F_AVG(arcstat_l2_log_blk_avg_size, lb_psize);
- ARCSTAT_F_AVG(arcstat_l2_data_to_meta_ratio, psize / lb_psize);
+ ARCSTAT_F_AVG(arcstat_l2_log_blk_avg_asize, lb_asize);
+ ARCSTAT_F_AVG(arcstat_l2_data_to_meta_ratio, asize / lb_asize);
ARCSTAT_BUMP(arcstat_l2_rebuild_log_blks);
}
@@ -9800,18 +9868,20 @@ static zio_t *
l2arc_log_blk_fetch(vdev_t *vd, const l2arc_log_blkptr_t *lbp,
l2arc_log_blk_phys_t *lb)
{
- uint32_t psize;
+ uint32_t asize;
zio_t *pio;
l2arc_read_callback_t *cb;
- psize = L2BLK_GET_PSIZE((lbp)->lbp_prop);
- ASSERT(psize <= sizeof (l2arc_log_blk_phys_t));
+ /* L2BLK_GET_PSIZE returns aligned size for log blocks */
+ asize = L2BLK_GET_PSIZE((lbp)->lbp_prop);
+ ASSERT(asize <= sizeof (l2arc_log_blk_phys_t));
+
cb = kmem_zalloc(sizeof (l2arc_read_callback_t), KM_SLEEP);
- cb->l2rcb_abd = abd_get_from_buf(lb, psize);
+ cb->l2rcb_abd = abd_get_from_buf(lb, asize);
pio = zio_root(vd->vdev_spa, l2arc_blk_fetch_done, cb,
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
ZIO_FLAG_DONT_RETRY);
- (void) zio_nowait(zio_read_phys(pio, vd, lbp->lbp_daddr, psize,
+ (void) zio_nowait(zio_read_phys(pio, vd, lbp->lbp_daddr, asize,
cb->l2rcb_abd, ZIO_CHECKSUM_OFF, NULL, NULL,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY, B_FALSE));
@@ -9841,14 +9911,18 @@ l2arc_dev_hdr_update(l2arc_dev_t *dev)
abd_t *abd;
int err;
+ VERIFY(spa_config_held(dev->l2ad_spa, SCL_STATE_ALL, RW_READER));
+
l2dhdr->dh_magic = L2ARC_DEV_HDR_MAGIC;
l2dhdr->dh_version = L2ARC_PERSISTENT_VERSION;
l2dhdr->dh_spa_guid = spa_guid(dev->l2ad_vdev->vdev_spa);
l2dhdr->dh_vdev_guid = dev->l2ad_vdev->vdev_guid;
- l2dhdr->dh_log_blk_ent = dev->l2ad_log_entries;
+ l2dhdr->dh_log_entries = dev->l2ad_log_entries;
l2dhdr->dh_evict = dev->l2ad_evict;
l2dhdr->dh_start = dev->l2ad_start;
l2dhdr->dh_end = dev->l2ad_end;
+ l2dhdr->dh_lb_asize = zfs_refcount_count(&dev->l2ad_lb_asize);
+ l2dhdr->dh_lb_count = zfs_refcount_count(&dev->l2ad_lb_count);
l2dhdr->dh_flags = 0;
if (dev->l2ad_first)
l2dhdr->dh_flags |= L2ARC_DEV_HDR_EVICT_FIRST;
@@ -9884,7 +9958,7 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
uint8_t *tmpbuf;
l2arc_lb_ptr_buf_t *lb_ptr_buf;
- VERIFY3S(dev->l2ad_log_ent_idx, ==, l2dhdr->dh_log_blk_ent);
+ VERIFY3S(dev->l2ad_log_ent_idx, ==, dev->l2ad_log_entries);
tmpbuf = zio_buf_alloc(sizeof (*lb));
abd_buf = zio_buf_alloc(sizeof (*abd_buf));
@@ -9896,8 +9970,14 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
lb->lb_prev_lbp = l2dhdr->dh_start_lbps[1];
lb->lb_magic = L2ARC_LOG_BLK_MAGIC;
- /* try to compress the buffer */
+ /*
+ * l2arc_log_blk_commit() may be called multiple times during a single
+ * l2arc_write_buffers() call. Save the allocated abd buffers in a list
+ * so we can free them in l2arc_write_done() later on.
+ */
list_insert_tail(&cb->l2wcb_abd_list, abd_buf);
+
+ /* try to compress the buffer */
psize = zio_compress_data(ZIO_COMPRESS_LZ4,
abd_buf->abd, tmpbuf, sizeof (*lb));
@@ -9962,13 +10042,17 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
sizeof (l2arc_log_blkptr_t));
mutex_enter(&dev->l2ad_mtx);
list_insert_head(&dev->l2ad_lbptr_list, lb_ptr_buf);
+ ARCSTAT_INCR(arcstat_l2_log_blk_asize, asize);
+ ARCSTAT_BUMP(arcstat_l2_log_blk_count);
+ zfs_refcount_add_many(&dev->l2ad_lb_asize, asize, lb_ptr_buf);
+ zfs_refcount_add(&dev->l2ad_lb_count, lb_ptr_buf);
mutex_exit(&dev->l2ad_mtx);
vdev_space_update(dev->l2ad_vdev, asize, 0, 0);
/* bump the kstats */
ARCSTAT_INCR(arcstat_l2_write_bytes, asize);
ARCSTAT_BUMP(arcstat_l2_log_blk_writes);
- ARCSTAT_F_AVG(arcstat_l2_log_blk_avg_size, asize);
+ ARCSTAT_F_AVG(arcstat_l2_log_blk_avg_asize, asize);
ARCSTAT_F_AVG(arcstat_l2_data_to_meta_ratio,
dev->l2ad_log_blk_payload_asize / asize);
@@ -9985,8 +10069,9 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
boolean_t
l2arc_log_blkptr_valid(l2arc_dev_t *dev, const l2arc_log_blkptr_t *lbp)
{
- uint64_t psize = L2BLK_GET_PSIZE((lbp)->lbp_prop);
- uint64_t end = lbp->lbp_daddr + psize - 1;
+ /* L2BLK_GET_PSIZE returns aligned size for log blocks */
+ uint64_t asize = L2BLK_GET_PSIZE((lbp)->lbp_prop);
+ uint64_t end = lbp->lbp_daddr + asize - 1;
uint64_t start = lbp->lbp_payload_start;
boolean_t evicted = B_FALSE;
@@ -10017,7 +10102,7 @@ l2arc_log_blkptr_valid(l2arc_dev_t *dev, const l2arc_log_blkptr_t *lbp)
l2arc_range_check_overlap(dev->l2ad_hand, dev->l2ad_evict, end);
return (start >= dev->l2ad_start && end <= dev->l2ad_end &&
- psize > 0 && psize <= sizeof (l2arc_log_blk_phys_t) &&
+ asize > 0 && asize <= sizeof (l2arc_log_blk_phys_t) &&
(!evicted || dev->l2ad_first));
}
@@ -10032,14 +10117,13 @@ l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr)
{
l2arc_log_blk_phys_t *lb = &dev->l2ad_log_blk;
l2arc_log_ent_phys_t *le;
- l2arc_dev_hdr_phys_t *l2dhdr = dev->l2ad_dev_hdr;
- if (l2dhdr->dh_log_blk_ent == 0)
+ if (dev->l2ad_log_entries == 0)
return (B_FALSE);
int index = dev->l2ad_log_ent_idx++;
- ASSERT3S(index, <, l2dhdr->dh_log_blk_ent);
+ ASSERT3S(index, <, dev->l2ad_log_entries);
ASSERT(HDR_HAS_L2HDR(hdr));
le = &lb->lb_entries[index];
@@ -10059,7 +10143,7 @@ l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr)
dev->l2ad_log_blk_payload_asize += vdev_psize_to_asize(dev->l2ad_vdev,
HDR_GET_PSIZE(hdr));
- return (dev->l2ad_log_ent_idx == l2dhdr->dh_log_blk_ent);
+ return (dev->l2ad_log_ent_idx == dev->l2ad_log_entries);
}
/*
diff --git a/tests/zfs-tests/tests/functional/persist_l2arc/persist_l2arc_001_pos.ksh b/tests/zfs-tests/tests/functional/persist_l2arc/persist_l2arc_001_pos.ksh
index b202fac40..f313923d1 100755
--- a/tests/zfs-tests/tests/functional/persist_l2arc/persist_l2arc_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/persist_l2arc/persist_l2arc_001_pos.ksh
@@ -99,7 +99,7 @@ typeset l2_rebuild_log_blk_end=$(get_arcstat l2_rebuild_log_blks)
log_must test $l2_dh_log_blk -eq $(( $l2_rebuild_log_blk_end - $l2_rebuild_log_blk_start ))
log_must test $l2_dh_log_blk -gt 0
-log_must zdb -lq $VDEV_CACHE
+log_must zdb -lll $VDEV_CACHE
log_must zpool destroy -f $TESTPOOL