diff options
author | George Amanakis <[email protected]> | 2020-05-07 19:34:03 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2020-05-07 16:34:03 -0700 |
commit | 657fd33bcff17e44ad55dffdf294d7c107b4bf5d (patch) | |
tree | 33abf04e8eeb45e5c63a7f20719ddad67aabeb65 /cmd | |
parent | 108a454a4604df6ea3be817f3cf076726df2c67a (diff) |
Improvements on persistent L2ARC
Functional changes:
We implement refcounts of log blocks and their aligned size on the
cache device along with two corresponding arcstats. The refcounts are
reflected in the header of the device and provide valuable information
as to whether log blocks are accounted for correctly. These are
dynamically adjusted as log blocks are committed/evicted. zdb also uses
this information in the device header and compares it to the
corresponding values as reported by dump_l2arc_log_blocks() which
emulates l2arc_rebuild(). If the refcounts saved in the device header
report higher values, zdb exits with an error. For this feature to work
correctly there should be no active writes on the device. This is also
employed in the tests of persistent L2ARC. We extend the structure of
the cache device header by adding the two new variables mirroring the
refcounts after the existing variables to preserve backward
compatibility in terms of persistent L2ARC.
1) a new arcstat "l2_log_blk_asize" and refcount "l2ad_lb_asize" which
reflect the total aligned size of log blocks on the device. This is
also reflected in the header of the cache device as "dh_lb_asize".
2) a new arcstat "l2arc_log_blk_count" and refcount "l2ad_lb_count"
which reflect the total number of L2ARC log blocks present on cache
devices. It is also reflected in the header of the cache device as
"dh_lb_count".
In l2arc_rebuild_vdev() if the amount of committed log entries in a log
block is 0 and the device header is valid we update the device header.
This will facilitate trimming of the whole device in this case when
TRIM for L2ARC is implemented.
Improve loop protection in l2arc_rebuild() by using the starting offset
of the payload of each log block instead of the starting offset of the
log block.
If the zio in l2arc_write_buffers() fails, restore the lbps array in the
header of the device to its previous state in l2arc_write_done().
If l2arc_rebuild() ends the rebuild process without restoring any L2ARC
log blocks in ARC and without any other error, this means that the lbps
array in the header is pointing to non-existent or invalid log blocks.
Reset the device header in this case.
In l2arc_rebuild() change the zfs_dbgmsg messages to
spa_history_log_internal() making them user visible with zpool history
command.
Non-functional changes:
Make the first test in persistent L2ARC use `zdb -lll` to increase
coverage in `zdb.c`.
Rename psize with asize when referring to log blocks, since
L2ARC_SET_PSIZE stores the vdev aligned size for log blocks. Also
rename dh_log_blk_entries to dh_log_entries to make it clear that
it is a mirror of l2ad_log_entries. Added comments for both changes.
Fix inaccurate comments for example in l2arc_log_blk_restore().
Add asserts at the end in l2arc_evict() and l2arc_write_buffers().
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: George Amanakis <[email protected]>
Closes #10228
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/zdb/zdb.c | 123 |
1 files changed, 86 insertions, 37 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index f4b4b454b..00258799b 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -3493,12 +3493,13 @@ print_l2arc_log_blocks(void) static void dump_l2arc_log_entries(uint64_t log_entries, - l2arc_log_ent_phys_t *le, int i) + l2arc_log_ent_phys_t *le, uint64_t i) { for (int j = 0; j < log_entries; j++) { dva_t dva = le[j].le_dva; - (void) printf("lb[%4d]\tle[%4d]\tDVA asize: %llu, " - "vdev: %llu, offset: %llu\n", i, j + 1, + (void) printf("lb[%4llu]\tle[%4d]\tDVA asize: %llu, " + "vdev: %llu, offset: %llu\n", + (u_longlong_t)i, j + 1, (u_longlong_t)DVA_GET_ASIZE(&dva), (u_longlong_t)DVA_GET_VDEV(&dva), (u_longlong_t)DVA_GET_OFFSET(&dva)); @@ -3533,7 +3534,7 @@ dump_l2arc_log_blkptr(l2arc_log_blkptr_t lbps) (u_longlong_t)lbps.lbp_payload_start); (void) printf("|\t\tlsize: %llu\n", (u_longlong_t)L2BLK_GET_LSIZE((&lbps)->lbp_prop)); - (void) printf("|\t\tpsize: %llu\n", + (void) printf("|\t\tasize: %llu\n", (u_longlong_t)L2BLK_GET_PSIZE((&lbps)->lbp_prop)); (void) printf("|\t\tcompralgo: %llu\n", (u_longlong_t)L2BLK_GET_COMPRESS((&lbps)->lbp_prop)); @@ -3543,17 +3544,19 @@ dump_l2arc_log_blkptr(l2arc_log_blkptr_t lbps) } static void -dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr) +dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr, + l2arc_dev_hdr_phys_t *rebuild) { l2arc_log_blk_phys_t this_lb; - uint64_t psize; + uint64_t asize; l2arc_log_blkptr_t lbps[2]; abd_t *abd; zio_cksum_t cksum; - int i = 0, failed = 0; + int failed = 0; l2arc_dev_t dev; - print_l2arc_log_blocks(); + if (!dump_opt['q']) + print_l2arc_log_blocks(); bcopy((&l2dhdr)->dh_start_lbps, lbps, sizeof (lbps)); dev.l2ad_evict = l2dhdr.dh_evict; @@ -3562,8 +3565,10 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr) if (l2dhdr.dh_start_lbps[0].lbp_daddr == 0) { /* no log blocks to read */ - (void) printf("No log blocks to read\n"); - (void) printf("\n"); + if (!dump_opt['q']) { + (void) printf("No log blocks to read\n"); + (void) printf("\n"); + } return; } else { dev.l2ad_hand = lbps[0].lbp_daddr + @@ -3576,17 +3581,23 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr) if (!l2arc_log_blkptr_valid(&dev, &lbps[0])) break; - psize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop); - if (pread64(fd, &this_lb, psize, lbps[0].lbp_daddr) != psize) { - (void) printf("Error while reading next log block\n\n"); + /* L2BLK_GET_PSIZE returns aligned size for log blocks */ + asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop); + if (pread64(fd, &this_lb, asize, lbps[0].lbp_daddr) != asize) { + if (!dump_opt['q']) { + (void) printf("Error while reading next log " + "block\n\n"); + } break; } - fletcher_4_native_varsize(&this_lb, psize, &cksum); + fletcher_4_native_varsize(&this_lb, asize, &cksum); if (!ZIO_CHECKSUM_EQUAL(cksum, lbps[0].lbp_cksum)) { failed++; - (void) printf("Invalid cksum\n"); - dump_l2arc_log_blkptr(lbps[0]); + if (!dump_opt['q']) { + (void) printf("Invalid cksum\n"); + dump_l2arc_log_blkptr(lbps[0]); + } break; } @@ -3594,11 +3605,11 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr) case ZIO_COMPRESS_OFF: break; case ZIO_COMPRESS_LZ4: - abd = abd_alloc_for_io(psize, B_TRUE); - abd_copy_from_buf_off(abd, &this_lb, 0, psize); + abd = abd_alloc_for_io(asize, B_TRUE); + abd_copy_from_buf_off(abd, &this_lb, 0, asize); zio_decompress_data(L2BLK_GET_COMPRESS( (&lbps[0])->lbp_prop), abd, &this_lb, - psize, sizeof (this_lb)); + asize, sizeof (this_lb)); abd_free(abd); break; default: @@ -3608,39 +3619,52 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr) if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC)) byteswap_uint64_array(&this_lb, sizeof (this_lb)); if (this_lb.lb_magic != L2ARC_LOG_BLK_MAGIC) { - (void) printf("Invalid log block magic\n\n"); + if (!dump_opt['q']) + (void) printf("Invalid log block magic\n\n"); break; } - i++; - if (dump_opt['l'] > 1) { - (void) printf("lb[%4d]\tmagic: %llu\n", i, + rebuild->dh_lb_count++; + rebuild->dh_lb_asize += asize; + if (dump_opt['l'] > 1 && !dump_opt['q']) { + (void) printf("lb[%4llu]\tmagic: %llu\n", + (u_longlong_t)rebuild->dh_lb_count, (u_longlong_t)this_lb.lb_magic); dump_l2arc_log_blkptr(lbps[0]); } - if (dump_opt['l'] > 2) - dump_l2arc_log_entries(l2dhdr.dh_log_blk_ent, - this_lb.lb_entries, i); + if (dump_opt['l'] > 2 && !dump_opt['q']) + dump_l2arc_log_entries(l2dhdr.dh_log_entries, + this_lb.lb_entries, + rebuild->dh_lb_count); - if (l2arc_range_check_overlap(lbps[1].lbp_daddr, - lbps[0].lbp_daddr, dev.l2ad_evict) && !dev.l2ad_first) + if (l2arc_range_check_overlap(lbps[1].lbp_payload_start, + lbps[0].lbp_payload_start, dev.l2ad_evict) && + !dev.l2ad_first) break; lbps[0] = lbps[1]; lbps[1] = this_lb.lb_prev_lbp; } - (void) printf("log_blk_count:\t %d with valid cksum\n", i); - (void) printf("\t\t %d with invalid cksum\n\n", failed); + if (!dump_opt['q']) { + (void) printf("log_blk_count:\t %llu with valid cksum\n", + (u_longlong_t)rebuild->dh_lb_count); + (void) printf("\t\t %d with invalid cksum\n", failed); + (void) printf("log_blk_asize:\t %llu\n\n", + (u_longlong_t)rebuild->dh_lb_asize); + } } -static void +static int dump_l2arc_header(int fd) { - l2arc_dev_hdr_phys_t l2dhdr; + l2arc_dev_hdr_phys_t l2dhdr, rebuild; int error = B_FALSE; + bzero(&l2dhdr, sizeof (l2dhdr)); + bzero(&rebuild, sizeof (rebuild)); + if (pread64(fd, &l2dhdr, sizeof (l2dhdr), VDEV_LABEL_START_SIZE) != sizeof (l2dhdr)) { error = B_TRUE; @@ -3654,6 +3678,8 @@ dump_l2arc_header(int fd) if (error) { (void) printf("L2ARC device header not found\n\n"); + /* Do not return an error here for backward compatibility */ + return (0); } else if (!dump_opt['q']) { print_l2arc_header(); @@ -3672,16 +3698,39 @@ dump_l2arc_header(int fd) (u_longlong_t) l2dhdr.dh_start_lbps[1].lbp_daddr); (void) printf(" log_blk_ent: %llu\n", - (u_longlong_t)l2dhdr.dh_log_blk_ent); + (u_longlong_t)l2dhdr.dh_log_entries); (void) printf(" start: %llu\n", (u_longlong_t)l2dhdr.dh_start); (void) printf(" end: %llu\n", (u_longlong_t)l2dhdr.dh_end); - (void) printf(" evict: %llu\n\n", + (void) printf(" evict: %llu\n", (u_longlong_t)l2dhdr.dh_evict); - - dump_l2arc_log_blocks(fd, l2dhdr); + (void) printf(" lb_asize_refcount: %llu\n", + (u_longlong_t)l2dhdr.dh_lb_asize); + (void) printf(" lb_count_refcount: %llu\n\n", + (u_longlong_t)l2dhdr.dh_lb_count); } + + dump_l2arc_log_blocks(fd, l2dhdr, &rebuild); + /* + * The total aligned size of log blocks and the number of log blocks + * reported in the header of the device may be less than what zdb + * reports by dump_l2arc_log_blocks() which emulates l2arc_rebuild(). + * This happens because dump_l2arc_log_blocks() lacks the memory + * pressure valve that l2arc_rebuild() has. Thus, if we are on a system + * with low memory, l2arc_rebuild will exit prematurely and dh_lb_asize + * and dh_lb_count will be lower to begin with than what exists on the + * device. This is normal and zdb should not exit with an error. The + * opposite case should never happen though, the values reported in the + * header should never be higher than what dump_l2arc_log_blocks() and + * l2arc_rebuild() report. If this happens there is a leak in the + * accounting of log blocks. + */ + if (l2dhdr.dh_lb_asize > rebuild.dh_lb_asize || + l2dhdr.dh_lb_count > rebuild.dh_lb_count) + return (1); + + return (0); } static void @@ -4009,7 +4058,7 @@ dump_label(const char *dev) * Dump the L2ARC header, if existent. */ if (read_l2arc_header) - dump_l2arc_header(fd); + error |= dump_l2arc_header(fd); cookie = NULL; while ((node = avl_destroy_nodes(&config_tree, &cookie)) != NULL) |