diff options
author | Matthew Ahrens <[email protected]> | 2014-06-05 13:19:08 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2014-08-01 14:28:05 -0700 |
commit | 9b67f605601c77c814037613d8129562db642a29 (patch) | |
tree | 21a3270ed7eda24858e56a9584f64f6359f4b28f /cmd | |
parent | faf0f58c69607a15e2d1563567afb815842805de (diff) |
Illumos 4757, 4913
4757 ZFS embedded-data block pointers ("zero block compression")
4913 zfs release should not be subject to space checks
Reviewed by: Adam Leventhal <[email protected]>
Reviewed by: Max Grossman <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: Christopher Siden <[email protected]>
Reviewed by: Dan McDonald <[email protected]>
Approved by: Dan McDonald <[email protected]>
References:
https://www.illumos.org/issues/4757
https://www.illumos.org/issues/4913
https://github.com/illumos/illumos-gate/commit/5d7b4d4
Porting notes:
For compatibility with the fastpath code the zio_done() function
needed to be updated. Because embedded-data block pointers do
not require DVAs to be allocated the associated vdevs will not
be marked and therefore should not be unmarked.
Ported by: Tim Chase <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #2544
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/zdb/zdb.c | 65 | ||||
-rw-r--r-- | cmd/zfs/zfs_main.c | 17 | ||||
-rw-r--r-- | cmd/zstreamdump/zstreamdump.c | 53 | ||||
-rw-r--r-- | cmd/ztest/ztest.c | 48 |
4 files changed, 142 insertions, 41 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 66b91cd97..dcb77c24d 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -1047,6 +1047,16 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) return; } + if (BP_IS_EMBEDDED(bp)) { + (void) sprintf(blkbuf, + "EMBEDDED et=%u %llxL/%llxP B=%llu", + (int)BPE_GET_ETYPE(bp), + (u_longlong_t)BPE_GET_LSIZE(bp), + (u_longlong_t)BPE_GET_PSIZE(bp), + (u_longlong_t)bp->blk_birth); + return; + } + blkbuf[0] = '\0'; for (i = 0; i < ndvas; i++) @@ -1066,7 +1076,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) "%llxL/%llxP F=%llu B=%llu/%llu", (u_longlong_t)BP_GET_LSIZE(bp), (u_longlong_t)BP_GET_PSIZE(bp), - (u_longlong_t)bp->blk_fill, + (u_longlong_t)BP_GET_FILL(bp), (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); } @@ -1079,8 +1089,10 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb, char blkbuf[BP_SPRINTF_LEN]; int l; - ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); - ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); + if (!BP_IS_EMBEDDED(bp)) { + ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); + ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); + } (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); @@ -1134,10 +1146,10 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp, err = visit_indirect(spa, dnp, cbp, &czb); if (err) break; - fill += cbp->blk_fill; + fill += BP_GET_FILL(cbp); } if (!err) - ASSERT3U(fill, ==, bp->blk_fill); + ASSERT3U(fill, ==, BP_GET_FILL(bp)); (void) arc_buf_remove_ref(buf, &buf); } @@ -1861,14 +1873,14 @@ dump_dir(objset_t *os) if (dds.dds_type == DMU_OST_META) { dds.dds_creation_txg = TXG_INITIAL; - usedobjs = os->os_rootbp->blk_fill; + usedobjs = BP_GET_FILL(os->os_rootbp); refdbytes = os->os_spa->spa_dsl_pool-> dp_mos_dir->dd_phys->dd_used_bytes; } else { dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); } - ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill); + ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp)); zdb_nicenum(refdbytes, numbuf); @@ -2171,6 +2183,9 @@ typedef struct zdb_cb { zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; uint64_t zcb_dedup_asize; uint64_t zcb_dedup_blocks; + uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES]; + uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES] + [BPE_PAYLOAD_SIZE]; uint64_t zcb_start; uint64_t zcb_lastprint; uint64_t zcb_totalasize; @@ -2204,6 +2219,13 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++; } + if (BP_IS_EMBEDDED(bp)) { + zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++; + zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)] + [BPE_GET_PSIZE(bp)]++; + return; + } + if (dump_opt['L']) return; @@ -2301,7 +2323,8 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)); - if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) { + if (!BP_IS_EMBEDDED(bp) && + (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) { size_t size = BP_GET_PSIZE(bp); void *data = zio_data_buf_alloc(size); int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; @@ -2497,8 +2520,9 @@ dump_block_stats(spa_t *spa) zdb_blkstats_t *zb, *tzb; uint64_t norm_alloc, norm_space, total_alloc, total_found; int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; - int leaks = 0; + boolean_t leaks = B_FALSE; int e; + bp_embedded_type_t i; (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", @@ -2587,7 +2611,7 @@ dump_block_stats(spa_t *spa) (u_longlong_t)total_alloc, (dump_opt['L']) ? "unreachable" : "leaked", (longlong_t)(total_alloc - total_found)); - leaks = 1; + leaks = B_TRUE; } if (tzb->zb_count == 0) @@ -2617,6 +2641,23 @@ dump_block_stats(spa_t *spa) (void) printf("\tSPA allocated: %10llu used: %5.2f%%\n", (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space); + for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) { + if (zcb.zcb_embedded_blocks[i] == 0) + continue; + (void) printf("\n"); + (void) printf("\tadditional, non-pointer bps of type %u: " + "%10llu\n", + i, (u_longlong_t)zcb.zcb_embedded_blocks[i]); + + if (dump_opt['b'] >= 3) { + (void) printf("\t number of (compressed) bytes: " + "number of bps\n"); + dump_histogram(zcb.zcb_embedded_histogram[i], + sizeof (zcb.zcb_embedded_histogram[i]) / + sizeof (zcb.zcb_embedded_histogram[i][0]), 0); + } + } + if (dump_opt['b'] >= 2) { int l, t, level; (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" @@ -2718,14 +2759,14 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, avl_index_t where; zdb_ddt_entry_t *zdde, zdde_search; - if (BP_IS_HOLE(bp)) + if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) return (0); if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { (void) printf("traversing objset %llu, %llu objects, " "%lu blocks so far\n", (u_longlong_t)zb->zb_objset, - (u_longlong_t)bp->blk_fill, + (u_longlong_t)BP_GET_FILL(bp), avl_numnodes(t)); } diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 521ce3c2b..84073435e 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -258,9 +258,9 @@ get_usage(zfs_help_t idx) case HELP_ROLLBACK: return (gettext("\trollback [-rRf] <snapshot>\n")); case HELP_SEND: - return (gettext("\tsend [-DnPpRrv] [-[iI] snapshot] " + return (gettext("\tsend [-DnPpRrve] [-[iI] snapshot] " "<snapshot>\n" - "\tsend [-i snapshot|bookmark] " + "\tsend [-e] [-i snapshot|bookmark] " "<filesystem|volume|snapshot>\n")); case HELP_SET: return (gettext("\tset <property=value> " @@ -3338,6 +3338,8 @@ rollback_check_dependent(zfs_handle_t *zhp, void *data) zfs_close(zhp); return (0); } + + /* * Report any snapshots more recent than the one specified. Used when '-r' is * not specified. We reuse this same callback for the snapshot dependents - if @@ -3677,7 +3679,7 @@ zfs_do_send(int argc, char **argv) boolean_t extraverbose = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, ":i:I:RDpvnP")) != -1) { + while ((c = getopt(argc, argv, ":i:I:RDpvnPe")) != -1) { switch (c) { case 'i': if (fromname) @@ -3712,6 +3714,9 @@ zfs_do_send(int argc, char **argv) case 'n': flags.dryrun = B_TRUE; break; + case 'e': + flags.embed_data = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -3750,6 +3755,7 @@ zfs_do_send(int argc, char **argv) if (strchr(argv[0], '@') == NULL || (fromname && strchr(fromname, '#') != NULL)) { char frombuf[ZFS_MAXNAMELEN]; + enum lzc_send_flags lzc_flags = 0; if (flags.replicate || flags.doall || flags.props || flags.dedup || flags.dryrun || flags.verbose || @@ -3764,6 +3770,9 @@ zfs_do_send(int argc, char **argv) if (zhp == NULL) return (1); + if (flags.embed_data) + lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; + if (fromname != NULL && (fromname[0] == '#' || fromname[0] == '@')) { /* @@ -3777,7 +3786,7 @@ zfs_do_send(int argc, char **argv) (void) strlcat(frombuf, fromname, sizeof (frombuf)); fromname = frombuf; } - err = zfs_send_one(zhp, fromname, STDOUT_FILENO); + err = zfs_send_one(zhp, fromname, STDOUT_FILENO, lzc_flags); zfs_close(zhp); return (err != 0); } diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c index a4c451d53..dd8b31ccc 100644 --- a/cmd/zstreamdump/zstreamdump.c +++ b/cmd/zstreamdump/zstreamdump.c @@ -36,7 +36,6 @@ #include <sys/zfs_ioctl.h> #include <zfs_fletcher.h> -uint64_t drr_record_count[DRR_NUMTYPES]; uint64_t total_write_size = 0; uint64_t total_stream_len = 0; FILE *send_stream = 0; @@ -81,6 +80,8 @@ int main(int argc, char *argv[]) { char *buf = malloc(INITIAL_BUFLEN); + uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; + uint64_t total_records = 0; dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; struct drr_begin *drrb = &thedrr.drr_u.drr_begin; @@ -91,6 +92,7 @@ main(int argc, char *argv[]) struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref; struct drr_free *drrf = &thedrr.drr_u.drr_free; struct drr_spill *drrs = &thedrr.drr_u.drr_spill; + struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; char c; boolean_t verbose = B_FALSE; boolean_t first = B_TRUE; @@ -170,6 +172,7 @@ main(int argc, char *argv[]) } drr_record_count[drr->drr_type]++; + total_records++; switch (drr->drr_type) { case DRR_BEGIN: @@ -286,8 +289,8 @@ main(int argc, char *argv[]) drro->drr_bonuslen); } if (drro->drr_bonuslen > 0) { - (void) ssread(buf, P2ROUNDUP(drro->drr_bonuslen, - 8), &zc); + (void) ssread(buf, + P2ROUNDUP(drro->drr_bonuslen, 8), &zc); } break; @@ -397,6 +400,38 @@ main(int argc, char *argv[]) } (void) ssread(buf, drrs->drr_length, &zc); break; + case DRR_WRITE_EMBEDDED: + if (do_byteswap) { + drrwe->drr_object = + BSWAP_64(drrwe->drr_object); + drrwe->drr_offset = + BSWAP_64(drrwe->drr_offset); + drrwe->drr_length = + BSWAP_64(drrwe->drr_length); + drrwe->drr_toguid = + BSWAP_64(drrwe->drr_toguid); + drrwe->drr_lsize = + BSWAP_32(drrwe->drr_lsize); + drrwe->drr_psize = + BSWAP_32(drrwe->drr_psize); + } + if (verbose) { + (void) printf("WRITE_EMBEDDED object = %llu " + "offset = %llu length = %llu\n" + "toguid = %llx comp = %u etype = %u " + "lsize = %u psize = %u\n", + (u_longlong_t)drrwe->drr_object, + (u_longlong_t)drrwe->drr_offset, + (u_longlong_t)drrwe->drr_length, + (u_longlong_t)drrwe->drr_toguid, + drrwe->drr_compression, + drrwe->drr_etype, + drrwe->drr_lsize, + drrwe->drr_psize); + } + (void) ssread(buf, + P2ROUNDUP(drrwe->drr_psize, 8), &zc); + break; case DRR_NUMTYPES: /* should never be reached */ exit(1); @@ -418,18 +453,16 @@ main(int argc, char *argv[]) (u_longlong_t)drr_record_count[DRR_FREEOBJECTS]); (void) printf("\tTotal DRR_WRITE records = %lld\n", (u_longlong_t)drr_record_count[DRR_WRITE]); + (void) printf("\tTotal DRR_WRITE_BYREF records = %lld\n", + (u_longlong_t)drr_record_count[DRR_WRITE_BYREF]); + (void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld\n", + (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED]); (void) printf("\tTotal DRR_FREE records = %lld\n", (u_longlong_t)drr_record_count[DRR_FREE]); (void) printf("\tTotal DRR_SPILL records = %lld\n", (u_longlong_t)drr_record_count[DRR_SPILL]); (void) printf("\tTotal records = %lld\n", - (u_longlong_t)(drr_record_count[DRR_BEGIN] + - drr_record_count[DRR_OBJECT] + - drr_record_count[DRR_FREEOBJECTS] + - drr_record_count[DRR_WRITE] + - drr_record_count[DRR_FREE] + - drr_record_count[DRR_SPILL] + - drr_record_count[DRR_END])); + (u_longlong_t)total_records); (void) printf("\tTotal write size = %lld (0x%llx)\n", (u_longlong_t)total_write_size, (u_longlong_t)total_write_size); (void) printf("\tTotal stream length = %lld (0x%llx)\n", diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 0a0fa7f49..a087444c8 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -52,7 +52,7 @@ * At random times, the child self-immolates with a SIGKILL. * This is the software equivalent of pulling the power cord. * The parent then runs the test again, using the existing - * storage pool, as many times as desired. If backwards compatability + * storage pool, as many times as desired. If backwards compatibility * testing is enabled ztest will sometimes run the "older" version * of ztest after a SIGKILL. * @@ -1301,13 +1301,13 @@ static void ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object, uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg) { - ASSERT(bt->bt_magic == BT_MAGIC); - ASSERT(bt->bt_objset == dmu_objset_id(os)); - ASSERT(bt->bt_object == object); - ASSERT(bt->bt_offset == offset); - ASSERT(bt->bt_gen <= gen); - ASSERT(bt->bt_txg <= txg); - ASSERT(bt->bt_crtxg == crtxg); + ASSERT3U(bt->bt_magic, ==, BT_MAGIC); + ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os)); + ASSERT3U(bt->bt_object, ==, object); + ASSERT3U(bt->bt_offset, ==, offset); + ASSERT3U(bt->bt_gen, <=, gen); + ASSERT3U(bt->bt_txg, <=, txg); + ASSERT3U(bt->bt_crtxg, ==, crtxg); } static ztest_block_tag_t * @@ -3557,6 +3557,11 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) if (error) fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); error = dsl_dataset_promote(clone2name, NULL); + if (error == ENOSPC) { + dmu_objset_disown(os, FTAG); + ztest_record_enospc(FTAG); + goto out; + } if (error != EBUSY) fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, error); @@ -3739,11 +3744,19 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) return; } - dmu_object_set_checksum(os, bigobj, - (enum zio_checksum)ztest_random_dsl_prop(ZFS_PROP_CHECKSUM), tx); + enum zio_checksum cksum; + do { + cksum = (enum zio_checksum) + ztest_random_dsl_prop(ZFS_PROP_CHECKSUM); + } while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS); + dmu_object_set_checksum(os, bigobj, cksum, tx); - dmu_object_set_compress(os, bigobj, - (enum zio_compress)ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), tx); + enum zio_compress comp; + do { + comp = (enum zio_compress) + ztest_random_dsl_prop(ZFS_PROP_COMPRESSION); + } while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS); + dmu_object_set_compress(os, bigobj, comp, tx); /* * For each index from n to n + s, verify that the existing bufwad @@ -4867,8 +4880,13 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id) error = dsl_dataset_user_hold(holds, 0, NULL); fnvlist_free(holds); - if (error) - fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag); + if (error == ENOSPC) { + ztest_record_enospc("dsl_dataset_user_hold"); + goto out; + } else if (error) { + fatal(0, "dsl_dataset_user_hold(%s, %s) = %u", + fullname, tag, error); + } error = dsl_destroy_snapshot(fullname, B_FALSE); if (error != EBUSY) { @@ -5336,7 +5354,7 @@ ztest_run_zdb(char *pool) } (void) sprintf(zdb, - "%s -bcc%s%s -U %s %s", + "%s -bcc%s%s -d -U %s %s", bin, ztest_opts.zo_verbose >= 3 ? "s" : "", ztest_opts.zo_verbose >= 4 ? "v" : "", |