From 9babb37438b58e77bad04e820d5702e15b79e6a6 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 2 Jul 2009 15:44:48 -0700 Subject: Rebase master to b117 --- cmd/zdb/zdb.c | 138 ++++++--- cmd/zdb/zdb_il.c | 10 +- cmd/zfs/zfs_iter.c | 79 +++++- cmd/zfs/zfs_iter.h | 5 +- cmd/zfs/zfs_main.c | 653 +++++++++++++++---------------------------- cmd/zinject/zinject.c | 11 +- cmd/zpool/zpool_main.c | 444 +++++++++++++++-------------- cmd/ztest/ztest.c | 737 +++++++++++++++++++++++++++++++++++++++++++++---- 8 files changed, 1331 insertions(+), 746 deletions(-) (limited to 'cmd') diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 0ced25865..a310fc38b 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -102,7 +102,9 @@ usage(void) (void) fprintf(stderr, " -C cached pool configuration\n"); (void) fprintf(stderr, " -i intent logs\n"); (void) fprintf(stderr, " -b block statistics\n"); - (void) fprintf(stderr, " -c checksum all data blocks\n"); + (void) fprintf(stderr, " -m metaslabs\n"); + (void) fprintf(stderr, " -c checksum all metadata (twice for " + "all data) blocks\n"); (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); (void) fprintf(stderr, " -S : -- " "dump blkptr signatures\n"); @@ -125,6 +127,11 @@ usage(void) exit(1); } +/* + * Called for usage errors that are discovered after a call to spa_open(), + * dmu_bonus_hold(), or pool_match(). abort() is called for other errors. + */ + static void fatal(const char *fmt, ...) { @@ -136,7 +143,7 @@ fatal(const char *fmt, ...) va_end(ap); (void) fprintf(stderr, "\n"); - abort(); + exit(1); } static void @@ -209,7 +216,7 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) size_t nvsize = *(uint64_t *)data; char *packed = umem_alloc(nvsize, UMEM_NOFAIL); - VERIFY(0 == dmu_read(os, object, 0, nvsize, packed)); + VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH)); VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0); @@ -435,7 +442,7 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm) alloc = 0; for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) { VERIFY(0 == dmu_read(os, smo->smo_object, offset, - sizeof (entry), &entry)); + sizeof (entry), &entry, DMU_READ_PREFETCH)); if (SM_DEBUG_DECODE(entry)) { (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n", (u_longlong_t)(offset / sizeof (entry)), @@ -466,6 +473,21 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm) } } +static void +dump_metaslab_stats(metaslab_t *msp) +{ + char maxbuf[5]; + space_map_t *sm = &msp->ms_map; + avl_tree_t *t = sm->sm_pp_root; + int free_pct = sm->sm_space * 100 / sm->sm_size; + + nicenum(space_map_maxsize(sm), maxbuf); + + (void) printf("\t %20s %10lu %7s %6s %4s %4d%%\n", + "segments", avl_numnodes(t), "maxsize", maxbuf, + "freepct", free_pct); +} + static void dump_metaslab(metaslab_t *msp) { @@ -476,22 +498,28 @@ dump_metaslab(metaslab_t *msp) nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf); - if (dump_opt['d'] <= 5) { - (void) printf("\t%10llx %10llu %5s\n", - (u_longlong_t)msp->ms_map.sm_start, - (u_longlong_t)smo->smo_object, - freebuf); - return; - } - (void) printf( - "\tvdev %llu offset %08llx spacemap %4llu free %5s\n", + "\tvdev %5llu offset %12llx spacemap %6llu free %5s\n", (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start, (u_longlong_t)smo->smo_object, freebuf); - ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift)); + if (dump_opt['m'] > 1) { + mutex_enter(&msp->ms_lock); + VERIFY(space_map_load(&msp->ms_map, zfs_metaslab_ops, + SM_FREE, &msp->ms_smo, spa->spa_meta_objset) == 0); + dump_metaslab_stats(msp); + space_map_unload(&msp->ms_map); + mutex_exit(&msp->ms_lock); + } + + if (dump_opt['d'] > 5 || dump_opt['m'] > 2) { + ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift)); + + mutex_enter(&msp->ms_lock); + dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map); + mutex_exit(&msp->ms_lock); + } - dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map); } static void @@ -506,14 +534,12 @@ dump_metaslabs(spa_t *spa) for (c = 0; c < rvd->vdev_children; c++) { vd = rvd->vdev_child[c]; - (void) printf("\n vdev %llu\n\n", (u_longlong_t)vd->vdev_id); + (void) printf("\t%-10s %-19s %-15s %-10s\n", + "vdev", "offset", "spacemap", "free"); + (void) printf("\t%10s %19s %15s %10s\n", + "----------", "-------------------", + "---------------", "-------------"); - if (dump_opt['d'] <= 5) { - (void) printf("\t%10s %10s %5s\n", - "offset", "spacemap", "free"); - (void) printf("\t%10s %10s %5s\n", - "------", "--------", "----"); - } for (m = 0; m < vd->vdev_ms_count; m++) dump_metaslab(vd->vdev_ms[m]); (void) printf("\n"); @@ -917,6 +943,7 @@ dump_uidgid(objset_t *os, znode_phys_t *zp) /* first find the fuid object. It lives in the master node */ VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, &fuid_obj) == 0); + zfs_fuid_avl_tree_create(&idx_tree, &domain_tree); (void) zfs_fuid_table_load(os, fuid_obj, &idx_tree, &domain_tree); fuid_table_loaded = B_TRUE; @@ -1020,6 +1047,8 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = { dump_packed_nvlist, /* FUID nvlist size */ dump_zap, /* DSL dataset next clones */ dump_zap, /* DSL scrub queue */ + dump_zap, /* ZFS user/group used */ + dump_zap, /* ZFS user/group quota */ }; static void @@ -1083,6 +1112,14 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) } if (verbosity >= 4) { + (void) printf("\tdnode flags: %s%s\n", + (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ? + "USED_BYTES " : "", + (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ? + "USERUSED_ACCOUNTED " : ""); + (void) printf("\tdnode maxblkid: %llu\n", + (longlong_t)dn->dn_phys->dn_maxblkid); + object_viewer[doi.doi_bonus_type](os, object, bonus, bsize); object_viewer[doi.doi_type](os, object, NULL, 0); *print_header = 1; @@ -1137,7 +1174,7 @@ dump_dir(objset_t *os) uint64_t object, object_count; uint64_t refdbytes, usedobjs, scratch; char numbuf[8]; - char blkbuf[BP_SPRINTF_LEN]; + char blkbuf[BP_SPRINTF_LEN + 20]; char osname[MAXNAMELEN]; char *type = "UNKNOWN"; int verbosity = dump_opt['d']; @@ -1163,8 +1200,8 @@ dump_dir(objset_t *os) nicenum(refdbytes, numbuf); if (verbosity >= 4) { - (void) strcpy(blkbuf, ", rootbp "); - sprintf_blkptr(blkbuf + strlen(blkbuf), + (void) sprintf(blkbuf + strlen(blkbuf), ", rootbp "); + (void) sprintf_blkptr(blkbuf + strlen(blkbuf), BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp); } else { blkbuf[0] = '\0'; @@ -1199,7 +1236,12 @@ dump_dir(objset_t *os) } dump_object(os, 0, verbosity, &print_header); - object_count = 1; + object_count = 0; + if (os->os->os_userused_dnode && + os->os->os_userused_dnode->dn_type != 0) { + dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header); + dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header); + } object = 0; while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) { @@ -1211,8 +1253,10 @@ dump_dir(objset_t *os) (void) printf("\n"); - if (error != ESRCH) - fatal("dmu_object_next() = %d", error); + if (error != ESRCH) { + (void) fprintf(stderr, "dmu_object_next() = %d\n", error); + abort(); + } } static void @@ -1395,7 +1439,8 @@ static space_map_ops_t zdb_space_map_ops = { zdb_space_map_unload, NULL, /* alloc */ zdb_space_map_claim, - NULL /* free */ + NULL, /* free */ + NULL /* maxsize */ }; static void @@ -1505,13 +1550,25 @@ zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, { zdb_cb_t *zcb = arg; char blkbuf[BP_SPRINTF_LEN]; + dmu_object_type_t type; + boolean_t is_l0_metadata; if (bp == NULL) return (0); - zdb_count_block(spa, zcb, bp, BP_GET_TYPE(bp)); + type = BP_GET_TYPE(bp); + + zdb_count_block(spa, zcb, bp, type); - if (dump_opt['c'] || dump_opt['S']) { + /* + * if we do metadata-only checksumming there's no need to checksum + * indirect blocks here because it is done during traverse + */ + is_l0_metadata = (BP_GET_LEVEL(bp) == 0 && type < DMU_OT_NUMTYPES && + dmu_ot[type].ot_metadata); + + if (dump_opt['c'] > 1 || dump_opt['S'] || + (dump_opt['c'] && is_l0_metadata)) { int ioerr, size; void *data; @@ -1523,7 +1580,7 @@ zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, free(data); /* We expect io errors on intent log */ - if (ioerr && BP_GET_TYPE(bp) != DMU_OT_INTENT_LOG) { + if (ioerr && type != DMU_OT_INTENT_LOG) { zcb->zcb_haderrors = 1; zcb->zcb_errors[ioerr]++; @@ -1571,8 +1628,9 @@ dump_block_stats(spa_t *spa) int c, e; if (!dump_opt['S']) { - (void) printf("\nTraversing all blocks %s%s%s%s...\n", + (void) printf("\nTraversing all blocks %s%s%s%s%s...\n", (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", + (dump_opt['c'] == 1) ? "metadata " : "", dump_opt['c'] ? "checksums " : "", (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "", !dump_opt['L'] ? "nothing leaked " : ""); @@ -1772,14 +1830,17 @@ dump_zpool(spa_t *spa) if (dump_opt['u']) dump_uberblock(&spa->spa_uberblock); - if (dump_opt['d'] || dump_opt['i']) { + if (dump_opt['d'] || dump_opt['i'] || dump_opt['m']) { dump_dir(dp->dp_meta_objset); if (dump_opt['d'] >= 3) { dump_bplist(dp->dp_meta_objset, spa->spa_sync_bplist_obj, "Deferred frees"); dump_dtl(spa->spa_root_vdev, 0); - dump_metaslabs(spa); } + + if (dump_opt['d'] >= 3 || dump_opt['m']) + dump_metaslabs(spa); + (void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); } @@ -2255,13 +2316,14 @@ main(int argc, char **argv) dprintf_setup(&argc, argv); - while ((c = getopt(argc, argv, "udibcsvCLS:U:lRep:t:")) != -1) { + while ((c = getopt(argc, argv, "udibcmsvCLS:U:lRep:t:")) != -1) { switch (c) { case 'u': case 'd': case 'i': case 'b': case 'c': + case 'm': case 's': case 'C': case 'l': @@ -2397,7 +2459,7 @@ main(int argc, char **argv) } if (error == 0) - error = spa_import_faulted(argv[0], + error = spa_import_verbatim(argv[0], exported_conf, nvl); nvlist_free(nvl); diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c index 02d35a050..cc08ef514 100644 --- a/cmd/zdb/zdb_il.c +++ b/cmd/zdb/zdb_il.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Print intent log header and statistics. */ @@ -345,8 +343,10 @@ dump_intent_log(zilog_t *zilog) if (zh->zh_log.blk_birth == 0 || verbose < 2) return; - (void) printf("\n ZIL header: claim_txg %llu, seq %llu\n", - (u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_replay_seq); + (void) printf("\n ZIL header: claim_txg %llu, claim_seq %llu", + (u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_claim_seq); + (void) printf(" replay_seq %llu, flags 0x%llx\n", + (u_longlong_t)zh->zh_replay_seq, (u_longlong_t)zh->zh_flags); if (verbose >= 4) print_log_bp(&zh->zh_log, "\n\tfirst block: "); diff --git a/cmd/zfs/zfs_iter.c b/cmd/zfs/zfs_iter.c index a22370a02..ca5c2b232 100644 --- a/cmd/zfs/zfs_iter.c +++ b/cmd/zfs/zfs_iter.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -53,11 +53,14 @@ typedef struct zfs_node { } zfs_node_t; typedef struct callback_data { - uu_avl_t *cb_avl; - int cb_flags; - zfs_type_t cb_types; - zfs_sort_column_t *cb_sortcol; - zprop_list_t **cb_proplist; + uu_avl_t *cb_avl; + int cb_flags; + zfs_type_t cb_types; + zfs_sort_column_t *cb_sortcol; + zprop_list_t **cb_proplist; + int cb_depth_limit; + int cb_depth; + uint8_t cb_props_table[ZFS_NUM_PROPS]; } callback_data_t; uu_avl_pool_t *avl_pool; @@ -98,10 +101,17 @@ zfs_callback(zfs_handle_t *zhp, void *data) uu_avl_node_init(node, &node->zn_avlnode, avl_pool); if (uu_avl_find(cb->cb_avl, node, cb->cb_sortcol, &idx) == NULL) { - if (cb->cb_proplist && - zfs_expand_proplist(zhp, cb->cb_proplist) != 0) { - free(node); - return (-1); + if (cb->cb_proplist) { + if ((*cb->cb_proplist) && + !(*cb->cb_proplist)->pl_all) + zfs_prune_proplist(zhp, + cb->cb_props_table); + + if (zfs_expand_proplist(zhp, cb->cb_proplist) + != 0) { + free(node); + return (-1); + } } uu_avl_insert(cb->cb_avl, node, idx); dontclose = 1; @@ -113,11 +123,15 @@ zfs_callback(zfs_handle_t *zhp, void *data) /* * Recurse if necessary. */ - if (cb->cb_flags & ZFS_ITER_RECURSE) { + if (cb->cb_flags & ZFS_ITER_RECURSE && + ((cb->cb_flags & ZFS_ITER_DEPTH_LIMIT) == 0 || + cb->cb_depth < cb->cb_depth_limit)) { + cb->cb_depth++; if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) (void) zfs_iter_filesystems(zhp, zfs_callback, data); if ((zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) && include_snaps) (void) zfs_iter_snapshots(zhp, zfs_callback, data); + cb->cb_depth--; } if (!dontclose) @@ -325,10 +339,10 @@ zfs_sort(const void *larg, const void *rarg, void *data) int zfs_for_each(int argc, char **argv, int flags, zfs_type_t types, - zfs_sort_column_t *sortcol, zprop_list_t **proplist, + zfs_sort_column_t *sortcol, zprop_list_t **proplist, int limit, zfs_iter_f callback, void *data) { - callback_data_t cb; + callback_data_t cb = {0}; int ret = 0; zfs_node_t *node; uu_avl_walk_t *walk; @@ -346,6 +360,45 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types, cb.cb_flags = flags; cb.cb_proplist = proplist; cb.cb_types = types; + cb.cb_depth_limit = limit; + /* + * If cb_proplist is provided then in the zfs_handles created we + * retain only those properties listed in cb_proplist and sortcol. + * The rest are pruned. So, the caller should make sure that no other + * properties other than those listed in cb_proplist/sortcol are + * accessed. + * + * If cb_proplist is NULL then we retain all the properties. We + * always retain the zoned property, which some other properties + * need (userquota & friends), and the createtxg property, which + * we need to sort snapshots. + */ + if (cb.cb_proplist && *cb.cb_proplist) { + zprop_list_t *p = *cb.cb_proplist; + + while (p) { + if (p->pl_prop >= ZFS_PROP_TYPE && + p->pl_prop < ZFS_NUM_PROPS) { + cb.cb_props_table[p->pl_prop] = B_TRUE; + } + p = p->pl_next; + } + + while (sortcol) { + if (sortcol->sc_prop >= ZFS_PROP_TYPE && + sortcol->sc_prop < ZFS_NUM_PROPS) { + cb.cb_props_table[sortcol->sc_prop] = B_TRUE; + } + sortcol = sortcol->sc_next; + } + + cb.cb_props_table[ZFS_PROP_ZONED] = B_TRUE; + cb.cb_props_table[ZFS_PROP_CREATETXG] = B_TRUE; + } else { + (void) memset(cb.cb_props_table, B_TRUE, + sizeof (cb.cb_props_table)); + } + if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) { (void) fprintf(stderr, gettext("internal error: out of memory\n")); diff --git a/cmd/zfs/zfs_iter.h b/cmd/zfs/zfs_iter.h index 76a11085a..a0290775b 100644 --- a/cmd/zfs/zfs_iter.h +++ b/cmd/zfs/zfs_iter.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,9 +41,10 @@ typedef struct zfs_sort_column { #define ZFS_ITER_RECURSE (1 << 0) #define ZFS_ITER_ARGS_CAN_BE_PATHS (1 << 1) #define ZFS_ITER_PROP_LISTSNAPS (1 << 2) +#define ZFS_ITER_DEPTH_LIMIT (1 << 3) int zfs_for_each(int, char **, int options, zfs_type_t, - zfs_sort_column_t *, zprop_list_t **, zfs_iter_f, void *); + zfs_sort_column_t *, zprop_list_t **, int, zfs_iter_f, void *); int zfs_add_sort_column(zfs_sort_column_t **, const char *, boolean_t); void zfs_free_sort_columns(zfs_sort_column_t *); diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 1f7f47d08..0752a4772 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -39,12 +39,14 @@ #include #include #include +#include +#include #include #include #include #include #include -#include +#include #include #include @@ -56,6 +58,7 @@ libzfs_handle_t *g_zfs; static FILE *mnttab_file; static char history_str[HIS_MAX_RECORD_LEN]; +const char *pypath = "/usr/lib/zfs/pyzfs.py"; static int zfs_do_clone(int argc, char **argv); static int zfs_do_create(int argc, char **argv); @@ -75,8 +78,8 @@ static int zfs_do_unshare(int argc, char **argv); static int zfs_do_send(int argc, char **argv); static int zfs_do_receive(int argc, char **argv); static int zfs_do_promote(int argc, char **argv); -static int zfs_do_allow(int argc, char **argv); -static int zfs_do_unallow(int argc, char **argv); +static int zfs_do_userspace(int argc, char **argv); +static int zfs_do_python(int argc, char **argv); /* * Enable a reasonable set of defaults for libumem debugging on DEBUG builds. @@ -116,7 +119,9 @@ typedef enum { HELP_UNMOUNT, HELP_UNSHARE, HELP_ALLOW, - HELP_UNALLOW + HELP_UNALLOW, + HELP_USERSPACE, + HELP_GROUPSPACE } zfs_help_t; typedef struct zfs_command { @@ -150,6 +155,8 @@ static zfs_command_t command_table[] = { { "get", zfs_do_get, HELP_GET }, { "inherit", zfs_do_inherit, HELP_INHERIT }, { "upgrade", zfs_do_upgrade, HELP_UPGRADE }, + { "userspace", zfs_do_userspace, HELP_USERSPACE }, + { "groupspace", zfs_do_userspace, HELP_GROUPSPACE }, { NULL }, { "mount", zfs_do_mount, HELP_MOUNT }, { "unmount", zfs_do_unmount, HELP_UNMOUNT }, @@ -159,9 +166,9 @@ static zfs_command_t command_table[] = { { "send", zfs_do_send, HELP_SEND }, { "receive", zfs_do_receive, HELP_RECEIVE }, { NULL }, - { "allow", zfs_do_allow, HELP_ALLOW }, + { "allow", zfs_do_python, HELP_ALLOW }, { NULL }, - { "unallow", zfs_do_unallow, HELP_UNALLOW }, + { "unallow", zfs_do_python, HELP_UNALLOW }, }; #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) @@ -184,8 +191,8 @@ get_usage(zfs_help_t idx) return (gettext("\tdestroy [-rRf] " "\n")); case HELP_GET: - return (gettext("\tget [-rHp] [-o field[,...]] " - "[-s source[,...]]\n" + return (gettext("\tget [-rHp] [-d max] " + "[-o field[,...]] [-s source[,...]]\n" "\t <\"all\" | property[,...]> " "[filesystem|volume|snapshot] ...\n")); case HELP_INHERIT: @@ -195,8 +202,8 @@ get_usage(zfs_help_t idx) return (gettext("\tupgrade [-v]\n" "\tupgrade [-r] [-V version] <-a | filesystem ...>\n")); case HELP_LIST: - return (gettext("\tlist [-rH] [-o property[,...]] " - "[-t type[,...]] [-s property] ...\n" + return (gettext("\tlist [-rH][-d max] " + "[-o property[,...]] [-t type[,...]] [-s property] ...\n" "\t [-S property] ... " "[filesystem|volume|snapshot] ...\n")); case HELP_MOUNT: @@ -232,7 +239,8 @@ get_usage(zfs_help_t idx) return (gettext("\tunshare [-f] " "<-a | filesystem|mountpoint>\n")); case HELP_ALLOW: - return (gettext("\tallow [-ldug] " + return (gettext("\tallow \n" + "\tallow [-ldug] " "<\"everyone\"|user|group>[,...] [,...]\n" "\t \n" "\tallow [-ld] -e [,...] " @@ -250,6 +258,14 @@ get_usage(zfs_help_t idx) "\n" "\tunallow [-r] -s @setname [[,...]] " "\n")); + case HELP_USERSPACE: + return (gettext("\tuserspace [-hniHp] [-o field[,...]] " + "[-sS field] ... [-t type[,...]]\n" + "\t \n")); + case HELP_GROUPSPACE: + return (gettext("\tgroupspace [-hniHpU] [-o field[,...]] " + "[-sS field] ... [-t type[,...]]\n" + "\t \n")); } abort(); @@ -311,7 +327,6 @@ usage(boolean_t requested) { int i; boolean_t show_properties = B_FALSE; - boolean_t show_permissions = B_FALSE; FILE *fp = requested ? stdout : stderr; if (current_command == NULL) { @@ -342,13 +357,7 @@ usage(boolean_t requested) strcmp(current_command->name, "list") == 0)) show_properties = B_TRUE; - if (current_command != NULL && - (strcmp(current_command->name, "allow") == 0 || - strcmp(current_command->name, "unallow") == 0)) - show_permissions = B_TRUE; - if (show_properties) { - (void) fprintf(fp, gettext("\nThe following properties are supported:\n")); @@ -359,16 +368,26 @@ usage(boolean_t requested) (void) zprop_iter(usage_prop_cb, fp, B_FALSE, B_TRUE, ZFS_TYPE_DATASET); + (void) fprintf(fp, "\t%-15s ", "userused@..."); + (void) fprintf(fp, " NO NO \n"); + (void) fprintf(fp, "\t%-15s ", "groupused@..."); + (void) fprintf(fp, " NO NO \n"); + (void) fprintf(fp, "\t%-15s ", "userquota@..."); + (void) fprintf(fp, "YES NO | none\n"); + (void) fprintf(fp, "\t%-15s ", "groupquota@..."); + (void) fprintf(fp, "YES NO | none\n"); + (void) fprintf(fp, gettext("\nSizes are specified in bytes " "with standard units such as K, M, G, etc.\n")); (void) fprintf(fp, gettext("\nUser-defined properties can " "be specified by using a name containing a colon (:).\n")); - - } else if (show_permissions) { - (void) fprintf(fp, - gettext("\nThe following permissions are supported:\n")); - - zfs_deleg_permissions(); + (void) fprintf(fp, gettext("\nThe {user|group}{used|quota}@ " + "properties must be appended with\n" + "a user or group specifier of one of these forms:\n" + " POSIX name (eg: \"matt\")\n" + " POSIX id (eg: \"126829\")\n" + " SMB name@domain (eg: \"matt@sun\")\n" + " SMB SID (eg: \"S-1-234-567-89\")\n")); } else { (void) fprintf(fp, gettext("\nFor the property list, run: %s\n"), @@ -415,6 +434,27 @@ parseprop(nvlist_t *props) return (0); } +static int +parse_depth(char *opt, int *flags) +{ + char *tmp; + int depth; + + depth = (int)strtol(opt, &tmp, 0); + if (*tmp) { + (void) fprintf(stderr, + gettext("%s is not an integer\n"), optarg); + usage(B_FALSE); + } + if (depth < 0) { + (void) fprintf(stderr, + gettext("Depth can not be negative.\n")); + usage(B_FALSE); + } + *flags |= (ZFS_ITER_DEPTH_LIMIT|ZFS_ITER_RECURSE); + return (depth); +} + /* * zfs clone [-p] [-o prop=value] ... * @@ -1063,6 +1103,17 @@ get_callback(zfs_handle_t *zhp, void *data) zprop_print_one_property(zfs_get_name(zhp), cbp, zfs_prop_to_name(pl->pl_prop), buf, sourcetype, source); + } else if (zfs_prop_userquota(pl->pl_user_prop)) { + sourcetype = ZPROP_SRC_LOCAL; + + if (zfs_prop_get_userquota(zhp, pl->pl_user_prop, + buf, sizeof (buf), cbp->cb_literal) != 0) { + sourcetype = ZPROP_SRC_NONE; + (void) strlcpy(buf, "-", sizeof (buf)); + } + + zprop_print_one_property(zfs_get_name(zhp), cbp, + pl->pl_user_prop, buf, sourcetype, source); } else { if (nvlist_lookup_nvlist(userprop, pl->pl_user_prop, &propval) != 0) { @@ -1102,6 +1153,7 @@ zfs_do_get(int argc, char **argv) int i, c, flags = 0; char *value, *fields; int ret; + int limit = 0; zprop_list_t fake_name = { 0 }; /* @@ -1115,11 +1167,14 @@ zfs_do_get(int argc, char **argv) cb.cb_type = ZFS_TYPE_DATASET; /* check options */ - while ((c = getopt(argc, argv, ":o:s:rHp")) != -1) { + while ((c = getopt(argc, argv, ":d:o:s:rHp")) != -1) { switch (c) { case 'p': cb.cb_literal = B_TRUE; break; + case 'd': + limit = parse_depth(optarg, &flags); + break; case 'r': flags |= ZFS_ITER_RECURSE; break; @@ -1250,7 +1305,7 @@ zfs_do_get(int argc, char **argv) /* run for each object */ ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET, NULL, - &cb.cb_proplist, get_callback, &cb); + &cb.cb_proplist, limit, get_callback, &cb); if (cb.cb_proplist == &fake_name) zprop_free_list(fake_name.pl_next); @@ -1363,10 +1418,10 @@ zfs_do_inherit(int argc, char **argv) if (flags & ZFS_ITER_RECURSE) { ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET, - NULL, NULL, inherit_recurse_cb, propname); + NULL, NULL, 0, inherit_recurse_cb, propname); } else { ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET, - NULL, NULL, inherit_cb, propname); + NULL, NULL, 0, inherit_cb, propname); } return (ret); @@ -1435,21 +1490,30 @@ upgrade_set_callback(zfs_handle_t *zhp, void *data) { upgrade_cbdata_t *cb = data; int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); - - if (cb->cb_version >= ZPL_VERSION_FUID) { - int spa_version; - - if (zfs_spa_version(zhp, &spa_version) < 0) - return (-1); - - if (spa_version < SPA_VERSION_FUID) { - /* can't upgrade */ - (void) printf(gettext("%s: can not be upgraded; " - "the pool version needs to first be upgraded\nto " - "version %d\n\n"), - zfs_get_name(zhp), SPA_VERSION_FUID); - cb->cb_numfailed++; - return (0); + int i; + static struct { int zplver; int spaver; } table[] = { + {ZPL_VERSION_FUID, SPA_VERSION_FUID}, + {ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE}, + {0, 0} + }; + + + for (i = 0; table[i].zplver; i++) { + if (cb->cb_version >= table[i].zplver) { + int spa_version; + + if (zfs_spa_version(zhp, &spa_version) < 0) + return (-1); + + if (spa_version < table[i].spaver) { + /* can't upgrade */ + (void) printf(gettext("%s: can not be " + "upgraded; the pool version needs to first " + "be upgraded\nto version %d\n\n"), + zfs_get_name(zhp), table[i].spaver); + cb->cb_numfailed++; + return (0); + } } } @@ -1550,6 +1614,8 @@ zfs_do_upgrade(int argc, char **argv) (void) printf(gettext(" 2 Enhanced directory entries\n")); (void) printf(gettext(" 3 Case insensitive and File system " "unique identifer (FUID)\n")); + (void) printf(gettext(" 4 userquota, groupquota " + "properties\n")); (void) printf(gettext("\nFor more information on a particular " "version, including supported releases, see:\n\n")); (void) printf("http://www.opensolaris.org/os/community/zfs/" @@ -1561,7 +1627,7 @@ zfs_do_upgrade(int argc, char **argv) if (cb.cb_version == 0) cb.cb_version = ZPL_VERSION; ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_FILESYSTEM, - NULL, NULL, upgrade_set_callback, &cb); + NULL, NULL, 0, upgrade_set_callback, &cb); (void) printf(gettext("%llu filesystems upgraded\n"), cb.cb_numupgraded); if (cb.cb_numsamegraded) { @@ -1579,14 +1645,14 @@ zfs_do_upgrade(int argc, char **argv) flags |= ZFS_ITER_RECURSE; ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM, - NULL, NULL, upgrade_list_callback, &cb); + NULL, NULL, 0, upgrade_list_callback, &cb); found = cb.cb_foundone; cb.cb_foundone = B_FALSE; cb.cb_newer = B_TRUE; ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM, - NULL, NULL, upgrade_list_callback, &cb); + NULL, NULL, 0, upgrade_list_callback, &cb); if (!cb.cb_foundone && !found) { (void) printf(gettext("All filesystems are " @@ -1598,11 +1664,90 @@ zfs_do_upgrade(int argc, char **argv) } /* - * list [-rH] [-o property[,property]...] [-t type[,type]...] + * zfs userspace + */ +static int +userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space) +{ + zfs_userquota_prop_t *typep = arg; + zfs_userquota_prop_t p = *typep; + char *name = NULL; + char *ug, *propname; + char namebuf[32]; + char sizebuf[32]; + + if (domain == NULL || domain[0] == '\0') { + if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) { + struct group *g = getgrgid(rid); + if (g) + name = g->gr_name; + } else { + struct passwd *p = getpwuid(rid); + if (p) + name = p->pw_name; + } + } + + if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) + ug = "group"; + else + ug = "user"; + + if (p == ZFS_PROP_USERUSED || p == ZFS_PROP_GROUPUSED) + propname = "used"; + else + propname = "quota"; + + if (name == NULL) { + (void) snprintf(namebuf, sizeof (namebuf), + "%llu", (longlong_t)rid); + name = namebuf; + } + zfs_nicenum(space, sizebuf, sizeof (sizebuf)); + + (void) printf("%s %s %s%c%s %s\n", propname, ug, domain, + domain[0] ? '-' : ' ', name, sizebuf); + + return (0); +} + +static int +zfs_do_userspace(int argc, char **argv) +{ + zfs_handle_t *zhp; + zfs_userquota_prop_t p; + int error; + + /* + * Try the python version. If the execv fails, we'll continue + * and do a simplistic implementation. + */ + (void) execv(pypath, argv-1); + + (void) printf("internal error: %s not found\n" + "falling back on built-in implementation, " + "some features will not work\n", pypath); + + if ((zhp = zfs_open(g_zfs, argv[argc-1], ZFS_TYPE_DATASET)) == NULL) + return (1); + + (void) printf("PROP TYPE NAME VALUE\n"); + + for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) { + error = zfs_userspace(zhp, p, userspace_cb, &p); + if (error) + break; + } + return (error); +} + +/* + * list [-r][-d max] [-H] [-o property[,property]...] [-t type[,type]...] * [-s property [-s property]...] [-S property [-S property]...] * ... * * -r Recurse over all children + * -d Limit recursion by depth. * -H Scripted mode; elide headers and separate columns by tabs * -o Control which fields to display. * -t Control which object types to display. @@ -1685,7 +1830,6 @@ print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted) first = B_FALSE; } - right_justify = B_FALSE; if (pl->pl_prop != ZPROP_INVAL) { if (zfs_prop_get(zhp, pl->pl_prop, property, sizeof (property), NULL, NULL, 0, B_FALSE) != 0) @@ -1694,6 +1838,13 @@ print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted) propstr = property; right_justify = zfs_prop_align_right(pl->pl_prop); + } else if (zfs_prop_userquota(pl->pl_user_prop)) { + if (zfs_prop_get_userquota(zhp, pl->pl_user_prop, + property, sizeof (property), B_FALSE) != 0) + propstr = "-"; + else + propstr = property; + right_justify = B_TRUE; } else { if (nvlist_lookup_nvlist(userprops, pl->pl_user_prop, &propval) != 0) @@ -1701,6 +1852,7 @@ print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted) else verify(nvlist_lookup_string(propval, ZPROP_VALUE, &propstr) == 0); + right_justify = B_FALSE; } width = pl->pl_width; @@ -1752,16 +1904,20 @@ zfs_do_list(int argc, char **argv) char *fields = NULL; list_cbdata_t cb = { 0 }; char *value; + int limit = 0; int ret; zfs_sort_column_t *sortcol = NULL; int flags = ZFS_ITER_PROP_LISTSNAPS | ZFS_ITER_ARGS_CAN_BE_PATHS; /* check options */ - while ((c = getopt(argc, argv, ":o:rt:Hs:S:")) != -1) { + while ((c = getopt(argc, argv, ":d:o:rt:Hs:S:")) != -1) { switch (c) { case 'o': fields = optarg; break; + case 'd': + limit = parse_depth(optarg, &flags); + break; case 'r': flags |= ZFS_ITER_RECURSE; break; @@ -1852,7 +2008,7 @@ zfs_do_list(int argc, char **argv) cb.cb_first = B_TRUE; ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist, - list_callback, &cb); + limit, list_callback, &cb); zprop_free_list(cb.cb_proplist); zfs_free_sort_columns(sortcol); @@ -2235,7 +2391,7 @@ zfs_do_set(int argc, char **argv) } ret = zfs_for_each(argc - 2, argv + 2, NULL, - ZFS_TYPE_DATASET, NULL, NULL, set_callback, &cb); + ZFS_TYPE_DATASET, NULL, NULL, 0, set_callback, &cb); return (ret); } @@ -2495,390 +2651,6 @@ zfs_do_receive(int argc, char **argv) return (err != 0); } -typedef struct allow_cb { - int a_permcnt; - size_t a_treeoffset; -} allow_cb_t; - -static void -zfs_print_perms(avl_tree_t *tree) -{ - zfs_perm_node_t *permnode; - - permnode = avl_first(tree); - while (permnode != NULL) { - (void) printf("%s", permnode->z_pname); - permnode = AVL_NEXT(tree, permnode); - if (permnode) - (void) printf(","); - else - (void) printf("\n"); - } -} - -/* - * Iterate over user/groups/everyone/... and the call perm_iter - * function to print actual permission when tree has >0 nodes. - */ -static void -zfs_iter_perms(avl_tree_t *tree, const char *banner, allow_cb_t *cb) -{ - zfs_allow_node_t *item; - avl_tree_t *ptree; - - item = avl_first(tree); - while (item) { - ptree = (void *)((char *)item + cb->a_treeoffset); - if (avl_numnodes(ptree)) { - if (cb->a_permcnt++ == 0) - (void) printf("%s\n", banner); - (void) printf("\t%s", item->z_key); - /* - * Avoid an extra space being printed - * for "everyone" which is keyed with a null - * string - */ - if (item->z_key[0] != '\0') - (void) printf(" "); - zfs_print_perms(ptree); - } - item = AVL_NEXT(tree, item); - } -} - -#define LINES "-------------------------------------------------------------\n" -static int -zfs_print_allows(char *ds) -{ - zfs_allow_t *curperms, *perms; - zfs_handle_t *zhp; - allow_cb_t allowcb = { 0 }; - char banner[MAXPATHLEN]; - - if (ds[0] == '-') - usage(B_FALSE); - - if (strrchr(ds, '@')) { - (void) fprintf(stderr, gettext("Snapshots don't have 'allow'" - " permissions\n")); - return (1); - } - if ((zhp = zfs_open(g_zfs, ds, ZFS_TYPE_DATASET)) == NULL) - return (1); - - if (zfs_perm_get(zhp, &perms)) { - (void) fprintf(stderr, - gettext("Failed to retrieve 'allows' on %s\n"), ds); - zfs_close(zhp); - return (1); - } - - zfs_close(zhp); - - if (perms != NULL) - (void) printf("%s", LINES); - for (curperms = perms; curperms; curperms = curperms->z_next) { - - (void) snprintf(banner, sizeof (banner), - gettext("Permission sets on (%s)"), curperms->z_setpoint); - allowcb.a_treeoffset = - offsetof(zfs_allow_node_t, z_localdescend); - allowcb.a_permcnt = 0; - zfs_iter_perms(&curperms->z_sets, banner, &allowcb); - - (void) snprintf(banner, sizeof (banner), - gettext("Create time permissions on (%s)"), - curperms->z_setpoint); - allowcb.a_treeoffset = - offsetof(zfs_allow_node_t, z_localdescend); - allowcb.a_permcnt = 0; - zfs_iter_perms(&curperms->z_crperms, banner, &allowcb); - - - (void) snprintf(banner, sizeof (banner), - gettext("Local permissions on (%s)"), curperms->z_setpoint); - allowcb.a_treeoffset = offsetof(zfs_allow_node_t, z_local); - allowcb.a_permcnt = 0; - zfs_iter_perms(&curperms->z_user, banner, &allowcb); - zfs_iter_perms(&curperms->z_group, banner, &allowcb); - zfs_iter_perms(&curperms->z_everyone, banner, &allowcb); - - (void) snprintf(banner, sizeof (banner), - gettext("Descendent permissions on (%s)"), - curperms->z_setpoint); - allowcb.a_treeoffset = offsetof(zfs_allow_node_t, z_descend); - allowcb.a_permcnt = 0; - zfs_iter_perms(&curperms->z_user, banner, &allowcb); - zfs_iter_perms(&curperms->z_group, banner, &allowcb); - zfs_iter_perms(&curperms->z_everyone, banner, &allowcb); - - (void) snprintf(banner, sizeof (banner), - gettext("Local+Descendent permissions on (%s)"), - curperms->z_setpoint); - allowcb.a_treeoffset = - offsetof(zfs_allow_node_t, z_localdescend); - allowcb.a_permcnt = 0; - zfs_iter_perms(&curperms->z_user, banner, &allowcb); - zfs_iter_perms(&curperms->z_group, banner, &allowcb); - zfs_iter_perms(&curperms->z_everyone, banner, &allowcb); - - (void) printf("%s", LINES); - } - zfs_free_allows(perms); - return (0); -} - -#define ALLOWOPTIONS "ldcsu:g:e" -#define UNALLOWOPTIONS "ldcsu:g:er" - -/* - * Validate options, and build necessary datastructure to display/remove/add - * permissions. - * Returns 0 - If permissions should be added/removed - * Returns 1 - If permissions should be displayed. - * Returns -1 - on failure - */ -int -parse_allow_args(int *argc, char **argv[], boolean_t unallow, - char **ds, int *recurse, nvlist_t **zperms) -{ - int c; - char *options = unallow ? UNALLOWOPTIONS : ALLOWOPTIONS; - zfs_deleg_inherit_t deleg_type = ZFS_DELEG_NONE; - zfs_deleg_who_type_t who_type = ZFS_DELEG_WHO_UNKNOWN; - char *who = NULL; - char *perms = NULL; - zfs_handle_t *zhp; - - while ((c = getopt(*argc, *argv, options)) != -1) { - switch (c) { - case 'l': - if (who_type == ZFS_DELEG_CREATE || - who_type == ZFS_DELEG_NAMED_SET) - usage(B_FALSE); - - deleg_type |= ZFS_DELEG_PERM_LOCAL; - break; - case 'd': - if (who_type == ZFS_DELEG_CREATE || - who_type == ZFS_DELEG_NAMED_SET) - usage(B_FALSE); - - deleg_type |= ZFS_DELEG_PERM_DESCENDENT; - break; - case 'r': - *recurse = B_TRUE; - break; - case 'c': - if (who_type != ZFS_DELEG_WHO_UNKNOWN) - usage(B_FALSE); - if (deleg_type) - usage(B_FALSE); - who_type = ZFS_DELEG_CREATE; - break; - case 's': - if (who_type != ZFS_DELEG_WHO_UNKNOWN) - usage(B_FALSE); - if (deleg_type) - usage(B_FALSE); - who_type = ZFS_DELEG_NAMED_SET; - break; - case 'u': - if (who_type != ZFS_DELEG_WHO_UNKNOWN) - usage(B_FALSE); - who_type = ZFS_DELEG_USER; - who = optarg; - break; - case 'g': - if (who_type != ZFS_DELEG_WHO_UNKNOWN) - usage(B_FALSE); - who_type = ZFS_DELEG_GROUP; - who = optarg; - break; - case 'e': - if (who_type != ZFS_DELEG_WHO_UNKNOWN) - usage(B_FALSE); - who_type = ZFS_DELEG_EVERYONE; - break; - default: - usage(B_FALSE); - break; - } - } - - if (deleg_type == 0) - deleg_type = ZFS_DELEG_PERM_LOCALDESCENDENT; - - *argc -= optind; - *argv += optind; - - if (unallow == B_FALSE && *argc == 1) { - /* - * Only print permissions if no options were processed - */ - if (optind == 1) - return (1); - else - usage(B_FALSE); - } - - /* - * initialize variables for zfs_build_perms based on number - * of arguments. - * 3 arguments ==> zfs [un]allow joe perm,perm,perm or - * zfs [un]allow -s @set1 perm,perm - * 2 arguments ==> zfs [un]allow -c perm,perm or - * zfs [un]allow -u|-g perm or - * zfs [un]allow -e perm,perm - * zfs unallow joe - * zfs unallow -s @set1 - * 1 argument ==> zfs [un]allow -e or - * zfs [un]allow -c - */ - - switch (*argc) { - case 3: - perms = (*argv)[1]; - who = (*argv)[0]; - *ds = (*argv)[2]; - - /* - * advance argc/argv for do_allow cases. - * for do_allow case make sure who have a know who type - * and its not a permission set. - */ - if (unallow == B_TRUE) { - *argc -= 2; - *argv += 2; - } else if (who_type != ZFS_DELEG_WHO_UNKNOWN && - who_type != ZFS_DELEG_NAMED_SET) - usage(B_FALSE); - break; - - case 2: - if (unallow == B_TRUE && (who_type == ZFS_DELEG_EVERYONE || - who_type == ZFS_DELEG_CREATE || who != NULL)) { - perms = (*argv)[0]; - *ds = (*argv)[1]; - } else { - if (unallow == B_FALSE && - (who_type == ZFS_DELEG_WHO_UNKNOWN || - who_type == ZFS_DELEG_NAMED_SET)) - usage(B_FALSE); - else if (who_type == ZFS_DELEG_WHO_UNKNOWN || - who_type == ZFS_DELEG_NAMED_SET) - who = (*argv)[0]; - else if (who_type != ZFS_DELEG_NAMED_SET) - perms = (*argv)[0]; - *ds = (*argv)[1]; - } - if (unallow == B_TRUE) { - (*argc)--; - (*argv)++; - } - break; - - case 1: - if (unallow == B_FALSE) - usage(B_FALSE); - if (who == NULL && who_type != ZFS_DELEG_CREATE && - who_type != ZFS_DELEG_EVERYONE) - usage(B_FALSE); - *ds = (*argv)[0]; - break; - - default: - usage(B_FALSE); - } - - if (strrchr(*ds, '@')) { - (void) fprintf(stderr, - gettext("Can't set or remove 'allow' permissions " - "on snapshots.\n")); - return (-1); - } - - if ((zhp = zfs_open(g_zfs, *ds, ZFS_TYPE_DATASET)) == NULL) - return (-1); - - if ((zfs_build_perms(zhp, who, perms, - who_type, deleg_type, zperms)) != 0) { - zfs_close(zhp); - return (-1); - } - zfs_close(zhp); - return (0); -} - -static int -zfs_do_allow(int argc, char **argv) -{ - char *ds; - nvlist_t *zperms = NULL; - zfs_handle_t *zhp; - int unused; - int ret; - - if ((ret = parse_allow_args(&argc, &argv, B_FALSE, &ds, - &unused, &zperms)) == -1) - return (1); - - if (ret == 1) - return (zfs_print_allows(argv[0])); - - if ((zhp = zfs_open(g_zfs, ds, ZFS_TYPE_DATASET)) == NULL) - return (1); - - if (zfs_perm_set(zhp, zperms)) { - zfs_close(zhp); - nvlist_free(zperms); - return (1); - } - nvlist_free(zperms); - zfs_close(zhp); - - return (0); -} - -static int -unallow_callback(zfs_handle_t *zhp, void *data) -{ - nvlist_t *nvp = (nvlist_t *)data; - int error; - - error = zfs_perm_remove(zhp, nvp); - if (error) { - (void) fprintf(stderr, gettext("Failed to remove permissions " - "on %s\n"), zfs_get_name(zhp)); - } - return (error); -} - -static int -zfs_do_unallow(int argc, char **argv) -{ - int recurse = B_FALSE; - char *ds; - int error; - nvlist_t *zperms = NULL; - int flags = 0; - - if (parse_allow_args(&argc, &argv, B_TRUE, - &ds, &recurse, &zperms) == -1) - return (1); - - if (recurse) - flags |= ZFS_ITER_RECURSE; - error = zfs_for_each(argc, argv, flags, - ZFS_TYPE_FILESYSTEM|ZFS_TYPE_VOLUME, NULL, - NULL, unallow_callback, (void *)zperms); - - if (zperms) - nvlist_free(zperms); - - return (error); -} - typedef struct get_all_cbdata { zfs_handle_t **cb_handles; size_t cb_alloc; @@ -3944,6 +3716,15 @@ zfs_do_unshare(int argc, char **argv) return (unshare_unmount(OP_SHARE, argc, argv)); } +/* ARGSUSED */ +static int +zfs_do_python(int argc, char **argv) +{ + (void) execv(pypath, argv-1); + (void) printf("internal error: %s not found\n", pypath); + return (-1); +} + /* * Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is * 'legacy'. Otherwise, complain that use should be using 'zfs mount'. @@ -4197,6 +3978,7 @@ main(int argc, char **argv) /* * Run the appropriate command. */ + libzfs_mnttab_cache(g_zfs, B_TRUE); if (find_command_idx(cmdname, &i) == 0) { current_command = &command_table[i]; ret = command_table[i].func(argc - 1, argv + 1); @@ -4209,6 +3991,7 @@ main(int argc, char **argv) "command '%s'\n"), cmdname); usage(B_FALSE); } + libzfs_mnttab_cache(g_zfs, B_FALSE); } (void) fclose(mnttab_file); diff --git a/cmd/zinject/zinject.c b/cmd/zinject/zinject.c index 02dc18b9c..09c377ef8 100644 --- a/cmd/zinject/zinject.c +++ b/cmd/zinject/zinject.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * ZFS Fault Injector * @@ -224,7 +222,7 @@ usage(void) "\t\tClear the particular record (if given a numeric ID), or\n" "\t\tall records if 'all' is specificed.\n" "\n" - "\tzinject -d device [-e errno] [-L ] pool\n" + "\tzinject -d device [-e errno] [-L ] [-F] pool\n" "\t\tInject a fault into a particular device or the device's\n" "\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n" "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n" @@ -516,7 +514,7 @@ main(int argc, char **argv) return (0); } - while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:uL:")) != -1) { + while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) { switch (c) { case 'a': flags |= ZINJECT_FLUSH_ARC; @@ -553,6 +551,9 @@ main(int argc, char **argv) return (1); } break; + case 'F': + record.zi_failfast = B_TRUE; + break; case 'h': usage(); return (0); diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 42c6aabb6..c8a33df4d 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -376,12 +376,11 @@ add_prop_list(const char *propname, char *propval, nvlist_t **props, } normnm = zpool_prop_to_name(prop); } else { - if ((fprop = zfs_name_to_prop(propname)) == ZPROP_INVAL) { - (void) fprintf(stderr, gettext("property '%s' is " - "not a valid file system property\n"), propname); - return (2); + if ((fprop = zfs_name_to_prop(propname)) != ZPROP_INVAL) { + normnm = zfs_prop_to_name(fprop); + } else { + normnm = propname; } - normnm = zfs_prop_to_name(fprop); } if (nvlist_lookup_string(proplist, normnm, &strval) == 0 && @@ -979,14 +978,189 @@ max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max) return (max); } +typedef struct spare_cbdata { + uint64_t cb_guid; + zpool_handle_t *cb_zhp; +} spare_cbdata_t; + +static boolean_t +find_vdev(nvlist_t *nv, uint64_t search) +{ + uint64_t guid; + nvlist_t **child; + uint_t c, children; + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && + search == guid) + return (B_TRUE); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) == 0) { + for (c = 0; c < children; c++) + if (find_vdev(child[c], search)) + return (B_TRUE); + } + + return (B_FALSE); +} + +static int +find_spare(zpool_handle_t *zhp, void *data) +{ + spare_cbdata_t *cbp = data; + nvlist_t *config, *nvroot; + + config = zpool_get_config(zhp, NULL); + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + if (find_vdev(nvroot, cbp->cb_guid)) { + cbp->cb_zhp = zhp; + return (1); + } + + zpool_close(zhp); + return (0); +} + +/* + * Print out configuration state as requested by status_callback. + */ +void +print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, + int namewidth, int depth, boolean_t isspare) +{ + nvlist_t **child; + uint_t c, children; + vdev_stat_t *vs; + char rbuf[6], wbuf[6], cbuf[6], repaired[7]; + char *vname; + uint64_t notpresent; + spare_cbdata_t cb; + char *state; + + verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, + (uint64_t **)&vs, &c) == 0); + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + children = 0; + + state = zpool_state_to_name(vs->vs_state, vs->vs_aux); + if (isspare) { + /* + * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for + * online drives. + */ + if (vs->vs_aux == VDEV_AUX_SPARED) + state = "INUSE"; + else if (vs->vs_state == VDEV_STATE_HEALTHY) + state = "AVAIL"; + } + + (void) printf("\t%*s%-*s %-8s", depth, "", namewidth - depth, + name, state); + + if (!isspare) { + zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf)); + zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf)); + zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf)); + (void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf); + } + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, + ¬present) == 0) { + char *path; + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); + (void) printf(" was %s", path); + } else if (vs->vs_aux != 0) { + (void) printf(" "); + + switch (vs->vs_aux) { + case VDEV_AUX_OPEN_FAILED: + (void) printf(gettext("cannot open")); + break; + + case VDEV_AUX_BAD_GUID_SUM: + (void) printf(gettext("missing device")); + break; + + case VDEV_AUX_NO_REPLICAS: + (void) printf(gettext("insufficient replicas")); + break; + + case VDEV_AUX_VERSION_NEWER: + (void) printf(gettext("newer version")); + break; + + case VDEV_AUX_SPARED: + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, + &cb.cb_guid) == 0); + if (zpool_iter(g_zfs, find_spare, &cb) == 1) { + if (strcmp(zpool_get_name(cb.cb_zhp), + zpool_get_name(zhp)) == 0) + (void) printf(gettext("currently in " + "use")); + else + (void) printf(gettext("in use by " + "pool '%s'"), + zpool_get_name(cb.cb_zhp)); + zpool_close(cb.cb_zhp); + } else { + (void) printf(gettext("currently in use")); + } + break; + + case VDEV_AUX_ERR_EXCEEDED: + (void) printf(gettext("too many errors")); + break; + + case VDEV_AUX_IO_FAILURE: + (void) printf(gettext("experienced I/O failures")); + break; + + case VDEV_AUX_BAD_LOG: + (void) printf(gettext("bad intent log")); + break; + + default: + (void) printf(gettext("corrupted data")); + break; + } + } else if (vs->vs_scrub_repaired != 0 && children == 0) { + /* + * Report bytes resilvered/repaired on leaf devices. + */ + zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired)); + (void) printf(gettext(" %s %s"), repaired, + (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ? + "resilvered" : "repaired"); + } + + (void) printf("\n"); + + for (c = 0; c < children; c++) { + uint64_t is_log = B_FALSE; + + /* Don't print logs here */ + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &is_log); + if (is_log) + continue; + vname = zpool_vdev_name(g_zfs, zhp, child[c]); + print_status_config(zhp, vname, child[c], + namewidth, depth + 2, isspare); + free(vname); + } +} + /* * Print the configuration of an exported pool. Iterate over all vdevs in the * pool, printing out the name and status for each one. */ void -print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth, - boolean_t print_logs) +print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth) { nvlist_t **child; uint_t c, children; @@ -1043,12 +1217,11 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth, (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, &is_log); - if ((is_log && !print_logs) || (!is_log && print_logs)) + if (is_log) continue; vname = zpool_vdev_name(g_zfs, NULL, child[c]); - print_import_config(vname, child[c], - namewidth, depth + 2, B_FALSE); + print_import_config(vname, child[c], namewidth, depth + 2); free(vname); } @@ -1073,6 +1246,43 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth, } } +/* + * Print log vdevs. + * Logs are recorded as top level vdevs in the main pool child array + * but with "is_log" set to 1. We use either print_status_config() or + * print_import_config() to print the top level logs then any log + * children (eg mirrored slogs) are printed recursively - which + * works because only the top level vdev is marked "is_log" + */ +static void +print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth, boolean_t verbose) +{ + uint_t c, children; + nvlist_t **child; + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, + &children) != 0) + return; + + (void) printf(gettext("\tlogs\n")); + + for (c = 0; c < children; c++) { + uint64_t is_log = B_FALSE; + char *name; + + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &is_log); + if (!is_log) + continue; + name = zpool_vdev_name(g_zfs, zhp, child[c]); + if (verbose) + print_status_config(zhp, name, child[c], namewidth, + 2, B_FALSE); + else + print_import_config(name, child[c], namewidth, 2); + free(name); + } +} /* * Display the status for the given pool. */ @@ -1241,11 +1451,9 @@ show_import(nvlist_t *config) if (namewidth < 10) namewidth = 10; - print_import_config(name, nvroot, namewidth, 0, B_FALSE); - if (num_logs(nvroot) > 0) { - (void) printf(gettext("\tlogs\n")); - print_import_config(name, nvroot, namewidth, 0, B_TRUE); - } + print_import_config(name, nvroot, namewidth, 0); + if (num_logs(nvroot) > 0) + print_logs(NULL, nvroot, namewidth, B_FALSE); if (reason == ZPOOL_STATUS_BAD_GUID_SUM) { (void) printf(gettext("\n\tAdditional devices are known to " @@ -2427,10 +2635,14 @@ zpool_do_online(int argc, char **argv) zpool_handle_t *zhp; int ret = 0; vdev_state_t newstate; + int flags = 0; /* check options */ - while ((c = getopt(argc, argv, "t")) != -1) { + while ((c = getopt(argc, argv, "et")) != -1) { switch (c) { + case 'e': + flags |= ZFS_ONLINE_EXPAND; + break; case 't': case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), @@ -2458,7 +2670,7 @@ zpool_do_online(int argc, char **argv) return (1); for (i = 1; i < argc; i++) { - if (zpool_vdev_online(zhp, argv[i], 0, &newstate) == 0) { + if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) { if (newstate != VDEV_STATE_HEALTHY) { (void) printf(gettext("warning: device '%s' " "onlined, but remains in faulted state\n"), @@ -2715,181 +2927,6 @@ print_scrub_status(nvlist_t *nvroot) (u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60)); } -typedef struct spare_cbdata { - uint64_t cb_guid; - zpool_handle_t *cb_zhp; -} spare_cbdata_t; - -static boolean_t -find_vdev(nvlist_t *nv, uint64_t search) -{ - uint64_t guid; - nvlist_t **child; - uint_t c, children; - - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && - search == guid) - return (B_TRUE); - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0) { - for (c = 0; c < children; c++) - if (find_vdev(child[c], search)) - return (B_TRUE); - } - - return (B_FALSE); -} - -static int -find_spare(zpool_handle_t *zhp, void *data) -{ - spare_cbdata_t *cbp = data; - nvlist_t *config, *nvroot; - - config = zpool_get_config(zhp, NULL); - verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - - if (find_vdev(nvroot, cbp->cb_guid)) { - cbp->cb_zhp = zhp; - return (1); - } - - zpool_close(zhp); - return (0); -} - -/* - * Print out configuration state as requested by status_callback. - */ -void -print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, - int namewidth, int depth, boolean_t isspare, boolean_t print_logs) -{ - nvlist_t **child; - uint_t c, children; - vdev_stat_t *vs; - char rbuf[6], wbuf[6], cbuf[6], repaired[7]; - char *vname; - uint64_t notpresent; - spare_cbdata_t cb; - char *state; - - verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, - (uint64_t **)&vs, &c) == 0); - - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) - children = 0; - - state = zpool_state_to_name(vs->vs_state, vs->vs_aux); - if (isspare) { - /* - * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for - * online drives. - */ - if (vs->vs_aux == VDEV_AUX_SPARED) - state = "INUSE"; - else if (vs->vs_state == VDEV_STATE_HEALTHY) - state = "AVAIL"; - } - - (void) printf("\t%*s%-*s %-8s", depth, "", namewidth - depth, - name, state); - - if (!isspare) { - zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf)); - zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf)); - zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf)); - (void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf); - } - - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, - ¬present) == 0) { - char *path; - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); - (void) printf(" was %s", path); - } else if (vs->vs_aux != 0) { - (void) printf(" "); - - switch (vs->vs_aux) { - case VDEV_AUX_OPEN_FAILED: - (void) printf(gettext("cannot open")); - break; - - case VDEV_AUX_BAD_GUID_SUM: - (void) printf(gettext("missing device")); - break; - - case VDEV_AUX_NO_REPLICAS: - (void) printf(gettext("insufficient replicas")); - break; - - case VDEV_AUX_VERSION_NEWER: - (void) printf(gettext("newer version")); - break; - - case VDEV_AUX_SPARED: - verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, - &cb.cb_guid) == 0); - if (zpool_iter(g_zfs, find_spare, &cb) == 1) { - if (strcmp(zpool_get_name(cb.cb_zhp), - zpool_get_name(zhp)) == 0) - (void) printf(gettext("currently in " - "use")); - else - (void) printf(gettext("in use by " - "pool '%s'"), - zpool_get_name(cb.cb_zhp)); - zpool_close(cb.cb_zhp); - } else { - (void) printf(gettext("currently in use")); - } - break; - - case VDEV_AUX_ERR_EXCEEDED: - (void) printf(gettext("too many errors")); - break; - - case VDEV_AUX_IO_FAILURE: - (void) printf(gettext("experienced I/O failures")); - break; - - case VDEV_AUX_BAD_LOG: - (void) printf(gettext("bad intent log")); - break; - - default: - (void) printf(gettext("corrupted data")); - break; - } - } else if (vs->vs_scrub_repaired != 0 && children == 0) { - /* - * Report bytes resilvered/repaired on leaf devices. - */ - zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired)); - (void) printf(gettext(" %s %s"), repaired, - (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ? - "resilvered" : "repaired"); - } - - (void) printf("\n"); - - for (c = 0; c < children; c++) { - uint64_t is_log = B_FALSE; - - (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, - &is_log); - if ((is_log && !print_logs) || (!is_log && print_logs)) - continue; - vname = zpool_vdev_name(g_zfs, zhp, child[c]); - print_status_config(zhp, vname, child[c], - namewidth, depth + 2, isspare, B_FALSE); - free(vname); - } -} - static void print_error_log(zpool_handle_t *zhp) { @@ -2940,7 +2977,7 @@ print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares, for (i = 0; i < nspares; i++) { name = zpool_vdev_name(g_zfs, zhp, spares[i]); print_status_config(zhp, name, spares[i], - namewidth, 2, B_TRUE, B_FALSE); + namewidth, 2, B_TRUE); free(name); } } @@ -2960,7 +2997,7 @@ print_l2cache(zpool_handle_t *zhp, nvlist_t **l2cache, uint_t nl2cache, for (i = 0; i < nl2cache; i++) { name = zpool_vdev_name(g_zfs, zhp, l2cache[i]); print_status_config(zhp, name, l2cache[i], - namewidth, 2, B_FALSE, B_FALSE); + namewidth, 2, B_FALSE); free(name); } } @@ -3190,11 +3227,10 @@ status_callback(zpool_handle_t *zhp, void *data) (void) printf(gettext("\t%-*s %-8s %5s %5s %5s\n"), namewidth, "NAME", "STATE", "READ", "WRITE", "CKSUM"); print_status_config(zhp, zpool_get_name(zhp), nvroot, - namewidth, 0, B_FALSE, B_FALSE); - if (num_logs(nvroot) > 0) - print_status_config(zhp, "logs", nvroot, namewidth, 0, - B_FALSE, B_TRUE); + namewidth, 0, B_FALSE); + if (num_logs(nvroot) > 0) + print_logs(zhp, nvroot, namewidth, B_TRUE); if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) print_l2cache(zhp, l2cache, nl2cache, namewidth); @@ -3418,7 +3454,7 @@ zpool_do_upgrade(int argc, char **argv) /* check options */ - while ((c = getopt(argc, argv, "avV:")) != -1) { + while ((c = getopt(argc, argv, ":avV:")) != -1) { switch (c) { case 'a': cb.cb_all = B_TRUE; @@ -3435,6 +3471,11 @@ zpool_do_upgrade(int argc, char **argv) usage(B_FALSE); } break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(B_FALSE); + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -3495,8 +3536,9 @@ zpool_do_upgrade(int argc, char **argv) (void) printf(gettext(" 11 Improved scrub performance\n")); (void) printf(gettext(" 12 Snapshot properties\n")); (void) printf(gettext(" 13 snapused property\n")); - (void) printf(gettext(" 14 passthrough-x aclinherit " - "support\n")); + (void) printf(gettext(" 14 passthrough-x aclinherit\n")); + (void) printf(gettext(" 15 user/group space accounting\n")); + (void) printf(gettext(" 16 stmf property support\n")); (void) printf(gettext("For more information on a particular " "version, including supported releases, see:\n\n")); (void) printf("http://www.opensolaris.org/os/community/zfs/" diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 4503a3d02..746db0c07 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -76,6 +76,7 @@ #include #include #include +#include #include #include #include @@ -92,6 +93,7 @@ #include #include #include +#include #include #include #include @@ -162,6 +164,7 @@ typedef void ztest_func_t(ztest_args_t *); * Note: these aren't static because we want dladdr() to work. */ ztest_func_t ztest_dmu_read_write; +ztest_func_t ztest_dmu_read_write_zcopy; ztest_func_t ztest_dmu_write_parallel; ztest_func_t ztest_dmu_object_alloc_free; ztest_func_t ztest_zap; @@ -170,6 +173,7 @@ ztest_func_t ztest_traverse; ztest_func_t ztest_dsl_prop_get_set; ztest_func_t ztest_dmu_objset_create_destroy; ztest_func_t ztest_dmu_snapshot_create_destroy; +ztest_func_t ztest_dsl_dataset_promote_busy; ztest_func_t ztest_spa_create_destroy; ztest_func_t ztest_fault_inject; ztest_func_t ztest_spa_rename; @@ -196,6 +200,7 @@ uint64_t zopt_rarely = 60; /* every 60 seconds */ ztest_info_t ztest_info[] = { { ztest_dmu_read_write, 1, &zopt_always }, + { ztest_dmu_read_write_zcopy, 1, &zopt_always }, { ztest_dmu_write_parallel, 30, &zopt_always }, { ztest_dmu_object_alloc_free, 1, &zopt_always }, { ztest_zap, 30, &zopt_always }, @@ -208,6 +213,7 @@ ztest_info_t ztest_info[] = { { ztest_spa_rename, 1, &zopt_rarely }, { ztest_vdev_attach_detach, 1, &zopt_rarely }, { ztest_vdev_LUN_growth, 1, &zopt_rarely }, + { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, { ztest_vdev_add_remove, 1, &zopt_vdevtime }, { ztest_vdev_aux_add_remove, 1, &zopt_vdevtime }, { ztest_scrub, 1, &zopt_vdevtime }, @@ -242,9 +248,11 @@ static ztest_shared_t *ztest_shared; static int ztest_random_fd; static int ztest_dump_core = 1; +static uint64_t metaslab_sz; static boolean_t ztest_exiting; extern uint64_t metaslab_gang_bang; +extern uint64_t metaslab_df_alloc_threshold; #define ZTEST_DIROBJ 1 #define ZTEST_MICROZAP_OBJ 2 @@ -946,7 +954,7 @@ ztest_vdev_aux_add_remove(ztest_args_t *za) * of devices that have pending state changes. */ if (ztest_random(2) == 0) - (void) vdev_online(spa, guid, B_FALSE, NULL); + (void) vdev_online(spa, guid, 0, NULL); error = spa_vdev_remove(spa, guid, B_FALSE); if (error != 0 && error != EBUSY) @@ -1024,7 +1032,7 @@ ztest_vdev_attach_detach(ztest_args_t *za) } oldguid = oldvd->vdev_guid; - oldsize = vdev_get_rsize(oldvd); + oldsize = vdev_get_min_asize(oldvd); oldvd_is_log = oldvd->vdev_top->vdev_islog; (void) strcpy(oldpath, oldvd->vdev_path); pvd = oldvd->vdev_parent; @@ -1060,7 +1068,7 @@ ztest_vdev_attach_detach(ztest_args_t *za) } if (newvd) { - newsize = vdev_get_rsize(newvd); + newsize = vdev_get_min_asize(newvd); } else { /* * Make newsize a little bigger or smaller than oldsize. @@ -1135,6 +1143,95 @@ ztest_vdev_attach_detach(ztest_args_t *za) (void) mutex_unlock(&ztest_shared->zs_vdev_lock); } +/* + * Callback function which expands the physical size of the vdev. + */ +vdev_t * +grow_vdev(vdev_t *vd, void *arg) +{ + spa_t *spa = vd->vdev_spa; + size_t *newsize = arg; + size_t fsize; + int fd; + + ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); + ASSERT(vd->vdev_ops->vdev_op_leaf); + + if ((fd = open(vd->vdev_path, O_RDWR)) == -1) + return (vd); + + fsize = lseek(fd, 0, SEEK_END); + (void) ftruncate(fd, *newsize); + + if (zopt_verbose >= 6) { + (void) printf("%s grew from %lu to %lu bytes\n", + vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize); + } + (void) close(fd); + return (NULL); +} + +/* + * Callback function which expands a given vdev by calling vdev_online(). + */ +/* ARGSUSED */ +vdev_t * +online_vdev(vdev_t *vd, void *arg) +{ + spa_t *spa = vd->vdev_spa; + vdev_t *tvd = vd->vdev_top; + vdev_t *pvd = vd->vdev_parent; + uint64_t guid = vd->vdev_guid; + + ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); + ASSERT(vd->vdev_ops->vdev_op_leaf); + + /* Calling vdev_online will initialize the new metaslabs */ + spa_config_exit(spa, SCL_STATE, spa); + (void) vdev_online(spa, guid, ZFS_ONLINE_EXPAND, NULL); + spa_config_enter(spa, SCL_STATE, spa, RW_READER); + + /* + * Since we dropped the lock we need to ensure that we're + * still talking to the original vdev. It's possible this + * vdev may have been detached/replaced while we were + * trying to online it. + */ + if (vd != vdev_lookup_by_guid(tvd, guid) || vd->vdev_parent != pvd) { + if (zopt_verbose >= 6) { + (void) printf("vdev %p has disappeared, was " + "guid %llu\n", (void *)vd, (u_longlong_t)guid); + } + return (vd); + } + return (NULL); +} + +/* + * Traverse the vdev tree calling the supplied function. + * We continue to walk the tree until we either have walked all + * children or we receive a non-NULL return from the callback. + * If a NULL callback is passed, then we just return back the first + * leaf vdev we encounter. + */ +vdev_t * +vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg) +{ + if (vd->vdev_ops->vdev_op_leaf) { + if (func == NULL) + return (vd); + else + return (func(vd, arg)); + } + + for (uint_t c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL) + return (cvd); + } + return (NULL); +} + /* * Verify that dynamic LUN growth works as expected. */ @@ -1142,43 +1239,107 @@ void ztest_vdev_LUN_growth(ztest_args_t *za) { spa_t *spa = za->za_spa; - char dev_name[MAXPATHLEN]; - uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz; - uint64_t vdev; - size_t fsize; - int fd; + vdev_t *vd, *tvd = NULL; + size_t psize, newsize; + uint64_t spa_newsize, spa_cursize, ms_count; (void) mutex_lock(&ztest_shared->zs_vdev_lock); + mutex_enter(&spa_namespace_lock); + spa_config_enter(spa, SCL_STATE, spa, RW_READER); + + while (tvd == NULL || tvd->vdev_islog) { + uint64_t vdev; + + vdev = ztest_random(spa->spa_root_vdev->vdev_children); + tvd = spa->spa_root_vdev->vdev_child[vdev]; + } /* - * Pick a random leaf vdev. + * Determine the size of the first leaf vdev associated with + * our top-level device. */ - spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves); - spa_config_exit(spa, SCL_VDEV, FTAG); + vd = vdev_walk_tree(tvd, NULL, NULL); + ASSERT3P(vd, !=, NULL); + ASSERT(vd->vdev_ops->vdev_op_leaf); - (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev); + psize = vd->vdev_psize; - if ((fd = open(dev_name, O_RDWR)) != -1) { - /* - * Determine the size. - */ - fsize = lseek(fd, 0, SEEK_END); + /* + * We only try to expand the vdev if it's less than 4x its + * original size and it has a valid psize. + */ + if (psize == 0 || psize >= 4 * zopt_vdev_size) { + spa_config_exit(spa, SCL_STATE, spa); + mutex_exit(&spa_namespace_lock); + (void) mutex_unlock(&ztest_shared->zs_vdev_lock); + return; + } + ASSERT(psize > 0); + newsize = psize + psize / 8; + ASSERT3U(newsize, >, psize); - /* - * If it's less than 2x the original size, grow by around 3%. - */ - if (fsize < 2 * zopt_vdev_size) { - size_t newsize = fsize + ztest_random(fsize / 32); - (void) ftruncate(fd, newsize); - if (zopt_verbose >= 6) { - (void) printf("%s grew from %lu to %lu bytes\n", - dev_name, (ulong_t)fsize, (ulong_t)newsize); - } + if (zopt_verbose >= 6) { + (void) printf("Expanding vdev %s from %lu to %lu\n", + vd->vdev_path, (ulong_t)psize, (ulong_t)newsize); + } + + spa_cursize = spa_get_space(spa); + ms_count = tvd->vdev_ms_count; + + /* + * Growing the vdev is a two step process: + * 1). expand the physical size (i.e. relabel) + * 2). online the vdev to create the new metaslabs + */ + if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL || + vdev_walk_tree(tvd, online_vdev, NULL) != NULL || + tvd->vdev_state != VDEV_STATE_HEALTHY) { + if (zopt_verbose >= 5) { + (void) printf("Could not expand LUN because " + "some vdevs were not healthy\n"); } - (void) close(fd); + (void) spa_config_exit(spa, SCL_STATE, spa); + mutex_exit(&spa_namespace_lock); + (void) mutex_unlock(&ztest_shared->zs_vdev_lock); + return; } + (void) spa_config_exit(spa, SCL_STATE, spa); + mutex_exit(&spa_namespace_lock); + + /* + * Expanding the LUN will update the config asynchronously, + * thus we must wait for the async thread to complete any + * pending tasks before proceeding. + */ + mutex_enter(&spa->spa_async_lock); + while (spa->spa_async_thread != NULL || spa->spa_async_tasks) + cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); + mutex_exit(&spa->spa_async_lock); + + spa_config_enter(spa, SCL_STATE, spa, RW_READER); + spa_newsize = spa_get_space(spa); + + /* + * Make sure we were able to grow the pool. + */ + if (ms_count >= tvd->vdev_ms_count || + spa_cursize >= spa_newsize) { + (void) printf("Top-level vdev metaslab count: " + "before %llu, after %llu\n", + (u_longlong_t)ms_count, + (u_longlong_t)tvd->vdev_ms_count); + fatal(0, "LUN expansion failed: before %llu, " + "after %llu\n", spa_cursize, spa_newsize); + } else if (zopt_verbose >= 5) { + char oldnumbuf[6], newnumbuf[6]; + + nicenum(spa_cursize, oldnumbuf); + nicenum(spa_newsize, newnumbuf); + (void) printf("%s grew from %s to %s\n", + spa->spa_name, oldnumbuf, newnumbuf); + } + spa_config_exit(spa, SCL_STATE, spa); (void) mutex_unlock(&ztest_shared->zs_vdev_lock); } @@ -1425,7 +1586,8 @@ ztest_dmu_snapshot_create_destroy(ztest_args_t *za) error = dmu_objset_destroy(snapname); if (error != 0 && error != ENOENT) fatal(0, "dmu_objset_destroy() = %d", error); - error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1, FALSE); + error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1, + NULL, FALSE); if (error == ENOSPC) ztest_record_enospc("dmu_take_snapshot"); else if (error != 0 && error != EEXIST) @@ -1433,6 +1595,148 @@ ztest_dmu_snapshot_create_destroy(ztest_args_t *za) (void) rw_unlock(&ztest_shared->zs_name_lock); } +/* + * Cleanup non-standard snapshots and clones. + */ +void +ztest_dsl_dataset_cleanup(char *osname, uint64_t curval) +{ + char snap1name[100]; + char clone1name[100]; + char snap2name[100]; + char clone2name[100]; + char snap3name[100]; + int error; + + (void) snprintf(snap1name, 100, "%s@s1_%llu", osname, curval); + (void) snprintf(clone1name, 100, "%s/c1_%llu", osname, curval); + (void) snprintf(snap2name, 100, "%s@s2_%llu", clone1name, curval); + (void) snprintf(clone2name, 100, "%s/c2_%llu", osname, curval); + (void) snprintf(snap3name, 100, "%s@s3_%llu", clone1name, curval); + + error = dmu_objset_destroy(clone2name); + if (error && error != ENOENT) + fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error); + error = dmu_objset_destroy(snap3name); + if (error && error != ENOENT) + fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error); + error = dmu_objset_destroy(snap2name); + if (error && error != ENOENT) + fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error); + error = dmu_objset_destroy(clone1name); + if (error && error != ENOENT) + fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error); + error = dmu_objset_destroy(snap1name); + if (error && error != ENOENT) + fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error); +} + +/* + * Verify dsl_dataset_promote handles EBUSY + */ +void +ztest_dsl_dataset_promote_busy(ztest_args_t *za) +{ + int error; + objset_t *os = za->za_os; + objset_t *clone; + dsl_dataset_t *ds; + char snap1name[100]; + char clone1name[100]; + char snap2name[100]; + char clone2name[100]; + char snap3name[100]; + char osname[MAXNAMELEN]; + uint64_t curval = za->za_instance; + + (void) rw_rdlock(&ztest_shared->zs_name_lock); + + dmu_objset_name(os, osname); + ztest_dsl_dataset_cleanup(osname, curval); + + (void) snprintf(snap1name, 100, "%s@s1_%llu", osname, curval); + (void) snprintf(clone1name, 100, "%s/c1_%llu", osname, curval); + (void) snprintf(snap2name, 100, "%s@s2_%llu", clone1name, curval); + (void) snprintf(clone2name, 100, "%s/c2_%llu", osname, curval); + (void) snprintf(snap3name, 100, "%s@s3_%llu", clone1name, curval); + + error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1, + NULL, FALSE); + if (error && error != EEXIST) { + if (error == ENOSPC) { + ztest_record_enospc("dmu_take_snapshot"); + goto out; + } + fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); + } + + error = dmu_objset_open(snap1name, DMU_OST_OTHER, + DS_MODE_USER | DS_MODE_READONLY, &clone); + if (error) + fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error); + + error = dmu_objset_create(clone1name, DMU_OST_OTHER, clone, 0, + NULL, NULL); + dmu_objset_close(clone); + if (error) { + if (error == ENOSPC) { + ztest_record_enospc("dmu_objset_create"); + goto out; + } + fatal(0, "dmu_objset_create(%s) = %d", clone1name, error); + } + + error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1, + NULL, FALSE); + if (error && error != EEXIST) { + if (error == ENOSPC) { + ztest_record_enospc("dmu_take_snapshot"); + goto out; + } + fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error); + } + + error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1, + NULL, FALSE); + if (error && error != EEXIST) { + if (error == ENOSPC) { + ztest_record_enospc("dmu_take_snapshot"); + goto out; + } + fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); + } + + error = dmu_objset_open(snap3name, DMU_OST_OTHER, + DS_MODE_USER | DS_MODE_READONLY, &clone); + if (error) + fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); + + error = dmu_objset_create(clone2name, DMU_OST_OTHER, clone, 0, + NULL, NULL); + dmu_objset_close(clone); + if (error) { + if (error == ENOSPC) { + ztest_record_enospc("dmu_objset_create"); + goto out; + } + fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); + } + + error = dsl_dataset_own(snap1name, DS_MODE_READONLY, FTAG, &ds); + if (error) + fatal(0, "dsl_dataset_own(%s) = %d", snap1name, error); + error = dsl_dataset_promote(clone2name); + if (error != EBUSY) + fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, + error); + dsl_dataset_disown(ds, FTAG); + +out: + ztest_dsl_dataset_cleanup(osname, curval); + + (void) rw_unlock(&ztest_shared->zs_name_lock); +} + /* * Verify that dmu_object_{alloc,free} work as expected. */ @@ -1456,7 +1760,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za) * Create a batch object if necessary, and record it in the directory. */ VERIFY3U(0, ==, dmu_read(os, ZTEST_DIROBJ, za->za_diroff, - sizeof (uint64_t), &batchobj)); + sizeof (uint64_t), &batchobj, DMU_READ_PREFETCH)); if (batchobj == 0) { tx = dmu_tx_create(os); dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, @@ -1481,7 +1785,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za) */ for (b = 0; b < batchsize; b++) { VERIFY3U(0, ==, dmu_read(os, batchobj, b * sizeof (uint64_t), - sizeof (uint64_t), &object)); + sizeof (uint64_t), &object, DMU_READ_PREFETCH)); if (object == 0) continue; /* @@ -1516,7 +1820,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za) * We expect the word at endoff to be our object number. */ VERIFY(0 == dmu_read(os, object, endoff, - sizeof (uint64_t), &temp)); + sizeof (uint64_t), &temp, DMU_READ_PREFETCH)); if (temp != object) { fatal(0, "bad data in %s, got %llu, expected %llu", @@ -1701,7 +2005,7 @@ ztest_dmu_read_write(ztest_args_t *za) * Read the directory info. If it's the first time, set things up. */ VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff, - sizeof (dd), &dd)); + sizeof (dd), &dd, DMU_READ_PREFETCH)); if (dd.dd_chunk == 0) { ASSERT(dd.dd_packobj == 0); ASSERT(dd.dd_bigobj == 0); @@ -1763,9 +2067,11 @@ ztest_dmu_read_write(ztest_args_t *za) /* * Read the current contents of our objects. */ - error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf); + error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf, + DMU_READ_PREFETCH); ASSERT3U(error, ==, 0); - error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf); + error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf, + DMU_READ_PREFETCH); ASSERT3U(error, ==, 0); /* @@ -1871,9 +2177,9 @@ ztest_dmu_read_write(ztest_args_t *za) void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff, - packsize, packcheck)); + packsize, packcheck, DMU_READ_PREFETCH)); VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff, - bigsize, bigcheck)); + bigsize, bigcheck, DMU_READ_PREFETCH)); ASSERT(bcmp(packbuf, packcheck, packsize) == 0); ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); @@ -1886,6 +2192,314 @@ ztest_dmu_read_write(ztest_args_t *za) umem_free(bigbuf, bigsize); } +void +compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf, + uint64_t bigsize, uint64_t n, dmu_read_write_dir_t dd, uint64_t txg) +{ + uint64_t i; + bufwad_t *pack; + bufwad_t *bigH; + bufwad_t *bigT; + + /* + * For each index from n to n + s, verify that the existing bufwad + * in packobj matches the bufwads at the head and tail of the + * corresponding chunk in bigobj. Then update all three bufwads + * with the new values we want to write out. + */ + for (i = 0; i < s; i++) { + /* LINTED */ + pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t)); + /* LINTED */ + bigH = (bufwad_t *)((char *)bigbuf + i * dd.dd_chunk); + /* LINTED */ + bigT = (bufwad_t *)((char *)bigH + dd.dd_chunk) - 1; + + ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize); + ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize); + + if (pack->bw_txg > txg) + fatal(0, "future leak: got %llx, open txg is %llx", + pack->bw_txg, txg); + + if (pack->bw_data != 0 && pack->bw_index != n + i) + fatal(0, "wrong index: got %llx, wanted %llx+%llx", + pack->bw_index, n, i); + + if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0) + fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH); + + if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0) + fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT); + + pack->bw_index = n + i; + pack->bw_txg = txg; + pack->bw_data = 1 + ztest_random(-2ULL); + + *bigH = *pack; + *bigT = *pack; + } +} + +void +ztest_dmu_read_write_zcopy(ztest_args_t *za) +{ + objset_t *os = za->za_os; + dmu_read_write_dir_t dd; + dmu_tx_t *tx; + uint64_t i; + int error; + uint64_t n, s, txg; + bufwad_t *packbuf, *bigbuf; + uint64_t packoff, packsize, bigoff, bigsize; + uint64_t regions = 997; + uint64_t stride = 123456789ULL; + uint64_t width = 9; + dmu_buf_t *bonus_db; + arc_buf_t **bigbuf_arcbufs; + dmu_object_info_t *doi = &za->za_doi; + + /* + * This test uses two objects, packobj and bigobj, that are always + * updated together (i.e. in the same tx) so that their contents are + * in sync and can be compared. Their contents relate to each other + * in a simple way: packobj is a dense array of 'bufwad' structures, + * while bigobj is a sparse array of the same bufwads. Specifically, + * for any index n, there are three bufwads that should be identical: + * + * packobj, at offset n * sizeof (bufwad_t) + * bigobj, at the head of the nth chunk + * bigobj, at the tail of the nth chunk + * + * The chunk size is set equal to bigobj block size so that + * dmu_assign_arcbuf() can be tested for object updates. + */ + + /* + * Read the directory info. If it's the first time, set things up. + */ + VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff, + sizeof (dd), &dd, DMU_READ_PREFETCH)); + if (dd.dd_chunk == 0) { + ASSERT(dd.dd_packobj == 0); + ASSERT(dd.dd_bigobj == 0); + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (dd)); + dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + ztest_record_enospc("create r/w directory"); + dmu_tx_abort(tx); + return; + } + + dd.dd_packobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, + DMU_OT_NONE, 0, tx); + dd.dd_bigobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, + DMU_OT_NONE, 0, tx); + ztest_set_random_blocksize(os, dd.dd_packobj, tx); + ztest_set_random_blocksize(os, dd.dd_bigobj, tx); + + VERIFY(dmu_object_info(os, dd.dd_bigobj, doi) == 0); + ASSERT(doi->doi_data_block_size >= 2 * sizeof (bufwad_t)); + ASSERT(ISP2(doi->doi_data_block_size)); + dd.dd_chunk = doi->doi_data_block_size; + + dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd, + tx); + dmu_tx_commit(tx); + } else { + VERIFY(dmu_object_info(os, dd.dd_bigobj, doi) == 0); + VERIFY(ISP2(doi->doi_data_block_size)); + VERIFY(dd.dd_chunk == doi->doi_data_block_size); + VERIFY(dd.dd_chunk >= 2 * sizeof (bufwad_t)); + } + + /* + * Pick a random index and compute the offsets into packobj and bigobj. + */ + n = ztest_random(regions) * stride + ztest_random(width); + s = 1 + ztest_random(width - 1); + + packoff = n * sizeof (bufwad_t); + packsize = s * sizeof (bufwad_t); + + bigoff = n * dd.dd_chunk; + bigsize = s * dd.dd_chunk; + + packbuf = umem_zalloc(packsize, UMEM_NOFAIL); + bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL); + + VERIFY(dmu_bonus_hold(os, dd.dd_bigobj, FTAG, &bonus_db) == 0); + + bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL); + + /* + * Iteration 0 test zcopy for DB_UNCACHED dbufs. + * Iteration 1 test zcopy to already referenced dbufs. + * Iteration 2 test zcopy to dirty dbuf in the same txg. + * Iteration 3 test zcopy to dbuf dirty in previous txg. + * Iteration 4 test zcopy when dbuf is no longer dirty. + * Iteration 5 test zcopy when it can't be done. + * Iteration 6 one more zcopy write. + */ + for (i = 0; i < 7; i++) { + uint64_t j; + uint64_t off; + + /* + * In iteration 5 (i == 5) use arcbufs + * that don't match bigobj blksz to test + * dmu_assign_arcbuf() when it can't directly + * assign an arcbuf to a dbuf. + */ + for (j = 0; j < s; j++) { + if (i != 5) { + bigbuf_arcbufs[j] = + dmu_request_arcbuf(bonus_db, + dd.dd_chunk); + } else { + bigbuf_arcbufs[2 * j] = + dmu_request_arcbuf(bonus_db, + dd.dd_chunk / 2); + bigbuf_arcbufs[2 * j + 1] = + dmu_request_arcbuf(bonus_db, + dd.dd_chunk / 2); + } + } + + /* + * Get a tx for the mods to both packobj and bigobj. + */ + tx = dmu_tx_create(os); + + dmu_tx_hold_write(tx, dd.dd_packobj, packoff, packsize); + dmu_tx_hold_write(tx, dd.dd_bigobj, bigoff, bigsize); + + if (ztest_random(100) == 0) { + error = -1; + } else { + error = dmu_tx_assign(tx, TXG_WAIT); + } + + if (error) { + if (error != -1) { + ztest_record_enospc("dmu r/w range"); + } + dmu_tx_abort(tx); + umem_free(packbuf, packsize); + umem_free(bigbuf, bigsize); + for (j = 0; j < s; j++) { + if (i != 5) { + dmu_return_arcbuf(bigbuf_arcbufs[j]); + } else { + dmu_return_arcbuf( + bigbuf_arcbufs[2 * j]); + dmu_return_arcbuf( + bigbuf_arcbufs[2 * j + 1]); + } + } + umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); + dmu_buf_rele(bonus_db, FTAG); + return; + } + + txg = dmu_tx_get_txg(tx); + + /* + * 50% of the time don't read objects in the 1st iteration to + * test dmu_assign_arcbuf() for the case when there're no + * existing dbufs for the specified offsets. + */ + if (i != 0 || ztest_random(2) != 0) { + error = dmu_read(os, dd.dd_packobj, packoff, + packsize, packbuf, DMU_READ_PREFETCH); + ASSERT3U(error, ==, 0); + error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, + bigbuf, DMU_READ_PREFETCH); + ASSERT3U(error, ==, 0); + } + compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize, + n, dd, txg); + + /* + * We've verified all the old bufwads, and made new ones. + * Now write them out. + */ + dmu_write(os, dd.dd_packobj, packoff, packsize, packbuf, tx); + if (zopt_verbose >= 6) { + (void) printf("writing offset %llx size %llx" + " txg %llx\n", + (u_longlong_t)bigoff, + (u_longlong_t)bigsize, + (u_longlong_t)txg); + } + for (off = bigoff, j = 0; j < s; j++, off += dd.dd_chunk) { + dmu_buf_t *dbt; + if (i != 5) { + bcopy((caddr_t)bigbuf + (off - bigoff), + bigbuf_arcbufs[j]->b_data, dd.dd_chunk); + } else { + bcopy((caddr_t)bigbuf + (off - bigoff), + bigbuf_arcbufs[2 * j]->b_data, + dd.dd_chunk / 2); + bcopy((caddr_t)bigbuf + (off - bigoff) + + dd.dd_chunk / 2, + bigbuf_arcbufs[2 * j + 1]->b_data, + dd.dd_chunk / 2); + } + + if (i == 1) { + VERIFY(dmu_buf_hold(os, dd.dd_bigobj, off, + FTAG, &dbt) == 0); + } + if (i != 5) { + dmu_assign_arcbuf(bonus_db, off, + bigbuf_arcbufs[j], tx); + } else { + dmu_assign_arcbuf(bonus_db, off, + bigbuf_arcbufs[2 * j], tx); + dmu_assign_arcbuf(bonus_db, + off + dd.dd_chunk / 2, + bigbuf_arcbufs[2 * j + 1], tx); + } + if (i == 1) { + dmu_buf_rele(dbt, FTAG); + } + } + dmu_tx_commit(tx); + + /* + * Sanity check the stuff we just wrote. + */ + { + void *packcheck = umem_alloc(packsize, UMEM_NOFAIL); + void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL); + + VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff, + packsize, packcheck, DMU_READ_PREFETCH)); + VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff, + bigsize, bigcheck, DMU_READ_PREFETCH)); + + ASSERT(bcmp(packbuf, packcheck, packsize) == 0); + ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0); + + umem_free(packcheck, packsize); + umem_free(bigcheck, bigsize); + } + if (i == 2) { + txg_wait_open(dmu_objset_pool(os), 0); + } else if (i == 3) { + txg_wait_synced(dmu_objset_pool(os), 0); + } + } + + dmu_buf_rele(bonus_db, FTAG); + umem_free(packbuf, packsize); + umem_free(bigbuf, bigsize); + umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *)); +} + void ztest_dmu_check_future_leak(ztest_args_t *za) { @@ -1935,6 +2549,8 @@ ztest_dmu_write_parallel(ztest_args_t *za) uint64_t blkoff; zbookmark_t zb; dmu_tx_t *tx = dmu_tx_create(os); + dmu_buf_t *bonus_db; + arc_buf_t *abuf = NULL; dmu_objset_name(os, osname); @@ -1963,6 +2579,12 @@ ztest_dmu_write_parallel(ztest_args_t *za) } } + if (off != -1ULL && P2PHASE(off, bs) == 0 && !do_free && + ztest_random(8) == 0) { + VERIFY(dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &bonus_db) == 0); + abuf = dmu_request_arcbuf(bonus_db, bs); + } + txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT; error = dmu_tx_assign(tx, txg_how); if (error) { @@ -1973,6 +2595,10 @@ ztest_dmu_write_parallel(ztest_args_t *za) ztest_record_enospc("dmu write parallel"); } dmu_tx_abort(tx); + if (abuf != NULL) { + dmu_return_arcbuf(abuf); + dmu_buf_rele(bonus_db, FTAG); + } return; } txg = dmu_tx_get_txg(tx); @@ -2027,8 +2653,12 @@ ztest_dmu_write_parallel(ztest_args_t *za) za->za_dbuf = NULL; } else if (do_free) { VERIFY(dmu_free_range(os, ZTEST_DIROBJ, off, bs, tx) == 0); - } else { + } else if (abuf == NULL) { dmu_write(os, ZTEST_DIROBJ, off, btsize, wbt, tx); + } else { + bcopy(wbt, abuf->b_data, btsize); + dmu_assign_arcbuf(bonus_db, off, abuf, tx); + dmu_buf_rele(bonus_db, FTAG); } (void) mutex_unlock(lp); @@ -2064,16 +2694,20 @@ ztest_dmu_write_parallel(ztest_args_t *za) dmu_buf_rele(db, FTAG); za->za_dbuf = NULL; - (void) mutex_unlock(lp); - - if (error) + if (error) { + (void) mutex_unlock(lp); return; + } - if (blk.blk_birth == 0) /* concurrent free */ + if (blk.blk_birth == 0) { /* concurrent free */ + (void) mutex_unlock(lp); return; + } txg_suspend(dmu_objset_pool(os)); + (void) mutex_unlock(lp); + ASSERT(blk.blk_fill == 1); ASSERT3U(BP_GET_TYPE(&blk), ==, DMU_OT_UINT64_OTHER); ASSERT3U(BP_GET_LEVEL(&blk), ==, 0); @@ -2146,7 +2780,7 @@ ztest_zap(ztest_args_t *za) * Create a new object if necessary, and record it in the directory. */ VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff, - sizeof (uint64_t), &object)); + sizeof (uint64_t), &object, DMU_READ_PREFETCH)); if (object == 0) { tx = dmu_tx_create(os); @@ -2799,7 +3433,7 @@ ztest_verify_blocks(char *pool) isa = strdup(isa); /* LINTED */ (void) sprintf(bin, - "/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache %s", + "/usr/sbin%.*s/zdb -bcc%s%s -U /tmp/zpool.cache %s", isalen, isa, zopt_verbose >= 3 ? "s" : "", @@ -2944,7 +3578,7 @@ ztest_resume(spa_t *spa) spa_vdev_state_enter(spa); vdev_clear(spa, NULL); (void) spa_vdev_state_exit(spa, NULL, 0); - zio_resume(spa); + (void) zio_resume(spa); } } @@ -3216,6 +3850,10 @@ ztest_run(char *pool) (void) snprintf(name, 100, "%s/%s_%d", pool, pool, d); if (zopt_verbose >= 3) (void) printf("Destroying %s to free up space\n", name); + + /* Cleanup any non-standard clones and snapshots */ + ztest_dsl_dataset_cleanup(name, za[d].za_instance); + (void) dmu_objset_find(name, ztest_destroy_cb, &za[d], DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); (void) rw_unlock(&ztest_shared->zs_name_lock); @@ -3296,6 +3934,8 @@ ztest_init(char *pool) if (error) fatal(0, "spa_open() = %d", error); + metaslab_sz = 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; + if (zopt_verbose >= 3) show_pool_stats(spa); @@ -3387,6 +4027,9 @@ main(int argc, char **argv) zi->zi_call_time = 0; } + /* Set the allocation switch size */ + metaslab_df_alloc_threshold = ztest_random(metaslab_sz / 4) + 1; + pid = fork(); if (pid == -1) -- cgit v1.2.3