diff options
author | Brian Behlendorf <[email protected]> | 2011-08-01 12:10:54 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2011-08-01 12:10:58 -0700 |
commit | 77999e804fff35782ab4b578d2cecf064c54a841 (patch) | |
tree | 20c50c86e64ab200b8d3ac8035bff2a9d086489c | |
parent | bfb73f92773dc3471e3a6274970c02e0700b6c7d (diff) | |
parent | cddafdcbc55a38cdbdd3dc8c58f447b22bd847ee (diff) |
Merge branch 'illumos'
Merge in ten upstream fixes which have already been made to both
the Illumos and FreeBSD ZFS implementations. This brings us up
to date with the latest ZFS changes in Illumos.
Credit goes to Martin Matuska of the FreeBSD project for posting
an excellent summary of the upstream patches we were missing.
Illumos #1313: Integer overflow in txg_delay()
Illumos #278: get rid zfs of python and pyzfs dependencies
Illumos #1043: Recursive zfs snapshot destroy fails
Illumos #883: ZIL reuse during remount corruption
Illumos #1092: zfs refratio property
Illumos #1051: zfs should handle
Illumos #510: 'zfs get' enhancement - mountpoint as an argument
Illumos #175: zfs vdev cache consumes excessive memory
Illumos #764: panic in zfs:dbuf_sync_list
Illumos #xxx: zdb -vvv broken after zfs diff integration
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #340
-rw-r--r-- | cmd/zfs/zfs_main.c | 2274 | ||||
-rw-r--r-- | cmd/ztest/ztest.c | 38 | ||||
-rw-r--r-- | include/libzfs.h | 9 | ||||
-rw-r--r-- | include/sys/fs/zfs.h | 2 | ||||
-rw-r--r-- | include/sys/metaslab.h | 3 | ||||
-rw-r--r-- | include/sys/metaslab_impl.h | 2 | ||||
-rw-r--r-- | include/sys/spa.h | 8 | ||||
-rw-r--r-- | include/sys/spa_impl.h | 2 | ||||
-rw-r--r-- | include/zfs_deleg.h | 2 | ||||
-rw-r--r-- | lib/libzfs/libzfs_dataset.c | 191 | ||||
-rw-r--r-- | man/man8/zfs.8 | 17 | ||||
-rw-r--r-- | module/zcommon/zfs_deleg.c | 3 | ||||
-rw-r--r-- | module/zcommon/zfs_prop.c | 4 | ||||
-rw-r--r-- | module/zfs/dbuf.c | 20 | ||||
-rw-r--r-- | module/zfs/dsl_dataset.c | 13 | ||||
-rw-r--r-- | module/zfs/metaslab.c | 105 | ||||
-rw-r--r-- | module/zfs/spa_misc.c | 7 | ||||
-rw-r--r-- | module/zfs/txg.c | 2 | ||||
-rw-r--r-- | module/zfs/vdev_cache.c | 9 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 5 | ||||
-rw-r--r-- | module/zfs/zfs_znode.c | 28 | ||||
-rw-r--r-- | module/zfs/zil.c | 40 | ||||
-rw-r--r-- | module/zfs/zio.c | 22 |
23 files changed, 2700 insertions, 106 deletions
diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index b6dc6d49d..54d057b1e 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #include <assert.h> @@ -41,6 +42,7 @@ #include <grp.h> #include <pwd.h> #include <signal.h> +#include <sys/list.h> #include <sys/mkdev.h> #include <sys/mntent.h> #include <sys/mnttab.h> @@ -51,7 +53,13 @@ #include <time.h> #include <libzfs.h> +#include <zfs_prop.h> +#include <zfs_deleg.h> #include <libuutil.h> +#ifdef HAVE_IDMAP +#include <aclutils.h> +#include <directory.h> +#endif /* HAVE_IDMAP */ #include "zfs_iter.h" #include "zfs_util.h" @@ -61,7 +69,6 @@ libzfs_handle_t *g_zfs; static FILE *mnttab_file; static char history_str[HIS_MAX_RECORD_LEN]; -const char *pypath = "/usr/lib/zfs/pyzfs.py"; static int zfs_do_clone(int argc, char **argv); static int zfs_do_create(int argc, char **argv); @@ -82,8 +89,10 @@ static int zfs_do_send(int argc, char **argv); static int zfs_do_receive(int argc, char **argv); static int zfs_do_promote(int argc, char **argv); static int zfs_do_userspace(int argc, char **argv); -static int zfs_do_python(int argc, char **argv); +static int zfs_do_allow(int argc, char **argv); +static int zfs_do_unallow(int argc, char **argv); static int zfs_do_hold(int argc, char **argv); +static int zfs_do_holds(int argc, char **argv); static int zfs_do_release(int argc, char **argv); static int zfs_do_diff(int argc, char **argv); @@ -176,12 +185,12 @@ static zfs_command_t command_table[] = { { "send", zfs_do_send, HELP_SEND }, { "receive", zfs_do_receive, HELP_RECEIVE }, { NULL }, - { "allow", zfs_do_python, HELP_ALLOW }, + { "allow", zfs_do_allow, HELP_ALLOW }, { NULL }, - { "unallow", zfs_do_python, HELP_UNALLOW }, + { "unallow", zfs_do_unallow, HELP_UNALLOW }, { NULL }, { "hold", zfs_do_hold, HELP_HOLD }, - { "holds", zfs_do_python, HELP_HOLDS }, + { "holds", zfs_do_holds, HELP_HOLDS }, { "release", zfs_do_release, HELP_RELEASE }, { "diff", zfs_do_diff, HELP_DIFF }, }; @@ -1274,7 +1283,7 @@ static int zfs_do_get(int argc, char **argv) { zprop_get_cbdata_t cb = { 0 }; - int i, c, flags = 0; + int i, c, flags = ZFS_ITER_ARGS_CAN_BE_PATHS; char *value, *fields; int ret; int limit = 0; @@ -1824,71 +1833,731 @@ zfs_do_upgrade(int argc, char **argv) return (ret); } +#define USTYPE_USR_BIT (0) +#define USTYPE_GRP_BIT (1) +#define USTYPE_PSX_BIT (2) +#define USTYPE_SMB_BIT (3) + +#define USTYPE_USR (1 << USTYPE_USR_BIT) +#define USTYPE_GRP (1 << USTYPE_GRP_BIT) + +#define USTYPE_PSX (1 << USTYPE_PSX_BIT) +#define USTYPE_SMB (1 << USTYPE_SMB_BIT) + +#define USTYPE_PSX_USR (USTYPE_PSX | USTYPE_USR) +#define USTYPE_SMB_USR (USTYPE_SMB | USTYPE_USR) +#define USTYPE_PSX_GRP (USTYPE_PSX | USTYPE_GRP) +#define USTYPE_SMB_GRP (USTYPE_SMB | USTYPE_GRP) +#define USTYPE_ALL (USTYPE_PSX_USR | USTYPE_SMB_USR \ + | USTYPE_PSX_GRP | USTYPE_SMB_GRP) + + +#define USPROP_USED_BIT (0) +#define USPROP_QUOTA_BIT (1) + +#define USPROP_USED (1 << USPROP_USED_BIT) +#define USPROP_QUOTA (1 << USPROP_QUOTA_BIT) + +typedef struct us_node { + nvlist_t *usn_nvl; + uu_avl_node_t usn_avlnode; + uu_list_node_t usn_listnode; +} us_node_t; + +typedef struct us_cbdata { + nvlist_t **cb_nvlp; + uu_avl_pool_t *cb_avl_pool; + uu_avl_t *cb_avl; + boolean_t cb_numname; + boolean_t cb_nicenum; + boolean_t cb_sid2posix; + zfs_userquota_prop_t cb_prop; + zfs_sort_column_t *cb_sortcol; + size_t cb_max_typelen; + size_t cb_max_namelen; + size_t cb_max_usedlen; + size_t cb_max_quotalen; +} us_cbdata_t; + +typedef struct { + zfs_sort_column_t *si_sortcol; + boolean_t si_num_name; + boolean_t si_parsable; +} us_sort_info_t; + +static int +us_compare(const void *larg, const void *rarg, void *unused) +{ + const us_node_t *l = larg; + const us_node_t *r = rarg; + int rc = 0; + us_sort_info_t *si = (us_sort_info_t *)unused; + zfs_sort_column_t *sortcol = si->si_sortcol; + boolean_t num_name = si->si_num_name; + nvlist_t *lnvl = l->usn_nvl; + nvlist_t *rnvl = r->usn_nvl; + + for (; sortcol != NULL; sortcol = sortcol->sc_next) { + char *lvstr = ""; + char *rvstr = ""; + uint32_t lv32 = 0; + uint32_t rv32 = 0; + uint64_t lv64 = 0; + uint64_t rv64 = 0; + zfs_prop_t prop = sortcol->sc_prop; + const char *propname = NULL; + boolean_t reverse = sortcol->sc_reverse; + + switch (prop) { + case ZFS_PROP_TYPE: + propname = "type"; + (void) nvlist_lookup_uint32(lnvl, propname, &lv32); + (void) nvlist_lookup_uint32(rnvl, propname, &rv32); + if (rv32 != lv32) + rc = (rv32 > lv32) ? 1 : -1; + break; + case ZFS_PROP_NAME: + propname = "name"; + if (num_name) { + (void) nvlist_lookup_uint32(lnvl, propname, + &lv32); + (void) nvlist_lookup_uint32(rnvl, propname, + &rv32); + if (rv32 != lv32) + rc = (rv32 > lv32) ? 1 : -1; + } else { + (void) nvlist_lookup_string(lnvl, propname, + &lvstr); + (void) nvlist_lookup_string(rnvl, propname, + &rvstr); + rc = strcmp(lvstr, rvstr); + } + break; + + case ZFS_PROP_USED: + case ZFS_PROP_QUOTA: + if (ZFS_PROP_USED == prop) + propname = "used"; + else + propname = "quota"; + (void) nvlist_lookup_uint64(lnvl, propname, &lv64); + (void) nvlist_lookup_uint64(rnvl, propname, &rv64); + if (rv64 != lv64) + rc = (rv64 > lv64) ? 1 : -1; + default: + break; + } + + if (rc) { + if (rc < 0) + return (reverse ? 1 : -1); + else + return (reverse ? -1 : 1); + } + } + + return (rc); +} + +static inline const char * +us_type2str(unsigned field_type) +{ + switch (field_type) { + case USTYPE_PSX_USR: + return ("POSIX User"); + case USTYPE_PSX_GRP: + return ("POSIX Group"); + case USTYPE_SMB_USR: + return ("SMB User"); + case USTYPE_SMB_GRP: + return ("SMB Group"); + default: + return ("Undefined"); + } +} + /* * zfs userspace */ static int userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space) { - zfs_userquota_prop_t *typep = arg; - zfs_userquota_prop_t p = *typep; + us_cbdata_t *cb = (us_cbdata_t *)arg; + zfs_userquota_prop_t prop = cb->cb_prop; char *name = NULL; - char *ug, *propname; + char *propname; char namebuf[32]; char sizebuf[32]; + us_node_t *node; + uu_avl_pool_t *avl_pool = cb->cb_avl_pool; + uu_avl_t *avl = cb->cb_avl; + uu_avl_index_t idx; + nvlist_t *props; + us_node_t *n; + zfs_sort_column_t *sortcol = cb->cb_sortcol; + unsigned type; + const char *typestr; + size_t namelen; + size_t typelen; + size_t sizelen; + us_sort_info_t sortinfo = { sortcol, cb->cb_numname }; if (domain == NULL || domain[0] == '\0') { - if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) { + /* POSIX */ + if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) { + type = USTYPE_PSX_GRP; struct group *g = getgrgid(rid); if (g) name = g->gr_name; } else { + type = USTYPE_PSX_USR; struct passwd *p = getpwuid(rid); if (p) name = p->pw_name; } + } else { +#ifdef HAVE_IDMAP + char sid[ZFS_MAXNAMELEN+32]; + uid_t id; + uint64_t classes; + int err; + directory_error_t e; + + (void) snprintf(sid, sizeof (sid), "%s-%u", domain, rid); + /* SMB */ + if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) { + type = USTYPE_SMB_GRP; + err = sid_to_id(sid, B_FALSE, &id); + } else { + type = USTYPE_SMB_USR; + err = sid_to_id(sid, B_TRUE, &id); + } + + if (err == 0) { + rid = id; + + e = directory_name_from_sid(NULL, sid, &name, &classes); + if (e != NULL) { + directory_error_free(e); + return (NULL); + } + + if (name == NULL) + name = sid; + } +#else + return (-1); +#endif /* HAVE_IDMAP */ } - if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) - ug = "group"; - else - ug = "user"; +/* + * if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) + * ug = "group"; + * else + * ug = "user"; + */ - if (p == ZFS_PROP_USERUSED || p == ZFS_PROP_GROUPUSED) + if (prop == ZFS_PROP_USERUSED || prop == ZFS_PROP_GROUPUSED) propname = "used"; else propname = "quota"; - if (name == NULL) { - (void) snprintf(namebuf, sizeof (namebuf), - "%llu", (longlong_t)rid); + (void) snprintf(namebuf, sizeof (namebuf), "%u", rid); + if (name == NULL) name = namebuf; + + if (cb->cb_nicenum) + zfs_nicenum(space, sizebuf, sizeof (sizebuf)); + else + (void) sprintf(sizebuf, "%llu", (u_longlong_t)space); + + node = safe_malloc(sizeof (us_node_t)); + uu_avl_node_init(node, &node->usn_avlnode, avl_pool); + + if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) { + free(node); + return (-1); + } + + if (nvlist_add_uint32(props, "type", type) != 0) + nomem(); + + if (cb->cb_numname) { + if (nvlist_add_uint32(props, "name", rid) != 0) + nomem(); + namelen = strlen(namebuf); + } else { + if (nvlist_add_string(props, "name", name) != 0) + nomem(); + namelen = strlen(name); + } + + typestr = us_type2str(type); + typelen = strlen(gettext(typestr)); + if (typelen > cb->cb_max_typelen) + cb->cb_max_typelen = typelen; + + if (namelen > cb->cb_max_namelen) + cb->cb_max_namelen = namelen; + + sizelen = strlen(sizebuf); + if (0 == strcmp(propname, "used")) { + if (sizelen > cb->cb_max_usedlen) + cb->cb_max_usedlen = sizelen; + } else { + if (sizelen > cb->cb_max_quotalen) + cb->cb_max_quotalen = sizelen; + } + + node->usn_nvl = props; + + n = uu_avl_find(avl, node, &sortinfo, &idx); + if (n == NULL) + uu_avl_insert(avl, node, idx); + else { + nvlist_free(props); + free(node); + node = n; + props = node->usn_nvl; } - zfs_nicenum(space, sizebuf, sizeof (sizebuf)); - (void) printf("%s %s %s%c%s %s\n", propname, ug, domain, - domain[0] ? '-' : ' ', name, sizebuf); + if (nvlist_add_uint64(props, propname, space) != 0) + nomem(); return (0); } +static inline boolean_t +usprop_check(zfs_userquota_prop_t p, unsigned types, unsigned props) +{ + unsigned type; + unsigned prop; + + switch (p) { + case ZFS_PROP_USERUSED: + type = USTYPE_USR; + prop = USPROP_USED; + break; + case ZFS_PROP_USERQUOTA: + type = USTYPE_USR; + prop = USPROP_QUOTA; + break; + case ZFS_PROP_GROUPUSED: + type = USTYPE_GRP; + prop = USPROP_USED; + break; + case ZFS_PROP_GROUPQUOTA: + type = USTYPE_GRP; + prop = USPROP_QUOTA; + break; + default: /* ALL */ + return (B_TRUE); + }; + + return (type & types && prop & props); +} + +#define USFIELD_TYPE (1 << 0) +#define USFIELD_NAME (1 << 1) +#define USFIELD_USED (1 << 2) +#define USFIELD_QUOTA (1 << 3) +#define USFIELD_ALL (USFIELD_TYPE | USFIELD_NAME | USFIELD_USED | USFIELD_QUOTA) + +static int +parsefields(unsigned *fieldsp, char **names, unsigned *bits, size_t len) +{ + char *field = optarg; + char *delim; + + do { + int i; + boolean_t found = B_FALSE; + delim = strchr(field, ','); + if (delim != NULL) + *delim = '\0'; + + for (i = 0; i < len; i++) + if (0 == strcmp(field, names[i])) { + found = B_TRUE; + *fieldsp |= bits[i]; + break; + } + + if (!found) { + (void) fprintf(stderr, gettext("invalid type '%s'" + "for -t option\n"), field); + return (-1); + } + + field = delim + 1; + } while (delim); + + return (0); +} + + +static char *type_names[] = { "posixuser", "smbuser", "posixgroup", "smbgroup", + "all" }; +static unsigned type_bits[] = { + USTYPE_PSX_USR, + USTYPE_SMB_USR, + USTYPE_PSX_GRP, + USTYPE_SMB_GRP, + USTYPE_ALL +}; + +static char *us_field_names[] = { "type", "name", "used", "quota" }; +static unsigned us_field_bits[] = { + USFIELD_TYPE, + USFIELD_NAME, + USFIELD_USED, + USFIELD_QUOTA +}; + +static void +print_us_node(boolean_t scripted, boolean_t parseable, unsigned fields, + size_t type_width, size_t name_width, size_t used_width, + size_t quota_width, us_node_t *node) +{ + nvlist_t *nvl = node->usn_nvl; + nvpair_t *nvp = NULL; + char valstr[ZFS_MAXNAMELEN]; + boolean_t first = B_TRUE; + boolean_t quota_found = B_FALSE; + + if (fields & USFIELD_QUOTA && !nvlist_exists(nvl, "quota")) + if (nvlist_add_string(nvl, "quota", "none") != 0) + nomem(); + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + char *pname = nvpair_name(nvp); + data_type_t type = nvpair_type(nvp); + uint32_t val32 = 0; + uint64_t val64 = 0; + char *strval = NULL; + unsigned field = 0; + unsigned width = 0; + int i; + for (i = 0; i < 4; i++) { + if (0 == strcmp(pname, us_field_names[i])) { + field = us_field_bits[i]; + break; + } + } + + if (!(field & fields)) + continue; + + switch (type) { + case DATA_TYPE_UINT32: + (void) nvpair_value_uint32(nvp, &val32); + break; + case DATA_TYPE_UINT64: + (void) nvpair_value_uint64(nvp, &val64); + break; + case DATA_TYPE_STRING: + (void) nvpair_value_string(nvp, &strval); + break; + default: + (void) fprintf(stderr, "Invalid data type\n"); + } + + if (!first) { + if (scripted) + (void) printf("\t"); + else + (void) printf(" "); + } + + switch (field) { + case USFIELD_TYPE: + strval = (char *)us_type2str(val32); + width = type_width; + break; + case USFIELD_NAME: + if (type == DATA_TYPE_UINT64) { + (void) sprintf(valstr, "%llu", + (u_longlong_t) val64); + strval = valstr; + } + width = name_width; + break; + case USFIELD_USED: + case USFIELD_QUOTA: + if (type == DATA_TYPE_UINT64) { + (void) nvpair_value_uint64(nvp, &val64); + if (parseable) + (void) sprintf(valstr, "%llu", + (u_longlong_t) val64); + else + zfs_nicenum(val64, valstr, + sizeof (valstr)); + strval = valstr; + } + + if (field == USFIELD_USED) + width = used_width; + else { + quota_found = B_FALSE; + width = quota_width; + } + + break; + } + + if (field == USFIELD_QUOTA && !quota_found) + (void) printf("%*s", width, strval); + else { + if (type == DATA_TYPE_STRING) + (void) printf("%-*s", width, strval); + else + (void) printf("%*s", width, strval); + } + + first = B_FALSE; + + } + + (void) printf("\n"); +} + +static void +print_us(boolean_t scripted, boolean_t parsable, unsigned fields, + unsigned type_width, unsigned name_width, unsigned used_width, + unsigned quota_width, boolean_t rmnode, uu_avl_t *avl) +{ + static char *us_field_hdr[] = { "TYPE", "NAME", "USED", "QUOTA" }; + us_node_t *node; + const char *col; + int i; + int width[4] = { type_width, name_width, used_width, quota_width }; + + if (!scripted) { + boolean_t first = B_TRUE; + for (i = 0; i < 4; i++) { + unsigned field = us_field_bits[i]; + if (!(field & fields)) + continue; + + col = gettext(us_field_hdr[i]); + if (field == USFIELD_TYPE || field == USFIELD_NAME) + (void) printf(first?"%-*s":" %-*s", width[i], + col); + else + (void) printf(first?"%*s":" %*s", width[i], + col); + first = B_FALSE; + } + (void) printf("\n"); + } + + for (node = uu_avl_first(avl); node != NULL; + node = uu_avl_next(avl, node)) { + print_us_node(scripted, parsable, fields, type_width, + name_width, used_width, used_width, node); + if (rmnode) + nvlist_free(node->usn_nvl); + } +} + static int zfs_do_userspace(int argc, char **argv) { zfs_handle_t *zhp; zfs_userquota_prop_t p; + uu_avl_pool_t *avl_pool; + uu_avl_t *avl_tree; + uu_avl_walk_t *walk; + + char *cmd; + boolean_t scripted = B_FALSE; + boolean_t prtnum = B_FALSE; + boolean_t parseable = B_FALSE; + boolean_t sid2posix = B_FALSE; int error; + int c; + zfs_sort_column_t *default_sortcol = NULL; + zfs_sort_column_t *sortcol = NULL; + unsigned types = USTYPE_PSX_USR | USTYPE_SMB_USR; + unsigned fields = 0; + unsigned props = USPROP_USED | USPROP_QUOTA; + us_cbdata_t cb; + us_node_t *node; + boolean_t resort_avl = B_FALSE; + + if (argc < 2) + usage(B_FALSE); + + cmd = argv[0]; + if (0 == strcmp(cmd, "groupspace")) + /* toggle default group types */ + types = USTYPE_PSX_GRP | USTYPE_SMB_GRP; + + /* check options */ + while ((c = getopt(argc, argv, "nHpo:s:S:t:i")) != -1) { + switch (c) { + case 'n': + prtnum = B_TRUE; + break; + case 'H': + scripted = B_TRUE; + break; + case 'p': + parseable = B_TRUE; + break; + case 'o': + if (parsefields(&fields, us_field_names, us_field_bits, + 4) != 0) + return (1); + break; + case 's': + if (zfs_add_sort_column(&sortcol, optarg, + B_FALSE) != 0) { + (void) fprintf(stderr, + gettext("invalid property '%s'\n"), optarg); + usage(B_FALSE); + } + break; + case 'S': + if (zfs_add_sort_column(&sortcol, optarg, + B_TRUE) != 0) { + (void) fprintf(stderr, + gettext("invalid property '%s'\n"), optarg); + usage(B_FALSE); + } + break; + case 't': + if (parsefields(&types, type_names, type_bits, 5)) + return (1); + break; + case 'i': + sid2posix = B_TRUE; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(B_FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* ok, now we have sorted by default colums (type,name) avl tree */ + if (sortcol) { + zfs_sort_column_t *sc; + for (sc = sortcol; sc; sc = sc->sc_next) { + if (sc->sc_prop == ZFS_PROP_QUOTA) { + resort_avl = B_TRUE; + break; + } + } + } + + if (!fields) + fields = USFIELD_ALL; if ((zhp = zfs_open(g_zfs, argv[argc-1], ZFS_TYPE_DATASET)) == NULL) return (1); - (void) printf("PROP TYPE NAME VALUE\n"); + if ((avl_pool = uu_avl_pool_create("us_avl_pool", sizeof (us_node_t), + offsetof(us_node_t, usn_avlnode), + us_compare, UU_DEFAULT)) == NULL) + nomem(); + if ((avl_tree = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) + nomem(); + + if (sortcol && !resort_avl) + cb.cb_sortcol = sortcol; + else { + (void) zfs_add_sort_column(&default_sortcol, "type", B_FALSE); + (void) zfs_add_sort_column(&default_sortcol, "name", B_FALSE); + cb.cb_sortcol = default_sortcol; + } + cb.cb_numname = prtnum; + cb.cb_nicenum = !parseable; + cb.cb_avl_pool = avl_pool; + cb.cb_avl = avl_tree; + cb.cb_sid2posix = sid2posix; + cb.cb_max_typelen = strlen(gettext("TYPE")); + cb.cb_max_namelen = strlen(gettext("NAME")); + cb.cb_max_usedlen = strlen(gettext("USED")); + cb.cb_max_quotalen = strlen(gettext("QUOTA")); for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) { - error = zfs_userspace(zhp, p, userspace_cb, &p); + if (!usprop_check(p, types, props)) + continue; + + cb.cb_prop = p; + error = zfs_userspace(zhp, p, userspace_cb, &cb); if (error) break; } + + + if (resort_avl) { + us_node_t *node; + us_node_t *rmnode; + uu_list_pool_t *listpool; + uu_list_t *list; + uu_avl_index_t idx = 0; + uu_list_index_t idx2 = 0; + listpool = uu_list_pool_create("tmplist", sizeof (us_node_t), + offsetof(us_node_t, usn_listnode), NULL, + UU_DEFAULT); + list = uu_list_create(listpool, NULL, UU_DEFAULT); + + node = uu_avl_first(avl_tree); + uu_list_node_init(node, &node->usn_listnode, listpool); + while (node != NULL) { + rmnode = node; + node = uu_avl_next(avl_tree, node); + uu_avl_remove(avl_tree, rmnode); + if (uu_list_find(list, rmnode, NULL, &idx2) == NULL) { + uu_list_insert(list, rmnode, idx2); + } + } + + for (node = uu_list_first(list); node != NULL; + node = uu_list_next(list, node)) { + us_sort_info_t sortinfo = { sortcol, cb.cb_numname }; + if (uu_avl_find(avl_tree, node, &sortinfo, &idx) == + NULL) + uu_avl_insert(avl_tree, node, idx); + } + + uu_list_destroy(list); + } + + /* print & free node`s nvlist memory */ + print_us(scripted, parseable, fields, cb.cb_max_typelen, + cb.cb_max_namelen, cb.cb_max_usedlen, + cb.cb_max_quotalen, B_TRUE, cb.cb_avl); + + if (sortcol) + zfs_free_sort_columns(sortcol); + zfs_free_sort_columns(default_sortcol); + + /* + * Finally, clean up the AVL tree. + */ + if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) + nomem(); + + while ((node = uu_avl_walk_next(walk)) != NULL) { + uu_avl_remove(cb.cb_avl, node); + free(node); + } + + uu_avl_walk_end(walk); + uu_avl_destroy(avl_tree); + uu_avl_pool_destroy(avl_pool); + return (error); } @@ -2820,6 +3489,1362 @@ zfs_do_receive(int argc, char **argv) return (err != 0); } +/* + * allow/unallow stuff + */ +/* copied from zfs/sys/dsl_deleg.h */ +#define ZFS_DELEG_PERM_CREATE "create" +#define ZFS_DELEG_PERM_DESTROY "destroy" +#define ZFS_DELEG_PERM_SNAPSHOT "snapshot" +#define ZFS_DELEG_PERM_ROLLBACK "rollback" +#define ZFS_DELEG_PERM_CLONE "clone" +#define ZFS_DELEG_PERM_PROMOTE "promote" +#define ZFS_DELEG_PERM_RENAME "rename" +#define ZFS_DELEG_PERM_MOUNT "mount" +#define ZFS_DELEG_PERM_SHARE "share" +#define ZFS_DELEG_PERM_SEND "send" +#define ZFS_DELEG_PERM_RECEIVE "receive" +#define ZFS_DELEG_PERM_ALLOW "allow" +#define ZFS_DELEG_PERM_USERPROP "userprop" +#define ZFS_DELEG_PERM_VSCAN "vscan" /* ??? */ +#define ZFS_DELEG_PERM_USERQUOTA "userquota" +#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota" +#define ZFS_DELEG_PERM_USERUSED "userused" +#define ZFS_DELEG_PERM_GROUPUSED "groupused" +#define ZFS_DELEG_PERM_HOLD "hold" +#define ZFS_DELEG_PERM_RELEASE "release" +#define ZFS_DELEG_PERM_DIFF "diff" + +#define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE + +static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = { + { ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW }, + { ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE }, + { ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE }, + { ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY }, + { ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF}, + { ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD }, + { ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT }, + { ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE }, + { ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE }, + { ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE }, + { ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME }, + { ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK }, + { ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND }, + { ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, + { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, + + { ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, + { ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED }, + { ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP }, + { ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA }, + { ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED }, + { NULL, ZFS_DELEG_NOTE_NONE } +}; + +/* permission structure */ +typedef struct deleg_perm { + zfs_deleg_who_type_t dp_who_type; + const char *dp_name; + boolean_t dp_local; + boolean_t dp_descend; +} deleg_perm_t; + +/* */ +typedef struct deleg_perm_node { + deleg_perm_t dpn_perm; + + uu_avl_node_t dpn_avl_node; +} deleg_perm_node_t; + +typedef struct fs_perm fs_perm_t; + +/* permissions set */ +typedef struct who_perm { + zfs_deleg_who_type_t who_type; + const char *who_name; /* id */ + char who_ug_name[256]; /* user/group name */ + fs_perm_t *who_fsperm; /* uplink */ + + uu_avl_t *who_deleg_perm_avl; /* permissions */ +} who_perm_t; + +/* */ +typedef struct who_perm_node { + who_perm_t who_perm; + uu_avl_node_t who_avl_node; +} who_perm_node_t; + +typedef struct fs_perm_set fs_perm_set_t; +/* fs permissions */ +struct fs_perm { + const char *fsp_name; + + uu_avl_t *fsp_sc_avl; /* sets,create */ + uu_avl_t *fsp_uge_avl; /* user,group,everyone */ + + fs_perm_set_t *fsp_set; /* uplink */ +}; + +/* */ +typedef struct fs_perm_node { + fs_perm_t fspn_fsperm; + uu_avl_t *fspn_avl; + + uu_list_node_t fspn_list_node; +} fs_perm_node_t; + +/* top level structure */ +struct fs_perm_set { + uu_list_pool_t *fsps_list_pool; + uu_list_t *fsps_list; /* list of fs_perms */ + + uu_avl_pool_t *fsps_named_set_avl_pool; + uu_avl_pool_t *fsps_who_perm_avl_pool; + uu_avl_pool_t *fsps_deleg_perm_avl_pool; +}; + +static inline const char * +deleg_perm_type(zfs_deleg_note_t note) +{ + /* subcommands */ + switch (note) { + /* SUBCOMMANDS */ + /* OTHER */ + case ZFS_DELEG_NOTE_GROUPQUOTA: + case ZFS_DELEG_NOTE_GROUPUSED: + case ZFS_DELEG_NOTE_USERPROP: + case ZFS_DELEG_NOTE_USERQUOTA: + case ZFS_DELEG_NOTE_USERUSED: + /* other */ + return (gettext("other")); + default: + return (gettext("subcommand")); + } +} + +static int inline +who_type2weight(zfs_deleg_who_type_t who_type) +{ + int res; + switch (who_type) { + case ZFS_DELEG_NAMED_SET_SETS: + case ZFS_DELEG_NAMED_SET: + res = 0; + break; + case ZFS_DELEG_CREATE_SETS: + case ZFS_DELEG_CREATE: + res = 1; + break; + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + res = 2; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + res = 3; + break; + case ZFS_DELEG_EVERYONE_SETS: + case ZFS_DELEG_EVERYONE: + res = 4; + break; + default: + res = -1; + } + + return (res); +} + +/* ARGSUSED */ +static int +who_perm_compare(const void *larg, const void *rarg, void *unused) +{ + const who_perm_node_t *l = larg; + const who_perm_node_t *r = rarg; + zfs_deleg_who_type_t ltype = l->who_perm.who_type; + zfs_deleg_who_type_t rtype = r->who_perm.who_type; + int lweight = who_type2weight(ltype); + int rweight = who_type2weight(rtype); + int res = lweight - rweight; + if (res == 0) + res = strncmp(l->who_perm.who_name, r->who_perm.who_name, + ZFS_MAX_DELEG_NAME-1); + + if (res == 0) + return (0); + if (res > 0) + return (1); + else + return (-1); +} + +/* ARGSUSED */ +static int +deleg_perm_compare(const void *larg, const void *rarg, void *unused) +{ + const deleg_perm_node_t *l = larg; + const deleg_perm_node_t *r = rarg; + int res = strncmp(l->dpn_perm.dp_name, r->dpn_perm.dp_name, + ZFS_MAX_DELEG_NAME-1); + + if (res == 0) + return (0); + + if (res > 0) + return (1); + else + return (-1); +} + +static inline void +fs_perm_set_init(fs_perm_set_t *fspset) +{ + bzero(fspset, sizeof (fs_perm_set_t)); + + if ((fspset->fsps_list_pool = uu_list_pool_create("fsps_list_pool", + sizeof (fs_perm_node_t), offsetof(fs_perm_node_t, fspn_list_node), + NULL, UU_DEFAULT)) == NULL) + nomem(); + if ((fspset->fsps_list = uu_list_create(fspset->fsps_list_pool, NULL, + UU_DEFAULT)) == NULL) + nomem(); + + if ((fspset->fsps_named_set_avl_pool = uu_avl_pool_create( + "named_set_avl_pool", sizeof (who_perm_node_t), offsetof( + who_perm_node_t, who_avl_node), who_perm_compare, + UU_DEFAULT)) == NULL) + nomem(); + + if ((fspset->fsps_who_perm_avl_pool = uu_avl_pool_create( + "who_perm_avl_pool", sizeof (who_perm_node_t), offsetof( + who_perm_node_t, who_avl_node), who_perm_compare, + UU_DEFAULT)) == NULL) + nomem(); + + if ((fspset->fsps_deleg_perm_avl_pool = uu_avl_pool_create( + "deleg_perm_avl_pool", sizeof (deleg_perm_node_t), offsetof( + deleg_perm_node_t, dpn_avl_node), deleg_perm_compare, UU_DEFAULT)) + == NULL) + nomem(); +} + +static inline void fs_perm_fini(fs_perm_t *); +static inline void who_perm_fini(who_perm_t *); + +static inline void +fs_perm_set_fini(fs_perm_set_t *fspset) +{ + fs_perm_node_t *node = uu_list_first(fspset->fsps_list); + + while (node != NULL) { + fs_perm_node_t *next_node = + uu_list_next(fspset->fsps_list, node); + fs_perm_t *fsperm = &node->fspn_fsperm; + fs_perm_fini(fsperm); + uu_list_remove(fspset->fsps_list, node); + free(node); + node = next_node; + } + + uu_avl_pool_destroy(fspset->fsps_named_set_avl_pool); + uu_avl_pool_destroy(fspset->fsps_who_perm_avl_pool); + uu_avl_pool_destroy(fspset->fsps_deleg_perm_avl_pool); +} + +static inline void +deleg_perm_init(deleg_perm_t *deleg_perm, zfs_deleg_who_type_t type, + const char *name) +{ + deleg_perm->dp_who_type = type; + deleg_perm->dp_name = name; +} + +static inline void +who_perm_init(who_perm_t *who_perm, fs_perm_t *fsperm, + zfs_deleg_who_type_t type, const char *name) +{ + uu_avl_pool_t *pool; + pool = fsperm->fsp_set->fsps_deleg_perm_avl_pool; + + bzero(who_perm, sizeof (who_perm_t)); + + if ((who_perm->who_deleg_perm_avl = uu_avl_create(pool, NULL, + UU_DEFAULT)) == NULL) + nomem(); + + who_perm->who_type = type; + who_perm->who_name = name; + who_perm->who_fsperm = fsperm; +} + +static inline void +who_perm_fini(who_perm_t *who_perm) +{ + deleg_perm_node_t *node = uu_avl_first(who_perm->who_deleg_perm_avl); + + while (node != NULL) { + deleg_perm_node_t *next_node = + uu_avl_next(who_perm->who_deleg_perm_avl, node); + + uu_avl_remove(who_perm->who_deleg_perm_avl, node); + free(node); + node = next_node; + } + + uu_avl_destroy(who_perm->who_deleg_perm_avl); +} + +static inline void +fs_perm_init(fs_perm_t *fsperm, fs_perm_set_t *fspset, const char *fsname) +{ + uu_avl_pool_t *nset_pool = fspset->fsps_named_set_avl_pool; + uu_avl_pool_t *who_pool = fspset->fsps_who_perm_avl_pool; + + bzero(fsperm, sizeof (fs_perm_t)); + + if ((fsperm->fsp_sc_avl = uu_avl_create(nset_pool, NULL, UU_DEFAULT)) + == NULL) + nomem(); + + if ((fsperm->fsp_uge_avl = uu_avl_create(who_pool, NULL, UU_DEFAULT)) + == NULL) + nomem(); + + fsperm->fsp_set = fspset; + fsperm->fsp_name = fsname; +} + +static inline void +fs_perm_fini(fs_perm_t *fsperm) +{ + who_perm_node_t *node = uu_avl_first(fsperm->fsp_sc_avl); + while (node != NULL) { + who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_sc_avl, + node); + who_perm_t *who_perm = &node->who_perm; + who_perm_fini(who_perm); + uu_avl_remove(fsperm->fsp_sc_avl, node); + free(node); + node = next_node; + } + + node = uu_avl_first(fsperm->fsp_uge_avl); + while (node != NULL) { + who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_uge_avl, + node); + who_perm_t *who_perm = &node->who_perm; + who_perm_fini(who_perm); + uu_avl_remove(fsperm->fsp_uge_avl, node); + free(node); + node = next_node; + } + + uu_avl_destroy(fsperm->fsp_sc_avl); + uu_avl_destroy(fsperm->fsp_uge_avl); +} + +static void inline +set_deleg_perm_node(uu_avl_t *avl, deleg_perm_node_t *node, + zfs_deleg_who_type_t who_type, const char *name, char locality) +{ + uu_avl_index_t idx = 0; + + deleg_perm_node_t *found_node = NULL; + deleg_perm_t *deleg_perm = &node->dpn_perm; + + deleg_perm_init(deleg_perm, who_type, name); + + if ((found_node = uu_avl_find(avl, node, NULL, &idx)) + == NULL) + uu_avl_insert(avl, node, idx); + else { + node = found_node; + deleg_perm = &node->dpn_perm; + } + + + switch (locality) { + case ZFS_DELEG_LOCAL: + deleg_perm->dp_local = B_TRUE; + break; + case ZFS_DELEG_DESCENDENT: + deleg_perm->dp_descend = B_TRUE; + break; + case ZFS_DELEG_NA: + break; + default: + assert(B_FALSE); /* invalid locality */ + } +} + +static inline int +parse_who_perm(who_perm_t *who_perm, nvlist_t *nvl, char locality) +{ + nvpair_t *nvp = NULL; + fs_perm_set_t *fspset = who_perm->who_fsperm->fsp_set; + uu_avl_t *avl = who_perm->who_deleg_perm_avl; + zfs_deleg_who_type_t who_type = who_perm->who_type; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + const char *name = nvpair_name(nvp); + data_type_t type = nvpair_type(nvp); + uu_avl_pool_t *avl_pool = fspset->fsps_deleg_perm_avl_pool; + deleg_perm_node_t *node = + safe_malloc(sizeof (deleg_perm_node_t)); + + VERIFY(type == DATA_TYPE_BOOLEAN); + + uu_avl_node_init(node, &node->dpn_avl_node, avl_pool); + set_deleg_perm_node(avl, node, who_type, name, locality); + } + + return (0); +} + +static inline int +parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl) +{ + nvpair_t *nvp = NULL; + fs_perm_set_t *fspset = fsperm->fsp_set; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + nvlist_t *nvl2 = NULL; + const char *name = nvpair_name(nvp); + uu_avl_t *avl = NULL; + uu_avl_pool_t *avl_pool = NULL; + zfs_deleg_who_type_t perm_type = name[0]; + char perm_locality = name[1]; + const char *perm_name = name + 3; + boolean_t is_set = B_TRUE; + who_perm_t *who_perm = NULL; + + assert('$' == name[2]); + + if (nvpair_value_nvlist(nvp, &nvl2) != 0) + return (-1); + + switch (perm_type) { + case ZFS_DELEG_CREATE: + case ZFS_DELEG_CREATE_SETS: + case ZFS_DELEG_NAMED_SET: + case ZFS_DELEG_NAMED_SET_SETS: + avl_pool = fspset->fsps_named_set_avl_pool; + avl = fsperm->fsp_sc_avl; + break; + case ZFS_DELEG_USER: + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_GROUP: + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_EVERYONE: + case ZFS_DELEG_EVERYONE_SETS: + avl_pool = fspset->fsps_who_perm_avl_pool; + avl = fsperm->fsp_uge_avl; + break; + default: + break; + } + + if (is_set) { + who_perm_node_t *found_node = NULL; + who_perm_node_t *node = safe_malloc( + sizeof (who_perm_node_t)); + who_perm = &node->who_perm; + uu_avl_index_t idx = 0; + + uu_avl_node_init(node, &node->who_avl_node, avl_pool); + who_perm_init(who_perm, fsperm, perm_type, perm_name); + + if ((found_node = uu_avl_find(avl, node, NULL, &idx)) + == NULL) { + if (avl == fsperm->fsp_uge_avl) { + uid_t rid = 0; + struct passwd *p = NULL; + struct group *g = NULL; + const char *nice_name = NULL; + + switch (perm_type) { + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + rid = atoi(perm_name); + p = getpwuid(rid); + if (p) + nice_name = p->pw_name; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + rid = atoi(perm_name); + g = getgrgid(rid); + if (g) + nice_name = g->gr_name; + break; + default: + break; + } + + if (nice_name != NULL) + (void) strlcpy( + node->who_perm.who_ug_name, + nice_name, 256); + } + + uu_avl_insert(avl, node, idx); + } else { + node = found_node; + who_perm = &node->who_perm; + } + } + + (void) parse_who_perm(who_perm, nvl2, perm_locality); + } + + return (0); +} + +static inline int +parse_fs_perm_set(fs_perm_set_t *fspset, nvlist_t *nvl) +{ + nvpair_t *nvp = NULL; + uu_avl_index_t idx = 0; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + nvlist_t *nvl2 = NULL; + const char *fsname = nvpair_name(nvp); + data_type_t type = nvpair_type(nvp); + fs_perm_t *fsperm = NULL; + fs_perm_node_t *node = safe_malloc(sizeof (fs_perm_node_t)); + if (node == NULL) + nomem(); + + fsperm = &node->fspn_fsperm; + + VERIFY(DATA_TYPE_NVLIST == type); + + uu_list_node_init(node, &node->fspn_list_node, + fspset->fsps_list_pool); + + idx = uu_list_numnodes(fspset->fsps_list); + fs_perm_init(fsperm, fspset, fsname); + + if (nvpair_value_nvlist(nvp, &nvl2) != 0) + return (-1); + + (void) parse_fs_perm(fsperm, nvl2); + + uu_list_insert(fspset->fsps_list, node, idx); + } + + return (0); +} + +static inline const char * +deleg_perm_comment(zfs_deleg_note_t note) +{ + const char *str = ""; + + /* subcommands */ + switch (note) { + /* SUBCOMMANDS */ + case ZFS_DELEG_NOTE_ALLOW: + str = gettext("Must also have the permission that is being" + "\n\t\t\t\tallowed"); + break; + case ZFS_DELEG_NOTE_CLONE: + str = gettext("Must also have the 'create' ability and 'mount'" + "\n\t\t\t\tability in the origin file system"); + break; + case ZFS_DELEG_NOTE_CREATE: + str = gettext("Must also have the 'mount' ability"); + break; + case ZFS_DELEG_NOTE_DESTROY: + str = gettext("Must also have the 'mount' ability"); + break; + case ZFS_DELEG_NOTE_DIFF: + str = gettext("Allows lookup of paths within a dataset;" + "\n\t\t\t\tgiven an object number. Ordinary users need this" + "\n\t\t\t\tin order to use zfs diff"); + break; + case ZFS_DELEG_NOTE_HOLD: + str = gettext("Allows adding a user hold to a snapshot"); + break; + case ZFS_DELEG_NOTE_MOUNT: + str = gettext("Allows mount/umount of ZFS datasets"); + break; + case ZFS_DELEG_NOTE_PROMOTE: + str = gettext("Must also have the 'mount'\n\t\t\t\tand" + " 'promote' ability in the origin file system"); + break; + case ZFS_DELEG_NOTE_RECEIVE: + str = gettext("Must also have the 'mount' and 'create'" + " ability"); + break; + case ZFS_DELEG_NOTE_RELEASE: + str = gettext("Allows releasing a user hold which\n\t\t\t\t" + "might destroy the snapshot"); + break; + case ZFS_DELEG_NOTE_RENAME: + str = gettext("Must also have the 'mount' and 'create'" + "\n\t\t\t\tability in the new parent"); + break; + case ZFS_DELEG_NOTE_ROLLBACK: + str = gettext(""); + break; + case ZFS_DELEG_NOTE_SEND: + str = gettext(""); + break; + case ZFS_DELEG_NOTE_SHARE: + str = gettext("Allows sharing file systems over NFS or SMB" + "\n\t\t\t\tprotocols"); + break; + case ZFS_DELEG_NOTE_SNAPSHOT: + str = gettext(""); + break; +/* + * case ZFS_DELEG_NOTE_VSCAN: + * str = gettext(""); + * break; + */ + /* OTHER */ + case ZFS_DELEG_NOTE_GROUPQUOTA: + str = gettext("Allows accessing any groupquota@... property"); + break; + case ZFS_DELEG_NOTE_GROUPUSED: + str = gettext("Allows reading any groupused@... property"); + break; + case ZFS_DELEG_NOTE_USERPROP: + str = gettext("Allows changing any user property"); + break; + case ZFS_DELEG_NOTE_USERQUOTA: + str = gettext("Allows accessing any userquota@... property"); + break; + case ZFS_DELEG_NOTE_USERUSED: + str = gettext("Allows reading any userused@... property"); + break; + /* other */ + default: + str = ""; + } + + return (str); +} + +struct allow_opts { + boolean_t local; + boolean_t descend; + boolean_t user; + boolean_t group; + boolean_t everyone; + boolean_t create; + boolean_t set; + boolean_t recursive; /* unallow only */ + boolean_t prt_usage; + + boolean_t prt_perms; + char *who; + char *perms; + const char *dataset; +}; + +static inline int +prop_cmp(const void *a, const void *b) +{ + const char *str1 = *(const char **)a; + const char *str2 = *(const char **)b; + return (strcmp(str1, str2)); +} + +static void +allow_usage(boolean_t un, boolean_t requested, const char *msg) +{ + const char *opt_desc[] = { + "-h", gettext("show this help message and exit"), + "-l", gettext("set permission locally"), + "-d", gettext("set permission for descents"), + "-u", gettext("set permission for user"), + "-g", gettext("set permission for group"), + "-e", gettext("set permission for everyone"), + "-c", gettext("set create time permission"), + "-s", gettext("define permission set"), + /* unallow only */ + "-r", gettext("remove permissions recursively"), + }; + size_t unallow_size = sizeof (opt_desc) / sizeof (char *); + size_t allow_size = unallow_size - 2; + const char *props[ZFS_NUM_PROPS]; + int i; + size_t count = 0; + FILE *fp = requested ? stdout : stderr; + zprop_desc_t *pdtbl = zfs_prop_get_table(); + const char *fmt = gettext("%-16s %-14s\t%s\n"); + + (void) fprintf(fp, gettext("Usage: %s\n"), get_usage(un ? HELP_UNALLOW : + HELP_ALLOW)); + (void) fprintf(fp, gettext("Options:\n")); + for (i = 0; i < (un ? unallow_size : allow_size); i++) { + const char *opt = opt_desc[i++]; + const char *optdsc = opt_desc[i]; + (void) fprintf(fp, gettext(" %-10s %s\n"), opt, optdsc); + } + + (void) fprintf(fp, gettext("\nThe following permissions are " + "supported:\n\n")); + (void) fprintf(fp, fmt, gettext("NAME"), gettext("TYPE"), + gettext("NOTES")); + for (i = 0; i < ZFS_NUM_DELEG_NOTES; i++) { + const char *perm_name = zfs_deleg_perm_tbl[i].z_perm; + zfs_deleg_note_t perm_note = zfs_deleg_perm_tbl[i].z_note; + const char *perm_type = deleg_perm_type(perm_note); + const char *perm_comment = deleg_perm_comment(perm_note); + (void) fprintf(fp, fmt, perm_name, perm_type, perm_comment); + } + + for (i = 0; i < ZFS_NUM_PROPS; i++) { + zprop_desc_t *pd = &pdtbl[i]; + if (pd->pd_visible != B_TRUE) + continue; + + if (pd->pd_attr == PROP_READONLY) + continue; + + props[count++] = pd->pd_name; + } + props[count] = NULL; + + qsort(props, count, sizeof (char *), prop_cmp); + + for (i = 0; i < count; i++) + (void) fprintf(fp, fmt, props[i], gettext("property"), ""); + + if (msg != NULL) + (void) fprintf(fp, gettext("\nzfs: error: %s"), msg); + + exit(requested ? 0 : 2); +} + +static inline const char * +munge_args(int argc, char **argv, boolean_t un, size_t expected_argc, + char **permsp) +{ + if (un && argc == expected_argc - 1) + *permsp = NULL; + else if (argc == expected_argc) + *permsp = argv[argc - 2]; + else + allow_usage(un, B_FALSE, + gettext("wrong number of parameters\n")); + + return (argv[argc - 1]); +} + +static void +parse_allow_args(int argc, char **argv, boolean_t un, struct allow_opts *opts) +{ + int uge_sum = opts->user + opts->group + opts->everyone; + int csuge_sum = opts->create + opts->set + uge_sum; + int ldcsuge_sum = csuge_sum + opts->local + opts->descend; + int all_sum = un ? ldcsuge_sum + opts->recursive : ldcsuge_sum; + + if (uge_sum > 1) + allow_usage(un, B_FALSE, + gettext("-u, -g, and -e are mutually exclusive\n")); + + if (opts->prt_usage) { + if (argc == 0 && all_sum == 0) + allow_usage(un, B_TRUE, NULL); + else + usage(B_FALSE); + } + + if (opts->set) { + if (csuge_sum > 1) + allow_usage(un, B_FALSE, + gettext("invalid options combined with -s\n")); + + opts->dataset = munge_args(argc, argv, un, 3, &opts->perms); + if (argv[0][0] != '@') + allow_usage(un, B_FALSE, + gettext("invalid set name: missing '@' prefix\n")); + opts->who = argv[0]; + } else if (opts->create) { + if (ldcsuge_sum > 1) + allow_usage(un, B_FALSE, + gettext("invalid options combined with -c\n")); + opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); + } else if (opts->everyone) { + if (csuge_sum > 1) + allow_usage(un, B_FALSE, + gettext("invalid options combined with -e\n")); + opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); + } else if (uge_sum == 0 && argc > 0 && strcmp(argv[0], "everyone") + == 0) { + opts->everyone = B_TRUE; + argc--; + argv++; + opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); + } else if (argc == 1) { + opts->prt_perms = B_TRUE; + opts->dataset = argv[argc-1]; + } else { + opts->dataset = munge_args(argc, argv, un, 3, &opts->perms); + opts->who = argv[0]; + } + + if (!opts->local && !opts->descend) { + opts->local = B_TRUE; + opts->descend = B_TRUE; + } +} + +static void +store_allow_perm(zfs_deleg_who_type_t type, boolean_t local, boolean_t descend, + const char *who, char *perms, nvlist_t *top_nvl) +{ + int i; + char ld[2] = { '\0', '\0' }; + char who_buf[ZFS_MAXNAMELEN+32]; + char base_type = ZFS_DELEG_WHO_UNKNOWN; + char set_type = ZFS_DELEG_WHO_UNKNOWN; + nvlist_t *base_nvl = NULL; + nvlist_t *set_nvl = NULL; + nvlist_t *nvl; + + if (nvlist_alloc(&base_nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); + if (nvlist_alloc(&set_nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); + + switch (type) { + case ZFS_DELEG_NAMED_SET_SETS: + case ZFS_DELEG_NAMED_SET: + set_type = ZFS_DELEG_NAMED_SET_SETS; + base_type = ZFS_DELEG_NAMED_SET; + ld[0] = ZFS_DELEG_NA; + break; + case ZFS_DELEG_CREATE_SETS: + case ZFS_DELEG_CREATE: + set_type = ZFS_DELEG_CREATE_SETS; + base_type = ZFS_DELEG_CREATE; + ld[0] = ZFS_DELEG_NA; + break; + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + set_type = ZFS_DELEG_USER_SETS; + base_type = ZFS_DELEG_USER; + if (local) + ld[0] = ZFS_DELEG_LOCAL; + if (descend) + ld[1] = ZFS_DELEG_DESCENDENT; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + set_type = ZFS_DELEG_GROUP_SETS; + base_type = ZFS_DELEG_GROUP; + if (local) + ld[0] = ZFS_DELEG_LOCAL; + if (descend) + ld[1] = ZFS_DELEG_DESCENDENT; + break; + case ZFS_DELEG_EVERYONE_SETS: + case ZFS_DELEG_EVERYONE: + set_type = ZFS_DELEG_EVERYONE_SETS; + base_type = ZFS_DELEG_EVERYONE; + if (local) + ld[0] = ZFS_DELEG_LOCAL; + if (descend) + ld[1] = ZFS_DELEG_DESCENDENT; + default: + break; + } + + if (perms != NULL) { + char *curr = perms; + char *end = curr + strlen(perms); + + while (curr < end) { + char *delim = strchr(curr, ','); + if (delim == NULL) + delim = end; + else + *delim = '\0'; + + if (curr[0] == '@') + nvl = set_nvl; + else + nvl = base_nvl; + + (void) nvlist_add_boolean(nvl, curr); + if (delim != end) + *delim = ','; + curr = delim + 1; + } + + for (i = 0; i < 2; i++) { + char locality = ld[i]; + if (locality == 0) + continue; + + if (!nvlist_empty(base_nvl)) { + if (who != NULL) + (void) snprintf(who_buf, + sizeof (who_buf), "%c%c$%s", + base_type, locality, who); + else + (void) snprintf(who_buf, + sizeof (who_buf), "%c%c$", + base_type, locality); + + (void) nvlist_add_nvlist(top_nvl, who_buf, + base_nvl); + } + + + if (!nvlist_empty(set_nvl)) { + if (who != NULL) + (void) snprintf(who_buf, + sizeof (who_buf), "%c%c$%s", + set_type, locality, who); + else + (void) snprintf(who_buf, + sizeof (who_buf), "%c%c$", + set_type, locality); + + (void) nvlist_add_nvlist(top_nvl, who_buf, + set_nvl); + } + } + } else { + for (i = 0; i < 2; i++) { + char locality = ld[i]; + if (locality == 0) + continue; + + if (who != NULL) + (void) snprintf(who_buf, sizeof (who_buf), + "%c%c$%s", base_type, locality, who); + else + (void) snprintf(who_buf, sizeof (who_buf), + "%c%c$", base_type, locality); + (void) nvlist_add_boolean(top_nvl, who_buf); + + if (who != NULL) + (void) snprintf(who_buf, sizeof (who_buf), + "%c%c$%s", set_type, locality, who); + else + (void) snprintf(who_buf, sizeof (who_buf), + "%c%c$", set_type, locality); + (void) nvlist_add_boolean(top_nvl, who_buf); + } + } +} + +static int +construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp) +{ + if (nvlist_alloc(nvlp, NV_UNIQUE_NAME, 0) != 0) + nomem(); + + if (opts->set) { + store_allow_perm(ZFS_DELEG_NAMED_SET, opts->local, + opts->descend, opts->who, opts->perms, *nvlp); + } else if (opts->create) { + store_allow_perm(ZFS_DELEG_CREATE, opts->local, + opts->descend, NULL, opts->perms, *nvlp); + } else if (opts->everyone) { + store_allow_perm(ZFS_DELEG_EVERYONE, opts->local, + opts->descend, NULL, opts->perms, *nvlp); + } else { + char *curr = opts->who; + char *end = curr + strlen(curr); + + while (curr < end) { + const char *who; + zfs_deleg_who_type_t who_type; + char *endch; + char *delim = strchr(curr, ','); + char errbuf[256]; + char id[64]; + struct passwd *p = NULL; + struct group *g = NULL; + + uid_t rid; + if (delim == NULL) + delim = end; + else + *delim = '\0'; + + rid = (uid_t)strtol(curr, &endch, 0); + if (opts->user) { + who_type = ZFS_DELEG_USER; + if (*endch != '\0') + p = getpwnam(curr); + else + p = getpwuid(rid); + + if (p != NULL) + rid = p->pw_uid; + else { + (void) snprintf(errbuf, 256, gettext( + "invalid user %s"), curr); + allow_usage(un, B_TRUE, errbuf); + } + } else if (opts->group) { + who_type = ZFS_DELEG_GROUP; + if (*endch != '\0') + g = getgrnam(curr); + else + g = getgrgid(rid); + + if (g != NULL) + rid = g->gr_gid; + else { + (void) snprintf(errbuf, 256, gettext( + "invalid group %s"), curr); + allow_usage(un, B_TRUE, errbuf); + } + } else { + if (*endch != '\0') { + p = getpwnam(curr); + } else { + p = getpwuid(rid); + } + + if (p == NULL) { + if (*endch != '\0') { + g = getgrnam(curr); + } else { + g = getgrgid(rid); + } + } + + if (p != NULL) { + who_type = ZFS_DELEG_USER; + rid = p->pw_uid; + } else if (g != NULL) { + who_type = ZFS_DELEG_GROUP; + rid = g->gr_gid; + } else { + (void) snprintf(errbuf, 256, gettext( + "invalid user/group %s"), curr); + allow_usage(un, B_TRUE, errbuf); + } + } + + (void) sprintf(id, "%u", rid); + who = id; + + store_allow_perm(who_type, opts->local, + opts->descend, who, opts->perms, *nvlp); + curr = delim + 1; + } + } + + return (0); +} + +static void +print_set_creat_perms(uu_avl_t *who_avl) +{ + const char *sc_title[] = { + gettext("Permission sets:\n"), + gettext("Create time permissions:\n"), + NULL + }; + const char **title_ptr = sc_title; + who_perm_node_t *who_node = NULL; + int prev_weight = -1; + + for (who_node = uu_avl_first(who_avl); who_node != NULL; + who_node = uu_avl_next(who_avl, who_node)) { + uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl; + zfs_deleg_who_type_t who_type = who_node->who_perm.who_type; + const char *who_name = who_node->who_perm.who_name; + int weight = who_type2weight(who_type); + boolean_t first = B_TRUE; + deleg_perm_node_t *deleg_node; + + if (prev_weight != weight) { + (void) printf(*title_ptr++); + prev_weight = weight; + } + + if (who_name == NULL || strnlen(who_name, 1) == 0) + (void) printf("\t"); + else + (void) printf("\t%s ", who_name); + + for (deleg_node = uu_avl_first(avl); deleg_node != NULL; + deleg_node = uu_avl_next(avl, deleg_node)) { + if (first) { + (void) printf("%s", + deleg_node->dpn_perm.dp_name); + first = B_FALSE; + } else + (void) printf(",%s", + deleg_node->dpn_perm.dp_name); + } + + (void) printf("\n"); + } +} + +static void inline +print_uge_deleg_perms(uu_avl_t *who_avl, boolean_t local, boolean_t descend, + const char *title) +{ + who_perm_node_t *who_node = NULL; + boolean_t prt_title = B_TRUE; + uu_avl_walk_t *walk; + + if ((walk = uu_avl_walk_start(who_avl, UU_WALK_ROBUST)) == NULL) + nomem(); + + while ((who_node = uu_avl_walk_next(walk)) != NULL) { + const char *who_name = who_node->who_perm.who_name; + const char *nice_who_name = who_node->who_perm.who_ug_name; + uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl; + zfs_deleg_who_type_t who_type = who_node->who_perm.who_type; + char delim = ' '; + deleg_perm_node_t *deleg_node; + boolean_t prt_who = B_TRUE; + + for (deleg_node = uu_avl_first(avl); + deleg_node != NULL; + deleg_node = uu_avl_next(avl, deleg_node)) { + if (local != deleg_node->dpn_perm.dp_local || + descend != deleg_node->dpn_perm.dp_descend) + continue; + + if (prt_who) { + const char *who = NULL; + if (prt_title) { + prt_title = B_FALSE; + (void) printf(title); + } + + switch (who_type) { + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + who = gettext("user"); + if (nice_who_name) + who_name = nice_who_name; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + who = gettext("group"); + if (nice_who_name) + who_name = nice_who_name; + break; + case ZFS_DELEG_EVERYONE_SETS: + case ZFS_DELEG_EVERYONE: + who = gettext("everyone"); + who_name = NULL; + default: + break; + } + + prt_who = B_FALSE; + if (who_name == NULL) + (void) printf("\t%s", who); + else + (void) printf("\t%s %s", who, who_name); + } + + (void) printf("%c%s", delim, + deleg_node->dpn_perm.dp_name); + delim = ','; + } + + if (!prt_who) + (void) printf("\n"); + } + + uu_avl_walk_end(walk); +} + +static void +print_fs_perms(fs_perm_set_t *fspset) +{ + fs_perm_node_t *node = NULL; + char buf[ZFS_MAXNAMELEN+32]; + const char *dsname = buf; + + for (node = uu_list_first(fspset->fsps_list); node != NULL; + node = uu_list_next(fspset->fsps_list, node)) { + uu_avl_t *sc_avl = node->fspn_fsperm.fsp_sc_avl; + uu_avl_t *uge_avl = node->fspn_fsperm.fsp_uge_avl; + int left = 0; + + (void) snprintf(buf, ZFS_MAXNAMELEN+32, + gettext("---- Permissions on %s "), + node->fspn_fsperm.fsp_name); + (void) printf(dsname); + left = 70 - strlen(buf); + while (left-- > 0) + (void) printf("-"); + (void) printf("\n"); + + print_set_creat_perms(sc_avl); + print_uge_deleg_perms(uge_avl, B_TRUE, B_FALSE, + gettext("Local permissions:\n")); + print_uge_deleg_perms(uge_avl, B_FALSE, B_TRUE, + gettext("Descendent permissions:\n")); + print_uge_deleg_perms(uge_avl, B_TRUE, B_TRUE, + gettext("Local+Descendent permissions:\n")); + } +} + +static fs_perm_set_t fs_perm_set = { NULL, NULL, NULL, NULL }; + +struct deleg_perms { + boolean_t un; + nvlist_t *nvl; +}; + +static int +set_deleg_perms(zfs_handle_t *zhp, void *data) +{ + struct deleg_perms *perms = (struct deleg_perms *)data; + zfs_type_t zfs_type = zfs_get_type(zhp); + + if (zfs_type != ZFS_TYPE_FILESYSTEM && zfs_type != ZFS_TYPE_VOLUME) + return (0); + + return (zfs_set_fsacl(zhp, perms->un, perms->nvl)); +} + +static int +zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un) +{ + zfs_handle_t *zhp; + nvlist_t *perm_nvl = NULL; + nvlist_t *update_perm_nvl = NULL; + int error = 1; + int c; + struct allow_opts opts = { 0 }; + + const char *optstr = un ? "ldugecsrh" : "ldugecsh"; + + /* check opts */ + while ((c = getopt(argc, argv, optstr)) != -1) { + switch (c) { + case 'l': + opts.local = B_TRUE; + break; + case 'd': + opts.descend = B_TRUE; + break; + case 'u': + opts.user = B_TRUE; + break; + case 'g': + opts.group = B_TRUE; + break; + case 'e': + opts.everyone = B_TRUE; + break; + case 's': + opts.set = B_TRUE; + break; + case 'c': + opts.create = B_TRUE; + break; + case 'r': + opts.recursive = B_TRUE; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(B_FALSE); + break; + case 'h': + opts.prt_usage = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check arguments */ + parse_allow_args(argc, argv, un, &opts); + + /* try to open the dataset */ + if ((zhp = zfs_open(g_zfs, opts.dataset, ZFS_TYPE_FILESYSTEM)) + == NULL) { + (void) fprintf(stderr, "Failed to open Dataset *%s*\n", + opts.dataset); + return (-1); + } + + if (zfs_get_fsacl(zhp, &perm_nvl) != 0) + goto cleanup2; + + fs_perm_set_init(&fs_perm_set); + if (parse_fs_perm_set(&fs_perm_set, perm_nvl) != 0) { + (void) fprintf(stderr, "Failed to parse fsacl permissionsn"); + goto cleanup1; + } + + if (opts.prt_perms) + print_fs_perms(&fs_perm_set); + else { + (void) construct_fsacl_list(un, &opts, &update_perm_nvl); + if (zfs_set_fsacl(zhp, un, update_perm_nvl) != 0) + goto cleanup0; + + if (un && opts.recursive) { + struct deleg_perms data = { un, update_perm_nvl }; + if (zfs_iter_filesystems(zhp, set_deleg_perms, + &data) != 0) + goto cleanup0; + } + } + + error = 0; + +cleanup0: + nvlist_free(perm_nvl); + if (update_perm_nvl != NULL) + nvlist_free(update_perm_nvl); +cleanup1: + fs_perm_set_fini(&fs_perm_set); +cleanup2: + zfs_close(zhp); + + return (error); +} + +/* + * zfs allow [-r] [-t] <tag> <snap> ... + * + * -r Recursively hold + * -t Temporary hold (hidden option) + * + * Apply a user-hold with the given tag to the list of snapshots. + */ +static int +zfs_do_allow(int argc, char **argv) +{ + return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE)); +} + +/* + * zfs unallow [-r] [-t] <tag> <snap> ... + * + * -r Recursively hold + * -t Temporary hold (hidden option) + * + * Apply a user-hold with the given tag to the list of snapshots. + */ +static int +zfs_do_unallow(int argc, char **argv) +{ + return (zfs_do_allow_unallow_impl(argc, argv, B_TRUE)); +} + static int zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) { @@ -2927,6 +4952,200 @@ zfs_do_release(int argc, char **argv) return (zfs_do_hold_rele_impl(argc, argv, B_FALSE)); } +typedef struct holds_cbdata { + boolean_t cb_recursive; + const char *cb_snapname; + nvlist_t **cb_nvlp; + size_t cb_max_namelen; + size_t cb_max_taglen; +} holds_cbdata_t; + +#define STRFTIME_FMT_STR "%a %b %e %k:%M %Y" +#define DATETIME_BUF_LEN (32) +/* + * + */ +static void +print_holds(boolean_t scripted, int nwidth, int tagwidth, nvlist_t *nvl) +{ + int i; + nvpair_t *nvp = NULL; + char *hdr_cols[] = { "NAME", "TAG", "TIMESTAMP" }; + const char *col; + + if (!scripted) { + for (i = 0; i < 3; i++) { + col = gettext(hdr_cols[i]); + if (i < 2) + (void) printf("%-*s ", i ? tagwidth : nwidth, + col); + else + (void) printf("%s\n", col); + } + } + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + char *zname = nvpair_name(nvp); + nvlist_t *nvl2; + nvpair_t *nvp2 = NULL; + (void) nvpair_value_nvlist(nvp, &nvl2); + while ((nvp2 = nvlist_next_nvpair(nvl2, nvp2)) != NULL) { + char tsbuf[DATETIME_BUF_LEN]; + char *tagname = nvpair_name(nvp2); + uint64_t val = 0; + time_t time; + struct tm t; + char sep = scripted ? '\t' : ' '; + int sepnum = scripted ? 1 : 2; + + (void) nvpair_value_uint64(nvp2, &val); + time = (time_t)val; + (void) localtime_r(&time, &t); + (void) strftime(tsbuf, DATETIME_BUF_LEN, + gettext(STRFTIME_FMT_STR), &t); + + (void) printf("%-*s%*c%-*s%*c%s\n", nwidth, zname, + sepnum, sep, tagwidth, tagname, sepnum, sep, tsbuf); + } + } +} + +/* + * Generic callback function to list a dataset or snapshot. + */ +static int +holds_callback(zfs_handle_t *zhp, void *data) +{ + holds_cbdata_t *cbp = data; + nvlist_t *top_nvl = *cbp->cb_nvlp; + nvlist_t *nvl = NULL; + nvpair_t *nvp = NULL; + const char *zname = zfs_get_name(zhp); + size_t znamelen = strnlen(zname, ZFS_MAXNAMELEN); + + if (cbp->cb_recursive) { + const char *snapname; + char *delim = strchr(zname, '@'); + if (delim == NULL) + return (0); + + snapname = delim + 1; + if (strcmp(cbp->cb_snapname, snapname)) + return (0); + } + + if (zfs_get_holds(zhp, &nvl) != 0) + return (-1); + + if (znamelen > cbp->cb_max_namelen) + cbp->cb_max_namelen = znamelen; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + const char *tag = nvpair_name(nvp); + size_t taglen = strnlen(tag, MAXNAMELEN); + if (taglen > cbp->cb_max_taglen) + cbp->cb_max_taglen = taglen; + } + + return (nvlist_add_nvlist(top_nvl, zname, nvl)); +} + +/* + * zfs holds [-r] <snap> ... + * + * -r Recursively hold + */ +static int +zfs_do_holds(int argc, char **argv) +{ + int errors = 0; + int c; + int i; + boolean_t scripted = B_FALSE; + boolean_t recursive = B_FALSE; + const char *opts = "rH"; + nvlist_t *nvl; + + int types = ZFS_TYPE_SNAPSHOT; + holds_cbdata_t cb = { 0 }; + + int limit = 0; + int ret; + int flags = 0; + + /* check options */ + while ((c = getopt(argc, argv, opts)) != -1) { + switch (c) { + case 'r': + recursive = B_TRUE; + break; + case 'H': + scripted = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + if (recursive) { + types |= ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME; + flags |= ZFS_ITER_RECURSE; + } + + argc -= optind; + argv += optind; + + /* check number of arguments */ + if (argc < 1) + usage(B_FALSE); + + if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); + + for (i = 0; i < argc; ++i) { + char *snapshot = argv[i]; + const char *delim; + const char *snapname; + + delim = strchr(snapshot, '@'); + if (delim == NULL) { + (void) fprintf(stderr, + gettext("'%s' is not a snapshot\n"), snapshot); + ++errors; + continue; + } + snapname = delim + 1; + if (recursive) + snapshot[delim - snapshot] = '\0'; + + cb.cb_recursive = recursive; + cb.cb_snapname = snapname; + cb.cb_nvlp = &nvl; + + /* + * 1. collect holds data, set format options + */ + ret = zfs_for_each(argc, argv, flags, types, NULL, NULL, limit, + holds_callback, &cb); + if (ret != 0) + ++errors; + } + + /* + * 2. print holds data + */ + print_holds(scripted, cb.cb_max_namelen, cb.cb_max_taglen, nvl); + + if (nvlist_empty(nvl)) + (void) printf(gettext("no datasets available\n")); + + nvlist_free(nvl); + + return (0 != errors); +} + #define CHECK_SPINNER 30 #define SPINNER_TIME 3 /* seconds */ #define MOUNT_TIME 5 /* seconds */ @@ -3808,15 +6027,6 @@ zfs_do_unshare(int argc, char **argv) return (unshare_unmount(OP_SHARE, argc, argv)); } -/* ARGSUSED */ -static int -zfs_do_python(int argc, char **argv) -{ - (void) execv(pypath, argv-1); - (void) printf("internal error: %s not found\n", pypath); - return (-1); -} - static int find_command_idx(char *command, int *idx) { diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 6acba5290..715815859 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* @@ -205,6 +206,7 @@ typedef struct ztest_od { */ typedef struct ztest_ds { objset_t *zd_os; + krwlock_t zd_zilog_lock; zilog_t *zd_zilog; uint64_t zd_seq; ztest_od_t *zd_od; /* debugging aid */ @@ -238,6 +240,7 @@ ztest_func_t ztest_dmu_commit_callbacks; ztest_func_t ztest_zap; ztest_func_t ztest_zap_parallel; ztest_func_t ztest_zil_commit; +ztest_func_t ztest_zil_remount; ztest_func_t ztest_dmu_read_write_zcopy; ztest_func_t ztest_dmu_objset_create_destroy; ztest_func_t ztest_dmu_prealloc; @@ -273,6 +276,7 @@ ztest_info_t ztest_info[] = { { ztest_zap_parallel, 100, &zopt_always }, { ztest_split_pool, 1, &zopt_always }, { ztest_zil_commit, 1, &zopt_incessant }, + { ztest_zil_remount, 1, &zopt_sometimes }, { ztest_dmu_read_write_zcopy, 1, &zopt_often }, { ztest_dmu_objset_create_destroy, 1, &zopt_often }, { ztest_dsl_prop_get_set, 1, &zopt_often }, @@ -1006,6 +1010,7 @@ ztest_zd_init(ztest_ds_t *zd, objset_t *os) dmu_objset_name(os, zd->zd_name); int l; + rw_init(&zd->zd_zilog_lock, NULL, RW_DEFAULT, NULL); mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL); for (l = 0; l < ZTEST_OBJECT_LOCKS; l++) @@ -1021,6 +1026,7 @@ ztest_zd_fini(ztest_ds_t *zd) int l; mutex_destroy(&zd->zd_dirobj_lock); + rw_destroy(&zd->zd_zilog_lock); for (l = 0; l < ZTEST_OBJECT_LOCKS; l++) ztest_rll_destroy(&zd->zd_object_lock[l]); @@ -1992,6 +1998,8 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) if (ztest_random(2) == 0) io_type = ZTEST_IO_WRITE_TAG; + (void) rw_enter(&zd->zd_zilog_lock, RW_READER); + switch (io_type) { case ZTEST_IO_WRITE_TAG: @@ -2029,6 +2037,8 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset) break; } + (void) rw_exit(&zd->zd_zilog_lock); + umem_free(data, blocksize); } @@ -2083,6 +2093,8 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id) { zilog_t *zilog = zd->zd_zilog; + (void) rw_enter(&zd->zd_zilog_lock, RW_READER); + zil_commit(zilog, ztest_random(ZTEST_OBJECTS)); /* @@ -2094,6 +2106,31 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id) ASSERT(zd->zd_seq <= zilog->zl_commit_lr_seq); zd->zd_seq = zilog->zl_commit_lr_seq; mutex_exit(&zilog->zl_lock); + + (void) rw_exit(&zd->zd_zilog_lock); +} + +/* + * This function is designed to simulate the operations that occur during a + * mount/unmount operation. We hold the dataset across these operations in an + * attempt to expose any implicit assumptions about ZIL management. + */ +/* ARGSUSED */ +void +ztest_zil_remount(ztest_ds_t *zd, uint64_t id) +{ + objset_t *os = zd->zd_os; + + (void) rw_enter(&zd->zd_zilog_lock, RW_WRITER); + + /* zfsvfs_teardown() */ + zil_close(zd->zd_zilog); + + /* zfsvfs_setup() */ + VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog); + zil_replay(os, zd, ztest_replay_vector); + + (void) rw_exit(&zd->zd_zilog_lock); } /* @@ -5300,6 +5337,7 @@ ztest_run(ztest_shared_t *zs) */ kernel_init(FREAD | FWRITE); VERIFY(spa_open(zs->zs_pool, &spa, FTAG) == 0); + spa->spa_debug = B_TRUE; zs->zs_spa = spa; spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; diff --git a/include/libzfs.h b/include/libzfs.h index 23422b2c9..26b1ce302 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. */ #ifndef _LIBZFS_H @@ -572,13 +573,17 @@ extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t, boolean_t, boolean_t, int, uint64_t, uint64_t); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); +extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain, uid_t rid, uint64_t space); -extern int zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, - zfs_userspace_cb_t func, void *arg); +extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t, + zfs_userspace_cb_t, void *); + +extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **); +extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *); typedef struct recvflags { /* print informational messages (ie, -v was specified) */ diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index a2b68eddf..2ac84f645 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -122,6 +123,7 @@ typedef enum { ZFS_PROP_DEDUP, ZFS_PROP_MLSLABEL, ZFS_PROP_SYNC, + ZFS_PROP_REFRATIO, ZFS_NUM_PROPS } zfs_prop_t; diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index 583d6303b..2cf4d2b48 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_H @@ -47,6 +48,8 @@ extern void metaslab_sync_reassess(metaslab_group_t *mg); #define METASLAB_HINTBP_FAVOR 0x0 #define METASLAB_HINTBP_AVOID 0x1 #define METASLAB_GANG_HEADER 0x2 +#define METASLAB_GANG_CHILD 0x4 +#define METASLAB_GANG_AVOID 0x8 extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags); diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 07988dd51..6c670a162 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_IMPL_H @@ -52,6 +53,7 @@ struct metaslab_group { avl_tree_t mg_metaslab_tree; uint64_t mg_aliquot; uint64_t mg_bonus_area; + uint64_t mg_alloc_failures; int64_t mg_bias; int64_t mg_activation_count; metaslab_class_t *mg_class; diff --git a/include/sys/spa.h b/include/sys/spa.h index 52737ebc2..c9028fb09 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_SPA_H @@ -698,6 +699,13 @@ _NOTE(CONSTCOND) } while (0) #define dprintf_bp(bp, fmt, ...) #endif +extern boolean_t spa_debug_enabled(spa_t *spa); +#define spa_dbgmsg(spa, ...) \ +{ \ + if (spa_debug_enabled(spa)) \ + zfs_dbgmsg(__VA_ARGS__); \ +} + extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */ #ifdef __cplusplus diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 1c34873b6..3f5cd9a73 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #ifndef _SYS_SPA_IMPL_H @@ -196,6 +197,7 @@ struct spa { kcondvar_t spa_suspend_cv; /* notification of resume */ uint8_t spa_suspended; /* pool is suspended */ uint8_t spa_claiming; /* pool is doing zil_claim() */ + boolean_t spa_debug; /* debug enabled? */ boolean_t spa_is_root; /* pool is root */ int spa_minref; /* num refs when first opened */ int spa_mode; /* FREAD | FWRITE */ diff --git a/include/zfs_deleg.h b/include/zfs_deleg.h index b4cb8e2b4..9997dffae 100644 --- a/include/zfs_deleg.h +++ b/include/zfs_deleg.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. */ #ifndef _ZFS_DELEG_H @@ -51,6 +52,7 @@ typedef enum { ZFS_DELEG_NOTE_CLONE, ZFS_DELEG_NOTE_PROMOTE, ZFS_DELEG_NOTE_RENAME, + ZFS_DELEG_NOTE_SEND, ZFS_DELEG_NOTE_RECEIVE, ZFS_DELEG_NOTE_ALLOW, ZFS_DELEG_NOTE_USERPROP, diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 5f8847a93..74015139a 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -21,6 +21,8 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include <ctype.h> @@ -94,6 +96,7 @@ zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, namecheck_err_t why; char what; + (void) zfs_prop_get_table(); if (dataset_namecheck(path, &why, &what) != 0) { if (hdl != NULL) { switch (why) { @@ -2025,6 +2028,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, } break; + case ZFS_PROP_REFRATIO: case ZFS_PROP_COMPRESSRATIO: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); @@ -4311,6 +4315,193 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, return (0); } +int +zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + int nvsz = 2048; + void *nvbuf; + int err = 0; + char errbuf[ZFS_MAXNAMELEN+32]; + + assert(zhp->zfs_type == ZFS_TYPE_VOLUME || + zhp->zfs_type == ZFS_TYPE_FILESYSTEM); + +tryagain: + + nvbuf = malloc(nvsz); + if (nvbuf == NULL) { + err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno))); + goto out; + } + + zc.zc_nvlist_dst_size = nvsz; + zc.zc_nvlist_dst = (uintptr_t)nvbuf; + + (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN); + + if (zfs_ioctl(hdl, ZFS_IOC_GET_FSACL, &zc) != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"), + zc.zc_name); + switch (errno) { + case ENOMEM: + free(nvbuf); + nvsz = zc.zc_nvlist_dst_size; + goto tryagain; + + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + err = zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + err = zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + case ENOENT: + err = zfs_error(hdl, EZFS_NOENT, errbuf); + break; + default: + err = zfs_standard_error_fmt(hdl, errno, errbuf); + break; + } + } else { + /* success */ + int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0); + if (rc) { + (void) snprintf(errbuf, sizeof (errbuf), dgettext( + TEXT_DOMAIN, "cannot get permissions on '%s'"), + zc.zc_name); + err = zfs_standard_error_fmt(hdl, rc, errbuf); + } + } + + free(nvbuf); +out: + return (err); +} + +int +zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + char *nvbuf; + char errbuf[ZFS_MAXNAMELEN+32]; + size_t nvsz; + int err; + + assert(zhp->zfs_type == ZFS_TYPE_VOLUME || + zhp->zfs_type == ZFS_TYPE_FILESYSTEM); + + err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE); + assert(err == 0); + + nvbuf = malloc(nvsz); + + err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0); + assert(err == 0); + + zc.zc_nvlist_src_size = nvsz; + zc.zc_nvlist_src = (uintptr_t)nvbuf; + zc.zc_perm_action = un; + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + if (zfs_ioctl(hdl, ZFS_IOC_SET_FSACL, &zc) != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot set permissions on '%s'"), + zc.zc_name); + switch (errno) { + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + err = zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + err = zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + case ENOENT: + err = zfs_error(hdl, EZFS_NOENT, errbuf); + break; + default: + err = zfs_standard_error_fmt(hdl, errno, errbuf); + break; + } + } + + free(nvbuf); + + return (err); +} + +int +zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + libzfs_handle_t *hdl = zhp->zfs_hdl; + int nvsz = 2048; + void *nvbuf; + int err = 0; + char errbuf[ZFS_MAXNAMELEN+32]; + + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + +tryagain: + + nvbuf = malloc(nvsz); + if (nvbuf == NULL) { + err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno))); + goto out; + } + + zc.zc_nvlist_dst_size = nvsz; + zc.zc_nvlist_dst = (uintptr_t)nvbuf; + + (void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN); + + if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), + zc.zc_name); + switch (errno) { + case ENOMEM: + free(nvbuf); + nvsz = zc.zc_nvlist_dst_size; + goto tryagain; + + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded")); + err = zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EINVAL: + err = zfs_error(hdl, EZFS_BADTYPE, errbuf); + break; + case ENOENT: + err = zfs_error(hdl, EZFS_NOENT, errbuf); + break; + default: + err = zfs_standard_error_fmt(hdl, errno, errbuf); + break; + } + } else { + /* success */ + int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0); + if (rc) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"), + zc.zc_name); + err = zfs_standard_error_fmt(hdl, rc, errbuf); + } + } + + free(nvbuf); +out: + return (err); +} + uint64_t zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props) { diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 3da2e44a7..26dc6cadc 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -360,7 +360,7 @@ This property can also be referred to by its shortened column name, \fBavail\fR. .ad .sp .6 .RS 4n -The compression ratio achieved for this dataset, expressed as a multiplier. Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR. +For non-snapshots, the compression ratio achieved for the \fBused\fR space of this dataset, expressed as a multiplier. The \fBused\fR property includes descendant datasets, and, for clones, does not include the space shared with the origin snapshot. For snapshots, the \fBcompressratio\fR is the same as the \fBrefcompressratio\fR property. Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR. .RE .sp @@ -424,6 +424,19 @@ This property can also be referred to by its shortened column name, \fBrefer\fR. .ne 2 .mk .na +\fB\fBrefcompressratio\fR\fR +.ad +.sp .6 +.RS 4n +The compression ratio achieved for the \fBreferenced\fR space of this +dataset, expressed as a multiplier. See also the \fBcompressratio\fR +property. +.RE + +.sp +.ne 2 +.mk +.na \fB\fBtype\fR\fR .ad .sp .6 @@ -1235,7 +1248,7 @@ Recursively destroy all dependents, including cloned file systems outside the ta Force an unmount of any file systems using the \fBunmount -f\fR command. This option has no effect on non-file systems or unmounted file systems. .RE -Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use. +Extreme care should be taken when applying either the \fB-r\fR or the \fB-R\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use. .RE .sp diff --git a/module/zcommon/zfs_deleg.c b/module/zcommon/zfs_deleg.c index 6754ab84b..9de61790c 100644 --- a/module/zcommon/zfs_deleg.c +++ b/module/zcommon/zfs_deleg.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. */ #if defined(_KERNEL) @@ -60,7 +61,7 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = { {ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK }, {ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, {ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, - {ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE }, + {ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND }, {ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP }, {ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA }, {ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index ce03a498c..9d65e35de 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -311,6 +312,9 @@ zfs_prop_init(void) zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0, PROP_READONLY, ZFS_TYPE_DATASET, "<1.00x or higher if compressed>", "RATIO"); + zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0, + PROP_READONLY, ZFS_TYPE_DATASET, + "<1.00x or higher if compressed>", "REFRATIO"); zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize", ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME, ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK"); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index e166c81df..34ce2f62b 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #include <sys/zfs_context.h> @@ -1347,13 +1348,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * it, since one of the current holders may be in the * middle of an update. Note that users of dbuf_undirty() * should not place a hold on the dbuf before the call. + * Also note: we can get here with a spill block, so + * test for that similar to how dbuf_dirty does. */ if (refcount_count(&db->db_holds) > db->db_dirtycnt) { mutex_exit(&db->db_mtx); /* Make sure we don't toss this buffer at sync phase */ - mutex_enter(&dn->dn_mtx); - dnode_clear_range(dn, db->db_blkid, 1, tx); - mutex_exit(&dn->dn_mtx); + if (db->db_blkid != DMU_SPILL_BLKID) { + mutex_enter(&dn->dn_mtx); + dnode_clear_range(dn, db->db_blkid, 1, tx); + mutex_exit(&dn->dn_mtx); + } DB_DNODE_EXIT(db); return (0); } @@ -1366,11 +1371,18 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) *drp = dr->dr_next; + /* + * Note that there are three places in dbuf_dirty() + * where this dirty record may be put on a list. + * Make sure to do a list_remove corresponding to + * every one of those list_insert calls. + */ if (dr->dr_parent) { mutex_enter(&dr->dr_parent->dt.di.dr_mtx); list_remove(&dr->dr_parent->dt.di.dr_children, dr); mutex_exit(&dr->dr_parent->dt.di.dr_mtx); - } else if (db->db_level+1 == dn->dn_nlevels) { + } else if (db->db_blkid == DMU_SPILL_BLKID || + db->db_level+1 == dn->dn_nlevels) { ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf); mutex_enter(&dn->dn_mtx); list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index c34ac2a76..26362c95c 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include <sys/dmu_objset.h> @@ -2153,7 +2154,7 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { - uint64_t refd, avail, uobjs, aobjs; + uint64_t refd, avail, uobjs, aobjs, ratio; dsl_dir_stats(ds->ds_dir, nv); @@ -2180,6 +2181,11 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, DS_IS_DEFER_DESTROY(ds) ? 1 : 0); + ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : + (ds->ds_phys->ds_uncompressed_bytes * 100 / + ds->ds_phys->ds_compressed_bytes); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); + if (ds->ds_phys->ds_next_snap_obj) { /* * This is a snapshot; override the dd's space used with @@ -2187,10 +2193,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) */ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, ds->ds_phys->ds_unique_bytes); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, - ds->ds_phys->ds_compressed_bytes == 0 ? 100 : - (ds->ds_phys->ds_uncompressed_bytes * 100 / - ds->ds_phys->ds_compressed_bytes)); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); } } diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 56c46100d..b089f1eac 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -30,12 +31,31 @@ #include <sys/vdev_impl.h> #include <sys/zio.h> -#define WITH_NDF_BLOCK_ALLOCATOR +#define WITH_DF_BLOCK_ALLOCATOR + +/* + * Allow allocations to switch to gang blocks quickly. We do this to + * avoid having to load lots of space_maps in a given txg. There are, + * however, some cases where we want to avoid "fast" ganging and instead + * we want to do an exhaustive search of all metaslabs on this device. + * Currently we don't allow any gang or dump device related allocations + * to "fast" gang. + */ +#define CAN_FASTGANG(flags) \ + (!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \ + METASLAB_GANG_AVOID))) uint64_t metaslab_aliquot = 512ULL << 10; uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */ /* + * This value defines the number of allowed allocation failures per vdev. + * If a device reaches this threshold in a given txg then we consider skipping + * allocations on that device. + */ +int zfs_mg_alloc_failures; + +/* * Metaslab debugging: when set, keeps all space maps in core to verify frees. */ static int metaslab_debug = 0; @@ -865,7 +885,7 @@ metaslab_prefetch(metaslab_group_t *mg) } static int -metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size) +metaslab_activate(metaslab_t *msp, uint64_t activation_weight) { metaslab_group_t *mg = msp->ms_group; space_map_t *sm = &msp->ms_map; @@ -899,13 +919,6 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size) mutex_exit(&mg->mg_lock); } - /* - * If we were able to load the map then make sure - * that this map is still able to satisfy our request. - */ - if (msp->ms_weight < size) - return (ENOSPC); - metaslab_group_sort(msp->ms_group, msp, msp->ms_weight | activation_weight); } @@ -1123,6 +1136,7 @@ void metaslab_sync_reassess(metaslab_group_t *mg) { vdev_t *vd = mg->mg_vd; + int64_t failures = mg->mg_alloc_failures; int m; /* @@ -1140,6 +1154,8 @@ metaslab_sync_reassess(metaslab_group_t *mg) mutex_exit(&msp->ms_lock); } + atomic_add_64(&mg->mg_alloc_failures, -failures); + /* * Prefetch the next potential metaslabs */ @@ -1164,9 +1180,10 @@ metaslab_distance(metaslab_t *msp, dva_t *dva) } static uint64_t -metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, - uint64_t min_distance, dva_t *dva, int d) +metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, + uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags) { + spa_t *spa = mg->mg_vd->vdev_spa; metaslab_t *msp = NULL; uint64_t offset = -1ULL; avl_tree_t *t = &mg->mg_metaslab_tree; @@ -1187,11 +1204,17 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, mutex_enter(&mg->mg_lock); for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) { - if (msp->ms_weight < size) { + if (msp->ms_weight < asize) { + spa_dbgmsg(spa, "%s: failed to meet weight " + "requirement: vdev %llu, txg %llu, mg %p, " + "msp %p, psize %llu, asize %llu, " + "failures %llu, weight %llu", + spa_name(spa), mg->mg_vd->vdev_id, txg, + mg, msp, psize, asize, + mg->mg_alloc_failures, msp->ms_weight); mutex_exit(&mg->mg_lock); return (-1ULL); } - was_active = msp->ms_weight & METASLAB_ACTIVE_MASK; if (activation_weight == METASLAB_WEIGHT_PRIMARY) break; @@ -1210,6 +1233,25 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, if (msp == NULL) return (-1ULL); + /* + * If we've already reached the allowable number of failed + * allocation attempts on this metaslab group then we + * consider skipping it. We skip it only if we're allowed + * to "fast" gang, the physical size is larger than + * a gang block, and we're attempting to allocate from + * the primary metaslab. + */ + if (mg->mg_alloc_failures > zfs_mg_alloc_failures && + CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE && + activation_weight == METASLAB_WEIGHT_PRIMARY) { + spa_dbgmsg(spa, "%s: skipping metaslab group: " + "vdev %llu, txg %llu, mg %p, psize %llu, " + "asize %llu, failures %llu", spa_name(spa), + mg->mg_vd->vdev_id, txg, mg, psize, asize, + mg->mg_alloc_failures); + return (-1ULL); + } + mutex_enter(&msp->ms_lock); /* @@ -1218,7 +1260,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, * another thread may have changed the weight while we * were blocked on the metaslab lock. */ - if (msp->ms_weight < size || (was_active && + if (msp->ms_weight < asize || (was_active && !(msp->ms_weight & METASLAB_ACTIVE_MASK) && activation_weight == METASLAB_WEIGHT_PRIMARY)) { mutex_exit(&msp->ms_lock); @@ -1233,14 +1275,16 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, continue; } - if (metaslab_activate(msp, activation_weight, size) != 0) { + if (metaslab_activate(msp, activation_weight) != 0) { mutex_exit(&msp->ms_lock); continue; } - if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL) + if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL) break; + atomic_inc_64(&mg->mg_alloc_failures); + metaslab_passivate(msp, space_map_maxsize(&msp->ms_map)); mutex_exit(&msp->ms_lock); @@ -1249,7 +1293,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0) vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg); - space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size); + space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize); mutex_exit(&msp->ms_lock); @@ -1376,7 +1420,8 @@ top: asize = vdev_psize_to_asize(vd, psize); ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0); - offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d); + offset = metaslab_group_alloc(mg, psize, asize, txg, distance, + dva, d, flags); if (offset != -1ULL) { /* * If we've just selected this metaslab group, @@ -1388,18 +1433,24 @@ top: vdev_stat_t *vs = &vd->vdev_stat; int64_t vu, cu; - /* - * Determine percent used in units of 0..1024. - * (This is just to avoid floating point.) - */ - vu = (vs->vs_alloc << 10) / (vs->vs_space + 1); - cu = (mc->mc_alloc << 10) / (mc->mc_space + 1); + vu = (vs->vs_alloc * 100) / (vs->vs_space + 1); + cu = (mc->mc_alloc * 100) / (mc->mc_space + 1); /* - * Bias by at most +/- 25% of the aliquot. + * Calculate how much more or less we should + * try to allocate from this device during + * this iteration around the rotor. + * For example, if a device is 80% full + * and the pool is 20% full then we should + * reduce allocations by 60% on this device. + * + * mg_bias = (20 - 80) * 512K / 100 = -307K + * + * This reduces allocations by 307K for this + * iteration. */ mg->mg_bias = ((cu - vu) * - (int64_t)mg->mg_aliquot) / (1024 * 4); + (int64_t)mg->mg_aliquot) / 100; } if (atomic_add_64_nv(&mc->mc_aliquot, asize) >= @@ -1513,7 +1564,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) mutex_enter(&msp->ms_lock); if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded) - error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0); + error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY); if (error == 0 && !space_map_contains(&msp->ms_map, offset, size)) error = ENOENT; diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 487a76d71..e4e0c35f0 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -1680,6 +1681,12 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps) return (0); } +boolean_t +spa_debug_enabled(spa_t *spa) +{ + return (spa->spa_debug); +} + #if defined(_KERNEL) && defined(HAVE_SPL) /* Namespace manipulation */ EXPORT_SYMBOL(spa_lookup); diff --git a/module/zfs/txg.c b/module/zfs/txg.c index 340c42ae8..d0d2b1716 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -506,7 +506,7 @@ void txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks) { tx_state_t *tx = &dp->dp_tx; - int timeout = ddi_get_lbolt() + ticks; + clock_t timeout = ddi_get_lbolt() + ticks; /* don't delay if this txg could transition to quiesing immediately */ if (tx->tx_open_txg > txg || diff --git a/module/zfs/vdev_cache.c b/module/zfs/vdev_cache.c index 0d1fe7d2c..e2f8040d1 100644 --- a/module/zfs/vdev_cache.c +++ b/module/zfs/vdev_cache.c @@ -71,9 +71,16 @@ * 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software * track buffer). At most zfs_vdev_cache_size bytes will be kept in each * vdev's vdev_cache. + * + * TODO: Note that with the current ZFS code, it turns out that the + * vdev cache is not helpful, and in some cases actually harmful. It + * is better if we disable this. Once some time has passed, we should + * actually remove this to simplify the code. For now we just disable + * it by setting the zfs_vdev_cache_size to zero. Note that Solaris 11 + * has made these same changes. */ int zfs_vdev_cache_max = 1<<14; /* 16KB */ -int zfs_vdev_cache_size = 10ULL << 20; /* 10MB */ +int zfs_vdev_cache_size = 0; int zfs_vdev_cache_bshift = 16; #define VCBS (1 << zfs_vdev_cache_bshift) /* 64KB */ diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 693ffc0c8..088c64b27 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -701,6 +701,9 @@ zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr) * and destroying snapshots requires descendent permissions, a successfull * check of the top level snapshot applies to snapshots of all descendent * datasets as well. + * + * The target snapshot may not exist when doing a recursive destroy. + * In this case fallback to permissions of the parent dataset. */ static int zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr) @@ -711,6 +714,8 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr) dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value); error = zfs_secpolicy_destroy_perms(dsname, cr); + if (error == ENOENT) + error = zfs_secpolicy_destroy_perms(zc->zc_name, cr); strfree(dsname); return (error); diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index ea8b4c505..dfbe11aca 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1560,12 +1560,12 @@ zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) static int zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, - dmu_buf_t **db) + dmu_buf_t **db, void *tag) { dmu_object_info_t doi; int error; - if ((error = sa_buf_hold(osp, obj, FTAG, db)) != 0) + if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) return (error); dmu_object_info_from_db(*db, &doi); @@ -1573,13 +1573,13 @@ zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, doi.doi_bonus_type != DMU_OT_ZNODE) || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t))) { - sa_buf_rele(*db, FTAG); + sa_buf_rele(*db, tag); return (ENOTSUP); } error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); if (error != 0) { - sa_buf_rele(*db, FTAG); + sa_buf_rele(*db, tag); return (error); } @@ -1587,10 +1587,10 @@ zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, } void -zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db) +zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) { sa_handle_destroy(hdl); - sa_buf_rele(db, FTAG); + sa_buf_rele(db, tag); } /* @@ -1667,7 +1667,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, int is_xattrdir; if (prevdb) - zfs_release_sa_handle(prevhdl, prevdb); + zfs_release_sa_handle(prevhdl, prevdb, FTAG); if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj, &is_xattrdir)) != 0) @@ -1699,7 +1699,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, prevhdl = sa_hdl; prevdb = sa_db; } - error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db); + error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); if (error != 0) { sa_hdl = prevhdl; sa_db = prevdb; @@ -1709,7 +1709,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, if (sa_hdl != NULL && sa_hdl != hdl) { ASSERT(sa_db != NULL); - zfs_release_sa_handle(sa_hdl, sa_db); + zfs_release_sa_handle(sa_hdl, sa_db, FTAG); } if (error == 0) @@ -1730,13 +1730,13 @@ zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) if (error != 0) return (error); - error = zfs_grab_sa_handle(osp, obj, &hdl, &db); + error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); if (error != 0) return (error); error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - zfs_release_sa_handle(hdl, db); + zfs_release_sa_handle(hdl, db, FTAG); return (error); } @@ -1756,19 +1756,19 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, if (error != 0) return (error); - error = zfs_grab_sa_handle(osp, obj, &hdl, &db); + error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); if (error != 0) return (error); error = zfs_obj_to_stats_impl(hdl, sa_table, sb); if (error != 0) { - zfs_release_sa_handle(hdl, db); + zfs_release_sa_handle(hdl, db, FTAG); return (error); } error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - zfs_release_sa_handle(hdl, db); + zfs_release_sa_handle(hdl, db, FTAG); return (error); } diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 8aa811db2..5296b38be 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -562,7 +563,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first) if (!list_is_empty(&zilog->zl_lwb_list)) { ASSERT(zh->zh_claim_txg == 0); - ASSERT(!keep_first); + VERIFY(!keep_first); while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { list_remove(&zilog->zl_lwb_list, lwb); if (lwb->lwb_buf != NULL) @@ -1665,21 +1666,11 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys) void zil_free(zilog_t *zilog) { - lwb_t *head_lwb; int i; zilog->zl_stop_sync = 1; - /* - * After zil_close() there should only be one lwb with a buffer. - */ - head_lwb = list_head(&zilog->zl_lwb_list); - if (head_lwb) { - ASSERT(head_lwb == list_tail(&zilog->zl_lwb_list)); - list_remove(&zilog->zl_lwb_list, head_lwb); - zio_buf_free(head_lwb->lwb_buf, head_lwb->lwb_sz); - kmem_cache_free(zil_lwb_cache, head_lwb); - } + ASSERT(list_is_empty(&zilog->zl_lwb_list)); list_destroy(&zilog->zl_lwb_list); avl_destroy(&zilog->zl_vdev_tree); @@ -1719,6 +1710,10 @@ zil_open(objset_t *os, zil_get_data_t *get_data) { zilog_t *zilog = dmu_objset_zil(os); + ASSERT(zilog->zl_clean_taskq == NULL); + ASSERT(zilog->zl_get_data == NULL); + ASSERT(list_is_empty(&zilog->zl_lwb_list)); + zilog->zl_get_data = get_data; zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri, 2, 2, TASKQ_PREPOPULATE); @@ -1732,7 +1727,7 @@ zil_open(objset_t *os, zil_get_data_t *get_data) void zil_close(zilog_t *zilog) { - lwb_t *tail_lwb; + lwb_t *lwb; uint64_t txg = 0; zil_commit(zilog, 0); /* commit all itx */ @@ -1744,9 +1739,9 @@ zil_close(zilog_t *zilog) * destroy the zl_clean_taskq. */ mutex_enter(&zilog->zl_lock); - tail_lwb = list_tail(&zilog->zl_lwb_list); - if (tail_lwb != NULL) - txg = tail_lwb->lwb_max_txg; + lwb = list_tail(&zilog->zl_lwb_list); + if (lwb != NULL) + txg = lwb->lwb_max_txg; mutex_exit(&zilog->zl_lock); if (txg) txg_wait_synced(zilog->zl_dmu_pool, txg); @@ -1754,6 +1749,19 @@ zil_close(zilog_t *zilog) taskq_destroy(zilog->zl_clean_taskq); zilog->zl_clean_taskq = NULL; zilog->zl_get_data = NULL; + + /* + * We should have only one LWB left on the list; remove it now. + */ + mutex_enter(&zilog->zl_lock); + lwb = list_head(&zilog->zl_lwb_list); + if (lwb != NULL) { + ASSERT(lwb == list_tail(&zilog->zl_lwb_list)); + list_remove(&zilog->zl_lwb_list, lwb); + zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); + kmem_cache_free(zil_lwb_cache, lwb); + } + mutex_exit(&zilog->zl_lock); } /* diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 0fa823687..0022c64cc 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -79,6 +80,7 @@ int zio_delay_max = ZIO_DELAY_MAX; #ifdef _KERNEL extern vmem_t *zio_alloc_arena; #endif +extern int zfs_mg_alloc_failures; /* * An allocating zio is one that either currently has the DVA allocate @@ -158,6 +160,12 @@ zio_init(void) zio_data_buf_cache[c - 1] = zio_data_buf_cache[c]; } + /* + * The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs + * to fail 3 times per txg or 8 failures, whichever is greater. + */ + zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8); + zio_inject_init(); } @@ -2151,6 +2159,7 @@ zio_dva_allocate(zio_t *zio) metaslab_class_t *mc = spa_normal_class(spa); blkptr_t *bp = zio->io_bp; int error; + int flags = 0; if (zio->io_gang_leader == NULL) { ASSERT(zio->io_child_type > ZIO_CHILD_GANG); @@ -2163,10 +2172,21 @@ zio_dva_allocate(zio_t *zio) ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa)); ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp)); + /* + * The dump device does not support gang blocks so allocation on + * behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid + * the "fast" gang feature. + */ + flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0; + flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ? + METASLAB_GANG_CHILD : 0; error = metaslab_alloc(spa, mc, zio->io_size, bp, - zio->io_prop.zp_copies, zio->io_txg, NULL, 0); + zio->io_prop.zp_copies, zio->io_txg, NULL, flags); if (error) { + spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, " + "size %llu, error %d", spa_name(spa), zio, zio->io_size, + error); if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE) return (zio_write_gang_block(zio)); zio->io_error = error; |