diff options
author | Brian Behlendorf <[email protected]> | 2010-05-28 13:45:14 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2010-05-28 13:45:14 -0700 |
commit | 428870ff734fdaccc342b33fc53cf94724409a46 (patch) | |
tree | 164e83c0ceda52a843795ed7cd9e95637d02c177 /lib | |
parent | 6119cb885a976e175a6e827894accf657ff1984f (diff) |
Update core ZFS code from build 121 to build 141.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libnvpair/include/libnvpair.h | 5 | ||||
-rw-r--r-- | lib/libnvpair/libnvpair.c | 155 | ||||
-rw-r--r-- | lib/libzfs/include/libzfs.h | 162 | ||||
-rw-r--r-- | lib/libzfs/include/libzfs_impl.h | 26 | ||||
-rw-r--r-- | lib/libzfs/libzfs_changelist.c | 58 | ||||
-rw-r--r-- | lib/libzfs/libzfs_config.c | 22 | ||||
-rw-r--r-- | lib/libzfs/libzfs_dataset.c | 1181 | ||||
-rw-r--r-- | lib/libzfs/libzfs_fru.c | 452 | ||||
-rw-r--r-- | lib/libzfs/libzfs_import.c | 515 | ||||
-rw-r--r-- | lib/libzfs/libzfs_mount.c | 158 | ||||
-rw-r--r-- | lib/libzfs/libzfs_pool.c | 941 | ||||
-rw-r--r-- | lib/libzfs/libzfs_sendrecv.c | 1225 | ||||
-rw-r--r-- | lib/libzfs/libzfs_status.c | 92 | ||||
-rw-r--r-- | lib/libzfs/libzfs_util.c | 75 | ||||
-rw-r--r-- | lib/libzpool/include/sys/zfs_context.h | 66 | ||||
-rw-r--r-- | lib/libzpool/kernel.c | 66 | ||||
-rw-r--r-- | lib/libzpool/taskq.c | 49 | ||||
-rw-r--r-- | lib/libzpool/util.c | 5 |
18 files changed, 3794 insertions, 1459 deletions
diff --git a/lib/libnvpair/include/libnvpair.h b/lib/libnvpair/include/libnvpair.h index e655e0d40..15c1c7816 100644 --- a/lib/libnvpair/include/libnvpair.h +++ b/lib/libnvpair/include/libnvpair.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _LIBNVPAIR_H #define _LIBNVPAIR_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/nvpair.h> #include <stdlib.h> #include <stdio.h> @@ -40,6 +38,7 @@ extern "C" { void nvlist_print(FILE *, nvlist_t *); int nvpair_value_match(nvpair_t *, int, char *, char **); int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *, char **); +void dump_nvlist(nvlist_t *, int); #ifdef __cplusplus } diff --git a/lib/libnvpair/libnvpair.c b/lib/libnvpair/libnvpair.c index 0845cb08c..57915cd73 100644 --- a/lib/libnvpair/libnvpair.c +++ b/lib/libnvpair/libnvpair.c @@ -19,14 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <unistd.h> #include <strings.h> +#include <libintl.h> #include <sys/types.h> #include <sys/inttypes.h> #include "libnvpair.h" @@ -272,6 +271,156 @@ nvlist_print(FILE *fp, nvlist_t *nvl) nvlist_print_with_indent(fp, nvl, 0); } + +#define NVP(elem, type, vtype, ptype, format) { \ + vtype value; \ +\ + (void) nvpair_value_##type(elem, &value); \ + (void) printf("%*s%s: " format "\n", indent, "", \ + nvpair_name(elem), (ptype)value); \ +} + +#define NVPA(elem, type, vtype, ptype, format) { \ + uint_t i, count; \ + vtype *value; \ +\ + (void) nvpair_value_##type(elem, &value, &count); \ + for (i = 0; i < count; i++) { \ + (void) printf("%*s%s[%d]: " format "\n", indent, "", \ + nvpair_name(elem), i, (ptype)value[i]); \ + } \ +} + +/* + * Similar to nvlist_print() but handles arrays slightly differently. + */ +void +dump_nvlist(nvlist_t *list, int indent) +{ + nvpair_t *elem = NULL; + boolean_t bool_value; + nvlist_t *nvlist_value; + nvlist_t **nvlist_array_value; + uint_t i, count; + + if (list == NULL) { + return; + } + + while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { + switch (nvpair_type(elem)) { + case DATA_TYPE_BOOLEAN_VALUE: + (void) nvpair_value_boolean_value(elem, &bool_value); + (void) printf("%*s%s: %s\n", indent, "", + nvpair_name(elem), bool_value ? "true" : "false"); + break; + + case DATA_TYPE_BYTE: + NVP(elem, byte, uchar_t, int, "%u"); + break; + + case DATA_TYPE_INT8: + NVP(elem, int8, int8_t, int, "%d"); + break; + + case DATA_TYPE_UINT8: + NVP(elem, uint8, uint8_t, int, "%u"); + break; + + case DATA_TYPE_INT16: + NVP(elem, int16, int16_t, int, "%d"); + break; + + case DATA_TYPE_UINT16: + NVP(elem, uint16, uint16_t, int, "%u"); + break; + + case DATA_TYPE_INT32: + NVP(elem, int32, int32_t, long, "%ld"); + break; + + case DATA_TYPE_UINT32: + NVP(elem, uint32, uint32_t, ulong_t, "%lu"); + break; + + case DATA_TYPE_INT64: + NVP(elem, int64, int64_t, longlong_t, "%lld"); + break; + + case DATA_TYPE_UINT64: + NVP(elem, uint64, uint64_t, u_longlong_t, "%llu"); + break; + + case DATA_TYPE_STRING: + NVP(elem, string, char *, char *, "'%s'"); + break; + + case DATA_TYPE_BYTE_ARRAY: + NVPA(elem, byte_array, uchar_t, int, "%u"); + break; + + case DATA_TYPE_INT8_ARRAY: + NVPA(elem, int8_array, int8_t, int, "%d"); + break; + + case DATA_TYPE_UINT8_ARRAY: + NVPA(elem, uint8_array, uint8_t, int, "%u"); + break; + + case DATA_TYPE_INT16_ARRAY: + NVPA(elem, int16_array, int16_t, int, "%d"); + break; + + case DATA_TYPE_UINT16_ARRAY: + NVPA(elem, uint16_array, uint16_t, int, "%u"); + break; + + case DATA_TYPE_INT32_ARRAY: + NVPA(elem, int32_array, int32_t, long, "%ld"); + break; + + case DATA_TYPE_UINT32_ARRAY: + NVPA(elem, uint32_array, uint32_t, ulong_t, "%lu"); + break; + + case DATA_TYPE_INT64_ARRAY: + NVPA(elem, int64_array, int64_t, longlong_t, "%lld"); + break; + + case DATA_TYPE_UINT64_ARRAY: + NVPA(elem, uint64_array, uint64_t, u_longlong_t, + "%llu"); + break; + + case DATA_TYPE_STRING_ARRAY: + NVPA(elem, string_array, char *, char *, "'%s'"); + break; + + case DATA_TYPE_NVLIST: + (void) nvpair_value_nvlist(elem, &nvlist_value); + (void) printf("%*s%s:\n", indent, "", + nvpair_name(elem)); + dump_nvlist(nvlist_value, indent + 4); + break; + + case DATA_TYPE_NVLIST_ARRAY: + (void) nvpair_value_nvlist_array(elem, + &nvlist_array_value, &count); + for (i = 0; i < count; i++) { + (void) printf("%*s%s[%u]:\n", indent, "", + nvpair_name(elem), i); + dump_nvlist(nvlist_array_value[i], indent + 4); + } + break; + + default: + (void) printf(dgettext(TEXT_DOMAIN, "bad config type " + "%d for %s\n"), nvpair_type(elem), + nvpair_name(elem)); + } + } +} + /* * Determine if string 'value' matches 'nvp' value. The 'value' string is * converted, depending on the type of 'nvp', prior to match. For numeric diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h index f19e398f6..6f7fed62c 100644 --- a/lib/libzfs/include/libzfs.h +++ b/lib/libzfs/include/libzfs.h @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _LIBZFS_H @@ -66,7 +65,6 @@ enum { EZFS_BADSTREAM, /* bad backup stream */ EZFS_DSREADONLY, /* dataset is readonly */ EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */ - EZFS_VOLHASDATA, /* volume already contains data */ EZFS_INVALIDNAME, /* invalid dataset name */ EZFS_BADRESTORE, /* unable to restore to destination */ EZFS_BADBACKUP, /* backup failed */ @@ -85,17 +83,15 @@ enum { EZFS_UMOUNTFAILED, /* failed to unmount dataset */ EZFS_UNSHARENFSFAILED, /* unshare(1M) failed */ EZFS_SHARENFSFAILED, /* share(1M) failed */ - EZFS_DEVLINKS, /* failed to create zvol links */ EZFS_PERM, /* permission denied */ EZFS_NOSPC, /* out of space */ + EZFS_FAULT, /* bad address */ EZFS_IO, /* I/O error */ EZFS_INTR, /* signal received */ EZFS_ISSPARE, /* device is a hot spare */ EZFS_INVALCONFIG, /* invalid vdev configuration */ EZFS_RECURSIVE, /* recursive dependency */ EZFS_NOHISTORY, /* no history object */ - EZFS_UNSHAREISCSIFAILED, /* iscsitgtd failed request to unshare */ - EZFS_SHAREISCSIFAILED, /* iscsitgtd failed request to share */ EZFS_POOLPROPS, /* couldn't retrieve pool props */ EZFS_POOL_NOTSUP, /* ops not supported for this type of pool */ EZFS_POOL_INVALARG, /* invalid argument for this pool operation */ @@ -103,7 +99,6 @@ enum { EZFS_OPENFAILED, /* open of device failed */ EZFS_NOCAP, /* couldn't get capacity */ EZFS_LABELFAILED, /* write of label failed */ - EZFS_ISCSISVCUNAVAIL, /* iscsi service unavailable */ EZFS_BADWHO, /* invalid permission who */ EZFS_BADPERM, /* invalid permission */ EZFS_BADPERMSET, /* invalid permission set name */ @@ -119,6 +114,12 @@ enum { EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */ EZFS_REFTAG_RELE, /* snapshot release: tag not found */ EZFS_REFTAG_HOLD, /* snapshot hold: tag already exists */ + EZFS_TAGTOOLONG, /* snapshot hold/rele: tag too long */ + EZFS_PIPEFAILED, /* pipe create failed */ + EZFS_THREADCREATEFAILED, /* thread create failed */ + EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */ + EZFS_SCRUBBING, /* currently scrubbing */ + EZFS_NO_SCRUB, /* no active scrub */ EZFS_UNKNOWN }; @@ -213,11 +214,19 @@ extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *, extern int zpool_destroy(zpool_handle_t *); extern int zpool_add(zpool_handle_t *, nvlist_t *); +typedef struct splitflags { + /* do not split, but return the config that would be split off */ + int dryrun : 1; + + /* after splitting, import the pool */ + int import : 1; +} splitflags_t; + /* * Functions to manipulate pool and vdev state */ -extern int zpool_scrub(zpool_handle_t *, pool_scrub_type_t); -extern int zpool_clear(zpool_handle_t *, const char *); +extern int zpool_scan(zpool_handle_t *, pool_scan_func_t); +extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *); extern int zpool_vdev_online(zpool_handle_t *, const char *, int, vdev_state_t *); @@ -226,9 +235,11 @@ extern int zpool_vdev_attach(zpool_handle_t *, const char *, const char *, nvlist_t *, int); extern int zpool_vdev_detach(zpool_handle_t *, const char *); extern int zpool_vdev_remove(zpool_handle_t *, const char *); +extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *, + splitflags_t); -extern int zpool_vdev_fault(zpool_handle_t *, uint64_t); -extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t); +extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); +extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); extern int zpool_vdev_clear(zpool_handle_t *, uint64_t); extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, @@ -298,6 +309,7 @@ typedef enum { extern zpool_status_t zpool_get_status(zpool_handle_t *, char **); extern zpool_status_t zpool_import_status(nvlist_t *, char **); +extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh); /* * Statistics and configuration functions. @@ -319,23 +331,38 @@ extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *, /* * Search for pools to import */ + +typedef struct importargs { + char **path; /* a list of paths to search */ + int paths; /* number of paths to search */ + char *poolname; /* name of a pool to find */ + uint64_t guid; /* guid of a pool to find */ + char *cachefile; /* cachefile to use for import */ + int can_be_active : 1; /* can the pool be active? */ + int unique : 1; /* does 'poolname' already exist? */ + int exists : 1; /* set on return if pool already exists */ +} importargs_t; + +extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *); + +/* legacy pool search routines */ extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **); extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *, char *, uint64_t); -extern nvlist_t *zpool_find_import_byname(libzfs_handle_t *, int, char **, - char *); -extern nvlist_t *zpool_find_import_byguid(libzfs_handle_t *, int, char **, - uint64_t); -extern nvlist_t *zpool_find_import_activeok(libzfs_handle_t *, int, char **); /* * Miscellaneous pool functions */ struct zfs_cmd; -extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *); +extern const char *zfs_history_event_names[LOG_END]; + +extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *, + boolean_t verbose); extern int zpool_upgrade(zpool_handle_t *, uint64_t); extern int zpool_get_history(zpool_handle_t *, nvlist_t **); +extern int zpool_history_unpack(char *, uint64_t, uint64_t *, + nvlist_t ***, uint_t *); extern void zpool_set_history_str(const char *subcommand, int argc, char **argv, char *history_str); extern int zpool_stage_history(libzfs_handle_t *, const char *); @@ -343,6 +370,8 @@ extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, size_t len); extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *); extern int zpool_get_physpath(zpool_handle_t *, char *, size_t); +extern void zpool_explain_recover(libzfs_handle_t *, const char *, int, + nvlist_t *); /* * Basic handle manipulations. These functions do not create or destroy the @@ -374,6 +403,8 @@ extern const char *zfs_prop_to_name(zfs_prop_t); extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t, zprop_source_t *, char *, size_t, boolean_t); +extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t, + boolean_t); extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *, zprop_source_t *, char *, size_t); extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname, @@ -381,10 +412,11 @@ extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname, extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal); extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t); -extern int zfs_prop_inherit(zfs_handle_t *, const char *); +extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t); extern const char *zfs_prop_values(zfs_prop_t); extern int zfs_prop_is_string(zfs_prop_t prop); extern nvlist_t *zfs_get_user_props(zfs_handle_t *); +extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); typedef struct zprop_list { int pl_prop; @@ -392,10 +424,11 @@ typedef struct zprop_list { struct zprop_list *pl_next; boolean_t pl_all; size_t pl_width; + size_t pl_recvd_width; boolean_t pl_fixed; } zprop_list_t; -extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **); +extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t); extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *); #define ZFS_MOUNTPOINT_NONE "none" @@ -419,13 +452,24 @@ extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **, zfs_type_t); extern void zprop_free_list(zprop_list_t *); +#define ZFS_GET_NCOLS 5 + +typedef enum { + GET_COL_NONE, + GET_COL_NAME, + GET_COL_PROPERTY, + GET_COL_VALUE, + GET_COL_RECVD, + GET_COL_SOURCE +} zfs_get_column_t; + /* * Functions for printing zfs or zpool properties */ typedef struct zprop_get_cbdata { int cb_sources; - int cb_columns[4]; - int cb_colwidths[5]; + zfs_get_column_t cb_columns[ZFS_GET_NCOLS]; + int cb_colwidths[ZFS_GET_NCOLS + 1]; boolean_t cb_scripted; boolean_t cb_literal; boolean_t cb_first; @@ -434,12 +478,8 @@ typedef struct zprop_get_cbdata { } zprop_get_cbdata_t; void zprop_print_one_property(const char *, zprop_get_cbdata_t *, - const char *, const char *, zprop_source_t, const char *); - -#define GET_COL_NAME 1 -#define GET_COL_PROPERTY 2 -#define GET_COL_VALUE 3 -#define GET_COL_SOURCE 4 + const char *, const char *, zprop_source_t, const char *, + const char *); /* * Iterator functions. @@ -450,6 +490,7 @@ extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *); extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *); +extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *); /* * Functions to create and destroy datasets. @@ -463,11 +504,42 @@ extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *); extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *); extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t); extern int zfs_rename(zfs_handle_t *, const char *, boolean_t); -extern int zfs_send(zfs_handle_t *, const char *, const char *, - boolean_t, boolean_t, boolean_t, boolean_t, int); + +typedef struct sendflags { + /* print informational messages (ie, -v was specified) */ + int verbose : 1; + + /* recursive send (ie, -R) */ + int replicate : 1; + + /* for incrementals, do all intermediate snapshots */ + int doall : 1; /* (ie, -I) */ + + /* if dataset is a clone, do incremental from its origin */ + int fromorigin : 1; + + /* do deduplication */ + int dedup : 1; + + /* send properties (ie, -p) */ + int props : 1; +} sendflags_t; + +typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); + +extern int zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, + sendflags_t flags, int outfd, snapfilter_cb_t filter_func, + void *cb_arg, nvlist_t **debugnvp); + extern int zfs_promote(zfs_handle_t *); -extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t); +extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t, + boolean_t, boolean_t); +extern int zfs_hold_range(zfs_handle_t *, const char *, const char *, + const char *, boolean_t, boolean_t, snapfilter_cb_t, void *); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); +extern int zfs_release_range(zfs_handle_t *, const char *, const char *, + const char *, boolean_t); +extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain, uid_t rid, uint64_t space); @@ -482,6 +554,12 @@ typedef struct recvflags { /* the destination is a prefix, not the exact fs (ie, -d) */ int isprefix : 1; + /* + * Only the tail of the sent snapshot path is appended to the + * destination to determine the received snapshot name (ie, -e). + */ + int istail : 1; + /* do not actually do the recv, just check if it would work (ie, -n) */ int dryrun : 1; @@ -542,10 +620,6 @@ extern int zfs_unshareall_nfs(zfs_handle_t *); extern int zfs_unshareall_smb(zfs_handle_t *); extern int zfs_unshareall_bypath(zfs_handle_t *, const char *); extern int zfs_unshareall(zfs_handle_t *); -extern boolean_t zfs_is_shared_iscsi(zfs_handle_t *); -extern int zfs_share_iscsi(zfs_handle_t *); -extern int zfs_unshare_iscsi(zfs_handle_t *); -extern int zfs_iscsi_perm_check(libzfs_handle_t *, char *, ucred_t *); extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *, void *, void *, int, zfs_share_op_t); @@ -571,15 +645,10 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **, boolean_t *); /* - * ftyp special. Read the label from a given device. + * Label manipulation. */ extern int zpool_read_label(int, nvlist_t **); - -/* - * Create and remove zvol /dev links. - */ -extern int zpool_create_zvol_links(zpool_handle_t *); -extern int zpool_remove_zvol_links(zpool_handle_t *); +extern int zpool_clear_label(int); /* is this zvol valid for use as a dump device? */ extern int zvol_check_dump_config(char *); @@ -600,6 +669,17 @@ int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *); extern int zpool_enable_datasets(zpool_handle_t *, const char *, int); extern int zpool_disable_datasets(zpool_handle_t *, boolean_t); +/* + * Mappings between vdev and FRU. + */ +extern void libzfs_fru_refresh(libzfs_handle_t *); +extern const char *libzfs_fru_lookup(libzfs_handle_t *, const char *); +extern const char *libzfs_fru_devpath(libzfs_handle_t *, const char *); +extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *, + const char *); +extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *); +extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *); + #ifdef __cplusplus } #endif diff --git a/lib/libzfs/include/libzfs_impl.h b/lib/libzfs/include/libzfs_impl.h index 70a1d1cbf..89c48c1c0 100644 --- a/lib/libzfs/include/libzfs_impl.h +++ b/lib/libzfs/include/libzfs_impl.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,7 +30,6 @@ #include <sys/dmu.h> #include <sys/fs/zfs.h> #include <sys/zfs_ioctl.h> -#include <sys/zfs_acl.h> #include <sys/spa.h> #include <sys/nvpair.h> @@ -38,6 +37,8 @@ #include <libzfs.h> #include <libshare.h> +#include <fm/libtopo.h> + #ifdef __cplusplus extern "C" { #endif @@ -47,6 +48,13 @@ extern "C" { #endif #define VERIFY verify +typedef struct libzfs_fru { + char *zf_device; + char *zf_fru; + struct libzfs_fru *zf_chain; + struct libzfs_fru *zf_next; +} libzfs_fru_t; + struct libzfs_handle { int libzfs_error; int libzfs_fd; @@ -65,7 +73,13 @@ struct libzfs_handle { uint_t libzfs_shareflags; boolean_t libzfs_mnttab_enable; avl_tree_t libzfs_mnttab_cache; + int libzfs_pool_iter; + topo_hdl_t *libzfs_topo_hdl; + libzfs_fru_t **libzfs_fru_hash; + libzfs_fru_t *libzfs_fru_list; + char libzfs_chassis_id[256]; }; + #define ZFSSHARE_MISS 0x01 /* Didn't find entry in cache */ struct zfs_handle { @@ -77,6 +91,7 @@ struct zfs_handle { dmu_objset_stats_t zfs_dmustats; nvlist_t *zfs_props; nvlist_t *zfs_user_props; + nvlist_t *zfs_recvd_props; boolean_t zfs_mntcheck; char *zfs_mntopts; uint8_t *zfs_props_table; @@ -112,7 +127,6 @@ typedef enum { */ typedef enum { SHARED_NOT_SHARED = 0x0, - SHARED_ISCSI = 0x1, SHARED_NFS = 0x2, SHARED_SMB = 0x4 } zfs_share_type_t; @@ -172,9 +186,6 @@ zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *); int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **); -int zvol_create_link(libzfs_handle_t *, const char *); -int zvol_remove_link(libzfs_handle_t *, const char *); -int zpool_iter_zvol(zpool_handle_t *, int (*)(const char *, void *), void *); boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *); void namespace_clear(libzfs_handle_t *); @@ -189,6 +200,9 @@ extern int zfs_parse_options(char *, zfs_share_proto_t); extern int zfs_unshare_proto(zfs_handle_t *, const char *, zfs_share_proto_t *); + +extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t); + #ifdef __cplusplus } #endif diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c index ff438b3d7..4328d38a2 100644 --- a/lib/libzfs/libzfs_changelist.c +++ b/lib/libzfs/libzfs_changelist.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * * Portions Copyright 2007 Ramprakash Jelari @@ -116,32 +116,7 @@ changelist_prefix(prop_changelist_t *clp) if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned) continue; - if (ZFS_IS_VOLUME(cn->cn_handle)) { - switch (clp->cl_realprop) { - case ZFS_PROP_NAME: - /* - * If this was a rename, unshare the zvol, and - * remove the /dev/zvol links. - */ - (void) zfs_unshare_iscsi(cn->cn_handle); - - if (zvol_remove_link(cn->cn_handle->zfs_hdl, - cn->cn_handle->zfs_name) != 0) { - ret = -1; - cn->cn_needpost = B_FALSE; - (void) zfs_share_iscsi(cn->cn_handle); - } - break; - - case ZFS_PROP_VOLSIZE: - /* - * If this was a change to the volume size, we - * need to unshare and reshare the volume. - */ - (void) zfs_unshare_iscsi(cn->cn_handle); - break; - } - } else { + if (!ZFS_IS_VOLUME(cn->cn_handle)) { /* * Do the property specific processing. */ @@ -234,32 +209,8 @@ changelist_postfix(prop_changelist_t *clp) zfs_refresh_properties(cn->cn_handle); - if (ZFS_IS_VOLUME(cn->cn_handle)) { - /* - * If we're doing a rename, recreate the /dev/zvol - * links. - */ - if (clp->cl_realprop == ZFS_PROP_NAME && - zvol_create_link(cn->cn_handle->zfs_hdl, - cn->cn_handle->zfs_name) != 0) { - errors++; - } else if (cn->cn_shared || - clp->cl_prop == ZFS_PROP_SHAREISCSI) { - if (zfs_prop_get(cn->cn_handle, - ZFS_PROP_SHAREISCSI, shareopts, - sizeof (shareopts), NULL, NULL, 0, - B_FALSE) == 0 && - strcmp(shareopts, "off") == 0) { - errors += - zfs_unshare_iscsi(cn->cn_handle); - } else { - errors += - zfs_share_iscsi(cn->cn_handle); - } - } - + if (ZFS_IS_VOLUME(cn->cn_handle)) continue; - } /* * Remount if previously mounted or mountpoint was legacy, @@ -658,8 +609,7 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags, if (clp->cl_prop != ZFS_PROP_MOUNTPOINT && clp->cl_prop != ZFS_PROP_SHARENFS && - clp->cl_prop != ZFS_PROP_SHARESMB && - clp->cl_prop != ZFS_PROP_SHAREISCSI) + clp->cl_prop != ZFS_PROP_SHARESMB) return (clp); /* diff --git a/lib/libzfs/libzfs_config.c b/lib/libzfs/libzfs_config.c index 94640d1b1..dc27238c9 100644 --- a/lib/libzfs/libzfs_config.c +++ b/lib/libzfs/libzfs_config.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * The pool configuration repository is stored in /etc/zfs/zpool.cache as a * single packed nvlist. While it would be nice to just read in this @@ -313,21 +311,33 @@ zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data) zpool_handle_t *zhp; int ret; - if (namespace_reload(hdl) != 0) + /* + * If someone makes a recursive call to zpool_iter(), we want to avoid + * refreshing the namespace because that will invalidate the parent + * context. We allow recursive calls, but simply re-use the same + * namespace AVL tree. + */ + if (!hdl->libzfs_pool_iter && namespace_reload(hdl) != 0) return (-1); + hdl->libzfs_pool_iter++; for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL; cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) { - if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0) + if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0) { + hdl->libzfs_pool_iter--; return (-1); + } if (zhp == NULL) continue; - if ((ret = func(zhp, data)) != 0) + if ((ret = func(zhp, data)) != 0) { + hdl->libzfs_pool_iter--; return (ret); + } } + hdl->libzfs_pool_iter--; return (0); } diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index ab9ba6b80..a3f5a7d0f 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -20,14 +20,12 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#include <assert.h> #include <ctype.h> #include <errno.h> -#include <libdevinfo.h> #include <libintl.h> #include <math.h> #include <stdio.h> @@ -39,7 +37,6 @@ #include <fcntl.h> #include <sys/mntent.h> #include <sys/mount.h> -#include <sys/avl.h> #include <priv.h> #include <pwd.h> #include <grp.h> @@ -49,6 +46,7 @@ #include <aclutils.h> #include <directory.h> +#include <sys/dnode.h> #include <sys/spa.h> #include <sys/zap.h> #include <libzfs.h> @@ -58,7 +56,6 @@ #include "libzfs_impl.h" #include "zfs_deleg.h" -static int zvol_create_link_common(libzfs_handle_t *, const char *, int); static int userquota_propname_decode(const char *propname, boolean_t zoned, zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp); @@ -340,6 +337,44 @@ get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc) return (0); } +/* + * Utility function to get the received properties of the given object. + */ +static int +get_recvd_props_ioctl(zfs_handle_t *zhp) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + nvlist_t *recvdprops; + zfs_cmd_t zc = { 0 }; + int err; + + if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) + return (-1); + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_RECVD_PROPS, &zc) != 0) { + if (errno == ENOMEM) { + if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { + return (-1); + } + } else { + zcmd_free_nvlists(&zc); + return (-1); + } + } + + err = zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &recvdprops); + zcmd_free_nvlists(&zc); + if (err != 0) + return (-1); + + nvlist_free(zhp->zfs_recvd_props); + zhp->zfs_recvd_props = recvdprops; + + return (0); +} + static int put_stats_zhdl(zfs_handle_t *zhp, zfs_cmd_t *zc) { @@ -401,70 +436,8 @@ zfs_refresh_properties(zfs_handle_t *zhp) static int make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc) { - char *logstr; - libzfs_handle_t *hdl = zhp->zfs_hdl; - - /* - * Preserve history log string. - * any changes performed here will be - * logged as an internal event. - */ - logstr = zhp->zfs_hdl->libzfs_log_str; - zhp->zfs_hdl->libzfs_log_str = NULL; - -top: - if (put_stats_zhdl(zhp, zc) != 0) { - zhp->zfs_hdl->libzfs_log_str = logstr; + if (put_stats_zhdl(zhp, zc) != 0) return (-1); - } - - - if (zhp->zfs_dmustats.dds_inconsistent) { - zfs_cmd_t zc2 = { 0 }; - - /* - * If it is dds_inconsistent, then we've caught it in - * the middle of a 'zfs receive' or 'zfs destroy', and - * it is inconsistent from the ZPL's point of view, so - * can't be mounted. However, it could also be that we - * have crashed in the middle of one of those - * operations, in which case we need to get rid of the - * inconsistent state. We do that by either rolling - * back to the previous snapshot (which will fail if - * there is none), or destroying the filesystem. Note - * that if we are still in the middle of an active - * 'receive' or 'destroy', then the rollback and destroy - * will fail with EBUSY and we will drive on as usual. - */ - - (void) strlcpy(zc2.zc_name, zhp->zfs_name, - sizeof (zc2.zc_name)); - - if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) { - (void) zvol_remove_link(hdl, zhp->zfs_name); - zc2.zc_objset_type = DMU_OST_ZVOL; - } else { - zc2.zc_objset_type = DMU_OST_ZFS; - } - - /* - * If we can successfully destroy it, pretend that it - * never existed. - */ - if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc2) == 0) { - zhp->zfs_hdl->libzfs_log_str = logstr; - errno = ENOENT; - return (-1); - } - /* If we can successfully roll it back, reset the stats */ - if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc2) == 0) { - if (get_stats_ioctl(zhp, zc) != 0) { - zhp->zfs_hdl->libzfs_log_str = logstr; - return (-1); - } - goto top; - } - } /* * We've managed to open the dataset and gather statistics. Determine @@ -486,8 +459,9 @@ top: else abort(); /* we should never see any other types */ - zhp->zfs_hdl->libzfs_log_str = logstr; - zhp->zpool_hdl = zpool_handle(zhp); + if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL) + return (-1); + return (0); } @@ -589,6 +563,7 @@ zfs_close(zfs_handle_t *zhp) free(zhp->zfs_mntopts); nvlist_free(zhp->zfs_props); nvlist_free(zhp->zfs_user_props); + nvlist_free(zhp->zfs_recvd_props); free(zhp); } @@ -882,9 +857,14 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, goto error; } + /* + * Encode the prop name as + * userquota@<hex-rid>-domain, to make it easy + * for the kernel to decode. + */ (void) snprintf(newpropname, sizeof (newpropname), - "%s%s", zfs_userquota_prop_prefixes[uqtype], - domain); + "%s%llx-%s", zfs_userquota_prop_prefixes[uqtype], + (longlong_t)rid, domain); valary[0] = uqtype; valary[1] = rid; valary[2] = intval; @@ -960,19 +940,60 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, } break; - case ZFS_PROP_SHAREISCSI: - if (strcmp(strval, "off") != 0 && - strcmp(strval, "on") != 0 && - strcmp(strval, "type=disk") != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be 'on', 'off', or 'type=disk'"), - propname); - (void) zfs_error(hdl, EZFS_BADPROP, errbuf); - goto error; + case ZFS_PROP_MLSLABEL: + { + /* + * Verify the mlslabel string and convert to + * internal hex label string. + */ + + m_label_t *new_sl; + char *hex = NULL; /* internal label string */ + + /* Default value is already OK. */ + if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0) + break; + + /* Verify the label can be converted to binary form */ + if (((new_sl = m_label_alloc(MAC_LABEL)) == NULL) || + (str_to_label(strval, &new_sl, MAC_LABEL, + L_NO_CORRECTION, NULL) == -1)) { + goto badlabel; } + /* Now translate to hex internal label string */ + if (label_to_str(new_sl, &hex, M_INTERNAL, + DEF_NAMES) != 0) { + if (hex) + free(hex); + goto badlabel; + } + m_label_free(new_sl); + + /* If string is already in internal form, we're done. */ + if (strcmp(strval, hex) == 0) { + free(hex); + break; + } + + /* Replace the label string with the internal form. */ + (void) nvlist_remove(ret, zfs_prop_to_name(prop), + DATA_TYPE_STRING); + verify(nvlist_add_string(ret, zfs_prop_to_name(prop), + hex) == 0); + free(hex); + break; +badlabel: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid mlslabel '%s'"), strval); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + m_label_free(new_sl); /* OK if null */ + goto error; + + } + case ZFS_PROP_MOUNTPOINT: { namecheck_err_t why; @@ -1226,6 +1247,90 @@ error: return (NULL); } +void +zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err, + char *errbuf) +{ + switch (err) { + + case ENOSPC: + /* + * For quotas and reservations, ENOSPC indicates + * something different; setting a quota or reservation + * doesn't use any disk space. + */ + switch (prop) { + case ZFS_PROP_QUOTA: + case ZFS_PROP_REFQUOTA: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "size is less than current used or " + "reserved space")); + (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); + break; + + case ZFS_PROP_RESERVATION: + case ZFS_PROP_REFRESERVATION: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "size is greater than available space")); + (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); + break; + + default: + (void) zfs_standard_error(hdl, err, errbuf); + break; + } + break; + + case EBUSY: + (void) zfs_standard_error(hdl, EBUSY, errbuf); + break; + + case EROFS: + (void) zfs_error(hdl, EZFS_DSREADONLY, errbuf); + break; + + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool and or dataset must be upgraded to set this " + "property or value")); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + + case ERANGE: + if (prop == ZFS_PROP_COMPRESSION) { + (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property setting is not allowed on " + "bootable datasets")); + (void) zfs_error(hdl, EZFS_NOTSUP, errbuf); + } else { + (void) zfs_standard_error(hdl, err, errbuf); + } + break; + + case EINVAL: + if (prop == ZPROP_INVAL) { + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + } else { + (void) zfs_standard_error(hdl, err, errbuf); + } + break; + + case EOVERFLOW: + /* + * This platform can't address a volume this big. + */ +#ifdef _ILP32 + if (prop == ZFS_PROP_VOLSIZE) { + (void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf); + break; + } +#endif + /* FALLTHROUGH */ + default: + (void) zfs_standard_error(hdl, err, errbuf); + } +} + /* * Given a property name and value, set the property for the given dataset. */ @@ -1294,79 +1399,7 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval) ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc); if (ret != 0) { - switch (errno) { - - case ENOSPC: - /* - * For quotas and reservations, ENOSPC indicates - * something different; setting a quota or reservation - * doesn't use any disk space. - */ - switch (prop) { - case ZFS_PROP_QUOTA: - case ZFS_PROP_REFQUOTA: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "size is less than current used or " - "reserved space")); - (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); - break; - - case ZFS_PROP_RESERVATION: - case ZFS_PROP_REFRESERVATION: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "size is greater than available space")); - (void) zfs_error(hdl, EZFS_PROPSPACE, errbuf); - break; - - default: - (void) zfs_standard_error(hdl, errno, errbuf); - break; - } - break; - - case EBUSY: - if (prop == ZFS_PROP_VOLBLOCKSIZE) - (void) zfs_error(hdl, EZFS_VOLHASDATA, errbuf); - else - (void) zfs_standard_error(hdl, EBUSY, errbuf); - break; - - case EROFS: - (void) zfs_error(hdl, EZFS_DSREADONLY, errbuf); - break; - - case ENOTSUP: - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "pool and or dataset must be upgraded to set this " - "property or value")); - (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); - break; - - case ERANGE: - if (prop == ZFS_PROP_COMPRESSION) { - (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "property setting is not allowed on " - "bootable datasets")); - (void) zfs_error(hdl, EZFS_NOTSUP, errbuf); - } else { - (void) zfs_standard_error(hdl, errno, errbuf); - } - break; - - case EOVERFLOW: - /* - * This platform can't address a volume this big. - */ -#ifdef _ILP32 - if (prop == ZFS_PROP_VOLSIZE) { - (void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf); - break; - } -#endif - /* FALLTHROUGH */ - default: - (void) zfs_standard_error(hdl, errno, errbuf); - } + zfs_setprop_error(hdl, prop, errno, errbuf); } else { if (do_prefix) ret = changelist_postfix(cl); @@ -1388,10 +1421,11 @@ error: } /* - * Given a property, inherit the value from the parent dataset. + * Given a property, inherit the value from the parent dataset, or if received + * is TRUE, revert to the received value, if any. */ int -zfs_prop_inherit(zfs_handle_t *zhp, const char *propname) +zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received) { zfs_cmd_t zc = { 0 }; int ret; @@ -1403,6 +1437,7 @@ zfs_prop_inherit(zfs_handle_t *zhp, const char *propname) (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot inherit %s for '%s'"), propname, zhp->zfs_name); + zc.zc_cookie = received; if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) { /* * For user properties, the amount of work we have to do is very @@ -1429,7 +1464,7 @@ zfs_prop_inherit(zfs_handle_t *zhp, const char *propname) if (zfs_prop_readonly(prop)) return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf)); - if (!zfs_prop_inheritable(prop)) + if (!zfs_prop_inheritable(prop) && !received) return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf)); /* @@ -1534,6 +1569,26 @@ getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source) return (value); } +static boolean_t +zfs_is_recvd_props_mode(zfs_handle_t *zhp) +{ + return (zhp->zfs_props == zhp->zfs_recvd_props); +} + +static void +zfs_set_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie) +{ + *cookie = (uint64_t)(uintptr_t)zhp->zfs_props; + zhp->zfs_props = zhp->zfs_recvd_props; +} + +static void +zfs_unset_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie) +{ + zhp->zfs_props = (nvlist_t *)(uintptr_t)*cookie; + *cookie = 0; +} + /* * Internal function for getting a numeric property. Both zfs_prop_get() and * zfs_prop_get_int() are built using this interface. @@ -1552,6 +1607,7 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, struct mnttab mnt; char *mntopt_on = NULL; char *mntopt_off = NULL; + boolean_t received = zfs_is_recvd_props_mode(zhp); *source = NULL; @@ -1627,6 +1683,9 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, case ZFS_PROP_NBMAND: *val = getprop_uint64(zhp, prop, source); + if (received) + break; + if (hasmntopt(&mnt, mntopt_on) && !*val) { *val = B_TRUE; if (src) @@ -1639,22 +1698,17 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, break; case ZFS_PROP_CANMOUNT: - *val = getprop_uint64(zhp, prop, source); - if (*val != ZFS_CANMOUNT_ON) - *source = zhp->zfs_name; - else - *source = ""; /* default */ - break; - + case ZFS_PROP_VOLSIZE: case ZFS_PROP_QUOTA: case ZFS_PROP_REFQUOTA: case ZFS_PROP_RESERVATION: case ZFS_PROP_REFRESERVATION: *val = getprop_uint64(zhp, prop, source); - if (*val == 0) - *source = ""; /* default */ - else + + if (*source == NULL) { + /* not default, must be local */ *source = zhp->zfs_name; + } break; case ZFS_PROP_MOUNTED: @@ -1696,11 +1750,11 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src, /* * If we tried to use a default value for a * readonly property, it means that it was not - * present; return an error. + * present. */ if (zfs_prop_readonly(prop) && - *source && (*source)[0] == '\0') { - return (-1); + *source != NULL && (*source)[0] == '\0') { + *source = NULL; } break; @@ -1730,6 +1784,8 @@ get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source, *srctype = ZPROP_SRC_NONE; } else if (source[0] == '\0') { *srctype = ZPROP_SRC_DEFAULT; + } else if (strstr(source, ZPROP_SOURCE_VAL_RECVD) != NULL) { + *srctype = ZPROP_SRC_RECEIVED; } else { if (strcmp(source, zhp->zfs_name) == 0) { *srctype = ZPROP_SRC_LOCAL; @@ -1741,6 +1797,43 @@ get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source, } +int +zfs_prop_get_recvd(zfs_handle_t *zhp, const char *propname, char *propbuf, + size_t proplen, boolean_t literal) +{ + zfs_prop_t prop; + int err = 0; + + if (zhp->zfs_recvd_props == NULL) + if (get_recvd_props_ioctl(zhp) != 0) + return (-1); + + prop = zfs_name_to_prop(propname); + + if (prop != ZPROP_INVAL) { + uint64_t cookie; + if (!nvlist_exists(zhp->zfs_recvd_props, propname)) + return (-1); + zfs_set_recvd_props_mode(zhp, &cookie); + err = zfs_prop_get(zhp, prop, propbuf, proplen, + NULL, NULL, 0, literal); + zfs_unset_recvd_props_mode(zhp, &cookie); + } else if (zfs_prop_userquota(propname)) { + return (-1); + } else { + nvlist_t *propval; + char *recvdval; + if (nvlist_lookup_nvlist(zhp->zfs_recvd_props, + propname, &propval) != 0) + return (-1); + verify(nvlist_lookup_string(propval, ZPROP_VALUE, + &recvdval) == 0); + (void) strlcpy(propbuf, recvdval, proplen); + } + + return (err == 0 ? 0 : -1); +} + /* * Retrieve a property from the given object. If 'literal' is specified, then * numbers are left as exact values. Otherwise, numbers are converted to a @@ -1756,6 +1849,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, uint64_t val; char *str; const char *strval; + boolean_t received = zfs_is_recvd_props_mode(zhp); /* * Check to see if this property applies to our object @@ -1763,6 +1857,9 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) return (-1); + if (received && zfs_prop_readonly(prop)) + return (-1); + if (src) *src = ZPROP_SRC_NONE; @@ -1802,10 +1899,22 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, if (str[0] == '/') { char buf[MAXPATHLEN]; char *root = buf; - const char *relpath = zhp->zfs_name + strlen(source); + const char *relpath; - if (relpath[0] == '/') - relpath++; + /* + * If we inherit the mountpoint, even from a dataset + * with a received value, the source will be the path of + * the dataset we inherit from. If source is + * ZPROP_SOURCE_VAL_RECVD, the received value is not + * inherited. + */ + if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) { + relpath = ""; + } else { + relpath = zhp->zfs_name + strlen(source); + if (relpath[0] == '/') + relpath++; + } if ((zpool_get_prop(zhp->zpool_hdl, ZPOOL_PROP_ALTROOT, buf, MAXPATHLEN, NULL)) || @@ -1884,8 +1993,9 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, case ZFS_PROP_COMPRESSRATIO: if (get_numeric_property(zhp, prop, src, &source, &val) != 0) return (-1); - (void) snprintf(propbuf, proplen, "%lld.%02lldx", (longlong_t) - val / 100, (longlong_t)val % 100); + (void) snprintf(propbuf, proplen, "%llu.%02llux", + (u_longlong_t)(val / 100), + (u_longlong_t)(val % 100)); break; case ZFS_PROP_TYPE: @@ -1930,6 +2040,44 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen, (void) strlcpy(propbuf, zhp->zfs_name, proplen); break; + case ZFS_PROP_MLSLABEL: + { + m_label_t *new_sl = NULL; + char *ascii = NULL; /* human readable label */ + + (void) strlcpy(propbuf, + getprop_string(zhp, prop, &source), proplen); + + if (literal || (strcasecmp(propbuf, + ZFS_MLSLABEL_DEFAULT) == 0)) + break; + + /* + * Try to translate the internal hex string to + * human-readable output. If there are any + * problems just use the hex string. + */ + + if (str_to_label(propbuf, &new_sl, MAC_LABEL, + L_NO_CORRECTION, NULL) == -1) { + m_label_free(new_sl); + break; + } + + if (label_to_str(new_sl, &ascii, M_LABEL, + DEF_NAMES) != 0) { + if (ascii) + free(ascii); + m_label_free(new_sl); + break; + } + m_label_free(new_sl); + + (void) strlcpy(propbuf, ascii, proplen); + free(ascii); + } + break; + default: switch (zfs_prop_get_type(prop)) { case PROP_TYPE_NUMBER: @@ -2371,6 +2519,27 @@ zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data) } /* + * Is one dataset name a child dataset of another? + * + * Needs to handle these cases: + * Dataset 1 "a/foo" "a/foo" "a/foo" "a/foo" + * Dataset 2 "a/fo" "a/foobar" "a/bar/baz" "a/foo/bar" + * Descendant? No. No. No. Yes. + */ +static boolean_t +is_descendant(const char *ds1, const char *ds2) +{ + size_t d1len = strlen(ds1); + + /* ds2 can't be a descendant if it's smaller */ + if (strlen(ds2) < d1len) + return (B_FALSE); + + /* otherwise, compare strings and verify that there's a '/' char */ + return (ds2[d1len] == '/' && (strncmp(ds1, ds2, d1len) == 0)); +} + +/* * Given a complete name, return just the portion that refers to the parent. * Can return NULL if this is a pool. */ @@ -2405,6 +2574,7 @@ check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned, char *slash; zfs_handle_t *zhp; char errbuf[1024]; + uint64_t is_zoned; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create '%s'"), path); @@ -2447,9 +2617,12 @@ check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned, return (zfs_standard_error(hdl, errno, errbuf)); } - *zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); + is_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); + if (zoned != NULL) + *zoned = is_zoned; + /* we are in a non-global zone, but parent is in the global zone */ - if (getzoneid() != GLOBAL_ZONEID && !(*zoned)) { + if (getzoneid() != GLOBAL_ZONEID && !is_zoned) { (void) zfs_standard_error(hdl, EPERM, errbuf); zfs_close(zhp); return (-1); @@ -2581,11 +2754,10 @@ int zfs_create_ancestors(libzfs_handle_t *hdl, const char *path) { int prefix; - uint64_t zoned; char *path_copy; int rc; - if (check_parents(hdl, path, &zoned, B_TRUE, &prefix) != 0) + if (check_parents(hdl, path, NULL, B_TRUE, &prefix) != 0) return (-1); if ((path_copy = strdup(path)) != NULL) { @@ -2699,18 +2871,6 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, /* create the dataset */ ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc); - if (ret == 0 && type == ZFS_TYPE_VOLUME) { - ret = zvol_create_link(hdl, path); - if (ret) { - (void) zfs_standard_error(hdl, errno, - dgettext(TEXT_DOMAIN, - "Volume successfully created, but device links " - "were not created")); - zcmd_free_nvlists(&zc); - return (-1); - } - } - zcmd_free_nvlists(&zc); /* check for failure */ @@ -2773,18 +2933,6 @@ zfs_destroy(zfs_handle_t *zhp, boolean_t defer) (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (ZFS_IS_VOLUME(zhp)) { - /* - * If user doesn't have permissions to unshare volume, then - * abort the request. This would only happen for a - * non-privileged user. - */ - if (zfs_unshare_iscsi(zhp) != 0) { - return (-1); - } - - if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0) - return (-1); - zc.zc_objset_type = DMU_OST_ZVOL; } else { zc.zc_objset_type = DMU_OST_ZFS; @@ -2809,13 +2957,13 @@ struct destroydata { }; static int -zfs_remove_link_cb(zfs_handle_t *zhp, void *arg) +zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) { struct destroydata *dd = arg; zfs_handle_t *szhp; char name[ZFS_MAXNAMELEN]; boolean_t closezhp = dd->closezhp; - int rv; + int rv = 0; (void) strlcpy(name, zhp->zfs_name, sizeof (name)); (void) strlcat(name, "@", sizeof (name)); @@ -2827,17 +2975,9 @@ zfs_remove_link_cb(zfs_handle_t *zhp, void *arg) zfs_close(szhp); } - if (zhp->zfs_type == ZFS_TYPE_VOLUME) { - (void) zvol_remove_link(zhp->zfs_hdl, name); - /* - * NB: this is simply a best-effort. We don't want to - * return an error, because then we wouldn't visit all - * the volumes. - */ - } - dd->closezhp = B_TRUE; - rv = zfs_iter_filesystems(zhp, zfs_remove_link_cb, arg); + if (!dd->gotone) + rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, arg); if (closezhp) zfs_close(zhp); return (rv); @@ -2854,7 +2994,7 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer) struct destroydata dd = { 0 }; dd.snapname = snapname; - (void) zfs_remove_link_cb(zhp, &dd); + (void) zfs_check_snap_cb(zhp, &dd); if (!dd.gotone) { return (zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT, @@ -2972,70 +3112,11 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf)); } - } else if (ZFS_IS_VOLUME(zhp)) { - ret = zvol_create_link(zhp->zfs_hdl, target); } return (ret); } -typedef struct promote_data { - char cb_mountpoint[MAXPATHLEN]; - const char *cb_target; - const char *cb_errbuf; - uint64_t cb_pivot_txg; -} promote_data_t; - -static int -promote_snap_cb(zfs_handle_t *zhp, void *data) -{ - promote_data_t *pd = data; - zfs_handle_t *szhp; - char snapname[MAXPATHLEN]; - int rv = 0; - - /* We don't care about snapshots after the pivot point */ - if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) { - zfs_close(zhp); - return (0); - } - - /* Remove the device link if it's a zvol. */ - if (ZFS_IS_VOLUME(zhp)) - (void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name); - - /* Check for conflicting names */ - (void) strlcpy(snapname, pd->cb_target, sizeof (snapname)); - (void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname)); - szhp = make_dataset_handle(zhp->zfs_hdl, snapname); - if (szhp != NULL) { - zfs_close(szhp); - zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, - "snapshot name '%s' from origin \n" - "conflicts with '%s' from target"), - zhp->zfs_name, snapname); - rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf); - } - zfs_close(zhp); - return (rv); -} - -static int -promote_snap_done_cb(zfs_handle_t *zhp, void *data) -{ - promote_data_t *pd = data; - - /* We don't care about snapshots after the pivot point */ - if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) { - /* Create the device link if it's a zvol. */ - if (ZFS_IS_VOLUME(zhp)) - (void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name); - } - - zfs_close(zhp); - return (0); -} - /* * Promotes the given clone fs to be the clone parent. */ @@ -3045,10 +3126,7 @@ zfs_promote(zfs_handle_t *zhp) libzfs_handle_t *hdl = zhp->zfs_hdl; zfs_cmd_t zc = { 0 }; char parent[MAXPATHLEN]; - char *cp; int ret; - zfs_handle_t *pzhp; - promote_data_t pd; char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, @@ -3066,29 +3144,7 @@ zfs_promote(zfs_handle_t *zhp) "not a cloned filesystem")); return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } - cp = strchr(parent, '@'); - *cp = '\0'; - - /* Walk the snapshots we will be moving */ - pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT); - if (pzhp == NULL) - return (-1); - pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG); - zfs_close(pzhp); - pd.cb_target = zhp->zfs_name; - pd.cb_errbuf = errbuf; - pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET); - if (pzhp == NULL) - return (-1); - (void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint, - sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE); - ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd); - if (ret != 0) { - zfs_close(pzhp); - return (-1); - } - /* issue the ioctl */ (void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin, sizeof (zc.zc_value)); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -3097,62 +3153,18 @@ zfs_promote(zfs_handle_t *zhp) if (ret != 0) { int save_errno = errno; - (void) zfs_iter_snapshots(pzhp, promote_snap_done_cb, &pd); - zfs_close(pzhp); - switch (save_errno) { case EEXIST: - /* - * There is a conflicting snapshot name. We - * should have caught this above, but they could - * have renamed something in the mean time. - */ + /* There is a conflicting snapshot name. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "conflicting snapshot name from parent '%s'"), - parent); + "conflicting snapshot '%s' from parent '%s'"), + zc.zc_string, parent); return (zfs_error(hdl, EZFS_EXISTS, errbuf)); default: return (zfs_standard_error(hdl, save_errno, errbuf)); } - } else { - (void) zfs_iter_snapshots(zhp, promote_snap_done_cb, &pd); - } - - zfs_close(pzhp); - return (ret); -} - -struct createdata { - const char *cd_snapname; - int cd_ifexists; -}; - -static int -zfs_create_link_cb(zfs_handle_t *zhp, void *arg) -{ - struct createdata *cd = arg; - int ret; - - if (zhp->zfs_type == ZFS_TYPE_VOLUME) { - char name[MAXPATHLEN]; - - (void) strlcpy(name, zhp->zfs_name, sizeof (name)); - (void) strlcat(name, "@", sizeof (name)); - (void) strlcat(name, cd->cd_snapname, sizeof (name)); - (void) zvol_create_link_common(zhp->zfs_hdl, name, - cd->cd_ifexists); - /* - * NB: this is simply a best-effort. We don't want to - * return an error, because then we wouldn't visit all - * the volumes. - */ } - - ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd); - - zfs_close(zhp); - return (ret); } @@ -3216,31 +3228,11 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, * if it was recursive, the one that actually failed will be in * zc.zc_name. */ - if (ret != 0) + if (ret != 0) { (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value); - - if (ret == 0 && recursive) { - struct createdata cd; - - cd.cd_snapname = delim + 1; - cd.cd_ifexists = B_FALSE; - (void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd); - } - if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) { - ret = zvol_create_link(zhp->zfs_hdl, path); - if (ret != 0) { - (void) zfs_standard_error(hdl, errno, - dgettext(TEXT_DOMAIN, - "Volume successfully snapshotted, but device links " - "were not created")); - zfs_close(zhp); - return (-1); - } - } - - if (ret != 0) (void) zfs_standard_error(hdl, errno, errbuf); + } zfs_close(zhp); @@ -3343,8 +3335,6 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) */ if (zhp->zfs_type == ZFS_TYPE_VOLUME) { - if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0) - return (-1); if (zfs_which_resv_prop(zhp, &resv_prop) < 0) return (-1); old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); @@ -3382,10 +3372,6 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) */ if ((zhp->zfs_type == ZFS_TYPE_VOLUME) && (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) { - if (err = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name)) { - zfs_close(zhp); - return (err); - } if (restore_resv) { new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); if (old_volsize != new_volsize) @@ -3500,14 +3486,11 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive) if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); - uint64_t unused; /* validate parents */ - if (check_parents(hdl, target, &unused, B_FALSE, NULL) != 0) + if (check_parents(hdl, target, NULL, B_FALSE, NULL) != 0) return (-1); - (void) parent_name(target, parent, sizeof (parent)); - /* make sure we're in the same pool */ verify((delim = strchr(target, '/')) != NULL); if (strncmp(zhp->zfs_name, target, delim - target) != 0 || @@ -3518,10 +3501,9 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive) } /* new name cannot be a child of the current dataset name */ - if (strncmp(parent, zhp->zfs_name, - strlen(zhp->zfs_name)) == 0) { + if (is_descendant(zhp->zfs_name, target)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "New dataset name cannot be a descendent of " + "New dataset name cannot be a descendant of " "current dataset name")); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } @@ -3538,7 +3520,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive) } if (recursive) { - struct destroydata dd; parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); if (parentname == NULL) { @@ -3553,15 +3534,6 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive) goto error; } - dd.snapname = delim + 1; - dd.gotone = B_FALSE; - dd.closezhp = B_TRUE; - - /* We remove any zvol links prior to renaming them */ - ret = zfs_iter_filesystems(zhrp, zfs_remove_link_cb, &dd); - if (ret) { - goto error; - } } else { if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0)) == NULL) return (-1); @@ -3609,27 +3581,10 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive) * On failure, we still want to remount any filesystems that * were previously mounted, so we don't alter the system state. */ - if (recursive) { - struct createdata cd; - - /* only create links for datasets that had existed */ - cd.cd_snapname = delim + 1; - cd.cd_ifexists = B_TRUE; - (void) zfs_iter_filesystems(zhrp, zfs_create_link_cb, - &cd); - } else { + if (!recursive) (void) changelist_postfix(cl); - } } else { - if (recursive) { - struct createdata cd; - - /* only create links for datasets that had existed */ - cd.cd_snapname = strchr(target, '@') + 1; - cd.cd_ifexists = B_TRUE; - ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb, - &cd); - } else { + if (!recursive) { changelist_rename(cl, zfs_get_name(zhp), target); ret = changelist_postfix(cl); } @@ -3648,143 +3603,19 @@ error: return (ret); } -/* - * Given a zvol dataset, issue the ioctl to create the appropriate minor node, - * poke devfsadm to create the /dev link, and then wait for the link to appear. - */ -int -zvol_create_link(libzfs_handle_t *hdl, const char *dataset) -{ - return (zvol_create_link_common(hdl, dataset, B_FALSE)); -} - -static int -zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists) -{ - zfs_cmd_t zc = { 0 }; - di_devlink_handle_t dhdl; - priv_set_t *priv_effective; - int privileged; - - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - - /* - * Issue the appropriate ioctl. - */ - if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) { - switch (errno) { - case EEXIST: - /* - * Silently ignore the case where the link already - * exists. This allows 'zfs volinit' to be run multiple - * times without errors. - */ - return (0); - - case ENOENT: - /* - * Dataset does not exist in the kernel. If we - * don't care (see zfs_rename), then ignore the - * error quietly. - */ - if (ifexists) { - return (0); - } - - /* FALLTHROUGH */ - - default: - return (zfs_standard_error_fmt(hdl, errno, - dgettext(TEXT_DOMAIN, "cannot create device links " - "for '%s'"), dataset)); - } - } - - /* - * If privileged call devfsadm and wait for the links to - * magically appear. - * Otherwise, print out an informational message. - */ - - priv_effective = priv_allocset(); - (void) getppriv(PRIV_EFFECTIVE, priv_effective); - privileged = (priv_isfullset(priv_effective) == B_TRUE); - priv_freeset(priv_effective); - - if (privileged) { - if ((dhdl = di_devlink_init(ZFS_DRIVER, - DI_MAKE_LINK)) == NULL) { - zfs_error_aux(hdl, strerror(errno)); - (void) zfs_error_fmt(hdl, errno, - dgettext(TEXT_DOMAIN, "cannot create device links " - "for '%s'"), dataset); - (void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc); - return (-1); - } else { - (void) di_devlink_fini(&dhdl); - } - } else { - char pathname[MAXPATHLEN]; - struct stat64 statbuf; - int i; - -#define MAX_WAIT 10 - - /* - * This is the poor mans way of waiting for the link - * to show up. If after 10 seconds we still don't - * have it, then print out a message. - */ - (void) snprintf(pathname, sizeof (pathname), "/dev/zvol/dsk/%s", - dataset); - - for (i = 0; i != MAX_WAIT; i++) { - if (stat64(pathname, &statbuf) == 0) - break; - (void) sleep(1); - } - if (i == MAX_WAIT) - (void) printf(gettext("%s may not be immediately " - "available\n"), pathname); - } - - return (0); -} - -/* - * Remove a minor node for the given zvol and the associated /dev links. - */ -int -zvol_remove_link(libzfs_handle_t *hdl, const char *dataset) +nvlist_t * +zfs_get_user_props(zfs_handle_t *zhp) { - zfs_cmd_t zc = { 0 }; - - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - - if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) { - switch (errno) { - case ENXIO: - /* - * Silently ignore the case where the link no longer - * exists, so that 'zfs volfini' can be run multiple - * times without errors. - */ - return (0); - - default: - return (zfs_standard_error_fmt(hdl, errno, - dgettext(TEXT_DOMAIN, "cannot remove device " - "links for '%s'"), dataset)); - } - } - - return (0); + return (zhp->zfs_user_props); } nvlist_t * -zfs_get_user_props(zfs_handle_t *zhp) +zfs_get_recvd_props(zfs_handle_t *zhp) { - return (zhp->zfs_user_props); + if (zhp->zfs_recvd_props == NULL) + if (get_recvd_props_ioctl(zhp) != 0) + return (NULL); + return (zhp->zfs_recvd_props); } /* @@ -3796,10 +3627,12 @@ zfs_get_user_props(zfs_handle_t *zhp) * for new unique user properties and add them to the list. * * - For non fixed-width properties, keep track of the maximum width seen - * so that we can size the column appropriately. + * so that we can size the column appropriately. If the user has + * requested received property values, we also need to compute the width + * of the RECEIVED column. */ int -zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp) +zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received) { libzfs_handle_t *hdl = zhp->zfs_hdl; zprop_list_t *entry; @@ -3870,12 +3703,24 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp) if (strlen(buf) > entry->pl_width) entry->pl_width = strlen(buf); } - } else if (nvlist_lookup_nvlist(userprops, - entry->pl_user_prop, &propval) == 0) { - verify(nvlist_lookup_string(propval, - ZPROP_VALUE, &strval) == 0); - if (strlen(strval) > entry->pl_width) - entry->pl_width = strlen(strval); + if (received && zfs_prop_get_recvd(zhp, + zfs_prop_to_name(entry->pl_prop), + buf, sizeof (buf), B_FALSE) == 0) + if (strlen(buf) > entry->pl_recvd_width) + entry->pl_recvd_width = strlen(buf); + } else { + if (nvlist_lookup_nvlist(userprops, entry->pl_user_prop, + &propval) == 0) { + verify(nvlist_lookup_string(propval, + ZPROP_VALUE, &strval) == 0); + if (strlen(strval) > entry->pl_width) + entry->pl_width = strlen(strval); + } + if (received && zfs_prop_get_recvd(zhp, + entry->pl_user_prop, + buf, sizeof (buf), B_FALSE) == 0) + if (strlen(buf) > entry->pl_recvd_width) + entry->pl_recvd_width = strlen(buf); } } @@ -3883,52 +3728,6 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp) } int -zfs_iscsi_perm_check(libzfs_handle_t *hdl, char *dataset, ucred_t *cred) -{ - zfs_cmd_t zc = { 0 }; - nvlist_t *nvp; - gid_t gid; - uid_t uid; - const gid_t *groups; - int group_cnt; - int error; - - if (nvlist_alloc(&nvp, NV_UNIQUE_NAME, 0) != 0) - return (no_memory(hdl)); - - uid = ucred_geteuid(cred); - gid = ucred_getegid(cred); - group_cnt = ucred_getgroups(cred, &groups); - - if (uid == (uid_t)-1 || gid == (uid_t)-1 || group_cnt == (uid_t)-1) - return (1); - - if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_UID, uid) != 0) { - nvlist_free(nvp); - return (1); - } - - if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_GID, gid) != 0) { - nvlist_free(nvp); - return (1); - } - - if (nvlist_add_uint32_array(nvp, - ZFS_DELEG_PERM_GROUPS, (uint32_t *)groups, group_cnt) != 0) { - nvlist_free(nvp); - return (1); - } - (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); - - if (zcmd_write_src_nvlist(hdl, &zc, nvp)) - return (-1); - - error = ioctl(hdl->libzfs_fd, ZFS_IOC_ISCSI_PERM_CHECK, &zc); - nvlist_free(nvp); - return (error); -} - -int zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path, char *resource, void *export, void *sharetab, int sharemax, zfs_share_op_t operation) @@ -3966,9 +3765,11 @@ zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props) nvpair_t *next = nvlist_next_nvpair(zhp->zfs_props, curr); /* - * We leave user:props in the nvlist, so there will be - * some ZPROP_INVAL. To be extra safe, don't prune - * those. + * User properties will result in ZPROP_INVAL, and since we + * only know how to prune standard ZFS properties, we always + * leave these in the list. This can also happen if we + * encounter an unknown DSL property (when running older + * software, for example). */ if (zfs_prop != ZPROP_INVAL && props[zfs_prop] == B_FALSE) (void) nvlist_remove(zhp->zfs_props, @@ -4097,15 +3898,18 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type, int zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, - boolean_t recursive) + boolean_t recursive, boolean_t temphold, boolean_t enoent_ok) { zfs_cmd_t zc = { 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); - (void) strlcpy(zc.zc_string, tag, sizeof (zc.zc_string)); + if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string)) + >= sizeof (zc.zc_string)) + return (zfs_error(hdl, EZFS_TAGTOOLONG, tag)); zc.zc_cookie = recursive; + zc.zc_temphold = temphold; if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) { char errbuf[ZFS_MAXNAMELEN+32]; @@ -4117,6 +3921,14 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot hold '%s@%s'"), zc.zc_name, snapname); switch (errno) { + case E2BIG: + /* + * Temporary tags wind up having the ds object id + * prepended. So even if we passed the length check + * above, it's still possible for the tag to wind + * up being slightly too long. + */ + return (zfs_error(hdl, EZFS_TAGTOOLONG, errbuf)); case ENOTSUP: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be upgraded")); @@ -4125,6 +3937,10 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); case EEXIST: return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf)); + case ENOENT: + if (enoent_ok) + return (0); + /* FALLTHROUGH */ default: return (zfs_standard_error_fmt(hdl, errno, errbuf)); } @@ -4133,6 +3949,102 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag, return (0); } +struct hold_range_arg { + zfs_handle_t *origin; + const char *fromsnap; + const char *tosnap; + char lastsnapheld[ZFS_MAXNAMELEN]; + const char *tag; + boolean_t temphold; + boolean_t seento; + boolean_t seenfrom; + boolean_t holding; + boolean_t recursive; + snapfilter_cb_t *filter_cb; + void *filter_cb_arg; +}; + +static int +zfs_hold_range_one(zfs_handle_t *zhp, void *arg) +{ + struct hold_range_arg *hra = arg; + const char *thissnap; + int error; + + thissnap = strchr(zfs_get_name(zhp), '@') + 1; + + if (hra->fromsnap && !hra->seenfrom && + strcmp(hra->fromsnap, thissnap) == 0) + hra->seenfrom = B_TRUE; + + /* snap is older or newer than the desired range, ignore it */ + if (hra->seento || !hra->seenfrom) { + zfs_close(zhp); + return (0); + } + + if (!hra->seento && strcmp(hra->tosnap, thissnap) == 0) + hra->seento = B_TRUE; + + if (hra->filter_cb != NULL && + hra->filter_cb(zhp, hra->filter_cb_arg) == B_FALSE) { + zfs_close(zhp); + return (0); + } + + if (hra->holding) { + /* We could be racing with destroy, so ignore ENOENT. */ + error = zfs_hold(hra->origin, thissnap, hra->tag, + hra->recursive, hra->temphold, B_TRUE); + if (error == 0) { + (void) strlcpy(hra->lastsnapheld, zfs_get_name(zhp), + sizeof (hra->lastsnapheld)); + } + } else { + error = zfs_release(hra->origin, thissnap, hra->tag, + hra->recursive); + } + + zfs_close(zhp); + return (error); +} + +/* + * Add a user hold on the set of snapshots starting with fromsnap up to + * and including tosnap. If we're unable to to acquire a particular hold, + * undo any holds up to that point. + */ +int +zfs_hold_range(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, + const char *tag, boolean_t recursive, boolean_t temphold, + snapfilter_cb_t filter_cb, void *cbarg) +{ + struct hold_range_arg arg = { 0 }; + int error; + + arg.origin = zhp; + arg.fromsnap = fromsnap; + arg.tosnap = tosnap; + arg.tag = tag; + arg.temphold = temphold; + arg.holding = B_TRUE; + arg.recursive = recursive; + arg.seenfrom = (fromsnap == NULL); + arg.filter_cb = filter_cb; + arg.filter_cb_arg = cbarg; + + error = zfs_iter_snapshots_sorted(zhp, zfs_hold_range_one, &arg); + + /* + * Make sure we either hold the entire range or none. + */ + if (error && arg.lastsnapheld[0] != '\0') { + (void) zfs_release_range(zhp, fromsnap, + (const char *)arg.lastsnapheld, tag, recursive); + } + return (error); +} + int zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, boolean_t recursive) @@ -4142,7 +4054,9 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value)); - (void) strlcpy(zc.zc_string, tag, sizeof (zc.zc_string)); + if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string)) + >= sizeof (zc.zc_string)) + return (zfs_error(hdl, EZFS_TAGTOOLONG, tag)); zc.zc_cookie = recursive; if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) { @@ -4153,7 +4067,8 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, * zc.zc_name. */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "cannot release '%s@%s'"), zc.zc_name, snapname); + "cannot release '%s' from '%s@%s'"), tag, zc.zc_name, + snapname); switch (errno) { case ESRCH: return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf)); @@ -4170,3 +4085,61 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag, return (0); } + +/* + * Release a user hold from the set of snapshots starting with fromsnap + * up to and including tosnap. + */ +int +zfs_release_range(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, + const char *tag, boolean_t recursive) +{ + struct hold_range_arg arg = { 0 }; + + arg.origin = zhp; + arg.fromsnap = fromsnap; + arg.tosnap = tosnap; + arg.tag = tag; + arg.recursive = recursive; + arg.seenfrom = (fromsnap == NULL); + + return (zfs_iter_snapshots_sorted(zhp, zfs_hold_range_one, &arg)); +} + +uint64_t +zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props) +{ + uint64_t numdb; + uint64_t nblocks, volblocksize; + int ncopies; + char *strval; + + if (nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_COPIES), &strval) == 0) + ncopies = atoi(strval); + else + ncopies = 1; + if (nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), + &volblocksize) != 0) + volblocksize = ZVOL_DEFAULT_BLOCKSIZE; + nblocks = volsize/volblocksize; + /* start with metadnode L0-L6 */ + numdb = 7; + /* calculate number of indirects */ + while (nblocks > 1) { + nblocks += DNODES_PER_LEVEL - 1; + nblocks /= DNODES_PER_LEVEL; + numdb += nblocks; + } + numdb *= MIN(SPA_DVAS_PER_BP, ncopies + 1); + volsize *= ncopies; + /* + * this is exactly DN_MAX_INDBLKSHIFT when metadata isn't + * compressed, but in practice they compress down to about + * 1100 bytes + */ + numdb *= 1ULL << DN_MAX_INDBLKSHIFT; + volsize += numdb; + return (volsize); +} diff --git a/lib/libzfs/libzfs_fru.c b/lib/libzfs/libzfs_fru.c new file mode 100644 index 000000000..788fa2cfb --- /dev/null +++ b/lib/libzfs/libzfs_fru.c @@ -0,0 +1,452 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <dlfcn.h> +#include <errno.h> +#include <libintl.h> +#include <link.h> +#include <pthread.h> +#include <strings.h> +#include <unistd.h> + +#include <libzfs.h> + +#include <fm/libtopo.h> +#include <sys/fm/protocol.h> +#include <sys/systeminfo.h> + +#include "libzfs_impl.h" + +/* + * This file is responsible for determining the relationship between I/O + * devices paths and physical locations. In the world of MPxIO and external + * enclosures, the device path is not synonymous with the physical location. + * If you remove a drive and insert it into a different slot, it will end up + * with the same path under MPxIO. If you recable storage enclosures, the + * device paths may change. All of this makes it difficult to implement the + * 'autoreplace' property, which is supposed to automatically manage disk + * replacement based on physical slot. + * + * In order to work around these limitations, we have a per-vdev FRU property + * that is the libtopo path (minus disk-specific authority information) to the + * physical location of the device on the system. This is an optional + * property, and is only needed when using the 'autoreplace' property or when + * generating FMA faults against vdevs. + */ + +/* + * Because the FMA packages depend on ZFS, we have to dlopen() libtopo in case + * it is not present. We only need this once per library instance, so it is + * not part of the libzfs handle. + */ +static void *_topo_dlhandle; +static topo_hdl_t *(*_topo_open)(int, const char *, int *); +static void (*_topo_close)(topo_hdl_t *); +static char *(*_topo_snap_hold)(topo_hdl_t *, const char *, int *); +static void (*_topo_snap_release)(topo_hdl_t *); +static topo_walk_t *(*_topo_walk_init)(topo_hdl_t *, const char *, + topo_walk_cb_t, void *, int *); +static int (*_topo_walk_step)(topo_walk_t *, int); +static void (*_topo_walk_fini)(topo_walk_t *); +static void (*_topo_hdl_strfree)(topo_hdl_t *, char *); +static char *(*_topo_node_name)(tnode_t *); +static int (*_topo_prop_get_string)(tnode_t *, const char *, const char *, + char **, int *); +static int (*_topo_node_fru)(tnode_t *, nvlist_t **, nvlist_t *, int *); +static int (*_topo_fmri_nvl2str)(topo_hdl_t *, nvlist_t *, char **, int *); +static int (*_topo_fmri_strcmp_noauth)(topo_hdl_t *, const char *, + const char *); + +#define ZFS_FRU_HASH_SIZE 257 + +static size_t +fru_strhash(const char *key) +{ + ulong_t g, h = 0; + const char *p; + + for (p = key; *p != '\0'; p++) { + h = (h << 4) + *p; + + if ((g = (h & 0xf0000000)) != 0) { + h ^= (g >> 24); + h ^= g; + } + } + + return (h % ZFS_FRU_HASH_SIZE); +} + +static int +libzfs_fru_gather(topo_hdl_t *thp, tnode_t *tn, void *arg) +{ + libzfs_handle_t *hdl = arg; + nvlist_t *fru; + char *devpath, *frustr; + int err; + libzfs_fru_t *frup; + size_t idx; + + /* + * If this is the chassis node, and we don't yet have the system + * chassis ID, then fill in this value now. + */ + if (hdl->libzfs_chassis_id[0] == '\0' && + strcmp(_topo_node_name(tn), "chassis") == 0) { + if (_topo_prop_get_string(tn, FM_FMRI_AUTHORITY, + FM_FMRI_AUTH_CHASSIS, &devpath, &err) == 0) + (void) strlcpy(hdl->libzfs_chassis_id, devpath, + sizeof (hdl->libzfs_chassis_id)); + } + + /* + * Skip non-disk nodes. + */ + if (strcmp(_topo_node_name(tn), "disk") != 0) + return (TOPO_WALK_NEXT); + + /* + * Get the devfs path and FRU. + */ + if (_topo_prop_get_string(tn, "io", "devfs-path", &devpath, &err) != 0) + return (TOPO_WALK_NEXT); + + if (libzfs_fru_lookup(hdl, devpath) != NULL) { + _topo_hdl_strfree(thp, devpath); + return (TOPO_WALK_NEXT); + } + + if (_topo_node_fru(tn, &fru, NULL, &err) != 0) { + _topo_hdl_strfree(thp, devpath); + return (TOPO_WALK_NEXT); + } + + /* + * Convert the FRU into a string. + */ + if (_topo_fmri_nvl2str(thp, fru, &frustr, &err) != 0) { + nvlist_free(fru); + _topo_hdl_strfree(thp, devpath); + return (TOPO_WALK_NEXT); + } + + nvlist_free(fru); + + /* + * Finally, we have a FRU string and device path. Add it to the hash. + */ + if ((frup = calloc(sizeof (libzfs_fru_t), 1)) == NULL) { + _topo_hdl_strfree(thp, devpath); + _topo_hdl_strfree(thp, frustr); + return (TOPO_WALK_NEXT); + } + + if ((frup->zf_device = strdup(devpath)) == NULL || + (frup->zf_fru = strdup(frustr)) == NULL) { + free(frup->zf_device); + free(frup); + _topo_hdl_strfree(thp, devpath); + _topo_hdl_strfree(thp, frustr); + return (TOPO_WALK_NEXT); + } + + _topo_hdl_strfree(thp, devpath); + _topo_hdl_strfree(thp, frustr); + + idx = fru_strhash(frup->zf_device); + frup->zf_chain = hdl->libzfs_fru_hash[idx]; + hdl->libzfs_fru_hash[idx] = frup; + frup->zf_next = hdl->libzfs_fru_list; + hdl->libzfs_fru_list = frup; + + return (TOPO_WALK_NEXT); +} + +/* + * Called during initialization to setup the dynamic libtopo connection. + */ +#pragma init(libzfs_init_fru) +static void +libzfs_init_fru(void) +{ + char path[MAXPATHLEN]; + char isa[257]; + +#if defined(_LP64) + if (sysinfo(SI_ARCHITECTURE_64, isa, sizeof (isa)) < 0) + isa[0] = '\0'; +#else + isa[0] = '\0'; +#endif + (void) snprintf(path, sizeof (path), + "/usr/lib/fm/%s/libtopo.so", isa); + + if ((_topo_dlhandle = dlopen(path, RTLD_LAZY)) == NULL) + return; + + _topo_open = (topo_hdl_t *(*)()) + dlsym(_topo_dlhandle, "topo_open"); + _topo_close = (void (*)()) + dlsym(_topo_dlhandle, "topo_close"); + _topo_snap_hold = (char *(*)()) + dlsym(_topo_dlhandle, "topo_snap_hold"); + _topo_snap_release = (void (*)()) + dlsym(_topo_dlhandle, "topo_snap_release"); + _topo_walk_init = (topo_walk_t *(*)()) + dlsym(_topo_dlhandle, "topo_walk_init"); + _topo_walk_step = (int (*)()) + dlsym(_topo_dlhandle, "topo_walk_step"); + _topo_walk_fini = (void (*)()) + dlsym(_topo_dlhandle, "topo_walk_fini"); + _topo_hdl_strfree = (void (*)()) + dlsym(_topo_dlhandle, "topo_hdl_strfree"); + _topo_node_name = (char *(*)()) + dlsym(_topo_dlhandle, "topo_node_name"); + _topo_prop_get_string = (int (*)()) + dlsym(_topo_dlhandle, "topo_prop_get_string"); + _topo_node_fru = (int (*)()) + dlsym(_topo_dlhandle, "topo_node_fru"); + _topo_fmri_nvl2str = (int (*)()) + dlsym(_topo_dlhandle, "topo_fmri_nvl2str"); + _topo_fmri_strcmp_noauth = (int (*)()) + dlsym(_topo_dlhandle, "topo_fmri_strcmp_noauth"); + + if (_topo_open == NULL || _topo_close == NULL || + _topo_snap_hold == NULL || _topo_snap_release == NULL || + _topo_walk_init == NULL || _topo_walk_step == NULL || + _topo_walk_fini == NULL || _topo_hdl_strfree == NULL || + _topo_node_name == NULL || _topo_prop_get_string == NULL || + _topo_node_fru == NULL || _topo_fmri_nvl2str == NULL || + _topo_fmri_strcmp_noauth == NULL) { + (void) dlclose(_topo_dlhandle); + _topo_dlhandle = NULL; + } +} + +/* + * Refresh the mappings from device path -> FMRI. We do this by walking the + * hc topology looking for disk nodes, and recording the io/devfs-path and FRU. + * Note that we strip out the disk-specific authority information (serial, + * part, revision, etc) so that we are left with only the identifying + * characteristics of the slot (hc path and chassis-id). + */ +void +libzfs_fru_refresh(libzfs_handle_t *hdl) +{ + int err; + char *uuid; + topo_hdl_t *thp; + topo_walk_t *twp; + + if (_topo_dlhandle == NULL) + return; + + /* + * Clear the FRU hash and initialize our basic structures. + */ + libzfs_fru_clear(hdl, B_FALSE); + + if ((hdl->libzfs_topo_hdl = _topo_open(TOPO_VERSION, + NULL, &err)) == NULL) + return; + + thp = hdl->libzfs_topo_hdl; + + if ((uuid = _topo_snap_hold(thp, NULL, &err)) == NULL) + return; + + _topo_hdl_strfree(thp, uuid); + + if (hdl->libzfs_fru_hash == NULL && + (hdl->libzfs_fru_hash = + calloc(ZFS_FRU_HASH_SIZE * sizeof (void *), 1)) == NULL) + return; + + /* + * We now have a topo snapshot, so iterate over the hc topology looking + * for disks to add to the hash. + */ + twp = _topo_walk_init(thp, FM_FMRI_SCHEME_HC, + libzfs_fru_gather, hdl, &err); + if (twp != NULL) { + (void) _topo_walk_step(twp, TOPO_WALK_CHILD); + _topo_walk_fini(twp); + } +} + +/* + * Given a devfs path, return the FRU for the device, if known. This will + * automatically call libzfs_fru_refresh() if it hasn't already been called by + * the consumer. The string returned is valid until the next call to + * libzfs_fru_refresh(). + */ +const char * +libzfs_fru_lookup(libzfs_handle_t *hdl, const char *devpath) +{ + size_t idx = fru_strhash(devpath); + libzfs_fru_t *frup; + + if (hdl->libzfs_fru_hash == NULL) + libzfs_fru_refresh(hdl); + + if (hdl->libzfs_fru_hash == NULL) + return (NULL); + + for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL; + frup = frup->zf_chain) { + if (strcmp(devpath, frup->zf_device) == 0) + return (frup->zf_fru); + } + + return (NULL); +} + +/* + * Given a fru path, return the device path. This will automatically call + * libzfs_fru_refresh() if it hasn't already been called by the consumer. The + * string returned is valid until the next call to libzfs_fru_refresh(). + */ +const char * +libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru) +{ + libzfs_fru_t *frup; + size_t idx; + + if (hdl->libzfs_fru_hash == NULL) + libzfs_fru_refresh(hdl); + + if (hdl->libzfs_fru_hash == NULL) + return (NULL); + + for (idx = 0; idx < ZFS_FRU_HASH_SIZE; idx++) { + for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL; + frup = frup->zf_next) { + if (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, + fru, frup->zf_fru)) + return (frup->zf_device); + } + } + + return (NULL); +} + +/* + * Change the stored FRU for the given vdev. + */ +int +zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru) +{ + zfs_cmd_t zc = { 0 }; + + (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + (void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value)); + zc.zc_guid = vdev_guid; + + if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SETFRU, &zc) != 0) + return (zpool_standard_error_fmt(zhp->zpool_hdl, errno, + dgettext(TEXT_DOMAIN, "cannot set FRU"))); + + return (0); +} + +/* + * Compare to two FRUs, ignoring any authority information. + */ +boolean_t +libzfs_fru_compare(libzfs_handle_t *hdl, const char *a, const char *b) +{ + if (hdl->libzfs_fru_hash == NULL) + libzfs_fru_refresh(hdl); + + if (hdl->libzfs_fru_hash == NULL) + return (strcmp(a, b) == 0); + + return (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, a, b)); +} + +/* + * This special function checks to see whether the FRU indicates it's supposed + * to be in the system chassis, but the chassis-id doesn't match. This can + * happen in a clustered case, where both head nodes have the same logical + * disk, but opening the device on the other head node is meaningless. + */ +boolean_t +libzfs_fru_notself(libzfs_handle_t *hdl, const char *fru) +{ + const char *chassisid; + size_t len; + + if (hdl->libzfs_fru_hash == NULL) + libzfs_fru_refresh(hdl); + + if (hdl->libzfs_chassis_id[0] == '\0') + return (B_FALSE); + + if (strstr(fru, "/chassis=0/") == NULL) + return (B_FALSE); + + if ((chassisid = strstr(fru, ":chassis-id=")) == NULL) + return (B_FALSE); + + chassisid += 12; + len = strlen(hdl->libzfs_chassis_id); + if (strncmp(chassisid, hdl->libzfs_chassis_id, len) == 0 && + (chassisid[len] == '/' || chassisid[len] == ':')) + return (B_FALSE); + + return (B_TRUE); +} + +/* + * Clear memory associated with the FRU hash. + */ +void +libzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final) +{ + libzfs_fru_t *frup; + + while ((frup = hdl->libzfs_fru_list) != NULL) { + hdl->libzfs_fru_list = frup->zf_next; + free(frup->zf_device); + free(frup->zf_fru); + free(frup); + } + + hdl->libzfs_fru_list = NULL; + + if (hdl->libzfs_topo_hdl != NULL) { + _topo_snap_release(hdl->libzfs_topo_hdl); + _topo_close(hdl->libzfs_topo_hdl); + hdl->libzfs_topo_hdl = NULL; + } + + if (final) { + free(hdl->libzfs_fru_hash); + } else if (hdl->libzfs_fru_hash != NULL) { + bzero(hdl->libzfs_fru_hash, + ZFS_FRU_HASH_SIZE * sizeof (void *)); + } +} diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c index d67776889..fd3044b1d 100644 --- a/lib/libzfs/libzfs_import.c +++ b/lib/libzfs/libzfs_import.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Pool import support functions. * @@ -41,15 +39,21 @@ * using our derived config, and record the results. */ +#include <ctype.h> #include <devid.h> #include <dirent.h> #include <errno.h> #include <libintl.h> +#include <stddef.h> #include <stdlib.h> #include <string.h> #include <sys/stat.h> #include <unistd.h> #include <fcntl.h> +#include <sys/vtoc.h> +#include <sys/dktp/fdisk.h> +#include <sys/efi_partition.h> +#include <thread_pool.h> #include <sys/vdev_impl.h> @@ -388,8 +392,6 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config) } if (err) { - (void) zpool_standard_error(hdl, errno, - dgettext(TEXT_DOMAIN, "cannot discover pools")); zcmd_free_nvlists(&zc); return (NULL); } @@ -404,6 +406,21 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config) } /* + * Determine if the vdev id is a hole in the namespace. + */ +boolean_t +vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id) +{ + for (int c = 0; c < holes; c++) { + + /* Top-level is a hole */ + if (hole_array[c] == id) + return (B_TRUE); + } + return (B_FALSE); +} + +/* * Convert our list of pools into the definitive set of configurations. We * start by picking the best config for each toplevel vdev. Once that's done, * we assemble the toplevel vdevs into a full config for the pool. We make a @@ -425,17 +442,20 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) uint64_t version, guid; uint_t children = 0; nvlist_t **child = NULL; + uint_t holes; + uint64_t *hole_array, max_id; uint_t c; boolean_t isactive; uint64_t hostid; nvlist_t *nvl; boolean_t found_one = B_FALSE; + boolean_t valid_top_config = B_FALSE; if (nvlist_alloc(&ret, 0, 0) != 0) goto nomem; for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { - uint64_t id; + uint64_t id, max_txg = 0; if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0) goto nomem; @@ -463,6 +483,42 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) } } + /* + * We rely on the fact that the max txg for the + * pool will contain the most up-to-date information + * about the valid top-levels in the vdev namespace. + */ + if (best_txg > max_txg) { + (void) nvlist_remove(config, + ZPOOL_CONFIG_VDEV_CHILDREN, + DATA_TYPE_UINT64); + (void) nvlist_remove(config, + ZPOOL_CONFIG_HOLE_ARRAY, + DATA_TYPE_UINT64_ARRAY); + + max_txg = best_txg; + hole_array = NULL; + holes = 0; + max_id = 0; + valid_top_config = B_FALSE; + + if (nvlist_lookup_uint64(tmp, + ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) { + verify(nvlist_add_uint64(config, + ZPOOL_CONFIG_VDEV_CHILDREN, + max_id) == 0); + valid_top_config = B_TRUE; + } + + if (nvlist_lookup_uint64_array(tmp, + ZPOOL_CONFIG_HOLE_ARRAY, &hole_array, + &holes) == 0) { + verify(nvlist_add_uint64_array(config, + ZPOOL_CONFIG_HOLE_ARRAY, + hole_array, holes) == 0); + } + } + if (!config_seen) { /* * Copy the relevant pieces of data to the pool @@ -522,6 +578,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID, &id) == 0); + if (id >= children) { nvlist_t **newchild; @@ -542,17 +599,82 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) } + /* + * If we have information about all the top-levels then + * clean up the nvlist which we've constructed. This + * means removing any extraneous devices that are + * beyond the valid range or adding devices to the end + * of our array which appear to be missing. + */ + if (valid_top_config) { + if (max_id < children) { + for (c = max_id; c < children; c++) + nvlist_free(child[c]); + children = max_id; + } else if (max_id > children) { + nvlist_t **newchild; + + newchild = zfs_alloc(hdl, (max_id) * + sizeof (nvlist_t *)); + if (newchild == NULL) + goto nomem; + + for (c = 0; c < children; c++) + newchild[c] = child[c]; + + free(child); + child = newchild; + children = max_id; + } + } + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) == 0); /* + * The vdev namespace may contain holes as a result of + * device removal. We must add them back into the vdev + * tree before we process any missing devices. + */ + if (holes > 0) { + ASSERT(valid_top_config); + + for (c = 0; c < children; c++) { + nvlist_t *holey; + + if (child[c] != NULL || + !vdev_is_hole(hole_array, holes, c)) + continue; + + if (nvlist_alloc(&holey, NV_UNIQUE_NAME, + 0) != 0) + goto nomem; + + /* + * Holes in the namespace are treated as + * "hole" top-level vdevs and have a + * special flag set on them. + */ + if (nvlist_add_string(holey, + ZPOOL_CONFIG_TYPE, + VDEV_TYPE_HOLE) != 0 || + nvlist_add_uint64(holey, + ZPOOL_CONFIG_ID, c) != 0 || + nvlist_add_uint64(holey, + ZPOOL_CONFIG_GUID, 0ULL) != 0) + goto nomem; + child[c] = holey; + } + } + + /* * Look for any missing top-level vdevs. If this is the case, * create a faked up 'missing' vdev as a placeholder. We cannot * simply compress the child array, because the kernel performs * certain checks to make sure the vdev IDs match their location * in the configuration. */ - for (c = 0; c < children; c++) + for (c = 0; c < children; c++) { if (child[c] == NULL) { nvlist_t *missing; if (nvlist_alloc(&missing, NV_UNIQUE_NAME, @@ -570,6 +692,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) } child[c] = missing; } + } /* * Put all of this pool's top-level vdevs into a root vdev. @@ -636,8 +759,11 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) continue; } - if ((nvl = refresh_config(hdl, config)) == NULL) - goto error; + if ((nvl = refresh_config(hdl, config)) == NULL) { + nvlist_free(config); + config = NULL; + continue; + } nvlist_free(config); config = nvl; @@ -777,6 +903,212 @@ zpool_read_label(int fd, nvlist_t **config) return (0); } +typedef struct rdsk_node { + char *rn_name; + int rn_dfd; + libzfs_handle_t *rn_hdl; + nvlist_t *rn_config; + avl_tree_t *rn_avl; + avl_node_t rn_node; + boolean_t rn_nozpool; +} rdsk_node_t; + +static int +slice_cache_compare(const void *arg1, const void *arg2) +{ + const char *nm1 = ((rdsk_node_t *)arg1)->rn_name; + const char *nm2 = ((rdsk_node_t *)arg2)->rn_name; + char *nm1slice, *nm2slice; + int rv; + + /* + * slices zero and two are the most likely to provide results, + * so put those first + */ + nm1slice = strstr(nm1, "s0"); + nm2slice = strstr(nm2, "s0"); + if (nm1slice && !nm2slice) { + return (-1); + } + if (!nm1slice && nm2slice) { + return (1); + } + nm1slice = strstr(nm1, "s2"); + nm2slice = strstr(nm2, "s2"); + if (nm1slice && !nm2slice) { + return (-1); + } + if (!nm1slice && nm2slice) { + return (1); + } + + rv = strcmp(nm1, nm2); + if (rv == 0) + return (0); + return (rv > 0 ? 1 : -1); +} + +static void +check_one_slice(avl_tree_t *r, char *diskname, uint_t partno, + diskaddr_t size, uint_t blksz) +{ + rdsk_node_t tmpnode; + rdsk_node_t *node; + char sname[MAXNAMELEN]; + + tmpnode.rn_name = &sname[0]; + (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u", + diskname, partno); + /* + * protect against division by zero for disk labels that + * contain a bogus sector size + */ + if (blksz == 0) + blksz = DEV_BSIZE; + /* too small to contain a zpool? */ + if ((size < (SPA_MINDEVSIZE / blksz)) && + (node = avl_find(r, &tmpnode, NULL))) + node->rn_nozpool = B_TRUE; +} + +static void +nozpool_all_slices(avl_tree_t *r, const char *sname) +{ + char diskname[MAXNAMELEN]; + char *ptr; + int i; + + (void) strncpy(diskname, sname, MAXNAMELEN); + if (((ptr = strrchr(diskname, 's')) == NULL) && + ((ptr = strrchr(diskname, 'p')) == NULL)) + return; + ptr[0] = 's'; + ptr[1] = '\0'; + for (i = 0; i < NDKMAP; i++) + check_one_slice(r, diskname, i, 0, 1); + ptr[0] = 'p'; + for (i = 0; i <= FD_NUMPART; i++) + check_one_slice(r, diskname, i, 0, 1); +} + +static void +check_slices(avl_tree_t *r, int fd, const char *sname) +{ + struct extvtoc vtoc; + struct dk_gpt *gpt; + char diskname[MAXNAMELEN]; + char *ptr; + int i; + + (void) strncpy(diskname, sname, MAXNAMELEN); + if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1])) + return; + ptr[1] = '\0'; + + if (read_extvtoc(fd, &vtoc) >= 0) { + for (i = 0; i < NDKMAP; i++) + check_one_slice(r, diskname, i, + vtoc.v_part[i].p_size, vtoc.v_sectorsz); + } else if (efi_alloc_and_read(fd, &gpt) >= 0) { + /* + * on x86 we'll still have leftover links that point + * to slices s[9-15], so use NDKMAP instead + */ + for (i = 0; i < NDKMAP; i++) + check_one_slice(r, diskname, i, + gpt->efi_parts[i].p_size, gpt->efi_lbasize); + /* nodes p[1-4] are never used with EFI labels */ + ptr[0] = 'p'; + for (i = 1; i <= FD_NUMPART; i++) + check_one_slice(r, diskname, i, 0, 1); + efi_free(gpt); + } +} + +static void +zpool_open_func(void *arg) +{ + rdsk_node_t *rn = arg; + struct stat64 statbuf; + nvlist_t *config; + int fd; + + if (rn->rn_nozpool) + return; + if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) { + /* symlink to a device that's no longer there */ + if (errno == ENOENT) + nozpool_all_slices(rn->rn_avl, rn->rn_name); + return; + } + /* + * Ignore failed stats. We only want regular + * files, character devs and block devs. + */ + if (fstat64(fd, &statbuf) != 0 || + (!S_ISREG(statbuf.st_mode) && + !S_ISCHR(statbuf.st_mode) && + !S_ISBLK(statbuf.st_mode))) { + (void) close(fd); + return; + } + /* this file is too small to hold a zpool */ + if (S_ISREG(statbuf.st_mode) && + statbuf.st_size < SPA_MINDEVSIZE) { + (void) close(fd); + return; + } else if (!S_ISREG(statbuf.st_mode)) { + /* + * Try to read the disk label first so we don't have to + * open a bunch of minor nodes that can't have a zpool. + */ + check_slices(rn->rn_avl, fd, rn->rn_name); + } + + if ((zpool_read_label(fd, &config)) != 0) { + (void) close(fd); + (void) no_memory(rn->rn_hdl); + return; + } + (void) close(fd); + + + rn->rn_config = config; + if (config != NULL) { + assert(rn->rn_nozpool == B_FALSE); + } +} + +/* + * Given a file descriptor, clear (zero) the label information. This function + * is currently only used in the appliance stack as part of the ZFS sysevent + * module. + */ +int +zpool_clear_label(int fd) +{ + struct stat64 statbuf; + int l; + vdev_label_t *label; + uint64_t size; + + if (fstat64(fd, &statbuf) == -1) + return (0); + size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); + + if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL) + return (-1); + + for (l = 0; l < VDEV_LABELS; l++) { + if (pwrite64(fd, label, sizeof (vdev_label_t), + label_offset(size, l)) != sizeof (vdev_label_t)) + return (-1); + } + + free(label); + return (0); +} + /* * Given a list of directories to search, find all pools stored on disk. This * includes partial pools which are not available to import. If no args are @@ -785,30 +1117,28 @@ zpool_read_label(int fd, nvlist_t **config) * to import a specific pool. */ static nvlist_t * -zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, - boolean_t active_ok, char *poolname, uint64_t guid) +zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) { - int i; + int i, dirs = iarg->paths; DIR *dirp = NULL; struct dirent64 *dp; char path[MAXPATHLEN]; - char *end; + char *end, **dir = iarg->path; size_t pathleft; - struct stat64 statbuf; - nvlist_t *ret = NULL, *config; + nvlist_t *ret = NULL; static char *default_dir = "/dev/dsk"; - int fd; pool_list_t pools = { 0 }; pool_entry_t *pe, *penext; vdev_entry_t *ve, *venext; config_entry_t *ce, *cenext; name_entry_t *ne, *nenext; + avl_tree_t slice_cache; + rdsk_node_t *slice; + void *cookie; - verify(poolname == NULL || guid == 0); - - if (argc == 0) { - argc = 1; - argv = &default_dir; + if (dirs == 0) { + dirs = 1; + dir = &default_dir; } /* @@ -816,15 +1146,15 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, * possible device, organizing the information according to pool GUID * and toplevel GUID. */ - for (i = 0; i < argc; i++) { + for (i = 0; i < dirs; i++) { + tpool_t *t; char *rdsk; int dfd; /* use realpath to normalize the path */ - if (realpath(argv[i], path) == 0) { + if (realpath(dir[i], path) == 0) { (void) zfs_error_fmt(hdl, EZFS_BADPATH, - dgettext(TEXT_DOMAIN, "cannot open '%s'"), - argv[i]); + dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]); goto error; } end = &path[strlen(path)]; @@ -851,6 +1181,8 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, goto error; } + avl_create(&slice_cache, slice_cache_compare, + sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node)); /* * This is not MT-safe, but we have no MT consumers of libzfs */ @@ -860,46 +1192,53 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, (name[1] == 0 || (name[1] == '.' && name[2] == 0))) continue; - if ((fd = openat64(dfd, name, O_RDONLY)) < 0) - continue; - - /* - * Ignore failed stats. We only want regular - * files, character devs and block devs. - */ - if (fstat64(fd, &statbuf) != 0 || - (!S_ISREG(statbuf.st_mode) && - !S_ISCHR(statbuf.st_mode) && - !S_ISBLK(statbuf.st_mode))) { - (void) close(fd); - continue; - } - - if ((zpool_read_label(fd, &config)) != 0) { - (void) close(fd); - (void) no_memory(hdl); - goto error; - } - - (void) close(fd); - - if (config != NULL) { + slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); + slice->rn_name = zfs_strdup(hdl, name); + slice->rn_avl = &slice_cache; + slice->rn_dfd = dfd; + slice->rn_hdl = hdl; + slice->rn_nozpool = B_FALSE; + avl_add(&slice_cache, slice); + } + /* + * create a thread pool to do all of this in parallel; + * rn_nozpool is not protected, so this is racy in that + * multiple tasks could decide that the same slice can + * not hold a zpool, which is benign. Also choose + * double the number of processors; we hold a lot of + * locks in the kernel, so going beyond this doesn't + * buy us much. + */ + t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN), + 0, NULL); + for (slice = avl_first(&slice_cache); slice; + (slice = avl_walk(&slice_cache, slice, + AVL_AFTER))) + (void) tpool_dispatch(t, zpool_open_func, slice); + tpool_wait(t); + tpool_destroy(t); + + cookie = NULL; + while ((slice = avl_destroy_nodes(&slice_cache, + &cookie)) != NULL) { + if (slice->rn_config != NULL) { + nvlist_t *config = slice->rn_config; boolean_t matched = B_TRUE; - if (poolname != NULL) { + if (iarg->poolname != NULL) { char *pname; matched = nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &pname) == 0 && - strcmp(poolname, pname) == 0; - } else if (guid != 0) { + strcmp(iarg->poolname, pname) == 0; + } else if (iarg->guid != 0) { uint64_t this_guid; matched = nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &this_guid) == 0 && - guid == this_guid; + iarg->guid == this_guid; } if (!matched) { nvlist_free(config); @@ -907,17 +1246,20 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, continue; } /* use the non-raw path for the config */ - (void) strlcpy(end, name, pathleft); + (void) strlcpy(end, slice->rn_name, pathleft); if (add_config(hdl, &pools, path, config) != 0) goto error; } + free(slice->rn_name); + free(slice); } + avl_destroy(&slice_cache); (void) closedir(dirp); dirp = NULL; } - ret = get_configs(hdl, &pools, active_ok); + ret = get_configs(hdl, &pools, iarg->can_be_active); error: for (pe = pools.pools; pe != NULL; pe = penext) { @@ -951,27 +1293,12 @@ error: nvlist_t * zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv) { - return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, 0)); -} + importargs_t iarg = { 0 }; -nvlist_t * -zpool_find_import_byname(libzfs_handle_t *hdl, int argc, char **argv, - char *pool) -{ - return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, pool, 0)); -} + iarg.paths = argc; + iarg.path = argv; -nvlist_t * -zpool_find_import_byguid(libzfs_handle_t *hdl, int argc, char **argv, - uint64_t guid) -{ - return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, guid)); -} - -nvlist_t * -zpool_find_import_activeok(libzfs_handle_t *hdl, int argc, char **argv) -{ - return (zpool_find_import_impl(hdl, argc, argv, B_TRUE, NULL, 0)); + return (zpool_find_import_impl(hdl, &iarg)); } /* @@ -1093,6 +1420,46 @@ zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile, return (pools); } +static int +name_or_guid_exists(zpool_handle_t *zhp, void *data) +{ + importargs_t *import = data; + int found = 0; + + if (import->poolname != NULL) { + char *pool_name; + + verify(nvlist_lookup_string(zhp->zpool_config, + ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0); + if (strcmp(pool_name, import->poolname) == 0) + found = 1; + } else { + uint64_t pool_guid; + + verify(nvlist_lookup_uint64(zhp->zpool_config, + ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0); + if (pool_guid == import->guid) + found = 1; + } + + zpool_close(zhp); + return (found); +} + +nvlist_t * +zpool_search_import(libzfs_handle_t *hdl, importargs_t *import) +{ + verify(import->poolname == NULL || import->guid == 0); + + if (import->unique) + import->exists = zpool_iter(hdl, name_or_guid_exists, import); + + if (import->cachefile != NULL) + return (zpool_find_import_cached(hdl, import->cachefile, + import->poolname, import->guid)); + + return (zpool_find_import_impl(hdl, import)); +} boolean_t find_guid(nvlist_t *nv, uint64_t guid) diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index 7810e5d68..0675ec229 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -44,17 +43,14 @@ * * zfs_is_shared_nfs() * zfs_is_shared_smb() - * zfs_is_shared_iscsi() * zfs_share_proto() * zfs_shareall(); - * zfs_share_iscsi() * zfs_unshare_nfs() * zfs_unshare_smb() * zfs_unshareall_nfs() * zfs_unshareall_smb() * zfs_unshareall() * zfs_unshareall_bypath() - * zfs_unshare_iscsi() * * The following functions are available for pool consumers, and will * mount/unmount and share/unshare all datasets within pool: @@ -89,11 +85,6 @@ static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *); zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **, zfs_share_proto_t); -static int (*iscsitgt_zfs_share)(const char *); -static int (*iscsitgt_zfs_unshare)(const char *); -static int (*iscsitgt_zfs_is_shared)(const char *); -static int (*iscsitgt_svc_online)(); - /* * The share protocols table must be in the same order as the zfs_share_prot_t * enum in libzfs_impl.h @@ -125,29 +116,6 @@ zfs_share_proto_t share_all_proto[] = { PROTO_END }; -#pragma init(zfs_iscsi_init) -static void -zfs_iscsi_init(void) -{ - void *libiscsitgt; - - if ((libiscsitgt = dlopen("/lib/libiscsitgt.so.1", - RTLD_LAZY | RTLD_GLOBAL)) == NULL || - (iscsitgt_zfs_share = (int (*)(const char *))dlsym(libiscsitgt, - "iscsitgt_zfs_share")) == NULL || - (iscsitgt_zfs_unshare = (int (*)(const char *))dlsym(libiscsitgt, - "iscsitgt_zfs_unshare")) == NULL || - (iscsitgt_zfs_is_shared = (int (*)(const char *))dlsym(libiscsitgt, - "iscsitgt_zfs_is_shared")) == NULL || - (iscsitgt_svc_online = (int (*)(const char *))dlsym(libiscsitgt, - "iscsitgt_svc_online")) == NULL) { - iscsitgt_zfs_share = NULL; - iscsitgt_zfs_unshare = NULL; - iscsitgt_zfs_is_shared = NULL; - iscsitgt_svc_online = NULL; - } -} - /* * Search the sharetab for the given mountpoint and protocol, returning * a zfs_share_type_t value. @@ -345,6 +313,18 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) } else if (errno == EPERM) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Insufficient privileges")); + } else if (errno == ENOTSUP) { + char buf[256]; + int spa_version; + + VERIFY(zfs_spa_version(zhp, &spa_version) == 0); + (void) snprintf(buf, sizeof (buf), + dgettext(TEXT_DOMAIN, "Can't mount a version %lld " + "file system on a version %d pool. Pool must be" + " upgraded to mount this file system."), + (u_longlong_t)zfs_prop_get_int(zhp, + ZFS_PROP_VERSION), spa_version); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf)); } else { zfs_error_aux(hdl, strerror(errno)); } @@ -445,7 +425,7 @@ zfs_is_shared(zfs_handle_t *zhp) zfs_share_proto_t *curr_proto; if (ZFS_IS_VOLUME(zhp)) - return (zfs_is_shared_iscsi(zhp)); + return (B_FALSE); for (curr_proto = share_all_proto; *curr_proto != PROTO_END; curr_proto++) @@ -458,7 +438,7 @@ int zfs_share(zfs_handle_t *zhp) { if (ZFS_IS_VOLUME(zhp)) - return (zfs_share_iscsi(zhp)); + return (0); return (zfs_share_proto(zhp, share_all_proto)); } @@ -467,7 +447,7 @@ int zfs_unshare(zfs_handle_t *zhp) { if (ZFS_IS_VOLUME(zhp)) - return (zfs_unshare_iscsi(zhp)); + return (0); return (zfs_unshareall(zhp)); } @@ -999,81 +979,6 @@ remove_mountpoint(zfs_handle_t *zhp) } } -boolean_t -zfs_is_shared_iscsi(zfs_handle_t *zhp) -{ - - /* - * If iscsi deamon isn't running then we aren't shared - */ - if (iscsitgt_svc_online && iscsitgt_svc_online() == 1) - return (B_FALSE); - else - return (iscsitgt_zfs_is_shared != NULL && - iscsitgt_zfs_is_shared(zhp->zfs_name) != 0); -} - -int -zfs_share_iscsi(zfs_handle_t *zhp) -{ - char shareopts[ZFS_MAXPROPLEN]; - const char *dataset = zhp->zfs_name; - libzfs_handle_t *hdl = zhp->zfs_hdl; - - /* - * Return success if there are no share options. - */ - if (zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts, - sizeof (shareopts), NULL, NULL, 0, B_FALSE) != 0 || - strcmp(shareopts, "off") == 0) - return (0); - - if (iscsitgt_zfs_share == NULL || iscsitgt_zfs_share(dataset) != 0) { - int error = EZFS_SHAREISCSIFAILED; - - /* - * If service isn't availabele and EPERM was - * returned then use special error. - */ - if (iscsitgt_svc_online && errno == EPERM && - (iscsitgt_svc_online() != 0)) - error = EZFS_ISCSISVCUNAVAIL; - - return (zfs_error_fmt(hdl, error, - dgettext(TEXT_DOMAIN, "cannot share '%s'"), dataset)); - } - - return (0); -} - -int -zfs_unshare_iscsi(zfs_handle_t *zhp) -{ - const char *dataset = zfs_get_name(zhp); - libzfs_handle_t *hdl = zhp->zfs_hdl; - - /* - * Return if the volume is not shared - */ - if (zfs_is_shared_iscsi(zhp) != SHARED_ISCSI) - return (0); - - /* - * If this fails with ENODEV it indicates that zvol wasn't shared so - * we should return success in that case. - */ - if (iscsitgt_zfs_unshare == NULL || - (iscsitgt_zfs_unshare(dataset) != 0 && errno != ENODEV)) { - if (errno == EPERM) - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "Insufficient privileges to unshare iscsi")); - return (zfs_error_fmt(hdl, EZFS_UNSHAREISCSIFAILED, - dgettext(TEXT_DOMAIN, "cannot unshare '%s'"), dataset)); - } - - return (0); -} - typedef struct mount_cbdata { zfs_handle_t **cb_datasets; int cb_used; @@ -1215,28 +1120,6 @@ out: return (ret); } - -static int -zvol_cb(const char *dataset, void *data) -{ - libzfs_handle_t *hdl = data; - zfs_handle_t *zhp; - - /* - * Ignore snapshots and ignore failures from non-existant datasets. - */ - if (strchr(dataset, '@') != NULL || - (zhp = zfs_open(hdl, dataset, ZFS_TYPE_VOLUME)) == NULL) - return (0); - - if (zfs_unshare_iscsi(zhp) != 0) - return (-1); - - zfs_close(zhp); - - return (0); -} - static int mountpoint_compare(const void *a, const void *b) { @@ -1246,6 +1129,8 @@ mountpoint_compare(const void *a, const void *b) return (strcmp(mountb, mounta)); } +/* alias for 2002/240 */ +#pragma weak zpool_unmount_datasets = zpool_disable_datasets /* * Unshare and unmount all datasets within the given pool. We don't want to * rely on traversing the DSL to discover the filesystems within the pool, @@ -1253,7 +1138,6 @@ mountpoint_compare(const void *a, const void *b) * arbitrarily (on I/O error, for example). Instead, we walk /etc/mnttab and * gather all the filesystems that are currently mounted. */ -#pragma weak zpool_unmount_datasets = zpool_disable_datasets int zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force) { @@ -1267,12 +1151,6 @@ zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force) int ret = -1; int flags = (force ? MS_FORCE : 0); - /* - * First unshare all zvols. - */ - if (zpool_iter_zvol(zhp, zvol_cb, hdl) != 0) - return (-1); - namelen = strlen(zhp->zpool_name); rewind(hdl->libzfs_mnttab); diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index fd734d8b4..7836e5873 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -20,33 +20,27 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ -#include <alloca.h> -#include <assert.h> #include <ctype.h> #include <errno.h> #include <devid.h> -#include <dirent.h> #include <fcntl.h> #include <libintl.h> #include <stdio.h> #include <stdlib.h> #include <strings.h> #include <unistd.h> -#include <zone.h> #include <sys/efi_partition.h> #include <sys/vtoc.h> #include <sys/zfs_ioctl.h> -#include <sys/zio.h> -#include <strings.h> #include <dlfcn.h> #include "zfs_namecheck.h" #include "zfs_prop.h" #include "libzfs_impl.h" +#include "zfs_comutil.h" static int read_efi_label(nvlist_t *config, diskaddr_t *sb); @@ -193,6 +187,8 @@ zpool_state_to_name(vdev_state_t state, vdev_aux_t aux) case VDEV_STATE_CANT_OPEN: if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG) return (gettext("FAULTED")); + else if (aux == VDEV_AUX_SPLIT_POOL) + return (gettext("SPLIT")); else return (gettext("UNAVAIL")); case VDEV_STATE_FAULTED: @@ -273,8 +269,8 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len, switch (prop) { case ZPOOL_PROP_SIZE: - case ZPOOL_PROP_USED: - case ZPOOL_PROP_AVAILABLE: + case ZPOOL_PROP_ALLOCATED: + case ZPOOL_PROP_FREE: (void) zfs_nicenum(intval, buf, len); break; @@ -283,11 +279,18 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len, (u_longlong_t)intval); break; + case ZPOOL_PROP_DEDUPRATIO: + (void) snprintf(buf, len, "%llu.%02llux", + (u_longlong_t)(intval / 100), + (u_longlong_t)(intval % 100)); + break; + case ZPOOL_PROP_HEALTH: verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); verify(nvlist_lookup_uint64_array(nvroot, - ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0); + ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) + == 0); (void) strlcpy(buf, zpool_state_to_name(intval, vs->vs_aux), len); @@ -1004,9 +1007,6 @@ zpool_destroy(zpool_handle_t *zhp) ZFS_TYPE_FILESYSTEM)) == NULL) return (-1); - if (zpool_remove_zvol_links(zhp) != 0) - return (-1); - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) { @@ -1072,7 +1072,8 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "device '%s' contains an EFI label and " "cannot be used on root pools."), - zpool_vdev_name(hdl, NULL, spares[s])); + zpool_vdev_name(hdl, NULL, spares[s], + B_FALSE)); return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg)); } } @@ -1167,9 +1168,6 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce) zfs_cmd_t zc = { 0 }; char msg[1024]; - if (zpool_remove_zvol_links(zhp) != 0) - return (-1); - (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot export '%s'"), zhp->zpool_name); @@ -1208,6 +1206,127 @@ zpool_export_force(zpool_handle_t *zhp) return (zpool_export_common(zhp, B_TRUE, B_TRUE)); } +static void +zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun, + nvlist_t *rbi) +{ + uint64_t rewindto; + int64_t loss = -1; + struct tm t; + char timestr[128]; + + if (!hdl->libzfs_printerr || rbi == NULL) + return; + + if (nvlist_lookup_uint64(rbi, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0) + return; + (void) nvlist_lookup_int64(rbi, ZPOOL_CONFIG_REWIND_TIME, &loss); + + if (localtime_r((time_t *)&rewindto, &t) != NULL && + strftime(timestr, 128, 0, &t) != 0) { + if (dryrun) { + (void) printf(dgettext(TEXT_DOMAIN, + "Would be able to return %s " + "to its state as of %s.\n"), + name, timestr); + } else { + (void) printf(dgettext(TEXT_DOMAIN, + "Pool %s returned to its state as of %s.\n"), + name, timestr); + } + if (loss > 120) { + (void) printf(dgettext(TEXT_DOMAIN, + "%s approximately %lld "), + dryrun ? "Would discard" : "Discarded", + (loss + 30) / 60); + (void) printf(dgettext(TEXT_DOMAIN, + "minutes of transactions.\n")); + } else if (loss > 0) { + (void) printf(dgettext(TEXT_DOMAIN, + "%s approximately %lld "), + dryrun ? "Would discard" : "Discarded", loss); + (void) printf(dgettext(TEXT_DOMAIN, + "seconds of transactions.\n")); + } + } +} + +void +zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason, + nvlist_t *config) +{ + int64_t loss = -1; + uint64_t edata = UINT64_MAX; + uint64_t rewindto; + struct tm t; + char timestr[128]; + + if (!hdl->libzfs_printerr) + return; + + if (reason >= 0) + (void) printf(dgettext(TEXT_DOMAIN, "action: ")); + else + (void) printf(dgettext(TEXT_DOMAIN, "\t")); + + /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */ + if (nvlist_lookup_uint64(config, + ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0) + goto no_info; + + (void) nvlist_lookup_int64(config, ZPOOL_CONFIG_REWIND_TIME, &loss); + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_LOAD_DATA_ERRORS, + &edata); + + (void) printf(dgettext(TEXT_DOMAIN, + "Recovery is possible, but will result in some data loss.\n")); + + if (localtime_r((time_t *)&rewindto, &t) != NULL && + strftime(timestr, 128, 0, &t) != 0) { + (void) printf(dgettext(TEXT_DOMAIN, + "\tReturning the pool to its state as of %s\n" + "\tshould correct the problem. "), + timestr); + } else { + (void) printf(dgettext(TEXT_DOMAIN, + "\tReverting the pool to an earlier state " + "should correct the problem.\n\t")); + } + + if (loss > 120) { + (void) printf(dgettext(TEXT_DOMAIN, + "Approximately %lld minutes of data\n" + "\tmust be discarded, irreversibly. "), (loss + 30) / 60); + } else if (loss > 0) { + (void) printf(dgettext(TEXT_DOMAIN, + "Approximately %lld seconds of data\n" + "\tmust be discarded, irreversibly. "), loss); + } + if (edata != 0 && edata != UINT64_MAX) { + if (edata == 1) { + (void) printf(dgettext(TEXT_DOMAIN, + "After rewind, at least\n" + "\tone persistent user-data error will remain. ")); + } else { + (void) printf(dgettext(TEXT_DOMAIN, + "After rewind, several\n" + "\tpersistent user-data errors will remain. ")); + } + } + (void) printf(dgettext(TEXT_DOMAIN, + "Recovery can be attempted\n\tby executing 'zpool %s -F %s'. "), + reason >= 0 ? "clear" : "import", name); + + (void) printf(dgettext(TEXT_DOMAIN, + "A scrub of the pool\n" + "\tis strongly recommended after recovery.\n")); + return; + +no_info: + (void) printf(dgettext(TEXT_DOMAIN, + "Destroy and re-create the pool from\n\ta backup source.\n")); +} + /* * zpool_import() is a contracted interface. Should be kept the same * if possible. @@ -1257,8 +1376,11 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, nvlist_t *props, boolean_t importfaulted) { zfs_cmd_t zc = { 0 }; + zpool_rewind_policy_t policy; + nvlist_t *nvi = NULL; char *thename; char *origname; + uint64_t returned_size; int ret; char errbuf[1024]; @@ -1302,11 +1424,30 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, nvlist_free(props); return (-1); } + returned_size = zc.zc_nvlist_conf_size + 512; + if (zcmd_alloc_dst_nvlist(hdl, &zc, returned_size) != 0) { + nvlist_free(props); + return (-1); + } zc.zc_cookie = (uint64_t)importfaulted; ret = 0; if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) { char desc[1024]; + + (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi); + zpool_get_rewind_policy(config, &policy); + /* + * Dry-run failed, but we print out what success + * looks like if we found a best txg + */ + if ((policy.zrp_request & ZPOOL_TRY_REWIND) && nvi) { + zpool_rewind_exclaim(hdl, newname ? origname : thename, + B_TRUE, nvi); + nvlist_free(nvi); + return (-1); + } + if (newname == NULL) (void) snprintf(desc, sizeof (desc), dgettext(TEXT_DOMAIN, "cannot import '%s'"), @@ -1328,8 +1469,19 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, (void) zfs_error(hdl, EZFS_INVALCONFIG, desc); break; + case EROFS: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "one or more devices is read only")); + (void) zfs_error(hdl, EZFS_BADDEV, desc); + break; + default: + (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi); (void) zpool_standard_error(hdl, errno, desc); + zpool_explain_recover(hdl, + newname ? origname : thename, -errno, nvi); + nvlist_free(nvi); + break; } ret = -1; @@ -1339,13 +1491,20 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, /* * This should never fail, but play it safe anyway. */ - if (zpool_open_silent(hdl, thename, &zhp) != 0) { + if (zpool_open_silent(hdl, thename, &zhp) != 0) ret = -1; - } else if (zhp != NULL) { - ret = zpool_create_zvol_links(zhp); + else if (zhp != NULL) zpool_close(zhp); + (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi); + zpool_get_rewind_policy(config, &policy); + if (policy.zrp_request & + (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) { + zpool_rewind_exclaim(hdl, newname ? origname : thename, + ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), + nvi); } - + nvlist_free(nvi); + return (0); } zcmd_free_nvlists(&zc); @@ -1355,28 +1514,83 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, } /* - * Scrub the pool. + * Scan the pool. */ int -zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type) +zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func) { zfs_cmd_t zc = { 0 }; char msg[1024]; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_cookie = type; + zc.zc_cookie = func; - if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0) + if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 || + (errno == ENOENT && func != POOL_SCAN_NONE)) return (0); - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name); + if (func == POOL_SCAN_SCRUB) { + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name); + } else if (func == POOL_SCAN_NONE) { + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"), + zc.zc_name); + } else { + assert(!"unexpected result"); + } - if (errno == EBUSY) - return (zfs_error(hdl, EZFS_RESILVERING, msg)); - else + if (errno == EBUSY) { + nvlist_t *nvroot; + pool_scan_stat_t *ps = NULL; + uint_t psc; + + verify(nvlist_lookup_nvlist(zhp->zpool_config, + ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + (void) nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc); + if (ps && ps->pss_func == POOL_SCAN_SCRUB) + return (zfs_error(hdl, EZFS_SCRUBBING, msg)); + else + return (zfs_error(hdl, EZFS_RESILVERING, msg)); + } else if (errno == ENOENT) { + return (zfs_error(hdl, EZFS_NO_SCRUB, msg)); + } else { return (zpool_standard_error(hdl, errno, msg)); + } +} + +/* + * This provides a very minimal check whether a given string is likely a + * c#t#d# style string. Users of this are expected to do their own + * verification of the s# part. + */ +#define CTD_CHECK(str) (str && str[0] == 'c' && isdigit(str[1])) + +/* + * More elaborate version for ones which may start with "/dev/dsk/" + * and the like. + */ +static int +ctd_check_path(char *str) { + /* + * If it starts with a slash, check the last component. + */ + if (str && str[0] == '/') { + char *tmp = strrchr(str, '/'); + + /* + * If it ends in "/old", check the second-to-last + * component of the string instead. + */ + if (tmp != str && strcmp(tmp, "/old") == 0) { + for (tmp--; *tmp != '/'; tmp--) + ; + } + str = tmp + 1; + } + return (CTD_CHECK(str)); } /* @@ -1433,25 +1647,99 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, break; /* - * Search for the requested value. We special case the search - * for ZPOOL_CONFIG_PATH when it's a wholedisk. Otherwise, - * all other searches are simple string compares. + * Search for the requested value. Special cases: + * + * - ZPOOL_CONFIG_PATH for whole disk entries. These end in + * "s0" or "s0/old". The "s0" part is hidden from the user, + * but included in the string, so this matches around it. + * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE). + * + * Otherwise, all other searches are simple string compares. */ - if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 && val) { + if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 && + ctd_check_path(val)) { uint64_t wholedisk = 0; (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); if (wholedisk) { + int slen = strlen(srchval); + int vlen = strlen(val); + + if (slen != vlen - 2) + break; + + /* + * make_leaf_vdev() should only set + * wholedisk for ZPOOL_CONFIG_PATHs which + * will include "/dev/dsk/", giving plenty of + * room for the indices used next. + */ + ASSERT(vlen >= 6); + /* - * For whole disks, the internal path has 's0', - * but the path passed in by the user doesn't. + * strings identical except trailing "s0" */ - if (strlen(srchval) == strlen(val) - 2 && - strncmp(srchval, val, strlen(srchval)) == 0) + if (strcmp(&val[vlen - 2], "s0") == 0 && + strncmp(srchval, val, slen) == 0) return (nv); + + /* + * strings identical except trailing "s0/old" + */ + if (strcmp(&val[vlen - 6], "s0/old") == 0 && + strcmp(&srchval[slen - 4], "/old") == 0 && + strncmp(srchval, val, slen - 4) == 0) + return (nv); + break; } + } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) { + char *type, *idx, *end, *p; + uint64_t id, vdev_id; + + /* + * Determine our vdev type, keeping in mind + * that the srchval is composed of a type and + * vdev id pair (i.e. mirror-4). + */ + if ((type = strdup(srchval)) == NULL) + return (NULL); + + if ((p = strrchr(type, '-')) == NULL) { + free(type); + break; + } + idx = p + 1; + *p = '\0'; + + /* + * If the types don't match then keep looking. + */ + if (strncmp(val, type, strlen(val)) != 0) { + free(type); + break; + } + + verify(strncmp(type, VDEV_TYPE_RAIDZ, + strlen(VDEV_TYPE_RAIDZ)) == 0 || + strncmp(type, VDEV_TYPE_MIRROR, + strlen(VDEV_TYPE_MIRROR)) == 0); + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, + &id) == 0); + + errno = 0; + vdev_id = strtoull(idx, &end, 10); + + free(type); + if (errno != 0) + return (NULL); + + /* + * Now verify that we have the correct vdev id. + */ + if (vdev_id == id) + return (nv); } /* @@ -1537,6 +1825,18 @@ zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath, return (ret); } +/* + * Determine if we have an "interior" top-level vdev (i.e mirror/raidz). + */ +boolean_t +zpool_vdev_is_interior(const char *name) +{ + if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 || + strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0) + return (B_TRUE); + return (B_FALSE); +} + nvlist_t * zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) @@ -1551,6 +1851,8 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, guid = strtoull(path, &end, 10); if (guid != 0 && *end == '\0') { verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0); + } else if (zpool_vdev_is_interior(path)) { + verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0); } else if (path[0] != '/') { (void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path); verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0); @@ -1721,34 +2023,6 @@ zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size) } /* - * Returns TRUE if the given guid corresponds to the given type. - * This is used to check for hot spares (INUSE or not), and level 2 cache - * devices. - */ -static boolean_t -is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type) -{ - uint64_t target_guid; - nvlist_t *nvroot; - nvlist_t **list; - uint_t count; - int i; - - verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) == 0); - if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) { - for (i = 0; i < count; i++) { - verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID, - &target_guid) == 0); - if (guid == target_guid) - return (B_TRUE); - } - } - - return (B_FALSE); -} - -/* * If the device has being dynamically expanded then we need to relabel * the disk to use the new unallocated space. */ @@ -1816,8 +2090,7 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); - if (avail_spare || - is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE) + if (avail_spare) return (zfs_error(hdl, EZFS_ISSPARE, msg)); if (flags & ZFS_ONLINE_EXPAND || @@ -1848,8 +2121,15 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, zc.zc_cookie = VDEV_STATE_ONLINE; zc.zc_obj = flags; - if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) + if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) { + if (errno == EINVAL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split " + "from this pool into a new one. Use '%s' " + "instead"), "zpool detach"); + return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg)); + } return (zpool_standard_error(hdl, errno, msg)); + } *newstate = zc.zc_cookie; return (0); @@ -1877,8 +2157,7 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); - if (avail_spare || - is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE) + if (avail_spare) return (zfs_error(hdl, EZFS_ISSPARE, msg)); zc.zc_cookie = VDEV_STATE_OFFLINE; @@ -1910,7 +2189,7 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) * Mark the given vdev faulted. */ int -zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid) +zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) { zfs_cmd_t zc = { 0 }; char msg[1024]; @@ -1922,6 +2201,7 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid) (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_guid = guid; zc.zc_cookie = VDEV_STATE_FAULTED; + zc.zc_obj = aux; if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) return (0); @@ -1944,7 +2224,7 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid) * Mark the given vdev degraded. */ int -zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid) +zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) { zfs_cmd_t zc = { 0 }; char msg[1024]; @@ -1956,6 +2236,7 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid) (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_guid = guid; zc.zc_cookie = VDEV_STATE_DEGRADED; + zc.zc_obj = aux; if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) return (0); @@ -2053,7 +2334,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0); - if ((newname = zpool_vdev_name(NULL, NULL, child[0])) == NULL) + if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL) return (-1); /* @@ -2241,6 +2522,257 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path) } /* + * Find a mirror vdev in the source nvlist. + * + * The mchild array contains a list of disks in one of the top-level mirrors + * of the source pool. The schild array contains a list of disks that the + * user specified on the command line. We loop over the mchild array to + * see if any entry in the schild array matches. + * + * If a disk in the mchild array is found in the schild array, we return + * the index of that entry. Otherwise we return -1. + */ +static int +find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren, + nvlist_t **schild, uint_t schildren) +{ + uint_t mc; + + for (mc = 0; mc < mchildren; mc++) { + uint_t sc; + char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp, + mchild[mc], B_FALSE); + + for (sc = 0; sc < schildren; sc++) { + char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp, + schild[sc], B_FALSE); + boolean_t result = (strcmp(mpath, spath) == 0); + + free(spath); + if (result) { + free(mpath); + return (mc); + } + } + + free(mpath); + } + + return (-1); +} + +/* + * Split a mirror pool. If newroot points to null, then a new nvlist + * is generated and it is the responsibility of the caller to free it. + */ +int +zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot, + nvlist_t *props, splitflags_t flags) +{ + zfs_cmd_t zc = { 0 }; + char msg[1024]; + nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL; + nvlist_t **varray = NULL, *zc_props = NULL; + uint_t c, children, newchildren, lastlog = 0, vcount, found = 0; + libzfs_handle_t *hdl = zhp->zpool_hdl; + uint64_t vers; + boolean_t freelist = B_FALSE, memory_err = B_TRUE; + int retval = 0; + + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name); + + if (!zpool_name_valid(hdl, B_FALSE, newname)) + return (zfs_error(hdl, EZFS_INVALIDNAME, msg)); + + if ((config = zpool_get_config(zhp, NULL)) == NULL) { + (void) fprintf(stderr, gettext("Internal error: unable to " + "retrieve pool configuration\n")); + return (-1); + } + + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) + == 0); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0); + + if (props) { + if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name, + props, vers, B_TRUE, msg)) == NULL) + return (-1); + } + + if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child, + &children) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Source pool is missing vdev tree")); + if (zc_props) + nvlist_free(zc_props); + return (-1); + } + + varray = zfs_alloc(hdl, children * sizeof (nvlist_t *)); + vcount = 0; + + if (*newroot == NULL || + nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, + &newchild, &newchildren) != 0) + newchildren = 0; + + for (c = 0; c < children; c++) { + uint64_t is_log = B_FALSE, is_hole = B_FALSE; + char *type; + nvlist_t **mchild, *vdev; + uint_t mchildren; + int entry; + + /* + * Unlike cache & spares, slogs are stored in the + * ZPOOL_CONFIG_CHILDREN array. We filter them out here. + */ + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &is_log); + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, + &is_hole); + if (is_log || is_hole) { + /* + * Create a hole vdev and put it in the config. + */ + if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0) + goto out; + if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, + VDEV_TYPE_HOLE) != 0) + goto out; + if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE, + 1) != 0) + goto out; + if (lastlog == 0) + lastlog = vcount; + varray[vcount++] = vdev; + continue; + } + lastlog = 0; + verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type) + == 0); + if (strcmp(type, VDEV_TYPE_MIRROR) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Source pool must be composed only of mirrors\n")); + retval = zfs_error(hdl, EZFS_INVALCONFIG, msg); + goto out; + } + + verify(nvlist_lookup_nvlist_array(child[c], + ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0); + + /* find or add an entry for this top-level vdev */ + if (newchildren > 0 && + (entry = find_vdev_entry(zhp, mchild, mchildren, + newchild, newchildren)) >= 0) { + /* We found a disk that the user specified. */ + vdev = mchild[entry]; + ++found; + } else { + /* User didn't specify a disk for this vdev. */ + vdev = mchild[mchildren - 1]; + } + + if (nvlist_dup(vdev, &varray[vcount++], 0) != 0) + goto out; + } + + /* did we find every disk the user specified? */ + if (found != newchildren) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must " + "include at most one disk from each mirror")); + retval = zfs_error(hdl, EZFS_INVALCONFIG, msg); + goto out; + } + + /* Prepare the nvlist for populating. */ + if (*newroot == NULL) { + if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0) + goto out; + freelist = B_TRUE; + if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE, + VDEV_TYPE_ROOT) != 0) + goto out; + } else { + verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0); + } + + /* Add all the children we found */ + if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray, + lastlog == 0 ? vcount : lastlog) != 0) + goto out; + + /* + * If we're just doing a dry run, exit now with success. + */ + if (flags.dryrun) { + memory_err = B_FALSE; + freelist = B_FALSE; + goto out; + } + + /* now build up the config list & call the ioctl */ + if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0) + goto out; + + if (nvlist_add_nvlist(newconfig, + ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 || + nvlist_add_string(newconfig, + ZPOOL_CONFIG_POOL_NAME, newname) != 0 || + nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0) + goto out; + + /* + * The new pool is automatically part of the namespace unless we + * explicitly export it. + */ + if (!flags.import) + zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT; + (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); + (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string)); + if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0) + goto out; + if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0) + goto out; + + if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) { + retval = zpool_standard_error(hdl, errno, msg); + goto out; + } + + freelist = B_FALSE; + memory_err = B_FALSE; + +out: + if (varray != NULL) { + int v; + + for (v = 0; v < vcount; v++) + nvlist_free(varray[v]); + free(varray); + } + zcmd_free_nvlists(&zc); + if (zc_props) + nvlist_free(zc_props); + if (newconfig) + nvlist_free(newconfig); + if (freelist) { + nvlist_free(*newroot); + *newroot = NULL; + } + + if (retval != 0) + return (retval); + + if (memory_err) + return (no_memory(hdl)); + + return (0); +} + +/* * Remove the given device. Currently, this is supported only for hot spares * and level 2 cache devices. */ @@ -2250,24 +2782,34 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path) zfs_cmd_t zc = { 0 }; char msg[1024]; nvlist_t *tgt; - boolean_t avail_spare, l2cache; + boolean_t avail_spare, l2cache, islog; libzfs_handle_t *hdl = zhp->zpool_hdl; + uint64_t version; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot remove %s"), path); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, - NULL)) == 0) + &islog)) == 0) return (zfs_error(hdl, EZFS_NODEVICE, msg)); - - if (!avail_spare && !l2cache) { + /* + * XXX - this should just go away. + */ + if (!avail_spare && !l2cache && !islog) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "only inactive hot spares or cache devices " - "can be removed")); + "only inactive hot spares, cache, top-level, " + "or log devices can be removed")); return (zfs_error(hdl, EZFS_NODEVICE, msg)); } + version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); + if (islog && version < SPA_VERSION_HOLES) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgrade to support log removal")); + return (zfs_error(hdl, EZFS_BADVERSION, msg)); + } + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0) @@ -2280,13 +2822,15 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path) * Clear the errors for the pool, or the particular device if specified. */ int -zpool_clear(zpool_handle_t *zhp, const char *path) +zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl) { zfs_cmd_t zc = { 0 }; char msg[1024]; nvlist_t *tgt; + zpool_rewind_policy_t policy; boolean_t avail_spare, l2cache; libzfs_handle_t *hdl = zhp->zpool_hdl; + nvlist_t *nvi = NULL; if (path) (void) snprintf(msg, sizeof (msg), @@ -2314,9 +2858,31 @@ zpool_clear(zpool_handle_t *zhp, const char *path) &zc.zc_guid) == 0); } - if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0) + zpool_get_rewind_policy(rewindnvl, &policy); + zc.zc_cookie = policy.zrp_request; + + if (zcmd_alloc_dst_nvlist(hdl, &zc, 8192) != 0) + return (-1); + + if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, rewindnvl) != 0) + return (-1); + + if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0 || + ((policy.zrp_request & ZPOOL_TRY_REWIND) && + errno != EPERM && errno != EACCES)) { + if (policy.zrp_request & + (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) { + (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi); + zpool_rewind_exclaim(hdl, zc.zc_name, + ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), + nvi); + nvlist_free(nvi); + } + zcmd_free_nvlists(&zc); return (0); + } + zcmd_free_nvlists(&zc); return (zpool_standard_error(hdl, errno, msg)); } @@ -2336,6 +2902,7 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid) (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_guid = guid; + zc.zc_cookie = ZPOOL_NO_REWIND; if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0) return (0); @@ -2344,173 +2911,6 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid) } /* - * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool> - * hierarchy. - */ -int -zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *), - void *data) -{ - libzfs_handle_t *hdl = zhp->zpool_hdl; - char (*paths)[MAXPATHLEN]; - size_t size = 4; - int curr, fd, base, ret = 0; - DIR *dirp; - struct dirent *dp; - struct stat st; - - if ((base = open("/dev/zvol/dsk", O_RDONLY)) < 0) - return (errno == ENOENT ? 0 : -1); - - if (fstatat(base, zhp->zpool_name, &st, 0) != 0) { - int err = errno; - (void) close(base); - return (err == ENOENT ? 0 : -1); - } - - /* - * Oddly this wasn't a directory -- ignore that failure since we - * know there are no links lower in the (non-existant) hierarchy. - */ - if (!S_ISDIR(st.st_mode)) { - (void) close(base); - return (0); - } - - if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) { - (void) close(base); - return (-1); - } - - (void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0])); - curr = 0; - - while (curr >= 0) { - if (fstatat(base, paths[curr], &st, AT_SYMLINK_NOFOLLOW) != 0) - goto err; - - if (S_ISDIR(st.st_mode)) { - if ((fd = openat(base, paths[curr], O_RDONLY)) < 0) - goto err; - - if ((dirp = fdopendir(fd)) == NULL) { - (void) close(fd); - goto err; - } - - while ((dp = readdir(dirp)) != NULL) { - if (dp->d_name[0] == '.') - continue; - - if (curr + 1 == size) { - paths = zfs_realloc(hdl, paths, - size * sizeof (paths[0]), - size * 2 * sizeof (paths[0])); - if (paths == NULL) { - (void) closedir(dirp); - (void) close(fd); - goto err; - } - - size *= 2; - } - - (void) strlcpy(paths[curr + 1], paths[curr], - sizeof (paths[curr + 1])); - (void) strlcat(paths[curr], "/", - sizeof (paths[curr])); - (void) strlcat(paths[curr], dp->d_name, - sizeof (paths[curr])); - curr++; - } - - (void) closedir(dirp); - - } else { - if ((ret = cb(paths[curr], data)) != 0) - break; - } - - curr--; - } - - free(paths); - (void) close(base); - - return (ret); - -err: - free(paths); - (void) close(base); - return (-1); -} - -typedef struct zvol_cb { - zpool_handle_t *zcb_pool; - boolean_t zcb_create; -} zvol_cb_t; - -/*ARGSUSED*/ -static int -do_zvol_create(zfs_handle_t *zhp, void *data) -{ - int ret = 0; - - if (ZFS_IS_VOLUME(zhp)) { - (void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name); - ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL); - } - - if (ret == 0) - ret = zfs_iter_filesystems(zhp, do_zvol_create, NULL); - - zfs_close(zhp); - - return (ret); -} - -/* - * Iterate over all zvols in the pool and make any necessary minor nodes. - */ -int -zpool_create_zvol_links(zpool_handle_t *zhp) -{ - zfs_handle_t *zfp; - int ret; - - /* - * If the pool is unavailable, just return success. - */ - if ((zfp = make_dataset_handle(zhp->zpool_hdl, - zhp->zpool_name)) == NULL) - return (0); - - ret = zfs_iter_filesystems(zfp, do_zvol_create, NULL); - - zfs_close(zfp); - return (ret); -} - -static int -do_zvol_remove(const char *dataset, void *data) -{ - zpool_handle_t *zhp = data; - - return (zvol_remove_link(zhp->zpool_hdl, dataset)); -} - -/* - * Iterate over all zvols in the pool and remove any minor nodes. We iterate - * by examining the /dev links so that a corrupted pool doesn't impede this - * operation. - */ -int -zpool_remove_zvol_links(zpool_handle_t *zhp) -{ - return (zpool_iter_zvol(zhp, do_zvol_remove, zhp)); -} - -/* * Convert from a devid string to a path. */ static char * @@ -2602,7 +3002,8 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path) * of these checks. */ char * -zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv) +zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, + boolean_t verbose) { char *path, *devid; uint64_t value; @@ -2625,7 +3026,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv) * open a misbehaving device, which can have undesirable * effects. */ - if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, + if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) != 0 || vs->vs_state >= VDEV_STATE_DEGRADED) && zhp != NULL && @@ -2662,10 +3063,23 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv) if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value) == 0 && value) { + int pathlen = strlen(path); char *tmp = zfs_strdup(hdl, path); - if (tmp == NULL) - return (NULL); - tmp[strlen(path) - 2] = '\0'; + + /* + * If it starts with c#, and ends with "s0", chop + * the "s0" off, or if it ends with "s0/old", remove + * the "s0" from the middle. + */ + if (CTD_CHECK(tmp)) { + if (strcmp(&tmp[pathlen - 2], "s0") == 0) { + tmp[pathlen - 2] = '\0'; + } else if (pathlen > 6 && + strcmp(&tmp[pathlen - 6], "s0/old") == 0) { + (void) strcpy(&tmp[pathlen - 6], + "/old"); + } + } return (tmp); } } else { @@ -2681,6 +3095,20 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv) (u_longlong_t)value); path = buf; } + + /* + * We identify each top-level vdev by using a <type-id> + * naming convention. + */ + if (verbose) { + uint64_t id; + + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, + &id) == 0); + (void) snprintf(buf, sizeof (buf), "%s-%llu", path, + (u_longlong_t)id); + path = buf; + } } return (zfs_strdup(hdl, path)); @@ -2899,7 +3327,7 @@ get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len) * into 'records'. 'leftover' is set to the number of bytes that weren't * processed as there wasn't a complete record. */ -static int +int zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover, nvlist_t ***records, uint_t *numrecords) { @@ -3218,6 +3646,7 @@ supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf) if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 || strcmp(type, VDEV_TYPE_FILE) == 0 || strcmp(type, VDEV_TYPE_LOG) == 0 || + strcmp(type, VDEV_TYPE_HOLE) == 0 || strcmp(type, VDEV_TYPE_MISSING) == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "vdev type '%s' is not supported"), type); diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 1ffb6294c..672e004ef 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -20,14 +20,12 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <assert.h> #include <ctype.h> #include <errno.h> -#include <libdevinfo.h> #include <libintl.h> #include <stdio.h> #include <stdlib.h> @@ -36,21 +34,394 @@ #include <stddef.h> #include <fcntl.h> #include <sys/mount.h> -#include <sys/mntent.h> -#include <sys/mnttab.h> -#include <sys/avl.h> -#include <stddef.h> +#include <pthread.h> +#include <umem.h> #include <libzfs.h> #include "zfs_namecheck.h" #include "zfs_prop.h" +#include "zfs_fletcher.h" #include "libzfs_impl.h" +#include <sha2.h> +#include <sys/zio_checksum.h> +#include <sys/ddt.h> -#include <fletcher.c> /* XXX */ +/* in libzfs_dataset.c */ +extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *); static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t, - int, avl_tree_t *, char **); + int, const char *, nvlist_t *, avl_tree_t *, char **); + +static const zio_cksum_t zero_cksum = { 0 }; + +typedef struct dedup_arg { + int inputfd; + int outputfd; + libzfs_handle_t *dedup_hdl; +} dedup_arg_t; + +typedef struct dataref { + uint64_t ref_guid; + uint64_t ref_object; + uint64_t ref_offset; +} dataref_t; + +typedef struct dedup_entry { + struct dedup_entry *dde_next; + zio_cksum_t dde_chksum; + uint64_t dde_prop; + dataref_t dde_ref; +} dedup_entry_t; + +#define MAX_DDT_PHYSMEM_PERCENT 20 +#define SMALLEST_POSSIBLE_MAX_DDT_MB 128 + +typedef struct dedup_table { + dedup_entry_t **dedup_hash_array; + umem_cache_t *ddecache; + uint64_t max_ddt_size; /* max dedup table size in bytes */ + uint64_t cur_ddt_size; /* current dedup table size in bytes */ + uint64_t ddt_count; + int numhashbits; + boolean_t ddt_full; +} dedup_table_t; + +static int +high_order_bit(uint64_t n) +{ + int count; + + for (count = 0; n != 0; count++) + n >>= 1; + return (count); +} + +static size_t +ssread(void *buf, size_t len, FILE *stream) +{ + size_t outlen; + + if ((outlen = fread(buf, len, 1, stream)) == 0) + return (0); + + return (outlen); +} + +static void +ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp, + zio_cksum_t *cs, uint64_t prop, dataref_t *dr) +{ + dedup_entry_t *dde; + + if (ddt->cur_ddt_size >= ddt->max_ddt_size) { + if (ddt->ddt_full == B_FALSE) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Dedup table full. Deduplication will continue " + "with existing table entries")); + ddt->ddt_full = B_TRUE; + } + return; + } + + if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT)) + != NULL) { + assert(*ddepp == NULL); + dde->dde_next = NULL; + dde->dde_chksum = *cs; + dde->dde_prop = prop; + dde->dde_ref = *dr; + *ddepp = dde; + ddt->cur_ddt_size += sizeof (dedup_entry_t); + ddt->ddt_count++; + } +} + +/* + * Using the specified dedup table, do a lookup for an entry with + * the checksum cs. If found, return the block's reference info + * in *dr. Otherwise, insert a new entry in the dedup table, using + * the reference information specified by *dr. + * + * return value: true - entry was found + * false - entry was not found + */ +static boolean_t +ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs, + uint64_t prop, dataref_t *dr) +{ + uint32_t hashcode; + dedup_entry_t **ddepp; + + hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits); + + for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL; + ddepp = &((*ddepp)->dde_next)) { + if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) && + (*ddepp)->dde_prop == prop) { + *dr = (*ddepp)->dde_ref; + return (B_TRUE); + } + } + ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr); + return (B_FALSE); +} + +static int +cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd) +{ + fletcher_4_incremental_native(buf, len, zc); + return (write(outfd, buf, len)); +} + +/* + * This function is started in a separate thread when the dedup option + * has been requested. The main send thread determines the list of + * snapshots to be included in the send stream and makes the ioctl calls + * for each one. But instead of having the ioctl send the output to the + * the output fd specified by the caller of zfs_send()), the + * ioctl is told to direct the output to a pipe, which is read by the + * alternate thread running THIS function. This function does the + * dedup'ing by: + * 1. building a dedup table (the DDT) + * 2. doing checksums on each data block and inserting a record in the DDT + * 3. looking for matching checksums, and + * 4. sending a DRR_WRITE_BYREF record instead of a write record whenever + * a duplicate block is found. + * The output of this function then goes to the output fd requested + * by the caller of zfs_send(). + */ +static void * +cksummer(void *arg) +{ + dedup_arg_t *dda = arg; + char *buf = malloc(1<<20); + dmu_replay_record_t thedrr; + dmu_replay_record_t *drr = &thedrr; + struct drr_begin *drrb = &thedrr.drr_u.drr_begin; + struct drr_end *drre = &thedrr.drr_u.drr_end; + struct drr_object *drro = &thedrr.drr_u.drr_object; + struct drr_write *drrw = &thedrr.drr_u.drr_write; + struct drr_spill *drrs = &thedrr.drr_u.drr_spill; + FILE *ofp; + int outfd; + dmu_replay_record_t wbr_drr = {0}; + struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref; + dedup_table_t ddt; + zio_cksum_t stream_cksum; + uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); + uint64_t numbuckets; + + ddt.max_ddt_size = + MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100, + SMALLEST_POSSIBLE_MAX_DDT_MB<<20); + + numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t)); + + /* + * numbuckets must be a power of 2. Increase number to + * a power of 2 if necessary. + */ + if (!ISP2(numbuckets)) + numbuckets = 1 << high_order_bit(numbuckets); + + ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *)); + ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0, + NULL, NULL, NULL, NULL, NULL, 0); + ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *); + ddt.numhashbits = high_order_bit(numbuckets) - 1; + ddt.ddt_full = B_FALSE; + + /* Initialize the write-by-reference block. */ + wbr_drr.drr_type = DRR_WRITE_BYREF; + wbr_drr.drr_payloadlen = 0; + + outfd = dda->outputfd; + ofp = fdopen(dda->inputfd, "r"); + while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) { + + switch (drr->drr_type) { + case DRR_BEGIN: + { + int fflags; + ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + + /* set the DEDUP feature flag for this stream */ + fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); + fflags |= (DMU_BACKUP_FEATURE_DEDUP | + DMU_BACKUP_FEATURE_DEDUPPROPS); + DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags); + + if (cksum_and_write(drr, sizeof (dmu_replay_record_t), + &stream_cksum, outfd) == -1) + goto out; + if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == + DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) { + int sz = drr->drr_payloadlen; + + if (sz > 1<<20) { + free(buf); + buf = malloc(sz); + } + (void) ssread(buf, sz, ofp); + if (ferror(stdin)) + perror("fread"); + if (cksum_and_write(buf, sz, &stream_cksum, + outfd) == -1) + goto out; + } + break; + } + + case DRR_END: + { + /* use the recalculated checksum */ + ZIO_SET_CHECKSUM(&drre->drr_checksum, + stream_cksum.zc_word[0], stream_cksum.zc_word[1], + stream_cksum.zc_word[2], stream_cksum.zc_word[3]); + if ((write(outfd, drr, + sizeof (dmu_replay_record_t))) == -1) + goto out; + break; + } + + case DRR_OBJECT: + { + if (cksum_and_write(drr, sizeof (dmu_replay_record_t), + &stream_cksum, outfd) == -1) + goto out; + if (drro->drr_bonuslen > 0) { + (void) ssread(buf, + P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), + ofp); + if (cksum_and_write(buf, + P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), + &stream_cksum, outfd) == -1) + goto out; + } + break; + } + + case DRR_SPILL: + { + if (cksum_and_write(drr, sizeof (dmu_replay_record_t), + &stream_cksum, outfd) == -1) + goto out; + (void) ssread(buf, drrs->drr_length, ofp); + if (cksum_and_write(buf, drrs->drr_length, + &stream_cksum, outfd) == -1) + goto out; + break; + } + + case DRR_FREEOBJECTS: + { + if (cksum_and_write(drr, sizeof (dmu_replay_record_t), + &stream_cksum, outfd) == -1) + goto out; + break; + } + + case DRR_WRITE: + { + dataref_t dataref; + + (void) ssread(buf, drrw->drr_length, ofp); + + /* + * Use the existing checksum if it's dedup-capable, + * else calculate a SHA256 checksum for it. + */ + + if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum, + zero_cksum) || + !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) { + SHA256_CTX ctx; + zio_cksum_t tmpsha256; + + SHA256Init(&ctx); + SHA256Update(&ctx, buf, drrw->drr_length); + SHA256Final(&tmpsha256, &ctx); + drrw->drr_key.ddk_cksum.zc_word[0] = + BE_64(tmpsha256.zc_word[0]); + drrw->drr_key.ddk_cksum.zc_word[1] = + BE_64(tmpsha256.zc_word[1]); + drrw->drr_key.ddk_cksum.zc_word[2] = + BE_64(tmpsha256.zc_word[2]); + drrw->drr_key.ddk_cksum.zc_word[3] = + BE_64(tmpsha256.zc_word[3]); + drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256; + drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP; + } + + dataref.ref_guid = drrw->drr_toguid; + dataref.ref_object = drrw->drr_object; + dataref.ref_offset = drrw->drr_offset; + + if (ddt_update(dda->dedup_hdl, &ddt, + &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop, + &dataref)) { + /* block already present in stream */ + wbr_drrr->drr_object = drrw->drr_object; + wbr_drrr->drr_offset = drrw->drr_offset; + wbr_drrr->drr_length = drrw->drr_length; + wbr_drrr->drr_toguid = drrw->drr_toguid; + wbr_drrr->drr_refguid = dataref.ref_guid; + wbr_drrr->drr_refobject = + dataref.ref_object; + wbr_drrr->drr_refoffset = + dataref.ref_offset; + + wbr_drrr->drr_checksumtype = + drrw->drr_checksumtype; + wbr_drrr->drr_checksumflags = + drrw->drr_checksumtype; + wbr_drrr->drr_key.ddk_cksum = + drrw->drr_key.ddk_cksum; + wbr_drrr->drr_key.ddk_prop = + drrw->drr_key.ddk_prop; + + if (cksum_and_write(&wbr_drr, + sizeof (dmu_replay_record_t), &stream_cksum, + outfd) == -1) + goto out; + } else { + /* block not previously seen */ + if (cksum_and_write(drr, + sizeof (dmu_replay_record_t), &stream_cksum, + outfd) == -1) + goto out; + if (cksum_and_write(buf, + drrw->drr_length, + &stream_cksum, outfd) == -1) + goto out; + } + break; + } + + case DRR_FREE: + { + if (cksum_and_write(drr, sizeof (dmu_replay_record_t), + &stream_cksum, outfd) == -1) + goto out; + break; + } + + default: + (void) printf("INVALID record type 0x%x\n", + drr->drr_type); + /* should never happen, so assert */ + assert(B_FALSE); + } + } +out: + umem_cache_destroy(ddt.ddecache); + free(ddt.dedup_hash_array); + free(buf); + (void) fclose(ofp); + + return (NULL); +} /* * Routines for dealing with the AVL tree of fs-nvlists @@ -173,6 +544,7 @@ typedef struct send_data { nvlist_t *snapprops; const char *fromsnap; const char *tosnap; + boolean_t recursive; /* * The header nvlist is of the following format: @@ -240,27 +612,50 @@ send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv) zfs_prop_t prop = zfs_name_to_prop(propname); nvlist_t *propnv; - assert(zfs_prop_user(propname) || prop != ZPROP_INVAL); + if (!zfs_prop_user(propname)) { + /* + * Realistically, this should never happen. However, + * we want the ability to add DSL properties without + * needing to make incompatible version changes. We + * need to ignore unknown properties to allow older + * software to still send datasets containing these + * properties, with the unknown properties elided. + */ + if (prop == ZPROP_INVAL) + continue; - if (!zfs_prop_user(propname) && zfs_prop_readonly(prop)) - continue; + if (zfs_prop_readonly(prop)) + continue; + } verify(nvpair_value_nvlist(elem, &propnv) == 0); if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION || prop == ZFS_PROP_REFQUOTA || prop == ZFS_PROP_REFRESERVATION) { - /* these guys are modifyable, but have no source */ + char *source; uint64_t value; verify(nvlist_lookup_uint64(propnv, ZPROP_VALUE, &value) == 0); if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) continue; + /* + * May have no source before SPA_VERSION_RECVD_PROPS, + * but is still modifiable. + */ + if (nvlist_lookup_string(propnv, + ZPROP_SOURCE, &source) == 0) { + if ((strcmp(source, zhp->zfs_name) != 0) && + (strcmp(source, + ZPROP_SOURCE_VAL_RECVD) != 0)) + continue; + } } else { char *source; if (nvlist_lookup_string(propnv, ZPROP_SOURCE, &source) != 0) continue; - if (strcmp(source, zhp->zfs_name) != 0) + if ((strcmp(source, zhp->zfs_name) != 0) && + (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)) continue; } @@ -289,7 +684,7 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) { send_data_t *sd = arg; nvlist_t *nvfs, *nv; - int rv; + int rv = 0; uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid; uint64_t guid = zhp->zfs_dmustats.dds_guid; char guidstring[64]; @@ -331,7 +726,8 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) nvlist_free(nvfs); /* iterate over children */ - rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd); + if (sd->recursive) + rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd); sd->parent_fromsnap_guid = parent_fromsnap_guid_save; @@ -341,7 +737,7 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) static int gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, - const char *tosnap, nvlist_t **nvlp, avl_tree_t **avlp) + const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp) { zfs_handle_t *zhp; send_data_t sd = { 0 }; @@ -354,6 +750,7 @@ gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0)); sd.fromsnap = fromsnap; sd.tosnap = tosnap; + sd.recursive = recursive; if ((error = send_iterate_fs(zhp, &sd)) != 0) { nvlist_free(sd.fss); @@ -415,7 +812,7 @@ zfs_snapshot_compare(const void *larg, const void *rarg) return (0); } -static int +int zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data) { int ret = 0; @@ -446,13 +843,16 @@ typedef struct send_dump_data { /* these are all just the short snapname (the part after the @) */ const char *fromsnap; const char *tosnap; - char lastsnap[ZFS_MAXNAMELEN]; + char prevsnap[ZFS_MAXNAMELEN]; boolean_t seenfrom, seento, replicate, doall, fromorigin; boolean_t verbose; int outfd; boolean_t err; nvlist_t *fss; avl_tree_t *fsavl; + snapfilter_cb_t *filter_cb; + void *filter_cb_arg; + nvlist_t *debugnv; } send_dump_data_t; /* @@ -461,10 +861,11 @@ typedef struct send_dump_data { */ static int dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin, - int outfd) + int outfd, boolean_t enoent_ok, boolean_t *got_enoent, nvlist_t *debugnv) { zfs_cmd_t zc = { 0 }; libzfs_handle_t *hdl = zhp->zfs_hdl; + nvlist_t *thisdbg; assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin); @@ -475,11 +876,26 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin, zc.zc_cookie = outfd; zc.zc_obj = fromorigin; + *got_enoent = B_FALSE; + + VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0)); + if (fromsnap && fromsnap[0] != '\0') { + VERIFY(0 == nvlist_add_string(thisdbg, + "fromsnap", fromsnap)); + } + if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) { char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "warning: cannot send '%s'"), zhp->zfs_name); + VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno)); + if (debugnv) { + VERIFY(0 == nvlist_add_nvlist(debugnv, + zhp->zfs_name, thisdbg)); + } + nvlist_free(thisdbg); + switch (errno) { case EXDEV: @@ -488,6 +904,10 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin, return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); case ENOENT: + if (enoent_ok) { + *got_enoent = B_TRUE; + return (0); + } if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_SNAPSHOT)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -515,6 +935,10 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin, } } + if (debugnv) + VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg)); + nvlist_free(thisdbg); + return (0); } @@ -524,13 +948,17 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) send_dump_data_t *sdd = arg; const char *thissnap; int err; + boolean_t got_enoent; + boolean_t isfromsnap, istosnap; + boolean_t exclude = B_FALSE; thissnap = strchr(zhp->zfs_name, '@') + 1; + isfromsnap = (sdd->fromsnap != NULL && + strcmp(sdd->fromsnap, thissnap) == 0); - if (sdd->fromsnap && !sdd->seenfrom && - strcmp(sdd->fromsnap, thissnap) == 0) { + if (!sdd->seenfrom && isfromsnap) { sdd->seenfrom = B_TRUE; - (void) strcpy(sdd->lastsnap, thissnap); + (void) strcpy(sdd->prevsnap, thissnap); zfs_close(zhp); return (0); } @@ -540,20 +968,63 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) return (0); } + istosnap = (strcmp(sdd->tosnap, thissnap) == 0); + if (istosnap) + sdd->seento = B_TRUE; + + if (!sdd->doall && !isfromsnap && !istosnap) { + if (sdd->replicate) { + char *snapname; + nvlist_t *snapprops; + /* + * Filter out all intermediate snapshots except origin + * snapshots needed to replicate clones. + */ + nvlist_t *nvfs = fsavl_find(sdd->fsavl, + zhp->zfs_dmustats.dds_guid, &snapname); + + VERIFY(0 == nvlist_lookup_nvlist(nvfs, + "snapprops", &snapprops)); + VERIFY(0 == nvlist_lookup_nvlist(snapprops, + thissnap, &snapprops)); + exclude = !nvlist_exists(snapprops, "is_clone_origin"); + } else { + exclude = B_TRUE; + } + } + + /* + * If a filter function exists, call it to determine whether + * this snapshot will be sent. + */ + if (exclude || (sdd->filter_cb != NULL && + sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) { + /* + * This snapshot is filtered out. Don't send it, and don't + * set prevsnap, so it will be as if this snapshot didn't + * exist, and the next accepted snapshot will be sent as + * an incremental from the last accepted one, or as the + * first (and full) snapshot in the case of a replication, + * non-incremental send. + */ + zfs_close(zhp); + return (0); + } + /* send it */ if (sdd->verbose) { (void) fprintf(stderr, "sending from @%s to %s\n", - sdd->lastsnap, zhp->zfs_name); + sdd->prevsnap, zhp->zfs_name); } - err = dump_ioctl(zhp, sdd->lastsnap, - sdd->lastsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate), - sdd->outfd); + err = dump_ioctl(zhp, sdd->prevsnap, + sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate), + sdd->outfd, B_TRUE, &got_enoent, sdd->debugnv); - if (!sdd->seento && strcmp(sdd->tosnap, thissnap) == 0) - sdd->seento = B_TRUE; - - (void) strcpy(sdd->lastsnap, thissnap); + if (got_enoent) + err = 0; + else + (void) strcpy(sdd->prevsnap, thissnap); zfs_close(zhp); return (err); } @@ -592,51 +1063,32 @@ dump_filesystem(zfs_handle_t *zhp, void *arg) } } - if (sdd->doall) { - sdd->seenfrom = sdd->seento = sdd->lastsnap[0] = 0; - if (sdd->fromsnap == NULL || missingfrom) - sdd->seenfrom = B_TRUE; + sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0; + if (sdd->fromsnap == NULL || missingfrom) + sdd->seenfrom = B_TRUE; - rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg); - if (!sdd->seenfrom) { + rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg); + if (!sdd->seenfrom) { + (void) fprintf(stderr, + "WARNING: could not send %s@%s:\n" + "incremental source (%s@%s) does not exist\n", + zhp->zfs_name, sdd->tosnap, + zhp->zfs_name, sdd->fromsnap); + sdd->err = B_TRUE; + } else if (!sdd->seento) { + if (sdd->fromsnap) { (void) fprintf(stderr, "WARNING: could not send %s@%s:\n" - "incremental source (%s@%s) does not exist\n", + "incremental source (%s@%s) " + "is not earlier than it\n", zhp->zfs_name, sdd->tosnap, zhp->zfs_name, sdd->fromsnap); - sdd->err = B_TRUE; - } else if (!sdd->seento) { - if (sdd->fromsnap) { - (void) fprintf(stderr, - "WARNING: could not send %s@%s:\n" - "incremental source (%s@%s) " - "is not earlier than it\n", - zhp->zfs_name, sdd->tosnap, - zhp->zfs_name, sdd->fromsnap); - } else { - (void) fprintf(stderr, "WARNING: " - "could not send %s@%s: does not exist\n", - zhp->zfs_name, sdd->tosnap); - } - sdd->err = B_TRUE; - } - } else { - zfs_handle_t *snapzhp; - char snapname[ZFS_MAXNAMELEN]; - - (void) snprintf(snapname, sizeof (snapname), "%s@%s", - zfs_get_name(zhp), sdd->tosnap); - snapzhp = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT); - if (snapzhp == NULL) { - rv = -1; } else { - rv = dump_ioctl(snapzhp, - missingfrom ? NULL : sdd->fromsnap, - sdd->fromorigin || missingfrom, - sdd->outfd); - sdd->seento = B_TRUE; - zfs_close(snapzhp); + (void) fprintf(stderr, "WARNING: " + "could not send %s@%s: does not exist\n", + zhp->zfs_name, sdd->tosnap); } + sdd->err = B_TRUE; } return (rv); @@ -652,6 +1104,29 @@ dump_filesystems(zfs_handle_t *rzhp, void *arg) if (!sdd->replicate) return (dump_filesystem(rzhp, sdd)); + /* Mark the clone origin snapshots. */ + for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; + fspair = nvlist_next_nvpair(sdd->fss, fspair)) { + nvlist_t *nvfs; + uint64_t origin_guid = 0; + + VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs)); + (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid); + if (origin_guid != 0) { + char *snapname; + nvlist_t *origin_nv = fsavl_find(sdd->fsavl, + origin_guid, &snapname); + if (origin_nv != NULL) { + nvlist_t *snapprops; + VERIFY(0 == nvlist_lookup_nvlist(origin_nv, + "snapprops", &snapprops)); + VERIFY(0 == nvlist_lookup_nvlist(snapprops, + snapname, &snapprops)); + VERIFY(0 == nvlist_add_boolean( + snapprops, "is_clone_origin")); + } + } + } again: needagain = progress = B_FALSE; for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair; @@ -661,7 +1136,6 @@ again: zfs_handle_t *zhp; int err; uint64_t origin_guid = 0; - nvlist_t *origin_nv; VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0); if (nvlist_lookup_boolean(fslist, "sent") == 0) @@ -670,15 +1144,19 @@ again: VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0); (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid); - origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL); - if (origin_nv && - nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) { - /* - * origin has not been sent yet; - * skip this clone. - */ - needagain = B_TRUE; - continue; + if (origin_guid != 0) { + nvlist_t *origin_nv = fsavl_find(sdd->fsavl, + origin_guid, NULL); + if (origin_nv != NULL && + nvlist_lookup_boolean(origin_nv, + "sent") == ENOENT) { + /* + * origin has not been sent yet; + * skip this clone. + */ + needagain = B_TRUE; + continue; + } } zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET); @@ -709,21 +1187,37 @@ again: * is TRUE. * * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and - * uses a special header (with a version field of DMU_BACKUP_HEADER_VERSION) + * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM) * if "replicate" is set. If "doall" is set, dump all the intermediate - * snapshots. The DMU_BACKUP_HEADER_VERSION header is used in the "doall" - * case too. + * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall" + * case too. If "props" is set, send properties. */ int zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, - boolean_t replicate, boolean_t doall, boolean_t fromorigin, - boolean_t verbose, int outfd) + sendflags_t flags, int outfd, snapfilter_cb_t filter_func, + void *cb_arg, nvlist_t **debugnvp) { char errbuf[1024]; send_dump_data_t sdd = { 0 }; int err; nvlist_t *fss = NULL; avl_tree_t *fsavl = NULL; + char holdtag[128]; + static uint64_t holdseq; + int spa_version; + boolean_t holdsnaps = B_FALSE; + pthread_t tid; + int pipefd[2]; + dedup_arg_t dda = { 0 }; + int featureflags = 0; + + if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) { + uint64_t version; + version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); + if (version >= ZPL_VERSION_SA) { + featureflags |= DMU_BACKUP_FEATURE_SA_SPILL; + } + } (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot send '%s'"), zhp->zfs_name); @@ -734,15 +1228,48 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf)); } - if (replicate || doall) { + if (zfs_spa_version(zhp, &spa_version) == 0 && + spa_version >= SPA_VERSION_USERREFS) + holdsnaps = B_TRUE; + + if (flags.dedup) { + featureflags |= (DMU_BACKUP_FEATURE_DEDUP | + DMU_BACKUP_FEATURE_DEDUPPROPS); + if (err = pipe(pipefd)) { + zfs_error_aux(zhp->zfs_hdl, strerror(errno)); + return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, + errbuf)); + } + dda.outputfd = outfd; + dda.inputfd = pipefd[1]; + dda.dedup_hdl = zhp->zfs_hdl; + if (err = pthread_create(&tid, NULL, cksummer, &dda)) { + (void) close(pipefd[0]); + (void) close(pipefd[1]); + zfs_error_aux(zhp->zfs_hdl, strerror(errno)); + return (zfs_error(zhp->zfs_hdl, + EZFS_THREADCREATEFAILED, errbuf)); + } + } + + if (flags.replicate || flags.doall || flags.props) { dmu_replay_record_t drr = { 0 }; char *packbuf = NULL; size_t buflen = 0; zio_cksum_t zc = { 0 }; - assert(fromsnap || doall); + if (holdsnaps) { + (void) snprintf(holdtag, sizeof (holdtag), + ".send-%d-%llu", getpid(), (u_longlong_t)holdseq); + ++holdseq; + err = zfs_hold_range(zhp, fromsnap, tosnap, + holdtag, flags.replicate, B_TRUE, filter_func, + cb_arg); + if (err) + goto err_out; + } - if (replicate) { + if (flags.replicate || flags.props) { nvlist_t *hdrnv; VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0)); @@ -751,45 +1278,65 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, "fromsnap", fromsnap)); } VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap)); + if (!flags.replicate) { + VERIFY(0 == nvlist_add_boolean(hdrnv, + "not_recursive")); + } err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, - fromsnap, tosnap, &fss, &fsavl); - if (err) - return (err); + fromsnap, tosnap, flags.replicate, &fss, &fsavl); + if (err) { + if (holdsnaps) { + (void) zfs_release_range(zhp, fromsnap, + tosnap, holdtag, flags.replicate); + } + goto err_out; + } VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); err = nvlist_pack(hdrnv, &packbuf, &buflen, NV_ENCODE_XDR, 0); - nvlist_free(hdrnv); + if (debugnvp) + *debugnvp = hdrnv; + else + nvlist_free(hdrnv); if (err) { fsavl_destroy(fsavl); nvlist_free(fss); - return (zfs_standard_error(zhp->zfs_hdl, - err, errbuf)); + if (holdsnaps) { + (void) zfs_release_range(zhp, fromsnap, + tosnap, holdtag, flags.replicate); + } + goto stderr_out; } } /* write first begin record */ drr.drr_type = DRR_BEGIN; drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; - drr.drr_u.drr_begin.drr_version = DMU_BACKUP_HEADER_VERSION; + DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.drr_versioninfo, + DMU_COMPOUNDSTREAM); + DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.drr_versioninfo, + featureflags); (void) snprintf(drr.drr_u.drr_begin.drr_toname, sizeof (drr.drr_u.drr_begin.drr_toname), "%s@%s", zhp->zfs_name, tosnap); drr.drr_payloadlen = buflen; - fletcher_4_incremental_native(&drr, sizeof (drr), &zc); - err = write(outfd, &drr, sizeof (drr)); + err = cksum_and_write(&drr, sizeof (drr), &zc, outfd); /* write header nvlist */ - if (err != -1) { - fletcher_4_incremental_native(packbuf, buflen, &zc); - err = write(outfd, packbuf, buflen); + if (err != -1 && packbuf != NULL) { + err = cksum_and_write(packbuf, buflen, &zc, outfd); } free(packbuf); if (err == -1) { fsavl_destroy(fsavl); nvlist_free(fss); - return (zfs_standard_error(zhp->zfs_hdl, - errno, errbuf)); + if (holdsnaps) { + (void) zfs_release_range(zhp, fromsnap, tosnap, + holdtag, flags.replicate); + } + err = errno; + goto stderr_out; } /* write end record */ @@ -801,8 +1348,12 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, if (err == -1) { fsavl_destroy(fsavl); nvlist_free(fss); - return (zfs_standard_error(zhp->zfs_hdl, - errno, errbuf)); + err = errno; + if (holdsnaps) { + (void) zfs_release_range(zhp, fromsnap, + tosnap, holdtag, flags.replicate); + } + goto stderr_out; } } } @@ -810,18 +1361,30 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, /* dump each stream */ sdd.fromsnap = fromsnap; sdd.tosnap = tosnap; - sdd.outfd = outfd; - sdd.replicate = replicate; - sdd.doall = doall; - sdd.fromorigin = fromorigin; + if (flags.dedup) + sdd.outfd = pipefd[0]; + else + sdd.outfd = outfd; + sdd.replicate = flags.replicate; + sdd.doall = flags.doall; + sdd.fromorigin = flags.fromorigin; sdd.fss = fss; sdd.fsavl = fsavl; - sdd.verbose = verbose; + sdd.verbose = flags.verbose; + sdd.filter_cb = filter_func; + sdd.filter_cb_arg = cb_arg; + if (debugnvp) + sdd.debugnv = *debugnvp; err = dump_filesystems(zhp, &sdd); fsavl_destroy(fsavl); nvlist_free(fss); - if (replicate || doall) { + if (flags.dedup) { + (void) close(pipefd[0]); + (void) pthread_join(tid, NULL); + } + + if (flags.replicate || flags.doall || flags.props) { /* * write final end record. NB: want to do this even if * there was some error, because it might not be totally @@ -829,6 +1392,10 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, */ dmu_replay_record_t drr = { 0 }; drr.drr_type = DRR_END; + if (holdsnaps) { + (void) zfs_release_range(zhp, fromsnap, tosnap, + holdtag, flags.replicate); + } if (write(outfd, &drr, sizeof (drr)) == -1) { return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf)); @@ -836,6 +1403,16 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, } return (err || sdd.err); + +stderr_out: + err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); +err_out: + if (flags.dedup) { + (void) pthread_cancel(tid); + (void) pthread_join(tid, NULL); + (void) close(pipefd[0]); + } + return (err); } /* @@ -1017,12 +1594,13 @@ recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen, changelist_free(clp); /* - * Deferred destroy should always succeed. Since we can't tell - * if it destroyed the dataset or just marked it for deferred - * destroy, always do the rename just in case. + * Deferred destroy might destroy the snapshot or only mark it to be + * destroyed later, and it returns success in either case. */ - if (err != 0 || defer) + if (err != 0 || (defer && zfs_dataset_exists(hdl, name, + ZFS_TYPE_SNAPSHOT))) { err = recv_rename(hdl, name, NULL, baselen, newname, flags); + } return (err); } @@ -1040,6 +1618,7 @@ guid_to_name_cb(zfs_handle_t *zhp, void *arg) if (zhp->zfs_dmustats.dds_guid == gtnd->guid) { (void) strcpy(gtnd->name, zhp->zfs_name); + zfs_close(zhp); return (EEXIST); } err = zfs_iter_children(zhp, guid_to_name_cb, gtnd); @@ -1130,18 +1709,22 @@ created_before(libzfs_handle_t *hdl, avl_tree_t *avl, static int recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, - recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl) + recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl, + nvlist_t *renamed) { nvlist_t *local_nv; avl_tree_t *local_avl; nvpair_t *fselem, *nextfselem; - char *tosnap, *fromsnap; + char *fromsnap; char newname[ZFS_MAXNAMELEN]; int error; - boolean_t needagain, progress; + boolean_t needagain, progress, recursive; + char *s1, *s2; VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap)); - VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap)); + + recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == + ENOENT); if (flags.dryrun) return (0); @@ -1150,7 +1733,7 @@ again: needagain = progress = B_FALSE; if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL, - &local_nv, &local_avl)) != 0) + recursive, &local_nv, &local_avl)) != 0) return (error); /* @@ -1273,7 +1856,7 @@ again: stream_snapname, &props)) { zfs_cmd_t zc = { 0 }; - zc.zc_cookie = B_TRUE; /* clear current props */ + zc.zc_cookie = B_TRUE; /* received */ (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s", fsname, nvpair_name(snapelem)); if (zcmd_write_src_nvlist(hdl, &zc, @@ -1321,10 +1904,13 @@ again: continue; } - if (fromguid == 0 && flags.verbose) { - (void) printf("local fs %s does not have fromsnap " - "(%s in stream); must have been deleted locally; " - "ignoring\n", fsname, fromsnap); + if (fromguid == 0) { + if (flags.verbose) { + (void) printf("local fs %s does not have " + "fromsnap (%s in stream); must have " + "been deleted locally; ignoring\n", + fsname, fromsnap); + } continue; } @@ -1333,11 +1919,19 @@ again: VERIFY(0 == nvlist_lookup_uint64(stream_nvfs, "parentfromsnap", &stream_parent_fromsnap_guid)); - /* check for rename */ + s1 = strrchr(fsname, '/'); + s2 = strrchr(stream_fsname, '/'); + + /* + * Check for rename. If the exact receive path is specified, it + * does not count as a rename, but we still need to check the + * datasets beneath it. + */ if ((stream_parent_fromsnap_guid != 0 && + parent_fromsnap_guid != 0 && stream_parent_fromsnap_guid != parent_fromsnap_guid) || - strcmp(strrchr(fsname, '/'), - strrchr(stream_fsname, '/')) != 0) { + ((flags.isprefix || strcmp(tofs, fsname) != 0) && + (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) { nvlist_t *parent; char tryname[ZFS_MAXNAMELEN]; @@ -1365,8 +1959,16 @@ again: } } + newname[0] = '\0'; + error = recv_rename(hdl, fsname, tryname, strlen(tofs)+1, newname, flags); + + if (renamed != NULL && newname[0] != '\0') { + VERIFY(0 == nvlist_add_boolean(renamed, + newname)); + } + if (error) needagain = B_TRUE; else @@ -1395,37 +1997,28 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, nvlist_t *stream_nv = NULL; avl_tree_t *stream_avl = NULL; char *fromsnap = NULL; + char *cp; char tofs[ZFS_MAXNAMELEN]; + char sendfs[ZFS_MAXNAMELEN]; char errbuf[1024]; dmu_replay_record_t drre; int error; boolean_t anyerr = B_FALSE; boolean_t softerr = B_FALSE; + boolean_t recursive; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); - if (strchr(destname, '@')) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "can not specify snapshot name for multi-snapshot stream")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); - } - assert(drr->drr_type == DRR_BEGIN); assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC); - assert(drr->drr_u.drr_begin.drr_version == DMU_BACKUP_HEADER_VERSION); + assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) == + DMU_COMPOUNDSTREAM); /* * Read in the nvlist from the stream. */ if (drr->drr_payloadlen != 0) { - if (!flags.isprefix) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "must use -d to receive replication " - "(send -R) stream")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); - } - error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen, &stream_nv, flags.byteswap, zc); if (error) { @@ -1434,6 +2027,16 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, } } + recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == + ENOENT); + + if (recursive && strchr(destname, '@')) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot specify snapshot name for multi-snapshot stream")); + error = zfs_error(hdl, EZFS_BADSTREAM, errbuf); + goto out; + } + /* * Read in the end record and verify checksum. */ @@ -1477,21 +2080,73 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, } if (fromsnap != NULL) { + nvlist_t *renamed = NULL; + nvpair_t *pair = NULL; + (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN); if (flags.isprefix) { - int i = strcspn(drr->drr_u.drr_begin.drr_toname, - "/@"); + struct drr_begin *drrb = &drr->drr_u.drr_begin; + int i; + + if (flags.istail) { + cp = strrchr(drrb->drr_toname, '/'); + if (cp == NULL) { + (void) strlcat(tofs, "/", + ZFS_MAXNAMELEN); + i = 0; + } else { + i = (cp - drrb->drr_toname); + } + } else { + i = strcspn(drrb->drr_toname, "/@"); + } /* zfs_receive_one() will create_parents() */ - (void) strlcat(tofs, - &drr->drr_u.drr_begin.drr_toname[i], + (void) strlcat(tofs, &drrb->drr_toname[i], ZFS_MAXNAMELEN); *strchr(tofs, '@') = '\0'; } - softerr = recv_incremental_replication(hdl, tofs, - flags, stream_nv, stream_avl); + + if (recursive && !flags.dryrun && !flags.nomount) { + VERIFY(0 == nvlist_alloc(&renamed, + NV_UNIQUE_NAME, 0)); + } + + softerr = recv_incremental_replication(hdl, tofs, flags, + stream_nv, stream_avl, renamed); + + /* Unmount renamed filesystems before receiving. */ + while ((pair = nvlist_next_nvpair(renamed, + pair)) != NULL) { + zfs_handle_t *zhp; + prop_changelist_t *clp = NULL; + + zhp = zfs_open(hdl, nvpair_name(pair), + ZFS_TYPE_FILESYSTEM); + if (zhp != NULL) { + clp = changelist_gather(zhp, + ZFS_PROP_MOUNTPOINT, 0, 0); + zfs_close(zhp); + if (clp != NULL) { + softerr |= + changelist_prefix(clp); + changelist_free(clp); + } + } + } + + nvlist_free(renamed); } } + /* + * Get the fs specified by the first path in the stream (the top level + * specified by 'zfs send') and pass it to each invocation of + * zfs_receive_one(). + */ + (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname, + ZFS_MAXNAMELEN); + if ((cp = strchr(sendfs, '@')) != NULL) + *cp = '\0'; /* Finally, receive each contained stream */ do { @@ -1503,7 +2158,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, * recv_skip() and return 0). */ error = zfs_receive_impl(hdl, destname, flags, fd, - stream_avl, top_zfs); + sendfs, stream_nv, stream_avl, top_zfs); if (error == ENODATA) { error = 0; break; @@ -1517,7 +2172,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, * renames again. */ softerr = recv_incremental_replication(hdl, tofs, flags, - stream_nv, stream_avl); + stream_nv, stream_avl, NULL); } out: @@ -1531,11 +2186,28 @@ out: return (error); } +static void +trunc_prop_errs(int truncated) +{ + ASSERT(truncated != 0); + + if (truncated == 1) + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "1 more property could not be set\n")); + else + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "%d more properties could not be set\n"), truncated); +} + static int recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) { dmu_replay_record_t *drr; void *buf = malloc(1<<20); + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "cannot receive:")); /* XXX would be great to use lseek if possible... */ drr = buf; @@ -1548,7 +2220,11 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) switch (drr->drr_type) { case DRR_BEGIN: /* NB: not to be used on v2 stream packages */ - assert(drr->drr_payloadlen == 0); + if (drr->drr_payloadlen != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid substream header")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } break; case DRR_END: @@ -1574,13 +2250,23 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) (void) recv_read(hdl, fd, buf, drr->drr_u.drr_write.drr_length, B_FALSE, NULL); break; - + case DRR_SPILL: + if (byteswap) { + drr->drr_u.drr_write.drr_length = + BSWAP_64(drr->drr_u.drr_spill.drr_length); + } + (void) recv_read(hdl, fd, buf, + drr->drr_u.drr_spill.drr_length, B_FALSE, NULL); + break; + case DRR_WRITE_BYREF: case DRR_FREEOBJECTS: case DRR_FREE: break; default: - assert(!"invalid record type"); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid record type")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } } @@ -1594,27 +2280,33 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) static int zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, recvflags_t flags, dmu_replay_record_t *drr, - dmu_replay_record_t *drr_noswap, avl_tree_t *stream_avl, - char **top_zfs) + dmu_replay_record_t *drr_noswap, const char *sendfs, + nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs) { zfs_cmd_t zc = { 0 }; time_t begin_time; - int ioctl_err, ioctl_errno, err, choplen; + int ioctl_err, ioctl_errno, err; char *cp; struct drr_begin *drrb = &drr->drr_u.drr_begin; char errbuf[1024]; - char chopprefix[ZFS_MAXNAMELEN]; + char prop_errbuf[1024]; + const char *chopprefix; boolean_t newfs = B_FALSE; boolean_t stream_wantsnewfs; uint64_t parent_snapguid = 0; prop_changelist_t *clp = NULL; nvlist_t *snapprops_nvlist = NULL; + zprop_errflags_t prop_errflags; + boolean_t recursive; begin_time = time(NULL); (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); + recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == + ENOENT); + if (stream_avl != NULL) { char *snapname; nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid, @@ -1645,6 +2337,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, return (-1); } + cp = NULL; + /* * Determine how much of the snapshot name stored in the stream * we are going to tack on to the name they specified on the @@ -1653,38 +2347,77 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * If they specified a snapshot, chop the entire name stored in * the stream. */ - (void) strcpy(chopprefix, drrb->drr_toname); - if (flags.isprefix) { + if (flags.istail) { + /* + * A filesystem was specified with -e. We want to tack on only + * the tail of the sent snapshot path. + */ + if (strchr(tosnap, '@')) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "argument - snapshot not allowed with -e")); + return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + } + + chopprefix = strrchr(sendfs, '/'); + + if (chopprefix == NULL) { + /* + * The tail is the poolname, so we need to + * prepend a path separator. + */ + int len = strlen(drrb->drr_toname); + cp = malloc(len + 2); + cp[0] = '/'; + (void) strcpy(&cp[1], drrb->drr_toname); + chopprefix = cp; + } else { + chopprefix = drrb->drr_toname + (chopprefix - sendfs); + } + } else if (flags.isprefix) { /* - * They specified a fs with -d, we want to tack on - * everything but the pool name stored in the stream + * A filesystem was specified with -d. We want to tack on + * everything but the first element of the sent snapshot path + * (all but the pool name). */ if (strchr(tosnap, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "argument - snapshot not allowed with -d")); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } - cp = strchr(chopprefix, '/'); - if (cp == NULL) - cp = strchr(chopprefix, '@'); - *cp = '\0'; + + chopprefix = strchr(drrb->drr_toname, '/'); + if (chopprefix == NULL) + chopprefix = strchr(drrb->drr_toname, '@'); } else if (strchr(tosnap, '@') == NULL) { /* - * If they specified a filesystem without -d, we want to - * tack on everything after the fs specified in the - * first name from the stream. + * If a filesystem was specified without -d or -e, we want to + * tack on everything after the fs specified by 'zfs send'. */ - cp = strchr(chopprefix, '@'); - *cp = '\0'; + chopprefix = drrb->drr_toname + strlen(sendfs); + } else { + /* A snapshot was specified as an exact path (no -d or -e). */ + if (recursive) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot specify snapshot name for multi-snapshot " + "stream")); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } + chopprefix = drrb->drr_toname + strlen(drrb->drr_toname); } - choplen = strlen(chopprefix); + + ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname); + ASSERT(chopprefix > drrb->drr_toname); + ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname)); + ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' || + chopprefix[0] == '\0'); /* * Determine name of destination snapshot, store in zc_value. */ + (void) strcpy(zc.zc_top_ds, tosnap); (void) strcpy(zc.zc_value, tosnap); - (void) strncat(zc.zc_value, drrb->drr_toname+choplen, - sizeof (zc.zc_value)); + (void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value)); + free(cp); if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) { zcmd_free_nvlists(&zc); return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); @@ -1742,7 +2475,14 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, (void) strcpy(zc.zc_name, zc.zc_value); *strchr(zc.zc_name, '@') = '\0'; - if (!zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { + /* + * If the exact receive path was specified and this is the + * topmost path in the stream, then if the fs does not exist we + * should look no further. + */ + if ((flags.isprefix || (*(chopprefix = drrb->drr_toname + + strlen(sendfs)) != '\0' && *chopprefix != '@')) && + !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { char snap[ZFS_MAXNAMELEN]; (void) strcpy(snap, strchr(zc.zc_value, '@')); if (guid_to_name(hdl, tosnap, drrb->drr_fromguid, @@ -1758,6 +2498,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { zfs_handle_t *zhp; + /* * Destination fs exists. Therefore this should either * be an incremental, or the stream specifies a new fs @@ -1765,7 +2506,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * away (and have therefore specified -F and removed any * snapshots). */ - if (stream_wantsnewfs) { if (!flags.force) { zcmd_free_nvlists(&zc); @@ -1819,12 +2559,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, return (-1); } } - if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME && - zvol_remove_link(hdl, zhp->zfs_name) != 0) { - zfs_close(zhp); - zcmd_free_nvlists(&zc); - return (-1); - } zfs_close(zhp); } else { /* @@ -1848,7 +2582,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, */ *cp = '\0'; - if (flags.isprefix && !flags.dryrun && + if (flags.isprefix && !flags.istail && !flags.dryrun && create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) { zcmd_free_nvlists(&zc); return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); @@ -1873,21 +2607,59 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, return (recv_skip(hdl, infd, flags.byteswap)); } + zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf; + zc.zc_nvlist_dst_size = sizeof (prop_errbuf); + err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc); ioctl_errno = errno; + prop_errflags = (zprop_errflags_t)zc.zc_obj; + + if (err == 0) { + nvlist_t *prop_errors; + VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst, + zc.zc_nvlist_dst_size, &prop_errors, 0)); + + nvpair_t *prop_err = NULL; + + while ((prop_err = nvlist_next_nvpair(prop_errors, + prop_err)) != NULL) { + char tbuf[1024]; + zfs_prop_t prop; + int intval; + + prop = zfs_name_to_prop(nvpair_name(prop_err)); + (void) nvpair_value_int32(prop_err, &intval); + if (strcmp(nvpair_name(prop_err), + ZPROP_N_MORE_ERRORS) == 0) { + trunc_prop_errs(intval); + break; + } else { + (void) snprintf(tbuf, sizeof (tbuf), + dgettext(TEXT_DOMAIN, + "cannot receive %s property on %s"), + nvpair_name(prop_err), zc.zc_name); + zfs_setprop_error(hdl, prop, intval, tbuf); + } + } + nvlist_free(prop_errors); + } + + zc.zc_nvlist_dst = 0; + zc.zc_nvlist_dst_size = 0; zcmd_free_nvlists(&zc); if (err == 0 && snapprops_nvlist) { zfs_cmd_t zc2 = { 0 }; (void) strcpy(zc2.zc_name, zc.zc_value); + zc2.zc_cookie = B_TRUE; /* received */ if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) { (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2); zcmd_free_nvlists(&zc2); } } - if (err && (ioctl_errno == ENOENT || ioctl_errno == ENODEV)) { + if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) { /* * It may be that this snapshot already exists, * in which case we want to consume & ignore it @@ -1895,7 +2667,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, */ avl_tree_t *local_avl; nvlist_t *local_nv, *fs; - char *cp = strchr(zc.zc_value, '@'); + cp = strchr(zc.zc_value, '@'); /* * XXX Do this faster by just iterating over snaps in @@ -1903,7 +2675,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * get a strange "does not exist" error message. */ *cp = '\0'; - if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, + if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE, &local_nv, &local_avl) == 0) { *cp = '@'; fs = fsavl_find(local_avl, drrb->drr_toguid, NULL); @@ -1915,14 +2687,13 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, (void) printf("snap %s already exists; " "ignoring\n", zc.zc_value); } - ioctl_err = recv_skip(hdl, infd, + err = ioctl_err = recv_skip(hdl, infd, flags.byteswap); } } *cp = '@'; } - if (ioctl_err != 0) { switch (ioctl_errno) { case ENODEV: @@ -1961,18 +2732,25 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, "invalid stream (checksum mismatch)")); (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; + case ENOTSUP: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgraded to receive this stream.")); + (void) zfs_error(hdl, EZFS_BADVERSION, errbuf); + break; + case EDQUOT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "destination %s space quota exceeded"), zc.zc_name); + (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); + break; default: (void) zfs_standard_error(hdl, ioctl_errno, errbuf); } } /* - * Mount or recreate the /dev links for the target filesystem - * (if created, or if we tore them down to do an incremental - * restore), and the /dev links for the new snapshot (if - * created). Also mount any children of the target filesystem - * if we did a replication receive (indicated by stream_avl - * being non-NULL). + * Mount the target filesystem (if created). Also mount any + * children of the target filesystem if we did a replication + * receive (indicated by stream_avl being non-NULL). */ cp = strchr(zc.zc_value, '@'); if (cp && (ioctl_err == 0 || !newfs)) { @@ -1984,10 +2762,6 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (h != NULL) { if (h->zfs_type == ZFS_TYPE_VOLUME) { *cp = '@'; - err = zvol_create_link(hdl, h->zfs_name); - if (err == 0 && ioctl_err == 0) - err = zvol_create_link(hdl, - zc.zc_value); } else if (newfs || stream_avl) { /* * Track the first/top of hierarchy fs, @@ -2006,6 +2780,19 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, changelist_free(clp); } + if (prop_errflags & ZPROP_ERR_NOCLEAR) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: " + "failed to clear unreceived properties on %s"), + zc.zc_name); + (void) fprintf(stderr, "\n"); + } + if (prop_errflags & ZPROP_ERR_NORESTORE) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: " + "failed to restore original properties on %s"), + zc.zc_name); + (void) fprintf(stderr, "\n"); + } + if (err || ioctl_err) return (-1); @@ -2028,13 +2815,16 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, static int zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags, - int infd, avl_tree_t *stream_avl, char **top_zfs) + int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl, + char **top_zfs) { int err; dmu_replay_record_t drr, drr_noswap; struct drr_begin *drrb = &drr.drr_u.drr_begin; char errbuf[1024]; zio_cksum_t zcksum = { 0 }; + uint64_t featureflags; + int hdrtype; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); @@ -2072,7 +2862,7 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags, drr.drr_type = BSWAP_32(drr.drr_type); drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen); drrb->drr_magic = BSWAP_64(drrb->drr_magic); - drrb->drr_version = BSWAP_64(drrb->drr_version); + drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo); drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); drrb->drr_type = BSWAP_32(drrb->drr_type); drrb->drr_flags = BSWAP_32(drrb->drr_flags); @@ -2086,23 +2876,45 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags, return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } + featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); + hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo); + + if (!DMU_STREAM_SUPPORTED(featureflags) || + (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "stream has unsupported feature, feature flags = %lx"), + featureflags); + return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + } + if (strchr(drrb->drr_toname, '@') == NULL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "stream (bad snapshot name)")); return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } - if (drrb->drr_version == DMU_BACKUP_STREAM_VERSION) { + if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) { + char nonpackage_sendfs[ZFS_MAXNAMELEN]; + if (sendfs == NULL) { + /* + * We were not called from zfs_receive_package(). Get + * the fs specified by 'zfs send'. + */ + char *cp; + (void) strlcpy(nonpackage_sendfs, + drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN); + if ((cp = strchr(nonpackage_sendfs, '@')) != NULL) + *cp = '\0'; + sendfs = nonpackage_sendfs; + } return (zfs_receive_one(hdl, infd, tosnap, flags, - &drr, &drr_noswap, stream_avl, top_zfs)); - } else if (drrb->drr_version == DMU_BACKUP_HEADER_VERSION) { + &drr, &drr_noswap, sendfs, stream_nv, stream_avl, + top_zfs)); + } else { + assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == + DMU_COMPOUNDSTREAM); return (zfs_receive_package(hdl, infd, tosnap, flags, &drr, &zcksum, top_zfs)); - } else { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "stream is unsupported version %llu"), - drrb->drr_version); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); } } @@ -2119,7 +2931,8 @@ zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags, char *top_zfs = NULL; int err; - err = zfs_receive_impl(hdl, tosnap, flags, infd, stream_avl, &top_zfs); + err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL, + stream_avl, &top_zfs); if (err == 0 && !flags.nomount && top_zfs) { zfs_handle_t *zhp; diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c index 44faf02af..24725ec04 100644 --- a/lib/libzfs/libzfs_status.c +++ b/lib/libzfs/libzfs_status.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -138,7 +137,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) if (find_vdev_problem(child[c], func)) return (B_TRUE); } else { - verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, + verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); if (func(vs->vs_state, vs->vs_aux, @@ -173,7 +172,8 @@ check_status(nvlist_t *config, boolean_t isimport) { nvlist_t *nvroot; vdev_stat_t *vs; - uint_t vsc; + pool_scan_stat_t *ps = NULL; + uint_t vsc, psc; uint64_t nerr; uint64_t version; uint64_t stateval; @@ -184,15 +184,24 @@ check_status(nvlist_t *config, boolean_t isimport) &version) == 0); verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, + verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) == 0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &stateval) == 0); - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); + + /* + * Currently resilvering a vdev + */ + (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS, + (uint64_t **)&ps, &psc); + if (ps && ps->pss_func == POOL_SCAN_RESILVER && + ps->pss_state == DSS_SCANNING) + return (ZPOOL_STATUS_RESILVERING); /* * Pool last accessed by another system. */ + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); if (hostid != 0 && (unsigned long)hostid != gethostid() && stateval == POOL_STATE_ACTIVE) return (ZPOOL_STATUS_HOSTID_MISMATCH); @@ -289,12 +298,6 @@ check_status(nvlist_t *config, boolean_t isimport) return (ZPOOL_STATUS_REMOVED_DEV); /* - * Currently resilvering - */ - if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) - return (ZPOOL_STATUS_RESILVERING); - - /* * Outdated, but usable, version */ if (version < SPA_VERSION) @@ -328,3 +331,68 @@ zpool_import_status(nvlist_t *config, char **msgid) return (ret); } + +static void +dump_ddt_stat(const ddt_stat_t *dds, int h) +{ + char refcnt[6]; + char blocks[6], lsize[6], psize[6], dsize[6]; + char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6]; + + if (dds == NULL || dds->dds_blocks == 0) + return; + + if (h == -1) + (void) strcpy(refcnt, "Total"); + else + zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt)); + + zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks)); + zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize)); + zfs_nicenum(dds->dds_psize, psize, sizeof (psize)); + zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize)); + zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks)); + zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize)); + zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize)); + zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize)); + + (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", + refcnt, + blocks, lsize, psize, dsize, + ref_blocks, ref_lsize, ref_psize, ref_dsize); +} + +/* + * Print the DDT histogram and the column totals. + */ +void +zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh) +{ + int h; + + (void) printf("\n"); + + (void) printf("bucket " + " allocated " + " referenced \n"); + (void) printf("______ " + "______________________________ " + "______________________________\n"); + + (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", + "refcnt", + "blocks", "LSIZE", "PSIZE", "DSIZE", + "blocks", "LSIZE", "PSIZE", "DSIZE"); + + (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", + "------", + "------", "-----", "-----", "-----", + "------", "-----", "-----", "-----"); + + for (h = 0; h < 64; h++) + dump_ddt_stat(&ddh->ddh_stat[h], h); + + dump_ddt_stat(dds_total, -1); + + (void) printf("\n"); +} diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 4da0fb44b..2e73f76ea 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -94,8 +93,6 @@ libzfs_error_description(libzfs_handle_t *hdl) case EZFS_VOLTOOBIG: return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for " "this system")); - case EZFS_VOLHASDATA: - return (dgettext(TEXT_DOMAIN, "volume has data")); case EZFS_INVALIDNAME: return (dgettext(TEXT_DOMAIN, "invalid name")); case EZFS_BADRESTORE: @@ -138,16 +135,12 @@ libzfs_error_description(libzfs_handle_t *hdl) return (dgettext(TEXT_DOMAIN, "smb remove share failed")); case EZFS_SHARESMBFAILED: return (dgettext(TEXT_DOMAIN, "smb add share failed")); - case EZFS_ISCSISVCUNAVAIL: - return (dgettext(TEXT_DOMAIN, - "iscsitgt service need to be enabled by " - "a privileged user")); - case EZFS_DEVLINKS: - return (dgettext(TEXT_DOMAIN, "failed to create /dev links")); case EZFS_PERM: return (dgettext(TEXT_DOMAIN, "permission denied")); case EZFS_NOSPC: return (dgettext(TEXT_DOMAIN, "out of space")); + case EZFS_FAULT: + return (dgettext(TEXT_DOMAIN, "bad address")); case EZFS_IO: return (dgettext(TEXT_DOMAIN, "I/O error")); case EZFS_INTR: @@ -161,12 +154,6 @@ libzfs_error_description(libzfs_handle_t *hdl) return (dgettext(TEXT_DOMAIN, "recursive dataset dependency")); case EZFS_NOHISTORY: return (dgettext(TEXT_DOMAIN, "no history available")); - case EZFS_UNSHAREISCSIFAILED: - return (dgettext(TEXT_DOMAIN, - "iscsitgtd failed request to unshare")); - case EZFS_SHAREISCSIFAILED: - return (dgettext(TEXT_DOMAIN, - "iscsitgtd failed request to share")); case EZFS_POOLPROPS: return (dgettext(TEXT_DOMAIN, "failed to retrieve " "pool properties")); @@ -218,6 +205,20 @@ libzfs_error_description(libzfs_handle_t *hdl) case EZFS_REFTAG_HOLD: return (dgettext(TEXT_DOMAIN, "tag already exists on this " "dataset")); + case EZFS_TAGTOOLONG: + return (dgettext(TEXT_DOMAIN, "tag too long")); + case EZFS_PIPEFAILED: + return (dgettext(TEXT_DOMAIN, "pipe create failed")); + case EZFS_THREADCREATEFAILED: + return (dgettext(TEXT_DOMAIN, "thread create failed")); + case EZFS_POSTSPLIT_ONLINE: + return (dgettext(TEXT_DOMAIN, "disk was split from this pool " + "into a new one")); + case EZFS_SCRUBBING: + return (dgettext(TEXT_DOMAIN, "currently scrubbing; " + "use 'zpool scrub -s' to cancel current scrub")); + case EZFS_NO_SCRUB: + return (dgettext(TEXT_DOMAIN, "there is no active scrub")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: @@ -306,6 +307,10 @@ zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt, zfs_verror(hdl, EZFS_IO, fmt, ap); return (-1); + case EFAULT: + zfs_verror(hdl, EZFS_FAULT, fmt, ap); + return (-1); + case EINTR: zfs_verror(hdl, EZFS_INTR, fmt, ap); return (-1); @@ -378,7 +383,7 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap); break; default: - zfs_error_aux(hdl, strerror(errno)); + zfs_error_aux(hdl, strerror(error)); zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); break; } @@ -610,6 +615,7 @@ libzfs_fini(libzfs_handle_t *hdl) if (hdl->libzfs_log_str) (void) free(hdl->libzfs_log_str); zpool_free_handles(hdl); + libzfs_fru_clear(hdl, B_TRUE); namespace_clear(hdl); libzfs_mnttab_fini(hdl); free(hdl); @@ -686,7 +692,7 @@ int zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len) { if (len == 0) - len = 2048; + len = 4*1024; zc->zc_nvlist_dst_size = len; if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t) zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == NULL) @@ -812,6 +818,8 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) "PROPERTY")); cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN, "VALUE")); + cbp->cb_colwidths[GET_COL_RECVD] = strlen(dgettext(TEXT_DOMAIN, + "RECEIVED")); cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN, "SOURCE")); @@ -825,7 +833,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) * inheriting from the longest name. This is acceptable because in the * majority of cases 'SOURCE' is the last column displayed, and we don't * use the width anyway. Note that the 'VALUE' column can be oversized, - * if the name of the property is much longer the any values we find. + * if the name of the property is much longer than any values we find. */ for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) { /* @@ -856,6 +864,11 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE]) cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width; + /* 'RECEIVED' column. */ + if (pl != cbp->cb_proplist && + pl->pl_recvd_width > cbp->cb_colwidths[GET_COL_RECVD]) + cbp->cb_colwidths[GET_COL_RECVD] = pl->pl_recvd_width; + /* * 'NAME' and 'SOURCE' columns */ @@ -871,7 +884,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) /* * Now go through and print the headers. */ - for (i = 0; i < 4; i++) { + for (i = 0; i < ZFS_GET_NCOLS; i++) { switch (cbp->cb_columns[i]) { case GET_COL_NAME: title = dgettext(TEXT_DOMAIN, "NAME"); @@ -882,6 +895,9 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) case GET_COL_VALUE: title = dgettext(TEXT_DOMAIN, "VALUE"); break; + case GET_COL_RECVD: + title = dgettext(TEXT_DOMAIN, "RECEIVED"); + break; case GET_COL_SOURCE: title = dgettext(TEXT_DOMAIN, "SOURCE"); break; @@ -890,7 +906,8 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) } if (title != NULL) { - if (i == 3 || cbp->cb_columns[i + 1] == 0) + if (i == (ZFS_GET_NCOLS - 1) || + cbp->cb_columns[i + 1] == GET_COL_NONE) (void) printf("%s", title); else (void) printf("%-*s ", @@ -908,7 +925,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type) void zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp, const char *propname, const char *value, zprop_source_t sourcetype, - const char *source) + const char *source, const char *recvd_value) { int i; const char *str; @@ -923,7 +940,7 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp, if (cbp->cb_first) zprop_print_headers(cbp, cbp->cb_type); - for (i = 0; i < 4; i++) { + for (i = 0; i < ZFS_GET_NCOLS; i++) { switch (cbp->cb_columns[i]) { case GET_COL_NAME: str = name; @@ -960,14 +977,21 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp, "inherited from %s", source); str = buf; break; + case ZPROP_SRC_RECEIVED: + str = "received"; + break; } break; + case GET_COL_RECVD: + str = (recvd_value == NULL ? "-" : recvd_value); + break; + default: continue; } - if (cbp->cb_columns[i + 1] == 0) + if (cbp->cb_columns[i + 1] == GET_COL_NONE) (void) printf("%s", str); else if (cbp->cb_scripted) (void) printf("%s\t", str); @@ -975,7 +999,6 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp, (void) printf("%-*s ", cbp->cb_colwidths[cbp->cb_columns[i]], str); - } (void) printf("\n"); @@ -1037,7 +1060,7 @@ zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num) return (-1); } - /* Rely on stroull() to process the numeric portion. */ + /* Rely on strtoull() to process the numeric portion. */ errno = 0; *num = strtoull(value, &end, 10); diff --git a/lib/libzpool/include/sys/zfs_context.h b/lib/libzpool/include/sys/zfs_context.h index 230c233a2..9a6d712e5 100644 --- a/lib/libzpool/include/sys/zfs_context.h +++ b/lib/libzpool/include/sys/zfs_context.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -75,6 +75,7 @@ extern "C" { #include <sys/u8_textprep.h> #include <sys/sysevent/eventdefs.h> #include <sys/sysevent/dev.h> +#include <sys/sunddi.h> /* * Debugging @@ -105,21 +106,27 @@ extern void vpanic(const char *, __va_list); #define fm_panic panic +extern int aok; + /* This definition is copied from assert.h. */ #if defined(__STDC__) #if __STDC_VERSION__ - 0 >= 199901L -#define verify(EX) (void)((EX) || \ +#define zverify(EX) (void)((EX) || (aok) || \ (__assert_c99(#EX, __FILE__, __LINE__, __func__), 0)) #else -#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0)) +#define zverify(EX) (void)((EX) || (aok) || \ + (__assert(#EX, __FILE__, __LINE__), 0)) #endif /* __STDC_VERSION__ - 0 >= 199901L */ #else -#define verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0)) +#define zverify(EX) (void)((EX) || (aok) || \ + (_assert("EX", __FILE__, __LINE__), 0)) #endif /* __STDC__ */ -#define VERIFY verify -#define ASSERT assert +#define VERIFY zverify +#define ASSERT zverify +#undef assert +#define assert zverify extern void __assert(const char *, const char *, int); @@ -130,7 +137,7 @@ extern void __assert(const char *, const char *, int); #define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \ const TYPE __left = (TYPE)(LEFT); \ const TYPE __right = (TYPE)(RIGHT); \ - if (!(__left OP __right)) { \ + if (!(__left OP __right) && (!aok)) { \ char *__buf = alloca(256); \ (void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \ #LEFT, #OP, #RIGHT, \ @@ -196,6 +203,18 @@ typedef struct kthread kthread_t; #define thread_create(stk, stksize, func, arg, len, pp, state, pri) \ zk_thread_create(func, arg) #define thread_exit() thr_exit(NULL) +#define thread_join(t) panic("libzpool cannot join threads") + +#define newproc(f, a, cid, pri, ctp, pid) (ENOSYS) + +/* in libzpool, p0 exists only to have its address taken */ +struct proc { + uintptr_t this_is_never_used_dont_dereference_it; +}; + +extern struct proc p0; + +#define PS_NONE -1 extern kthread_t *zk_thread_create(void (*func)(), void *arg); @@ -318,20 +337,27 @@ typedef void (task_func_t)(void *); #define TASKQ_PREPOPULATE 0x0001 #define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */ #define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */ -#define TASKQ_THREADS_CPU_PCT 0x0008 /* Use dynamic thread scheduling */ +#define TASKQ_THREADS_CPU_PCT 0x0008 /* Scale # threads by # cpus */ +#define TASKQ_DC_BATCH 0x0010 /* Mark threads as batch */ #define TQ_SLEEP KM_SLEEP /* Can block for memory */ #define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */ -#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */ +#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */ +#define TQ_FRONT 0x08 /* Queue in front */ extern taskq_t *system_taskq; extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t); +#define taskq_create_proc(a, b, c, d, e, p, f) \ + (taskq_create(a, b, c, d, e, f)) +#define taskq_create_sysdc(a, b, d, e, p, dc, f) \ + (taskq_create(a, b, maxclsyspri, d, e, f)) extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t); extern void taskq_destroy(taskq_t *); extern void taskq_wait(taskq_t *); extern int taskq_member(taskq_t *, void *); extern void system_taskq_init(void); +extern void system_taskq_fini(void); #define XVA_MAPSIZE 3 #define XVA_MAGIC 0x78766174 @@ -345,6 +371,7 @@ typedef struct vnode { char *v_path; } vnode_t; +#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */ typedef struct xoptattr { timestruc_t xoa_createtime; /* Create time of file */ @@ -360,6 +387,8 @@ typedef struct xoptattr { uint8_t xoa_opaque; uint8_t xoa_av_quarantined; uint8_t xoa_av_modified; + uint8_t xoa_av_scanstamp[AV_SCANSTAMP_SZ]; + uint8_t xoa_reparse; } xoptattr_t; typedef struct vattr { @@ -406,9 +435,11 @@ typedef struct vsecattr { #define CRCREAT 0 +extern int fop_getattr(vnode_t *vp, vattr_t *vap); + #define VOP_CLOSE(vp, f, c, o, cr, ct) 0 #define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0 -#define VOP_GETATTR(vp, vap, fl, cr, ct) ((vap)->va_size = (vp)->v_size, 0) +#define VOP_GETATTR(vp, vap, fl, cr, ct) fop_getattr((vp), (vap)); #define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd) @@ -433,13 +464,18 @@ extern vnode_t *rootdir; /* * Random stuff */ -#define lbolt (gethrtime() >> 23) -#define lbolt64 (gethrtime() >> 23) +#define ddi_get_lbolt() (gethrtime() >> 23) +#define ddi_get_lbolt64() (gethrtime() >> 23) #define hz 119 /* frequency when using gethrtime() >> 23 for lbolt */ extern void delay(clock_t ticks); #define gethrestime_sec() time(NULL) +#define gethrestime(t) \ + do {\ + (t)->tv_sec = gethrestime_sec();\ + (t)->tv_nsec = 0;\ + } while (0); #define max_ncpus 64 @@ -490,6 +526,9 @@ typedef struct callb_cpr { #define zone_dataset_visible(x, y) (1) #define INGLOBALZONE(z) (1) +extern char *kmem_asprintf(const char *fmt, ...); +#define strfree(str) kmem_free((str), strlen(str)+1) + /* * Hostname information */ @@ -497,6 +536,9 @@ extern char hw_serial[]; /* for userland-emulated hostid access */ extern int ddi_strtoul(const char *str, char **nptr, int base, unsigned long *result); +extern int ddi_strtoull(const char *str, char **nptr, int base, + u_longlong_t *result); + /* ZFS Boot Related stuff. */ struct _buf { diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 89108fe5b..5284c1253 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,6 +42,7 @@ * Emulation of kernel services in userland. */ +int aok; uint64_t physmem; vnode_t *rootdir = (vnode_t *)0xabcd1234; char hw_serial[HW_HOSTID_LEN]; @@ -50,6 +51,9 @@ struct utsname utsname = { "userland", "libzpool", "1", "1", "na" }; +/* this only exists to have its address taken */ +struct proc p0; + /* * ========================================================================= * threads @@ -269,7 +273,7 @@ cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) clock_t delta; top: - delta = abstime - lbolt; + delta = abstime - ddi_get_lbolt(); if (delta <= 0) return (-1); @@ -444,6 +448,24 @@ vn_close(vnode_t *vp) umem_free(vp, sizeof (vnode_t)); } +/* + * At a minimum we need to update the size since vdev_reopen() + * will no longer call vn_openat(). + */ +int +fop_getattr(vnode_t *vp, vattr_t *vap) +{ + struct stat64 st; + + if (fstat64(vp->v_fd, &st) == -1) { + close(vp->v_fd); + return (errno); + } + + vap->va_size = st.st_size; + return (0); +} + #ifdef ZFS_DEBUG /* @@ -754,6 +776,17 @@ ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result) return (0); } +int +ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result) +{ + char *end; + + *result = strtoull(str, &end, base); + if (*result == 0) + return (errno); + return (0); +} + /* * ========================================================================= * kernel emulation setup & teardown @@ -779,7 +812,8 @@ kernel_init(int mode) dprintf("physmem = %llu pages (%.2f GB)\n", physmem, (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); - (void) snprintf(hw_serial, sizeof (hw_serial), "%ld", gethostid()); + (void) snprintf(hw_serial, sizeof (hw_serial), "%ld", + (mode & FWRITE) ? gethostid() : 0); VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1); VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1); @@ -794,6 +828,8 @@ kernel_fini(void) { spa_fini(); + system_taskq_fini(); + close(random_fd); close(urandom_fd); @@ -884,3 +920,27 @@ ksiddomain_rele(ksiddomain_t *ksid) spa_strfree(ksid->kd_name); umem_free(ksid, sizeof (ksiddomain_t)); } + +/* + * Do not change the length of the returned string; it must be freed + * with strfree(). + */ +char * +kmem_asprintf(const char *fmt, ...) +{ + int size; + va_list adx; + char *buf; + + va_start(adx, fmt); + size = vsnprintf(NULL, 0, fmt, adx) + 1; + va_end(adx); + + buf = kmem_alloc(size, KM_SLEEP); + + va_start(adx, fmt); + size = vsnprintf(buf, size, fmt, adx); + va_end(adx); + + return (buf); +} diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c index 1a73fe83c..8db5d11c1 100644 --- a/lib/libzpool/taskq.c +++ b/lib/libzpool/taskq.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -49,6 +49,8 @@ struct taskq { int tq_nalloc; int tq_minalloc; int tq_maxalloc; + kcondvar_t tq_maxalloc_cv; + int tq_maxalloc_wait; task_t *tq_freelist; task_t tq_task; }; @@ -57,26 +59,36 @@ static task_t * task_alloc(taskq_t *tq, int tqflags) { task_t *t; + int rv; - if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) { +again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) { tq->tq_freelist = t->task_next; } else { - mutex_exit(&tq->tq_lock); if (tq->tq_nalloc >= tq->tq_maxalloc) { - if (!(tqflags & KM_SLEEP)) { - mutex_enter(&tq->tq_lock); + if (!(tqflags & KM_SLEEP)) return (NULL); - } + /* * We don't want to exceed tq_maxalloc, but we can't * wait for other tasks to complete (and thus free up * task structures) without risking deadlock with * the caller. So, we just delay for one second - * to throttle the allocation rate. + * to throttle the allocation rate. If we have tasks + * complete before one second timeout expires then + * taskq_ent_free will signal us and we will + * immediately retry the allocation. */ - delay(hz); + tq->tq_maxalloc_wait++; + rv = cv_timedwait(&tq->tq_maxalloc_cv, + &tq->tq_lock, ddi_get_lbolt() + hz); + tq->tq_maxalloc_wait--; + if (rv > 0) + goto again; /* signaled */ } + mutex_exit(&tq->tq_lock); + t = kmem_alloc(sizeof (task_t), tqflags); + mutex_enter(&tq->tq_lock); if (t != NULL) tq->tq_nalloc++; @@ -96,6 +108,9 @@ task_free(taskq_t *tq, task_t *t) kmem_free(t, sizeof (task_t)); mutex_enter(&tq->tq_lock); } + + if (tq->tq_maxalloc_wait) + cv_signal(&tq->tq_maxalloc_cv); } taskqid_t @@ -114,8 +129,13 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags) mutex_exit(&tq->tq_lock); return (0); } - t->task_next = &tq->tq_task; - t->task_prev = tq->tq_task.task_prev; + if (tqflags & TQ_FRONT) { + t->task_next = tq->tq_task.task_next; + t->task_prev = &tq->tq_task; + } else { + t->task_next = &tq->tq_task; + t->task_prev = tq->tq_task.task_prev; + } t->task_next->task_prev = t; t->task_prev->task_next = t; t->task_func = func; @@ -191,6 +211,7 @@ taskq_create(const char *name, int nthreads, pri_t pri, mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL); cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL); + cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL); tq->tq_flags = flags | TASKQ_ACTIVE; tq->tq_active = nthreads; tq->tq_nthreads = nthreads; @@ -247,6 +268,7 @@ taskq_destroy(taskq_t *tq) mutex_destroy(&tq->tq_lock); cv_destroy(&tq->tq_dispatch_cv); cv_destroy(&tq->tq_wait_cv); + cv_destroy(&tq->tq_maxalloc_cv); kmem_free(tq, sizeof (taskq_t)); } @@ -272,3 +294,10 @@ system_taskq_init(void) system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512, TASKQ_DYNAMIC | TASKQ_PREPOPULATE); } + +void +system_taskq_fini(void) +{ + taskq_destroy(system_taskq); + system_taskq = NULL; /* defensive */ +} diff --git a/lib/libzpool/util.c b/lib/libzpool/util.c index 781edb6e8..9b99531fd 100644 --- a/lib/libzpool/util.c +++ b/lib/libzpool/util.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <assert.h> @@ -90,7 +89,7 @@ show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent) if (is_log) prefix = "log "; - if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, + if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) != 0) vs = &v0; |