diff options
Diffstat (limited to 'lib/libzfs/libzfs_pool.c')
-rw-r--r-- | lib/libzfs/libzfs_pool.c | 431 |
1 files changed, 324 insertions, 107 deletions
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 75ecc54e2..fd734d8b4 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -42,6 +42,7 @@ #include <sys/zfs_ioctl.h> #include <sys/zio.h> #include <strings.h> +#include <dlfcn.h> #include "zfs_namecheck.h" #include "zfs_prop.h" @@ -55,6 +56,10 @@ static int read_efi_label(nvlist_t *config, diskaddr_t *sb); #define BOOTCMD "installboot(1M)" #endif +#define DISK_ROOT "/dev/dsk" +#define RDISK_ROOT "/dev/rdsk" +#define BACKUP_SLICE "s2" + /* * ==================================================================== * zpool property functions @@ -628,6 +633,12 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp) /* + * Don't start the slice at the default block of 34; many storage + * devices will use a stripe width of 128k, so start there instead. + */ +#define NEW_START_BLOCK 256 + +/* * Validate the given pool name, optionally putting an extended error message in * 'buf'. */ @@ -1369,46 +1380,90 @@ zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type) } /* + * Find a vdev that matches the search criteria specified. We use the + * the nvpair name to determine how we should look for the device. * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL * spare; but FALSE if its an INUSE spare. */ static nvlist_t * -vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid, - boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) +vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, + boolean_t *l2cache, boolean_t *log) { uint_t c, children; nvlist_t **child; - uint64_t theguid, present; - char *path; - uint64_t wholedisk = 0; nvlist_t *ret; uint64_t is_log; + char *srchkey; + nvpair_t *pair = nvlist_next_nvpair(search, NULL); - verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0); + /* Nothing to look for */ + if (search == NULL || pair == NULL) + return (NULL); + + /* Obtain the key we will use to search */ + srchkey = nvpair_name(pair); + + switch (nvpair_type(pair)) { + case DATA_TYPE_UINT64: { + uint64_t srchval, theguid, present; + + verify(nvpair_value_uint64(pair, &srchval) == 0); + if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) { + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, + &present) == 0) { + /* + * If the device has never been present since + * import, the only reliable way to match the + * vdev is by GUID. + */ + verify(nvlist_lookup_uint64(nv, + ZPOOL_CONFIG_GUID, &theguid) == 0); + if (theguid == srchval) + return (nv); + } + } + break; + } + + case DATA_TYPE_STRING: { + char *srchval, *val; + + verify(nvpair_value_string(pair, &srchval) == 0); + if (nvlist_lookup_string(nv, srchkey, &val) != 0) + break; - if (search == NULL && - nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) { /* - * If the device has never been present since import, the only - * reliable way to match the vdev is by GUID. + * Search for the requested value. We special case the search + * for ZPOOL_CONFIG_PATH when it's a wholedisk. Otherwise, + * all other searches are simple string compares. */ - if (theguid == guid) - return (nv); - } else if (search != NULL && - nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) { - (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk); - if (wholedisk) { - /* - * For whole disks, the internal path has 's0', but the - * path passed in by the user doesn't. - */ - if (strlen(search) == strlen(path) - 2 && - strncmp(search, path, strlen(search)) == 0) - return (nv); - } else if (strcmp(search, path) == 0) { - return (nv); + if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 && val) { + uint64_t wholedisk = 0; + + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk); + if (wholedisk) { + /* + * For whole disks, the internal path has 's0', + * but the path passed in by the user doesn't. + */ + if (strlen(srchval) == strlen(val) - 2 && + strncmp(srchval, val, strlen(srchval)) == 0) + return (nv); + break; + } } + + /* + * Common case + */ + if (strcmp(srchval, val) == 0) + return (nv); + break; + } + + default: + break; } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, @@ -1416,7 +1471,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid, return (NULL); for (c = 0; c < children; c++) { - if ((ret = vdev_to_nvlist_iter(child[c], search, guid, + if ((ret = vdev_to_nvlist_iter(child[c], search, avail_spare, l2cache, NULL)) != NULL) { /* * The 'is_log' value is only set for the toplevel @@ -1437,7 +1492,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid, if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0) { for (c = 0; c < children; c++) { - if ((ret = vdev_to_nvlist_iter(child[c], search, guid, + if ((ret = vdev_to_nvlist_iter(child[c], search, avail_spare, l2cache, NULL)) != NULL) { *avail_spare = B_TRUE; return (ret); @@ -1448,7 +1503,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid, if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) { for (c = 0; c < children; c++) { - if ((ret = vdev_to_nvlist_iter(child[c], search, guid, + if ((ret = vdev_to_nvlist_iter(child[c], search, avail_spare, l2cache, NULL)) != NULL) { *l2cache = B_TRUE; return (ret); @@ -1459,24 +1514,48 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid, return (NULL); } +/* + * Given a physical path (minus the "/devices" prefix), find the + * associated vdev. + */ +nvlist_t * +zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath, + boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) +{ + nvlist_t *search, *nvroot, *ret; + + verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0); + verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0); + + verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + *avail_spare = B_FALSE; + ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); + nvlist_free(search); + + return (ret); +} + nvlist_t * zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) { char buf[MAXPATHLEN]; - const char *search; char *end; - nvlist_t *nvroot; + nvlist_t *nvroot, *search, *ret; uint64_t guid; + verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0); + guid = strtoull(path, &end, 10); if (guid != 0 && *end == '\0') { - search = NULL; + verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0); } else if (path[0] != '/') { (void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path); - search = buf; + verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0); } else { - search = path; + verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0); } verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, @@ -1486,8 +1565,10 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, *l2cache = B_FALSE; if (log != NULL) *log = B_FALSE; - return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare, - l2cache, log)); + ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); + nvlist_free(search); + + return (ret); } static int @@ -1504,81 +1585,142 @@ vdev_online(nvlist_t *nv) } /* - * Get phys_path for a root pool - * Return 0 on success; non-zeron on failure. + * Helper function for zpool_get_physpaths(). */ -int -zpool_get_physpath(zpool_handle_t *zhp, char *physpath) +static int +vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size, + size_t *bytes_written) { + size_t bytes_left, pos, rsz; + char *tmppath; + const char *format; + + if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH, + &tmppath) != 0) + return (EZFS_NODEVICE); + + pos = *bytes_written; + bytes_left = physpath_size - pos; + format = (pos == 0) ? "%s" : " %s"; + + rsz = snprintf(physpath + pos, bytes_left, format, tmppath); + *bytes_written += rsz; + + if (rsz >= bytes_left) { + /* if physpath was not copied properly, clear it */ + if (bytes_left != 0) { + physpath[pos] = 0; + } + return (EZFS_NOSPC); + } + return (0); +} + +static int +vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size, + size_t *rsz, boolean_t is_spare) +{ + char *type; + int ret; + + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) + return (EZFS_INVALCONFIG); + + if (strcmp(type, VDEV_TYPE_DISK) == 0) { + /* + * An active spare device has ZPOOL_CONFIG_IS_SPARE set. + * For a spare vdev, we only want to boot from the active + * spare device. + */ + if (is_spare) { + uint64_t spare = 0; + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE, + &spare); + if (!spare) + return (EZFS_INVALCONFIG); + } + + if (vdev_online(nv)) { + if ((ret = vdev_get_one_physpath(nv, physpath, + phypath_size, rsz)) != 0) + return (ret); + } + } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 || + strcmp(type, VDEV_TYPE_REPLACING) == 0 || + (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) { + nvlist_t **child; + uint_t count; + int i, ret; + + if (nvlist_lookup_nvlist_array(nv, + ZPOOL_CONFIG_CHILDREN, &child, &count) != 0) + return (EZFS_INVALCONFIG); + + for (i = 0; i < count; i++) { + ret = vdev_get_physpaths(child[i], physpath, + phypath_size, rsz, is_spare); + if (ret == EZFS_NOSPC) + return (ret); + } + } + + return (EZFS_POOL_INVALARG); +} + +/* + * Get phys_path for a root pool config. + * Return 0 on success; non-zero on failure. + */ +static int +zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size) +{ + size_t rsz; nvlist_t *vdev_root; nvlist_t **child; uint_t count; - int i; + char *type; - /* - * Make sure this is a root pool, as phys_path doesn't mean - * anything to a non-root pool. - */ - if (!pool_is_bootable(zhp)) - return (-1); + rsz = 0; - verify(nvlist_lookup_nvlist(zhp->zpool_config, - ZPOOL_CONFIG_VDEV_TREE, &vdev_root) == 0); + if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &vdev_root) != 0) + return (EZFS_INVALCONFIG); - if (nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN, + if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 || + nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN, &child, &count) != 0) - return (-2); + return (EZFS_INVALCONFIG); - for (i = 0; i < count; i++) { - nvlist_t **child2; - uint_t count2; - char *type; - char *tmppath; - int j; + /* + * root pool can not have EFI labeled disks and can only have + * a single top-level vdev. + */ + if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 || + pool_uses_efi(vdev_root)) + return (EZFS_POOL_INVALARG); - if (nvlist_lookup_string(child[i], ZPOOL_CONFIG_TYPE, &type) - != 0) - return (-3); - - if (strcmp(type, VDEV_TYPE_DISK) == 0) { - if (!vdev_online(child[i])) - return (-8); - verify(nvlist_lookup_string(child[i], - ZPOOL_CONFIG_PHYS_PATH, &tmppath) == 0); - (void) strncpy(physpath, tmppath, strlen(tmppath)); - } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) { - if (nvlist_lookup_nvlist_array(child[i], - ZPOOL_CONFIG_CHILDREN, &child2, &count2) != 0) - return (-4); - - for (j = 0; j < count2; j++) { - if (!vdev_online(child2[j])) - return (-8); - if (nvlist_lookup_string(child2[j], - ZPOOL_CONFIG_PHYS_PATH, &tmppath) != 0) - return (-5); - - if ((strlen(physpath) + strlen(tmppath)) > - MAXNAMELEN) - return (-6); - - if (strlen(physpath) == 0) { - (void) strncpy(physpath, tmppath, - strlen(tmppath)); - } else { - (void) strcat(physpath, " "); - (void) strcat(physpath, tmppath); - } - } - } else { - return (-7); - } - } + (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz, + B_FALSE); + + /* No online devices */ + if (rsz == 0) + return (EZFS_NODEVICE); return (0); } /* + * Get phys_path for a root pool + * Return 0 on success; non-zero on failure. + */ +int +zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size) +{ + return (zpool_get_config_physpath(zhp->zpool_config, physpath, + phypath_size)); +} + +/* * Returns TRUE if the given guid corresponds to the given type. * This is used to check for hot spares (INUSE or not), and level 2 cache * devices. @@ -1607,6 +1749,45 @@ is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type) } /* + * If the device has being dynamically expanded then we need to relabel + * the disk to use the new unallocated space. + */ +static int +zpool_relabel_disk(libzfs_handle_t *hdl, const char *name) +{ + char path[MAXPATHLEN]; + char errbuf[1024]; + int fd, error; + int (*_efi_use_whole_disk)(int); + + if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT, + "efi_use_whole_disk")) == NULL) + return (-1); + + (void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name); + + if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " + "relabel '%s': unable to open device"), name); + return (zfs_error(hdl, EZFS_OPENFAILED, errbuf)); + } + + /* + * It's possible that we might encounter an error if the device + * does not have any unallocated space left. If so, we simply + * ignore that error and continue on. + */ + error = _efi_use_whole_disk(fd); + (void) close(fd); + if (error && error != VT_ENOSPC) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " + "relabel '%s': unable to read disk capacity"), name); + return (zfs_error(hdl, EZFS_NOCAP, errbuf)); + } + return (0); +} + +/* * Bring the specified vdev online. The 'flags' parameter is a set of the * ZFS_ONLINE_* flags. */ @@ -1617,15 +1798,20 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, zfs_cmd_t zc = { 0 }; char msg[1024]; nvlist_t *tgt; - boolean_t avail_spare, l2cache; + boolean_t avail_spare, l2cache, islog; libzfs_handle_t *hdl = zhp->zpool_hdl; - (void) snprintf(msg, sizeof (msg), - dgettext(TEXT_DOMAIN, "cannot online %s"), path); + if (flags & ZFS_ONLINE_EXPAND) { + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot expand %s"), path); + } else { + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot online %s"), path); + } (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, - NULL)) == NULL) + &islog)) == NULL) return (zfs_error(hdl, EZFS_NODEVICE, msg)); verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); @@ -1634,6 +1820,31 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE) return (zfs_error(hdl, EZFS_ISSPARE, msg)); + if (flags & ZFS_ONLINE_EXPAND || + zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) { + char *pathname = NULL; + uint64_t wholedisk = 0; + + (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk); + verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, + &pathname) == 0); + + /* + * XXX - L2ARC 1.0 devices can't support expansion. + */ + if (l2cache) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot expand cache devices")); + return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg)); + } + + if (wholedisk) { + pathname += strlen(DISK_ROOT) + 1; + (void) zpool_relabel_disk(zhp->zpool_hdl, pathname); + } + } + zc.zc_cookie = VDEV_STATE_ONLINE; zc.zc_obj = flags; @@ -1684,6 +1895,12 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) */ return (zfs_error(hdl, EZFS_NOREPLICAS, msg)); + case EEXIST: + /* + * The log device has unplayed logs + */ + return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg)); + default: return (zpool_standard_error(hdl, errno, msg)); } @@ -1888,6 +2105,14 @@ zpool_vdev_attach(zpool_handle_t *zhp, (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Please " "be sure to invoke %s to make '%s' bootable.\n"), BOOTCMD, new_disk); + + /* + * XXX need a better way to prevent user from + * booting up a half-baked vdev. + */ + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make " + "sure to wait until resilver is done " + "before rebooting.\n")); } return (0); } @@ -2803,14 +3028,6 @@ zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, free(mntpnt); } -#define RDISK_ROOT "/dev/rdsk" -#define BACKUP_SLICE "s2" -/* - * Don't start the slice at the default block of 34; many storage - * devices will use a stripe width of 128k, so start there instead. - */ -#define NEW_START_BLOCK 256 - /* * Read the EFI label from the config, if a label does not exist then * pass back the error to the caller. If the caller has passed a non-NULL |