aboutsummaryrefslogtreecommitdiffstats
path: root/lib/libzfs/libzfs_pool.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libzfs/libzfs_pool.c')
-rw-r--r--lib/libzfs/libzfs_pool.c431
1 files changed, 324 insertions, 107 deletions
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index 75ecc54e2..fd734d8b4 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -42,6 +42,7 @@
#include <sys/zfs_ioctl.h>
#include <sys/zio.h>
#include <strings.h>
+#include <dlfcn.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
@@ -55,6 +56,10 @@ static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
#define BOOTCMD "installboot(1M)"
#endif
+#define DISK_ROOT "/dev/dsk"
+#define RDISK_ROOT "/dev/rdsk"
+#define BACKUP_SLICE "s2"
+
/*
* ====================================================================
* zpool property functions
@@ -628,6 +633,12 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
/*
+ * Don't start the slice at the default block of 34; many storage
+ * devices will use a stripe width of 128k, so start there instead.
+ */
+#define NEW_START_BLOCK 256
+
+/*
* Validate the given pool name, optionally putting an extended error message in
* 'buf'.
*/
@@ -1369,46 +1380,90 @@ zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
}
/*
+ * Find a vdev that matches the search criteria specified. We use the
+ * the nvpair name to determine how we should look for the device.
* 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
* spare; but FALSE if its an INUSE spare.
*/
static nvlist_t *
-vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
- boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
+vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
+ boolean_t *l2cache, boolean_t *log)
{
uint_t c, children;
nvlist_t **child;
- uint64_t theguid, present;
- char *path;
- uint64_t wholedisk = 0;
nvlist_t *ret;
uint64_t is_log;
+ char *srchkey;
+ nvpair_t *pair = nvlist_next_nvpair(search, NULL);
- verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
+ /* Nothing to look for */
+ if (search == NULL || pair == NULL)
+ return (NULL);
+
+ /* Obtain the key we will use to search */
+ srchkey = nvpair_name(pair);
+
+ switch (nvpair_type(pair)) {
+ case DATA_TYPE_UINT64: {
+ uint64_t srchval, theguid, present;
+
+ verify(nvpair_value_uint64(pair, &srchval) == 0);
+ if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
+ &present) == 0) {
+ /*
+ * If the device has never been present since
+ * import, the only reliable way to match the
+ * vdev is by GUID.
+ */
+ verify(nvlist_lookup_uint64(nv,
+ ZPOOL_CONFIG_GUID, &theguid) == 0);
+ if (theguid == srchval)
+ return (nv);
+ }
+ }
+ break;
+ }
+
+ case DATA_TYPE_STRING: {
+ char *srchval, *val;
+
+ verify(nvpair_value_string(pair, &srchval) == 0);
+ if (nvlist_lookup_string(nv, srchkey, &val) != 0)
+ break;
- if (search == NULL &&
- nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
/*
- * If the device has never been present since import, the only
- * reliable way to match the vdev is by GUID.
+ * Search for the requested value. We special case the search
+ * for ZPOOL_CONFIG_PATH when it's a wholedisk. Otherwise,
+ * all other searches are simple string compares.
*/
- if (theguid == guid)
- return (nv);
- } else if (search != NULL &&
- nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
- (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
- &wholedisk);
- if (wholedisk) {
- /*
- * For whole disks, the internal path has 's0', but the
- * path passed in by the user doesn't.
- */
- if (strlen(search) == strlen(path) - 2 &&
- strncmp(search, path, strlen(search)) == 0)
- return (nv);
- } else if (strcmp(search, path) == 0) {
- return (nv);
+ if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 && val) {
+ uint64_t wholedisk = 0;
+
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+ &wholedisk);
+ if (wholedisk) {
+ /*
+ * For whole disks, the internal path has 's0',
+ * but the path passed in by the user doesn't.
+ */
+ if (strlen(srchval) == strlen(val) - 2 &&
+ strncmp(srchval, val, strlen(srchval)) == 0)
+ return (nv);
+ break;
+ }
}
+
+ /*
+ * Common case
+ */
+ if (strcmp(srchval, val) == 0)
+ return (nv);
+ break;
+ }
+
+ default:
+ break;
}
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
@@ -1416,7 +1471,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
return (NULL);
for (c = 0; c < children; c++) {
- if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+ if ((ret = vdev_to_nvlist_iter(child[c], search,
avail_spare, l2cache, NULL)) != NULL) {
/*
* The 'is_log' value is only set for the toplevel
@@ -1437,7 +1492,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
&child, &children) == 0) {
for (c = 0; c < children; c++) {
- if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+ if ((ret = vdev_to_nvlist_iter(child[c], search,
avail_spare, l2cache, NULL)) != NULL) {
*avail_spare = B_TRUE;
return (ret);
@@ -1448,7 +1503,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0) {
for (c = 0; c < children; c++) {
- if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+ if ((ret = vdev_to_nvlist_iter(child[c], search,
avail_spare, l2cache, NULL)) != NULL) {
*l2cache = B_TRUE;
return (ret);
@@ -1459,24 +1514,48 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
return (NULL);
}
+/*
+ * Given a physical path (minus the "/devices" prefix), find the
+ * associated vdev.
+ */
+nvlist_t *
+zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
+ boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
+{
+ nvlist_t *search, *nvroot, *ret;
+
+ verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
+
+ verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+
+ *avail_spare = B_FALSE;
+ ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
+ nvlist_free(search);
+
+ return (ret);
+}
+
nvlist_t *
zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
boolean_t *l2cache, boolean_t *log)
{
char buf[MAXPATHLEN];
- const char *search;
char *end;
- nvlist_t *nvroot;
+ nvlist_t *nvroot, *search, *ret;
uint64_t guid;
+ verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
guid = strtoull(path, &end, 10);
if (guid != 0 && *end == '\0') {
- search = NULL;
+ verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
} else if (path[0] != '/') {
(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
- search = buf;
+ verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
} else {
- search = path;
+ verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
}
verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
@@ -1486,8 +1565,10 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
*l2cache = B_FALSE;
if (log != NULL)
*log = B_FALSE;
- return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
- l2cache, log));
+ ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
+ nvlist_free(search);
+
+ return (ret);
}
static int
@@ -1504,81 +1585,142 @@ vdev_online(nvlist_t *nv)
}
/*
- * Get phys_path for a root pool
- * Return 0 on success; non-zeron on failure.
+ * Helper function for zpool_get_physpaths().
*/
-int
-zpool_get_physpath(zpool_handle_t *zhp, char *physpath)
+static int
+vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
+ size_t *bytes_written)
{
+ size_t bytes_left, pos, rsz;
+ char *tmppath;
+ const char *format;
+
+ if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
+ &tmppath) != 0)
+ return (EZFS_NODEVICE);
+
+ pos = *bytes_written;
+ bytes_left = physpath_size - pos;
+ format = (pos == 0) ? "%s" : " %s";
+
+ rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
+ *bytes_written += rsz;
+
+ if (rsz >= bytes_left) {
+ /* if physpath was not copied properly, clear it */
+ if (bytes_left != 0) {
+ physpath[pos] = 0;
+ }
+ return (EZFS_NOSPC);
+ }
+ return (0);
+}
+
+static int
+vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
+ size_t *rsz, boolean_t is_spare)
+{
+ char *type;
+ int ret;
+
+ if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
+ return (EZFS_INVALCONFIG);
+
+ if (strcmp(type, VDEV_TYPE_DISK) == 0) {
+ /*
+ * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
+ * For a spare vdev, we only want to boot from the active
+ * spare device.
+ */
+ if (is_spare) {
+ uint64_t spare = 0;
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
+ &spare);
+ if (!spare)
+ return (EZFS_INVALCONFIG);
+ }
+
+ if (vdev_online(nv)) {
+ if ((ret = vdev_get_one_physpath(nv, physpath,
+ phypath_size, rsz)) != 0)
+ return (ret);
+ }
+ } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
+ strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
+ (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
+ nvlist_t **child;
+ uint_t count;
+ int i, ret;
+
+ if (nvlist_lookup_nvlist_array(nv,
+ ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
+ return (EZFS_INVALCONFIG);
+
+ for (i = 0; i < count; i++) {
+ ret = vdev_get_physpaths(child[i], physpath,
+ phypath_size, rsz, is_spare);
+ if (ret == EZFS_NOSPC)
+ return (ret);
+ }
+ }
+
+ return (EZFS_POOL_INVALARG);
+}
+
+/*
+ * Get phys_path for a root pool config.
+ * Return 0 on success; non-zero on failure.
+ */
+static int
+zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
+{
+ size_t rsz;
nvlist_t *vdev_root;
nvlist_t **child;
uint_t count;
- int i;
+ char *type;
- /*
- * Make sure this is a root pool, as phys_path doesn't mean
- * anything to a non-root pool.
- */
- if (!pool_is_bootable(zhp))
- return (-1);
+ rsz = 0;
- verify(nvlist_lookup_nvlist(zhp->zpool_config,
- ZPOOL_CONFIG_VDEV_TREE, &vdev_root) == 0);
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &vdev_root) != 0)
+ return (EZFS_INVALCONFIG);
- if (nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
+ if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
+ nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
&child, &count) != 0)
- return (-2);
+ return (EZFS_INVALCONFIG);
- for (i = 0; i < count; i++) {
- nvlist_t **child2;
- uint_t count2;
- char *type;
- char *tmppath;
- int j;
+ /*
+ * root pool can not have EFI labeled disks and can only have
+ * a single top-level vdev.
+ */
+ if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
+ pool_uses_efi(vdev_root))
+ return (EZFS_POOL_INVALARG);
- if (nvlist_lookup_string(child[i], ZPOOL_CONFIG_TYPE, &type)
- != 0)
- return (-3);
-
- if (strcmp(type, VDEV_TYPE_DISK) == 0) {
- if (!vdev_online(child[i]))
- return (-8);
- verify(nvlist_lookup_string(child[i],
- ZPOOL_CONFIG_PHYS_PATH, &tmppath) == 0);
- (void) strncpy(physpath, tmppath, strlen(tmppath));
- } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
- if (nvlist_lookup_nvlist_array(child[i],
- ZPOOL_CONFIG_CHILDREN, &child2, &count2) != 0)
- return (-4);
-
- for (j = 0; j < count2; j++) {
- if (!vdev_online(child2[j]))
- return (-8);
- if (nvlist_lookup_string(child2[j],
- ZPOOL_CONFIG_PHYS_PATH, &tmppath) != 0)
- return (-5);
-
- if ((strlen(physpath) + strlen(tmppath)) >
- MAXNAMELEN)
- return (-6);
-
- if (strlen(physpath) == 0) {
- (void) strncpy(physpath, tmppath,
- strlen(tmppath));
- } else {
- (void) strcat(physpath, " ");
- (void) strcat(physpath, tmppath);
- }
- }
- } else {
- return (-7);
- }
- }
+ (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
+ B_FALSE);
+
+ /* No online devices */
+ if (rsz == 0)
+ return (EZFS_NODEVICE);
return (0);
}
/*
+ * Get phys_path for a root pool
+ * Return 0 on success; non-zero on failure.
+ */
+int
+zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
+{
+ return (zpool_get_config_physpath(zhp->zpool_config, physpath,
+ phypath_size));
+}
+
+/*
* Returns TRUE if the given guid corresponds to the given type.
* This is used to check for hot spares (INUSE or not), and level 2 cache
* devices.
@@ -1607,6 +1749,45 @@ is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
}
/*
+ * If the device has being dynamically expanded then we need to relabel
+ * the disk to use the new unallocated space.
+ */
+static int
+zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
+{
+ char path[MAXPATHLEN];
+ char errbuf[1024];
+ int fd, error;
+ int (*_efi_use_whole_disk)(int);
+
+ if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT,
+ "efi_use_whole_disk")) == NULL)
+ return (-1);
+
+ (void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name);
+
+ if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+ "relabel '%s': unable to open device"), name);
+ return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
+ }
+
+ /*
+ * It's possible that we might encounter an error if the device
+ * does not have any unallocated space left. If so, we simply
+ * ignore that error and continue on.
+ */
+ error = _efi_use_whole_disk(fd);
+ (void) close(fd);
+ if (error && error != VT_ENOSPC) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+ "relabel '%s': unable to read disk capacity"), name);
+ return (zfs_error(hdl, EZFS_NOCAP, errbuf));
+ }
+ return (0);
+}
+
+/*
* Bring the specified vdev online. The 'flags' parameter is a set of the
* ZFS_ONLINE_* flags.
*/
@@ -1617,15 +1798,20 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
zfs_cmd_t zc = { 0 };
char msg[1024];
nvlist_t *tgt;
- boolean_t avail_spare, l2cache;
+ boolean_t avail_spare, l2cache, islog;
libzfs_handle_t *hdl = zhp->zpool_hdl;
- (void) snprintf(msg, sizeof (msg),
- dgettext(TEXT_DOMAIN, "cannot online %s"), path);
+ if (flags & ZFS_ONLINE_EXPAND) {
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
+ } else {
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "cannot online %s"), path);
+ }
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
- NULL)) == NULL)
+ &islog)) == NULL)
return (zfs_error(hdl, EZFS_NODEVICE, msg));
verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
@@ -1634,6 +1820,31 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
return (zfs_error(hdl, EZFS_ISSPARE, msg));
+ if (flags & ZFS_ONLINE_EXPAND ||
+ zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
+ char *pathname = NULL;
+ uint64_t wholedisk = 0;
+
+ (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
+ &wholedisk);
+ verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
+ &pathname) == 0);
+
+ /*
+ * XXX - L2ARC 1.0 devices can't support expansion.
+ */
+ if (l2cache) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "cannot expand cache devices"));
+ return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
+ }
+
+ if (wholedisk) {
+ pathname += strlen(DISK_ROOT) + 1;
+ (void) zpool_relabel_disk(zhp->zpool_hdl, pathname);
+ }
+ }
+
zc.zc_cookie = VDEV_STATE_ONLINE;
zc.zc_obj = flags;
@@ -1684,6 +1895,12 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
*/
return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
+ case EEXIST:
+ /*
+ * The log device has unplayed logs
+ */
+ return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
+
default:
return (zpool_standard_error(hdl, errno, msg));
}
@@ -1888,6 +2105,14 @@ zpool_vdev_attach(zpool_handle_t *zhp,
(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Please "
"be sure to invoke %s to make '%s' bootable.\n"),
BOOTCMD, new_disk);
+
+ /*
+ * XXX need a better way to prevent user from
+ * booting up a half-baked vdev.
+ */
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
+ "sure to wait until resilver is done "
+ "before rebooting.\n"));
}
return (0);
}
@@ -2803,14 +3028,6 @@ zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
free(mntpnt);
}
-#define RDISK_ROOT "/dev/rdsk"
-#define BACKUP_SLICE "s2"
-/*
- * Don't start the slice at the default block of 34; many storage
- * devices will use a stripe width of 128k, so start there instead.
- */
-#define NEW_START_BLOCK 256
-
/*
* Read the EFI label from the config, if a label does not exist then
* pass back the error to the caller. If the caller has passed a non-NULL