diff options
-rw-r--r-- | cmd/zpool/os/freebsd/zpool_vdev_os.c | 14 | ||||
-rw-r--r-- | cmd/zpool/os/linux/zpool_vdev_os.c | 255 | ||||
-rw-r--r-- | cmd/zpool/zpool_iter.c | 4 | ||||
-rw-r--r-- | cmd/zpool/zpool_main.c | 239 | ||||
-rw-r--r-- | cmd/zpool/zpool_util.h | 3 | ||||
-rw-r--r-- | include/libzfs.h | 3 | ||||
-rw-r--r-- | include/libzutil.h | 57 | ||||
-rw-r--r-- | lib/libzfs/libzfs.abi | 97 | ||||
-rw-r--r-- | lib/libzfs/libzfs_pool.c | 49 | ||||
-rw-r--r-- | lib/libzutil/os/linux/zutil_import_os.c | 40 | ||||
-rw-r--r-- | lib/libzutil/zutil_import.c | 98 | ||||
-rw-r--r-- | lib/libzutil/zutil_pool.c | 31 | ||||
-rw-r--r-- | man/man8/zpool-clear.8 | 11 | ||||
-rw-r--r-- | man/man8/zpool-offline.8 | 18 | ||||
-rw-r--r-- | man/man8/zpool-status.8 | 2 | ||||
-rw-r--r-- | man/man8/zpool.8 | 19 |
16 files changed, 875 insertions, 65 deletions
diff --git a/cmd/zpool/os/freebsd/zpool_vdev_os.c b/cmd/zpool/os/freebsd/zpool_vdev_os.c index 231ca97f1..9dd733989 100644 --- a/cmd/zpool/os/freebsd/zpool_vdev_os.c +++ b/cmd/zpool/os/freebsd/zpool_vdev_os.c @@ -124,3 +124,17 @@ check_file(const char *file, boolean_t force, boolean_t isspare) { return (check_file_generic(file, force, isspare)); } + +int +zpool_power_current_state(zpool_handle_t *zhp, char *vdev) +{ + /* Enclosure slot power not supported on FreeBSD yet */ + return (-1); +} + +int +zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on) +{ + /* Enclosure slot power not supported on FreeBSD yet */ + return (ENOTSUP); +} diff --git a/cmd/zpool/os/linux/zpool_vdev_os.c b/cmd/zpool/os/linux/zpool_vdev_os.c index 7f4486e06..006a3a7d8 100644 --- a/cmd/zpool/os/linux/zpool_vdev_os.c +++ b/cmd/zpool/os/linux/zpool_vdev_os.c @@ -416,3 +416,258 @@ check_file(const char *file, boolean_t force, boolean_t isspare) { return (check_file_generic(file, force, isspare)); } + +/* + * Read from a sysfs file and return an allocated string. Removes + * the newline from the end of the string if there is one. + * + * Returns a string on success (which must be freed), or NULL on error. + */ +static char *zpool_sysfs_gets(char *path) +{ + int fd; + struct stat statbuf; + char *buf = NULL; + ssize_t count = 0; + fd = open(path, O_RDONLY); + if (fd < 0) + return (NULL); + + if (fstat(fd, &statbuf) != 0) { + close(fd); + return (NULL); + } + + buf = calloc(sizeof (*buf), statbuf.st_size + 1); + if (buf == NULL) { + close(fd); + return (NULL); + } + + /* + * Note, we can read less bytes than st_size, and that's ok. Sysfs + * files will report their size is 4k even if they only return a small + * string. + */ + count = read(fd, buf, statbuf.st_size); + if (count < 0) { + /* Error doing read() or we overran the buffer */ + close(fd); + free(buf); + return (NULL); + } + + /* Remove trailing newline */ + if (buf[count - 1] == '\n') + buf[count - 1] = 0; + + close(fd); + + return (buf); +} + +/* + * Write a string to a sysfs file. + * + * Returns 0 on success, non-zero otherwise. + */ +static int zpool_sysfs_puts(char *path, char *str) +{ + FILE *file; + + file = fopen(path, "w"); + if (!file) { + return (-1); + } + + if (fputs(str, file) < 0) { + fclose(file); + return (-2); + } + fclose(file); + return (0); +} + +/* Given a vdev nvlist_t, rescan its enclosure sysfs path */ +static void +rescan_vdev_config_dev_sysfs_path(nvlist_t *vdev_nv) +{ + update_vdev_config_dev_sysfs_path(vdev_nv, + fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH), + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); +} + +/* + * Given a power string: "on", "off", "1", or "0", return 0 if it's an + * off value, 1 if it's an on value, and -1 if the value is unrecognized. + */ +static int zpool_power_parse_value(char *str) +{ + if ((strcmp(str, "off") == 0) || (strcmp(str, "0") == 0)) + return (0); + + if ((strcmp(str, "on") == 0) || (strcmp(str, "1") == 0)) + return (1); + + return (-1); +} + +/* + * Given a vdev string return an allocated string containing the sysfs path to + * its power control file. Also do a check if the power control file really + * exists and has correct permissions. + * + * Example returned strings: + * + * /sys/class/enclosure/0:0:122:0/10/power_status + * /sys/bus/pci/slots/10/power + * + * Returns allocated string on success (which must be freed), NULL on failure. + */ +static char * +zpool_power_sysfs_path(zpool_handle_t *zhp, char *vdev) +{ + const char *enc_sysfs_dir = NULL; + char *path = NULL; + nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL); + + if (vdev_nv == NULL) { + return (NULL); + } + + /* Make sure we're getting the updated enclosure sysfs path */ + rescan_vdev_config_dev_sysfs_path(vdev_nv); + + if (nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, + &enc_sysfs_dir) != 0) { + return (NULL); + } + + if (asprintf(&path, "%s/power_status", enc_sysfs_dir) == -1) + return (NULL); + + if (access(path, W_OK) != 0) { + free(path); + path = NULL; + /* No HDD 'power_control' file, maybe it's NVMe? */ + if (asprintf(&path, "%s/power", enc_sysfs_dir) == -1) { + return (NULL); + } + + if (access(path, R_OK | W_OK) != 0) { + /* Not NVMe either */ + free(path); + return (NULL); + } + } + + return (path); +} + +/* + * Given a path to a sysfs power control file, return B_TRUE if you should use + * "on/off" words to control it, or B_FALSE otherwise ("0/1" to control). + */ +static boolean_t +zpool_power_use_word(char *sysfs_path) +{ + if (strcmp(&sysfs_path[strlen(sysfs_path) - strlen("power_status")], + "power_status") == 0) { + return (B_TRUE); + } + return (B_FALSE); +} + +/* + * Check the sysfs power control value for a vdev. + * + * Returns: + * 0 - Power is off + * 1 - Power is on + * -1 - Error or unsupported + */ +int +zpool_power_current_state(zpool_handle_t *zhp, char *vdev) +{ + char *val; + int rc; + + char *path = zpool_power_sysfs_path(zhp, vdev); + if (path == NULL) + return (-1); + + val = zpool_sysfs_gets(path); + if (val == NULL) { + free(path); + return (-1); + } + + rc = zpool_power_parse_value(val); + free(val); + free(path); + return (rc); +} + +/* + * Turn on or off the slot to a device + * + * Device path is the full path to the device (like /dev/sda or /dev/sda1). + * + * Return code: + * 0: Success + * ENOTSUP: Power control not supported for OS + * EBADSLT: Couldn't read current power state + * ENOENT: No sysfs path to power control + * EIO: Couldn't write sysfs power value + * EBADE: Sysfs power value didn't change + */ +int +zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on) +{ + char *sysfs_path; + const char *val; + int rc; + int timeout_ms; + + rc = zpool_power_current_state(zhp, vdev); + if (rc == -1) { + return (EBADSLT); + } + + /* Already correct value? */ + if (rc == (int)turn_on) + return (0); + + sysfs_path = zpool_power_sysfs_path(zhp, vdev); + if (sysfs_path == NULL) + return (ENOENT); + + if (zpool_power_use_word(sysfs_path)) { + val = turn_on ? "on" : "off"; + } else { + val = turn_on ? "1" : "0"; + } + + rc = zpool_sysfs_puts(sysfs_path, (char *)val); + + free(sysfs_path); + if (rc != 0) { + return (EIO); + } + + /* + * Wait up to 30 seconds for sysfs power value to change after + * writing it. + */ + timeout_ms = zpool_getenv_int("ZPOOL_POWER_ON_SLOT_TIMEOUT_MS", 30000); + for (int i = 0; i < MAX(1, timeout_ms / 200); i++) { + rc = zpool_power_current_state(zhp, vdev); + if (rc == (int)turn_on) + return (0); /* success */ + + fsleep(0.200); /* 200ms */ + } + + /* sysfs value never changed */ + return (EBADE); +} diff --git a/cmd/zpool/zpool_iter.c b/cmd/zpool/zpool_iter.c index 506b529dc..ae2e9da91 100644 --- a/cmd/zpool/zpool_iter.c +++ b/cmd/zpool/zpool_iter.c @@ -554,6 +554,10 @@ for_each_vdev_run_cb(void *zhp_data, nvlist_t *nv, void *cb_vcdl) if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) return (1); + /* Make sure we're getting the updated enclosure sysfs path */ + update_vdev_config_dev_sysfs_path(nv, path, + ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); + nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, &vdev_enc_sysfs_path); diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index cf66953a8..b347cfba9 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -353,7 +353,7 @@ get_usage(zpool_help_t idx) return (gettext("\tattach [-fsw] [-o property=value] " "<pool> <device> <new-device>\n")); case HELP_CLEAR: - return (gettext("\tclear [-nF] <pool> [device]\n")); + return (gettext("\tclear [[--power]|[-nF]] <pool> [device]\n")); case HELP_CREATE: return (gettext("\tcreate [-fnd] [-o property=value] ... \n" "\t [-O file-system-property=value] ... \n" @@ -389,9 +389,11 @@ get_usage(zpool_help_t idx) "[-T d|u] [pool] ... \n" "\t [interval [count]]\n")); case HELP_OFFLINE: - return (gettext("\toffline [-f] [-t] <pool> <device> ...\n")); + return (gettext("\toffline [--power]|[[-f][-t]] <pool> " + "<device> ...\n")); case HELP_ONLINE: - return (gettext("\tonline [-e] <pool> <device> ...\n")); + return (gettext("\tonline [--power][-e] <pool> <device> " + "...\n")); case HELP_REPLACE: return (gettext("\treplace [-fsw] [-o property=value] " "<pool> <device> [new-device]\n")); @@ -410,7 +412,7 @@ get_usage(zpool_help_t idx) return (gettext("\ttrim [-dw] [-r <rate>] [-c | -s] <pool> " "[<device> ...]\n")); case HELP_STATUS: - return (gettext("\tstatus [-c [script1,script2,...]] " + return (gettext("\tstatus [--power] [-c [script1,script2,...]] " "[-igLpPstvxD] [-T d|u] [pool] ... \n" "\t [interval [count]]\n")); case HELP_UPGRADE: @@ -517,6 +519,77 @@ print_vdev_prop_cb(int prop, void *cb) } /* + * Given a leaf vdev name like 'L5' return its VDEV_CONFIG_PATH like + * '/dev/disk/by-vdev/L5'. + */ +static const char * +vdev_name_to_path(zpool_handle_t *zhp, char *vdev) +{ + nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL); + if (vdev_nv == NULL) { + return (NULL); + } + return (fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH)); +} + +static int +zpool_power_on(zpool_handle_t *zhp, char *vdev) +{ + return (zpool_power(zhp, vdev, B_TRUE)); +} + +static int +zpool_power_on_and_disk_wait(zpool_handle_t *zhp, char *vdev) +{ + int rc; + + rc = zpool_power_on(zhp, vdev); + if (rc != 0) + return (rc); + + zpool_disk_wait(vdev_name_to_path(zhp, vdev)); + + return (0); +} + +static int +zpool_power_on_pool_and_wait_for_devices(zpool_handle_t *zhp) +{ + nvlist_t *nv; + const char *path = NULL; + int rc; + + /* Power up all the devices first */ + FOR_EACH_REAL_LEAF_VDEV(zhp, nv) { + path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH); + if (path != NULL) { + rc = zpool_power_on(zhp, (char *)path); + if (rc != 0) { + return (rc); + } + } + } + + /* + * Wait for their devices to show up. Since we powered them on + * at roughly the same time, they should all come online around + * the same time. + */ + FOR_EACH_REAL_LEAF_VDEV(zhp, nv) { + path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH); + zpool_disk_wait(path); + } + + return (0); +} + +static int +zpool_power_off(zpool_handle_t *zhp, char *vdev) +{ + return (zpool_power(zhp, vdev, B_FALSE)); +} + +/* * Display usage message. If we're inside a command, display only the usage for * that command. Otherwise, iterate over the entire command table and display * a complete usage message. @@ -2093,6 +2166,7 @@ typedef struct status_cbdata { boolean_t cb_print_vdev_init; boolean_t cb_print_vdev_trim; vdev_cmd_data_list_t *vcdl; + boolean_t cb_print_power; } status_cbdata_t; /* Return 1 if string is NULL, empty, or whitespace; return 0 otherwise. */ @@ -2378,6 +2452,26 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name, else printf(" %5s", rbuf); } + if (cb->cb_print_power) { + if (children == 0) { + /* Only leaf vdevs have physical slots */ + switch (zpool_power_current_state(zhp, (char *) + fnvlist_lookup_string(nv, + ZPOOL_CONFIG_PATH))) { + case 0: + printf_color(ANSI_RED, " %5s", + gettext("off")); + break; + case 1: + printf(" %5s", gettext("on")); + break; + default: + printf(" %5s", "-"); + } + } else { + printf(" %5s", "-"); + } + } } if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, @@ -5430,19 +5524,6 @@ get_interval_count_filter_guids(int *argc, char **argv, float *interval, } /* - * Floating point sleep(). Allows you to pass in a floating point value for - * seconds. - */ -static void -fsleep(float sec) -{ - struct timespec req; - req.tv_sec = floor(sec); - req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC; - nanosleep(&req, NULL); -} - -/* * Terminal height, in rows. Returns -1 if stdout is not connected to a TTY or * if we were unable to determine its size. */ @@ -6950,10 +7031,12 @@ zpool_do_split(int argc, char **argv) return (ret); } - +#define POWER_OPT 1024 /* - * zpool online <pool> <device> ... + * zpool online [--power] <pool> <device> ... + * + * --power: Power on the enclosure slot to the drive (if possible) */ int zpool_do_online(int argc, char **argv) @@ -6964,13 +7047,21 @@ zpool_do_online(int argc, char **argv) int ret = 0; vdev_state_t newstate; int flags = 0; + boolean_t is_power_on = B_FALSE; + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; /* check options */ - while ((c = getopt(argc, argv, "e")) != -1) { + while ((c = getopt_long(argc, argv, "e", long_options, NULL)) != -1) { switch (c) { case 'e': flags |= ZFS_ONLINE_EXPAND; break; + case POWER_OPT: + is_power_on = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -6978,6 +7069,9 @@ zpool_do_online(int argc, char **argv) } } + if (libzfs_envvar_is_set("ZPOOL_AUTO_POWER_ON_SLOT")) + is_power_on = B_TRUE; + argc -= optind; argv += optind; @@ -6999,6 +7093,18 @@ zpool_do_online(int argc, char **argv) for (i = 1; i < argc; i++) { vdev_state_t oldstate; boolean_t avail_spare, l2cache; + int rc; + + if (is_power_on) { + rc = zpool_power_on_and_disk_wait(zhp, argv[i]); + if (rc == ENOTSUP) { + (void) fprintf(stderr, + gettext("Power control not supported\n")); + } + if (rc != 0) + return (rc); + } + nvlist_t *tgt = zpool_find_vdev(zhp, argv[i], &avail_spare, &l2cache, NULL); if (tgt == NULL) { @@ -7044,12 +7150,15 @@ zpool_do_online(int argc, char **argv) } /* - * zpool offline [-ft] <pool> <device> ... + * zpool offline [-ft]|[--power] <pool> <device> ... + * * * -f Force the device into a faulted state. * * -t Only take the device off-line temporarily. The offline/faulted * state will not be persistent across reboots. + * + * --power Power off the enclosure slot to the drive (if possible) */ int zpool_do_offline(int argc, char **argv) @@ -7060,9 +7169,15 @@ zpool_do_offline(int argc, char **argv) int ret = 0; boolean_t istmp = B_FALSE; boolean_t fault = B_FALSE; + boolean_t is_power_off = B_FALSE; + + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; /* check options */ - while ((c = getopt(argc, argv, "ft")) != -1) { + while ((c = getopt_long(argc, argv, "ft", long_options, NULL)) != -1) { switch (c) { case 'f': fault = B_TRUE; @@ -7070,6 +7185,9 @@ zpool_do_offline(int argc, char **argv) case 't': istmp = B_TRUE; break; + case POWER_OPT: + is_power_off = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -7077,6 +7195,20 @@ zpool_do_offline(int argc, char **argv) } } + if (is_power_off && fault) { + (void) fprintf(stderr, + gettext("-0 and -f cannot be used together\n")); + usage(B_FALSE); + return (1); + } + + if (is_power_off && istmp) { + (void) fprintf(stderr, + gettext("-0 and -t cannot be used together\n")); + usage(B_FALSE); + return (1); + } + argc -= optind; argv += optind; @@ -7096,8 +7228,22 @@ zpool_do_offline(int argc, char **argv) return (1); for (i = 1; i < argc; i++) { - if (fault) { - uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]); + uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]); + if (is_power_off) { + /* + * Note: we have to power off first, then set REMOVED, + * or else zpool_vdev_set_removed_state() returns + * EAGAIN. + */ + ret = zpool_power_off(zhp, argv[i]); + if (ret != 0) { + (void) fprintf(stderr, "%s %s %d\n", + gettext("unable to power off slot for"), + argv[i], ret); + } + zpool_vdev_set_removed_state(zhp, guid, VDEV_AUX_NONE); + + } else if (fault) { vdev_aux_t aux; if (istmp == B_FALSE) { /* Force the fault to persist across imports */ @@ -7120,7 +7266,7 @@ zpool_do_offline(int argc, char **argv) } /* - * zpool clear <pool> [device] + * zpool clear [-nF]|[--power] <pool> [device] * * Clear all errors associated with a pool or a particular device. */ @@ -7132,13 +7278,20 @@ zpool_do_clear(int argc, char **argv) boolean_t dryrun = B_FALSE; boolean_t do_rewind = B_FALSE; boolean_t xtreme_rewind = B_FALSE; + boolean_t is_power_on = B_FALSE; uint32_t rewind_policy = ZPOOL_NO_REWIND; nvlist_t *policy = NULL; zpool_handle_t *zhp; char *pool, *device; + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, "FnX")) != -1) { + while ((c = getopt_long(argc, argv, "FnX", long_options, + NULL)) != -1) { switch (c) { case 'F': do_rewind = B_TRUE; @@ -7149,6 +7302,9 @@ zpool_do_clear(int argc, char **argv) case 'X': xtreme_rewind = B_TRUE; break; + case POWER_OPT: + is_power_on = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -7156,6 +7312,9 @@ zpool_do_clear(int argc, char **argv) } } + if (libzfs_envvar_is_set("ZPOOL_AUTO_POWER_ON_SLOT")) + is_power_on = B_TRUE; + argc -= optind; argv += optind; @@ -7196,6 +7355,14 @@ zpool_do_clear(int argc, char **argv) return (1); } + if (is_power_on) { + if (device == NULL) { + zpool_power_on_pool_and_wait_for_devices(zhp); + } else { + zpool_power_on_and_disk_wait(zhp, device); + } + } + if (zpool_clear(zhp, device, policy) != 0) ret = 1; @@ -8908,6 +9075,10 @@ status_callback(zpool_handle_t *zhp, void *data) printf_color(ANSI_BOLD, " %5s", gettext("SLOW")); } + if (cbp->cb_print_power) { + printf_color(ANSI_BOLD, " %5s", gettext("POWER")); + } + if (cbp->vcdl != NULL) print_cmd_columns(cbp->vcdl, 0); @@ -8954,8 +9125,8 @@ status_callback(zpool_handle_t *zhp, void *data) } /* - * zpool status [-c [script1,script2,...]] [-igLpPstvx] [-T d|u] [pool] ... - * [interval [count]] + * zpool status [-c [script1,script2,...]] [-igLpPstvx] [--power] [-T d|u] ... + * [pool] [interval [count]] * * -c CMD For each vdev, run command CMD * -i Display vdev initialization status. @@ -8969,6 +9140,7 @@ status_callback(zpool_handle_t *zhp, void *data) * -D Display dedup status (undocumented) * -t Display vdev TRIM status. * -T Display a timestamp in date(1) or Unix format + * --power Display vdev enclosure slot power status * * Describes the health status of all pools or some subset. */ @@ -8982,8 +9154,14 @@ zpool_do_status(int argc, char **argv) status_cbdata_t cb = { 0 }; char *cmd = NULL; + struct option long_options[] = { + {"power", no_argument, NULL, POWER_OPT}, + {0, 0, 0, 0} + }; + /* check options */ - while ((c = getopt(argc, argv, "c:igLpPsvxDtT:")) != -1) { + while ((c = getopt_long(argc, argv, "c:igLpPsvxDtT:", long_options, + NULL)) != -1) { switch (c) { case 'c': if (cmd != NULL) { @@ -9042,6 +9220,9 @@ zpool_do_status(int argc, char **argv) case 'T': get_timestamp_arg(*optarg); break; + case POWER_OPT: + cb.cb_print_power = B_TRUE; + break; case '?': if (optopt == 'c') { print_zpool_script_list("status"); diff --git a/cmd/zpool/zpool_util.h b/cmd/zpool/zpool_util.h index db8e631dc..7f5406f06 100644 --- a/cmd/zpool/zpool_util.h +++ b/cmd/zpool/zpool_util.h @@ -138,6 +138,9 @@ int check_file(const char *file, boolean_t force, boolean_t isspare); void after_zpool_upgrade(zpool_handle_t *zhp); int check_file_generic(const char *file, boolean_t force, boolean_t isspare); +int zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on); +int zpool_power_current_state(zpool_handle_t *zhp, char *vdev); + #ifdef __cplusplus } #endif diff --git a/include/libzfs.h b/include/libzfs.h index dbb6340b0..4f06b5d3c 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -319,6 +319,9 @@ _LIBZFS_H int zpool_vdev_remove_wanted(zpool_handle_t *, const char *); _LIBZFS_H int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); _LIBZFS_H int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); +_LIBZFS_H int zpool_vdev_set_removed_state(zpool_handle_t *, uint64_t, + vdev_aux_t); + _LIBZFS_H int zpool_vdev_clear(zpool_handle_t *, uint64_t); _LIBZFS_H nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, diff --git a/include/libzutil.h b/include/libzutil.h index 9842c225b..839486fb6 100644 --- a/include/libzutil.h +++ b/include/libzutil.h @@ -97,6 +97,7 @@ _LIBZUTIL_H int zpool_find_config(libpc_handle_t *, const char *, nvlist_t **, _LIBZUTIL_H const char * const * zpool_default_search_paths(size_t *count); _LIBZUTIL_H int zpool_read_label(int, nvlist_t **, int *); _LIBZUTIL_H int zpool_label_disk_wait(const char *, int); +_LIBZUTIL_H int zpool_disk_wait(const char *); struct udev_device; @@ -163,6 +164,8 @@ _LIBZUTIL_H void zfs_niceraw(uint64_t, char *, size_t); _LIBZUTIL_H void zpool_dump_ddt(const ddt_stat_t *, const ddt_histogram_t *); _LIBZUTIL_H int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***, uint_t *); +_LIBZUTIL_H void fsleep(float sec); +_LIBZUTIL_H int zpool_getenv_int(const char *env, int default_val); struct zfs_cmd; @@ -205,6 +208,60 @@ _LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...); typedef int (*pool_vdev_iter_f)(void *, nvlist_t *, void *); int for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func, void *data); +int for_each_vdev_macro_helper_func(void *zhp_data, nvlist_t *nv, void *data); +int for_each_real_leaf_vdev_macro_helper_func(void *zhp_data, nvlist_t *nv, + void *data); +/* + * Often you'll want to iterate over all the vdevs in the pool, but don't want + * to use for_each_vdev() since it requires a callback function. + * + * Instead you can use FOR_EACH_VDEV(): + * + * zpool_handle_t *zhp // Assume this is initialized + * nvlist_t *nv + * ... + * FOR_EACH_VDEV(zhp, nv) { + * const char *path = NULL; + * nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path); + * printf("Looking at vdev %s\n", path); + * } + * + * Note: FOR_EACH_VDEV runs in O(n^2) time where n = number of vdevs. However, + * there's an upper limit of 256 vdevs per dRAID top-level vdevs (TLDs), 255 for + * raidz2 TLDs, a real world limit of ~500 vdevs for mirrors, so this shouldn't + * really be an issue. + * + * Here are some micro-benchmarks of a complete FOR_EACH_VDEV loop on a RAID0 + * pool: + * + * 100 vdevs = 0.7ms + * 500 vdevs = 17ms + * 750 vdevs = 40ms + * 1000 vdevs = 82ms + * + * The '__nv += 0' at the end of the for() loop gets around a "comma or + * semicolon followed by non-blank" checkstyle error. Note on most compliers + * the '__nv += 0' can just be replaced with 'NULL', but gcc on Centos 7 + * will give a 'warning: statement with no effect' error if you do that. + */ +#define __FOR_EACH_VDEV(__zhp, __nv, __func) { \ + __nv = zpool_get_config(__zhp, NULL); \ + VERIFY0(nvlist_lookup_nvlist(__nv, ZPOOL_CONFIG_VDEV_TREE, &__nv)); \ + } \ + for (nvlist_t *__root_nv = __nv, *__state = (nvlist_t *)0; \ + for_each_vdev_cb(&__state, __root_nv, __func, &__nv) == 1; \ + __nv += 0) + +#define FOR_EACH_VDEV(__zhp, __nv) \ + __FOR_EACH_VDEV(__zhp, __nv, for_each_vdev_macro_helper_func) + +/* + * "real leaf" vdevs are leaf vdevs that are real devices (disks or files). + * This excludes leaf vdevs like like draid spares. + */ +#define FOR_EACH_REAL_LEAF_VDEV(__zhp, __nv) \ + __FOR_EACH_VDEV(__zhp, __nv, for_each_real_leaf_vdev_macro_helper_func) + int for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func, void *data); void update_vdevs_config_dev_sysfs_path(nvlist_t *config); diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 51b0368f8..7c39b134d 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -179,6 +179,7 @@ <elf-symbol name='fletcher_4_native' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='fletcher_4_native_varsize' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='fletcher_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + <elf-symbol name='fsleep' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='get_dataset_depth' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='get_system_hostid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='getexecname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> @@ -466,6 +467,7 @@ <elf-symbol name='zpool_disable_datasets_os' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_disable_volume_os' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_discard_checkpoint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + <elf-symbol name='zpool_disk_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_dump_ddt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_enable_datasets' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_events_clear' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> @@ -497,6 +499,7 @@ <elf-symbol name='zpool_get_userprop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_get_vdev_prop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_get_vdev_prop_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + <elf-symbol name='zpool_getenv_int' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_history_unpack' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_import' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_import_props' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> @@ -567,6 +570,7 @@ <elf-symbol name='zpool_vdev_remove_wanted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_script_alloc_env' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_script_free_env' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + <elf-symbol name='zpool_vdev_set_removed_state' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> @@ -1368,8 +1372,6 @@ <qualified-type-def type-id='0897719a' const='yes' id='c4a7b189'/> <pointer-type-def type-id='c4a7b189' size-in-bits='64' id='36fca399'/> <qualified-type-def type-id='36fca399' restrict='yes' id='37e4897b'/> - <qualified-type-def type-id='a9c79a1f' const='yes' id='cd087e36'/> - <pointer-type-def type-id='cd087e36' size-in-bits='64' id='e05e8614'/> <qualified-type-def type-id='e05e8614' restrict='yes' id='0be2e71c'/> <pointer-type-def type-id='8037c762' size-in-bits='64' id='d74a6869'/> <qualified-type-def type-id='7292109c' restrict='yes' id='6942f6a4'/> @@ -6426,6 +6428,12 @@ <parameter type-id='9d774e0b' name='aux'/> <return type-id='95e97e5e'/> </function-decl> + <function-decl name='zpool_vdev_set_removed_state' mangled-name='zpool_vdev_set_removed_state' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_set_removed_state'> + <parameter type-id='4c81de99' name='zhp'/> + <parameter type-id='9c313c2d' name='guid'/> + <parameter type-id='9d774e0b' name='aux'/> + <return type-id='95e97e5e'/> + </function-decl> <function-decl name='zpool_vdev_attach' mangled-name='zpool_vdev_attach' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_attach'> <parameter type-id='4c81de99' name='zhp'/> <parameter type-id='80f4b756' name='old_disk'/> @@ -7792,6 +7800,12 @@ <qualified-type-def type-id='d33f11cb' restrict='yes' id='5c53ba29'/> <pointer-type-def type-id='ffa52b96' size-in-bits='64' id='76c8174b'/> <pointer-type-def type-id='f3d87113' size-in-bits='64' id='0d2a0670'/> + <function-decl name='zpool_label_disk' mangled-name='zpool_label_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_label_disk'> + <parameter type-id='b0382bb3'/> + <parameter type-id='4c81de99'/> + <parameter type-id='80f4b756'/> + <return type-id='95e97e5e'/> + </function-decl> <function-decl name='zfs_version_kernel' mangled-name='zfs_version_kernel' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_kernel'> <return type-id='26a90f95'/> </function-decl> @@ -7801,6 +7815,10 @@ <function-decl name='libzfs_core_fini' visibility='default' binding='global' size-in-bits='64'> <return type-id='48b5725f'/> </function-decl> + <function-decl name='zfs_get_underlying_path' mangled-name='zfs_get_underlying_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_underlying_path'> + <parameter type-id='80f4b756'/> + <return type-id='26a90f95'/> + </function-decl> <function-decl name='zpool_prop_unsupported' mangled-name='zpool_prop_unsupported' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_unsupported'> <parameter type-id='80f4b756'/> <return type-id='c19b74c3'/> @@ -7918,6 +7936,11 @@ <parameter type-id='b59d7dce'/> <return type-id='95e97e5e'/> </function-decl> + <function-decl name='access' visibility='default' binding='global' size-in-bits='64'> + <parameter type-id='80f4b756'/> + <parameter type-id='95e97e5e'/> + <return type-id='95e97e5e'/> + </function-decl> <function-decl name='dup2' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='95e97e5e'/> <parameter type-id='95e97e5e'/> @@ -8085,6 +8108,37 @@ <parameter is-variadic='yes'/> <return type-id='95e97e5e'/> </function-decl> + <function-decl name='zpool_vdev_script_alloc_env' mangled-name='zpool_vdev_script_alloc_env' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_script_alloc_env'> + <parameter type-id='80f4b756' name='pool_name'/> + <parameter type-id='80f4b756' name='vdev_path'/> + <parameter type-id='80f4b756' name='vdev_upath'/> + <parameter type-id='80f4b756' name='vdev_enc_sysfs_path'/> + <parameter type-id='80f4b756' name='opt_key'/> + <parameter type-id='80f4b756' name='opt_val'/> + <return type-id='9b23c9ad'/> + </function-decl> + <function-decl name='zpool_vdev_script_free_env' mangled-name='zpool_vdev_script_free_env' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_script_free_env'> + <parameter type-id='9b23c9ad' name='env'/> + <return type-id='48b5725f'/> + </function-decl> + <function-decl name='zpool_prepare_disk' mangled-name='zpool_prepare_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prepare_disk'> + <parameter type-id='4c81de99' name='zhp'/> + <parameter type-id='5ce45b60' name='vdev_nv'/> + <parameter type-id='80f4b756' name='prepare_str'/> + <parameter type-id='c0563f85' name='lines'/> + <parameter type-id='7292109c' name='lines_cnt'/> + <return type-id='95e97e5e'/> + </function-decl> + <function-decl name='zpool_prepare_and_label_disk' mangled-name='zpool_prepare_and_label_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prepare_and_label_disk'> + <parameter type-id='b0382bb3' name='hdl'/> + <parameter type-id='4c81de99' name='zhp'/> + <parameter type-id='80f4b756' name='name'/> + <parameter type-id='5ce45b60' name='vdev_nv'/> + <parameter type-id='80f4b756' name='prepare_str'/> + <parameter type-id='c0563f85' name='lines'/> + <parameter type-id='7292109c' name='lines_cnt'/> + <return type-id='95e97e5e'/> + </function-decl> </abi-instr> <abi-instr address-size='64' path='lib/libzfs/os/linux/libzfs_mount_os.c' language='LANG_C99'> <pointer-type-def type-id='7359adad' size-in-bits='64' id='1d2c2b85'/> @@ -8274,12 +8328,6 @@ <parameter type-id='95e97e5e'/> <return type-id='95e97e5e'/> </function-decl> - <function-decl name='zpool_label_disk' mangled-name='zpool_label_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_label_disk'> - <parameter type-id='b0382bb3' name='hdl'/> - <parameter type-id='4c81de99' name='zhp'/> - <parameter type-id='80f4b756' name='name'/> - <return type-id='95e97e5e'/> - </function-decl> </abi-instr> <abi-instr address-size='64' path='lib/libzfs/os/linux/libzfs_util_os.c' language='LANG_C99'> <typedef-decl name='nfds_t' type-id='7359adad' id='555eef66'/> @@ -8295,11 +8343,6 @@ </data-member> </class-decl> <pointer-type-def type-id='b440e872' size-in-bits='64' id='3ac36db0'/> - <function-decl name='access' visibility='default' binding='global' size-in-bits='64'> - <parameter type-id='80f4b756'/> - <parameter type-id='95e97e5e'/> - <return type-id='95e97e5e'/> - </function-decl> <function-decl name='__poll_chk' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='3ac36db0'/> <parameter type-id='555eef66'/> @@ -8384,10 +8427,6 @@ <parameter type-id='80f4b756' name='dev_name'/> <return type-id='c19b74c3'/> </function-decl> - <function-decl name='zfs_get_underlying_path' mangled-name='zfs_get_underlying_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_underlying_path'> - <parameter type-id='80f4b756' name='dev_name'/> - <return type-id='26a90f95'/> - </function-decl> <function-decl name='is_mpath_whole_disk' mangled-name='is_mpath_whole_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='is_mpath_whole_disk'> <parameter type-id='80f4b756' name='path'/> <return type-id='c19b74c3'/> @@ -8523,6 +8562,10 @@ <parameter type-id='b59d7dce' name='buflen'/> <return type-id='95e97e5e'/> </function-decl> + <function-decl name='zpool_disk_wait' mangled-name='zpool_disk_wait' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_disk_wait'> + <parameter type-id='80f4b756' name='path'/> + <return type-id='95e97e5e'/> + </function-decl> <function-decl name='update_vdev_config_dev_sysfs_path' mangled-name='update_vdev_config_dev_sysfs_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='update_vdev_config_dev_sysfs_path'> <parameter type-id='5ce45b60' name='nv'/> <parameter type-id='80f4b756' name='path'/> @@ -8548,6 +8591,9 @@ <parameter type-id='95e97e5e'/> <return type-id='95e97e5e'/> </function-decl> + <function-decl name='clearenv' visibility='default' binding='global' size-in-bits='64'> + <return type-id='95e97e5e'/> + </function-decl> <function-decl name='zfs_setproctitle_init' mangled-name='zfs_setproctitle_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_setproctitle_init'> <parameter type-id='95e97e5e' name='argc'/> <parameter type-id='9b23c9ad' name='argv'/> @@ -8778,6 +8824,7 @@ <array-type-def dimensions='1' type-id='853fd5dc' size-in-bits='32768' id='b505fc2f'> <subrange length='64' type-id='7359adad' id='b10be967'/> </array-type-def> + <type-decl name='float' size-in-bits='32' id='a6c45d85'/> <class-decl name='ddt_stat' size-in-bits='512' is-struct='yes' visibility='default' id='65242dfe'> <data-member access='public' layout-offset-in-bits='0'> <var-decl name='dds_blocks' type-id='9c313c2d' visibility='default'/> @@ -8815,11 +8862,27 @@ <pointer-type-def type-id='ec92d602' size-in-bits='64' id='932720f8'/> <qualified-type-def type-id='853fd5dc' const='yes' id='764c298c'/> <pointer-type-def type-id='764c298c' size-in-bits='64' id='dfe59052'/> + <qualified-type-def type-id='a9c79a1f' const='yes' id='cd087e36'/> + <pointer-type-def type-id='cd087e36' size-in-bits='64' id='e05e8614'/> + <function-decl name='nanosleep' visibility='default' binding='global' size-in-bits='64'> + <parameter type-id='e05e8614'/> + <parameter type-id='3d83ba87'/> + <return type-id='95e97e5e'/> + </function-decl> <function-decl name='zpool_dump_ddt' mangled-name='zpool_dump_ddt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_dump_ddt'> <parameter type-id='dfe59052' name='dds_total'/> <parameter type-id='932720f8' name='ddh'/> <return type-id='48b5725f'/> </function-decl> + <function-decl name='fsleep' mangled-name='fsleep' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fsleep'> + <parameter type-id='a6c45d85' name='sec'/> + <return type-id='48b5725f'/> + </function-decl> + <function-decl name='zpool_getenv_int' mangled-name='zpool_getenv_int' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_getenv_int'> + <parameter type-id='80f4b756' name='env'/> + <parameter type-id='95e97e5e' name='default_val'/> + <return type-id='95e97e5e'/> + </function-decl> </abi-instr> <abi-instr address-size='64' path='module/avl/avl.c' language='LANG_C99'> <function-decl name='avl_last' mangled-name='avl_last' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_last'> diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 71cf029de..c7b8617ef 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3036,6 +3036,9 @@ zpool_vdev_is_interior(const char *name) return (B_FALSE); } +/* + * Lookup the nvlist for a given vdev. + */ nvlist_t * zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) @@ -3043,6 +3046,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, char *end; nvlist_t *nvroot, *search, *ret; uint64_t guid; + boolean_t __avail_spare, __l2cache, __log; search = fnvlist_alloc(); @@ -3058,6 +3062,18 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, nvroot = fnvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE); + /* + * User can pass NULL for avail_spare, l2cache, and log, but + * we still need to provide variables to vdev_to_nvlist_iter(), so + * just point them to junk variables here. + */ + if (!avail_spare) + avail_spare = &__avail_spare; + if (!l2cache) + l2cache = &__l2cache; + if (!log) + log = &__log; + *avail_spare = B_FALSE; *l2cache = B_FALSE; if (log != NULL) @@ -3313,21 +3329,23 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) } /* - * Mark the given vdev degraded. + * Generic set vdev state function */ -int -zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +static int +zpool_vdev_set_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux, + vdev_state_t state) { zfs_cmd_t zc = {"\0"}; char errbuf[ERRBUFLEN]; libzfs_handle_t *hdl = zhp->zpool_hdl; (void) snprintf(errbuf, sizeof (errbuf), - dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid); + dgettext(TEXT_DOMAIN, "cannot set %s %llu"), + zpool_state_to_name(state, aux), (u_longlong_t)guid); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_guid = guid; - zc.zc_cookie = VDEV_STATE_DEGRADED; + zc.zc_cookie = state; zc.zc_obj = aux; if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) @@ -3337,6 +3355,27 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) } /* + * Mark the given vdev degraded. + */ +int +zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +{ + return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_DEGRADED)); +} + +/* + * Mark the given vdev as in a removed state (as if the device does not exist). + * + * This is different than zpool_vdev_remove() which does a removal of a device + * from the pool (but the device does exist). + */ +int +zpool_vdev_set_removed_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) +{ + return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_REMOVED)); +} + +/* * Returns TRUE if the given nvlist is a vdev that was originally swapped in as * a hot spare. */ diff --git a/lib/libzutil/os/linux/zutil_import_os.c b/lib/libzutil/os/linux/zutil_import_os.c index fbfae4f7e..bb91dec5a 100644 --- a/lib/libzutil/os/linux/zutil_import_os.c +++ b/lib/libzutil/os/linux/zutil_import_os.c @@ -170,25 +170,17 @@ zpool_open_func(void *arg) if (rn->rn_labelpaths) { const char *path = NULL; const char *devid = NULL; - const char *env = NULL; rdsk_node_t *slice; avl_index_t where; - int timeout; int error; if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid)) return; - env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS"); - if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 || - timeout < 0) { - timeout = DISK_LABEL_WAIT; - } - /* * Allow devlinks to stabilize so all paths are available. */ - zpool_label_disk_wait(rn->rn_name, timeout); + zpool_disk_wait(rn->rn_name); if (path != NULL) { slice = zutil_alloc(hdl, sizeof (rdsk_node_t)); @@ -683,6 +675,20 @@ zpool_label_disk_wait(const char *path, int timeout_ms) } /* + * Simplified version of zpool_label_disk_wait() where we wait for a device + * to appear using the default timeouts. + */ +int +zpool_disk_wait(const char *path) +{ + int timeout; + timeout = zpool_getenv_int("ZPOOL_IMPORT_UDEV_TIMEOUT_MS", + DISK_LABEL_WAIT); + + return (zpool_label_disk_wait(path, timeout)); +} + +/* * Encode the persistent devices strings * used for the vdev disk label */ @@ -767,6 +773,10 @@ no_dev: * in the nvlist * (if applicable). Like: * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4' * + * If an old path was in the nvlist, and the rescan can not find a new path, + * then keep the old path, since the disk may have been removed. + * + * path: The vdev path (value from ZPOOL_CONFIG_PATH) * key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH) */ void @@ -774,6 +784,9 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path, const char *key) { char *upath, *spath; + const char *oldpath = NULL; + + (void) nvlist_lookup_string(nv, key, &oldpath); /* Add enclosure sysfs path (if disk is in an enclosure). */ upath = zfs_get_underlying_path(path); @@ -782,7 +795,14 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path, if (spath) { (void) nvlist_add_string(nv, key, spath); } else { - (void) nvlist_remove_all(nv, key); + /* + * We couldn't dynamically scan the disk's enclosure sysfs path. + * This could be because the disk went away. If there's an old + * enclosure sysfs path in the nvlist, then keep using it. + */ + if (!oldpath) { + (void) nvlist_remove_all(nv, key); + } } free(upath); diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 19d8a4742..d023dd105 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1898,6 +1898,104 @@ zpool_find_config(libpc_handle_t *hdl, const char *target, nvlist_t **configp, return (0); } +/* Return if a vdev is a leaf vdev. Note: draid spares are leaf vdevs. */ +static boolean_t +vdev_is_leaf(nvlist_t *nv) +{ + uint_t children = 0; + nvlist_t **child; + + (void) nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children); + + return (children == 0); +} + +/* Return if a vdev is a leaf vdev and a real device (disk or file) */ +static boolean_t +vdev_is_real_leaf(nvlist_t *nv) +{ + const char *type = NULL; + if (!vdev_is_leaf(nv)) + return (B_FALSE); + + (void) nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type); + if ((strcmp(type, VDEV_TYPE_DISK) == 0) || + (strcmp(type, VDEV_TYPE_FILE) == 0)) { + return (B_TRUE); + } + + return (B_FALSE); +} + +/* + * This function is called by our FOR_EACH_VDEV() macros. + * + * state: State machine status (stored inside of a (nvlist_t *)) + * nv: The current vdev nvlist_t we are iterating over. + * last_nv: The previous vdev nvlist_t we returned to the user in + * the last iteration of FOR_EACH_VDEV(). We use it + * to find the next vdev nvlist_t we should return. + * real_leaves_only: Only return leaf vdevs. + * + * Returns 1 if we found the next vdev nvlist_t for this iteration. 0 if + * we're still searching for it. + */ +static int +__for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv, + boolean_t real_leaves_only) +{ + enum {FIRST_NV = 0, NEXT_IS_MATCH = 1, STOP_LOOKING = 2}; + + /* The very first entry in the NV list is a special case */ + if (*((nvlist_t **)state) == (nvlist_t *)FIRST_NV) { + if (real_leaves_only && !vdev_is_real_leaf(nv)) + return (0); + + *((nvlist_t **)last_nv) = nv; + *((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING; + return (1); + } + + /* + * We came across our last_nv, meaning the next one is the one we + * want + */ + if (nv == *((nvlist_t **)last_nv)) { + /* Next iteration of this function will return the nvlist_t */ + *((nvlist_t **)state) = (nvlist_t *)NEXT_IS_MATCH; + return (0); + } + + /* + * We marked NEXT_IS_MATCH on the previous iteration, so this is the one + * we want. + */ + if (*(nvlist_t **)state == (nvlist_t *)NEXT_IS_MATCH) { + if (real_leaves_only && !vdev_is_real_leaf(nv)) + return (0); + + *((nvlist_t **)last_nv) = nv; + *((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING; + return (1); + } + + return (0); +} + +int +for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv) +{ + return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_FALSE)); +} + +int +for_each_real_leaf_vdev_macro_helper_func(void *state, nvlist_t *nv, + void *last_nv) +{ + return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_TRUE)); +} + /* * Internal function for iterating over the vdevs. * diff --git a/lib/libzutil/zutil_pool.c b/lib/libzutil/zutil_pool.c index 288a0033c..86460de3f 100644 --- a/lib/libzutil/zutil_pool.c +++ b/lib/libzutil/zutil_pool.c @@ -28,6 +28,7 @@ #include <string.h> #include <sys/nvpair.h> #include <sys/fs/zfs.h> +#include <math.h> #include <libzutil.h> @@ -144,3 +145,33 @@ zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover, *leftover = bytes_read; return (0); } + +/* + * Floating point sleep(). Allows you to pass in a floating point value for + * seconds. + */ +void +fsleep(float sec) +{ + struct timespec req; + req.tv_sec = floor(sec); + req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC; + nanosleep(&req, NULL); +} + +/* + * Get environment variable 'env' and return it as an integer. + * If 'env' is not set, then return 'default_val' instead. + */ +int +zpool_getenv_int(const char *env, int default_val) +{ + char *str; + int val; + str = getenv(env); + if ((str == NULL) || sscanf(str, "%d", &val) != 1 || + val < 0) { + val = default_val; + } + return (val); +} diff --git a/man/man8/zpool-clear.8 b/man/man8/zpool-clear.8 index 7b9d40c74..c61ecae48 100644 --- a/man/man8/zpool-clear.8 +++ b/man/man8/zpool-clear.8 @@ -36,6 +36,7 @@ .Sh SYNOPSIS .Nm zpool .Cm clear +.Op Fl -power .Ar pool .Oo Ar device Oc Ns … . @@ -52,6 +53,16 @@ Pools with enabled which have been suspended cannot be resumed. While the pool was suspended, it may have been imported on another host, and resuming I/O could result in pool damage. +.Bl -tag -width Ds +.It Fl -power +Power on the devices's slot in the storage enclosure and wait for the device +to show up before attempting to clear errors. +This is done on all the devices specified. +Alternatively, you can set the +.Sy ZPOOL_AUTO_POWER_ON_SLOT +environment variable to always enable this behavior. +Note: This flag currently works on Linux only. +.El . .Sh SEE ALSO .Xr zdb 8 , diff --git a/man/man8/zpool-offline.8 b/man/man8/zpool-offline.8 index edcf1d06a..1b6095d63 100644 --- a/man/man8/zpool-offline.8 +++ b/man/man8/zpool-offline.8 @@ -36,12 +36,13 @@ .Sh SYNOPSIS .Nm zpool .Cm offline -.Op Fl ft +.Op Fl Sy -power Ns | Ns Op Fl Sy ft .Ar pool .Ar device Ns … .Nm zpool .Cm online -.Op Fl e +.Op Fl Sy -power +.Op Fl Sy e .Ar pool .Ar device Ns … . @@ -50,7 +51,7 @@ .It Xo .Nm zpool .Cm offline -.Op Fl ft +.Op Fl Sy -power Ns | Ns Op Fl Sy ft .Ar pool .Ar device Ns … .Xc @@ -60,6 +61,9 @@ While the is offline, no attempt is made to read or write to the device. This command is not applicable to spares. .Bl -tag -width Ds +.It Fl -power +Power off the device's slot in the storage enclosure. +This flag currently works on Linux only .It Fl f Force fault. Instead of offlining the disk, put it into a faulted state. @@ -73,6 +77,7 @@ Upon reboot, the specified physical device reverts to its previous state. .It Xo .Nm zpool .Cm online +.Op Fl -power .Op Fl e .Ar pool .Ar device Ns … @@ -80,6 +85,13 @@ Upon reboot, the specified physical device reverts to its previous state. Brings the specified physical device online. This command is not applicable to spares. .Bl -tag -width Ds +.It Fl -power +Power on the device's slot in the storage enclosure and wait for the device +to show up before attempting to online it. +Alternatively, you can set the +.Sy ZPOOL_AUTO_POWER_ON_SLOT +environment variable to always enable this behavior. +This flag currently works on Linux only .It Fl e Expand the device to use all available space. If the device is part of a mirror or raidz then all devices must be expanded diff --git a/man/man8/zpool-status.8 b/man/man8/zpool-status.8 index 8f9580cf0..965bf9d2b 100644 --- a/man/man8/zpool-status.8 +++ b/man/man8/zpool-status.8 @@ -57,6 +57,8 @@ and the estimated time to completion. Both of these are only approximate, because the amount of data in the pool and the other workloads on the system can change. .Bl -tag -width Ds +.It Fl -power +Display vdev enclosure slot power status (on or off). .It Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns … Run a script (or scripts) on each vdev and include the output as a new column in the diff --git a/man/man8/zpool.8 b/man/man8/zpool.8 index 4c4020bdd..fe44e15ca 100644 --- a/man/man8/zpool.8 +++ b/man/man8/zpool.8 @@ -444,7 +444,7 @@ rpool 14.6G 54.9G 4 55 250K 2.69M .Ed . .Sh ENVIRONMENT VARIABLES -.Bl -tag -compact -width "ZPOOL_IMPORT_UDEV_TIMEOUT_MS" +.Bl -tag -compact -width "ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE" .It Sy ZFS_ABORT Cause .Nm @@ -456,6 +456,23 @@ Use ANSI color in and .Nm zpool Cm iostat output. +.It Sy ZPOOL_AUTO_POWER_ON_SLOT +Automatically attempt to turn on the drives enclosure slot power to a drive when +running the +.Nm zpool Cm online +or +.Nm zpool Cm clear +commands. +This has the same effect as passing the +.Fl -power +option to those commands. +.It Sy ZPOOL_POWER_ON_SLOT_TIMEOUT_MS +The maximum time in milliseconds to wait for a slot power sysfs value +to return the correct value after writing it. +For example, after writing "on" to the sysfs enclosure slot power_control file, +it can take some time for the enclosure to power down the slot and return +"on" if you read back the 'power_control' value. +Defaults to 30 seconds (30000ms) if not set. .It Sy ZPOOL_IMPORT_PATH The search path for devices or files to use with the pool. This is a colon-separated list of directories in which |