diff options
-rw-r--r-- | cmd/zdb/zdb.c | 1 | ||||
-rw-r--r-- | cmd/zfs/zfs_main.c | 37 | ||||
-rw-r--r-- | cmd/zinject/zinject.c | 5 | ||||
-rw-r--r-- | cmd/zpool/zpool_main.c | 28 | ||||
-rw-r--r-- | cmd/zpool/zpool_vdev.c | 567 | ||||
-rw-r--r-- | lib/libefi/include/sys/uuid.h | 4 | ||||
-rw-r--r-- | lib/libefi/rdwr_efi.c | 384 | ||||
-rw-r--r-- | lib/libzfs/include/libzfs.h | 24 | ||||
-rw-r--r-- | lib/libzfs/include/libzfs_impl.h | 2 | ||||
-rw-r--r-- | lib/libzfs/libzfs_changelist.c | 12 | ||||
-rw-r--r-- | lib/libzfs/libzfs_dataset.c | 325 | ||||
-rw-r--r-- | lib/libzfs/libzfs_import.c | 352 | ||||
-rw-r--r-- | lib/libzfs/libzfs_mount.c | 51 | ||||
-rw-r--r-- | lib/libzfs/libzfs_pool.c | 246 | ||||
-rw-r--r-- | lib/libzfs/libzfs_sendrecv.c | 10 | ||||
-rw-r--r-- | lib/libzfs/libzfs_util.c | 3 | ||||
-rw-r--r-- | lib/libzpool/kernel.c | 34 | ||||
-rw-r--r-- | module/zfs/sa.c | 5 | ||||
-rw-r--r-- | module/zfs/zfs_sa.c | 3 |
19 files changed, 1387 insertions, 706 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 459445bb7..d0354809d 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -55,7 +55,6 @@ #include <sys/arc.h> #include <sys/ddt.h> #undef ZFS_MAXNAMELEN -#undef verify #include <libzfs.h> #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \ diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 9cb69c1bf..ebcec2d6e 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -318,6 +318,7 @@ safe_malloc(size_t size) return (data); } +#ifdef HAVE_ZPL static char * safe_strdup(char *str) { @@ -328,6 +329,7 @@ safe_strdup(char *str) return (dupstr); } +#endif /* HAVE_ZPL */ /* * Callback routine that will print out information for each of @@ -495,6 +497,7 @@ parse_depth(char *opt, int *flags) #define PROGRESS_DELAY 2 /* seconds */ +#ifdef HAVE_ZPL static char *pt_reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"; static time_t pt_begin; static char *pt_header = NULL; @@ -546,6 +549,8 @@ finish_progress(char *done) free(pt_header); pt_header = NULL; } +#endif /* HAVE_ZPL */ + /* * zfs clone [-p] [-o prop=value] ... <snap> <fs | vol> * @@ -626,6 +631,7 @@ zfs_do_clone(int argc, char **argv) ret = zfs_clone(zhp, argv[1], props); /* create the mountpoint if necessary */ +#ifdef HAVE_ZPL if (ret == 0) { zfs_handle_t *clone; @@ -637,6 +643,7 @@ zfs_do_clone(int argc, char **argv) zfs_close(clone); } } +#endif /* HAVE_ZPL */ zfs_close(zhp); nvlist_free(props); @@ -824,6 +831,7 @@ zfs_do_create(int argc, char **argv) * verbose error message to let the user know that their filesystem was * in fact created, even if we failed to mount or share it. */ +#ifdef HAVE_ZPL if (canmount == ZFS_CANMOUNT_ON) { if (zfs_mount(zhp, NULL, 0) != 0) { (void) fprintf(stderr, gettext("filesystem " @@ -835,6 +843,7 @@ zfs_do_create(int argc, char **argv) ret = 1; } } +#endif /* HAVE_ZPL */ error: if (zhp) @@ -2940,6 +2949,7 @@ zfs_do_release(int argc, char **argv) #define SPINNER_TIME 3 /* seconds */ #define MOUNT_TIME 5 /* seconds */ +#ifdef HAVE_ZPL static int get_one_dataset(zfs_handle_t *zhp, void *data) { @@ -3387,6 +3397,7 @@ share_mount(int op, int argc, char **argv) return (ret); } +#endif /* HAVE_ZPL */ /* * zfs mount -a [nfs] @@ -3397,7 +3408,11 @@ share_mount(int op, int argc, char **argv) static int zfs_do_mount(int argc, char **argv) { +#ifdef HAVE_ZPL return (share_mount(OP_MOUNT, argc, argv)); +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } /* @@ -3409,9 +3424,14 @@ zfs_do_mount(int argc, char **argv) static int zfs_do_share(int argc, char **argv) { +#ifdef HAVE_ZPL return (share_mount(OP_SHARE, argc, argv)); +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } +#ifdef HAVE_ZPL typedef struct unshare_unmount_node { zfs_handle_t *un_zhp; char *un_mountp; @@ -3795,6 +3815,7 @@ unshare_unmount(int op, int argc, char **argv) return (ret); } +#endif /* HAVE_ZPL */ /* * zfs unmount -a @@ -3805,7 +3826,11 @@ unshare_unmount(int op, int argc, char **argv) static int zfs_do_unmount(int argc, char **argv) { +#ifdef HAVE_ZPL return (unshare_unmount(OP_MOUNT, argc, argv)); +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } /* @@ -3817,7 +3842,11 @@ zfs_do_unmount(int argc, char **argv) static int zfs_do_unshare(int argc, char **argv) { +#ifdef HAVE_ZPL return (unshare_unmount(OP_SHARE, argc, argv)); +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } /* ARGSUSED */ @@ -3833,6 +3862,7 @@ zfs_do_python(int argc, char **argv) * Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is * 'legacy'. Otherwise, complain that use should be using 'zfs mount'. */ +#ifdef HAVE_ZPL static int manual_mount(int argc, char **argv) { @@ -3963,6 +3993,7 @@ manual_unmount(int argc, char **argv) return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE)); } +#endif /* HAVE_ZPL */ static int find_command_idx(char *command, int *idx) @@ -4061,7 +4092,9 @@ main(int argc, char **argv) { int ret; int i = 0; +#ifdef HAVE_ZPL char *progname; +#endif char *cmdname; (void) setlocale(LC_ALL, ""); @@ -4086,6 +4119,7 @@ main(int argc, char **argv) return (1); } +#ifdef HAVE_ZPL /* * This command also doubles as the /etc/fs mount and unmount program. * Determine if we should take this behavior based on argv[0]. @@ -4096,6 +4130,9 @@ main(int argc, char **argv) } else if (strcmp(progname, "umount") == 0) { ret = manual_unmount(argc, argv); } else { +#else + { +#endif /* HAVE_ZPL */ /* * Make sure the user has specified some command. */ diff --git a/cmd/zinject/zinject.c b/cmd/zinject/zinject.c index 60c53ceb3..643d73e7f 100644 --- a/cmd/zinject/zinject.c +++ b/cmd/zinject/zinject.c @@ -954,17 +954,20 @@ main(int argc, char **argv) if (dataset[0] != '\0' && domount) { if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL) return (1); - +#ifdef HAVE_ZPL if (zfs_unmount(zhp, NULL, 0) != 0) return (1); +#endif /* HAVE_ZPL */ } record.zi_error = error; ret = register_handler(pool, flags, &record, quiet); +#ifdef HAVE_ZPL if (dataset[0] != '\0' && domount) ret = (zfs_mount(zhp, NULL, 0) != 0); +#endif /* HAVE_ZPL */ libzfs_fini(g_zfs); diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 074f76e81..b1b71acf8 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -716,7 +716,9 @@ zpool_do_create(int argc, char **argv) (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 && strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) { char buf[MAXPATHLEN]; +#ifdef HAVE_ZPL DIR *dirp; +#endif if (mountpoint && mountpoint[0] != '/') { (void) fprintf(stderr, gettext("invalid mountpoint " @@ -741,6 +743,7 @@ zpool_do_create(int argc, char **argv) mountpoint); } +#ifdef HAVE_ZPL if ((dirp = opendir(buf)) == NULL && errno != ENOENT) { (void) fprintf(stderr, gettext("mountpoint '%s' : " "%s\n"), buf, strerror(errno)); @@ -763,6 +766,7 @@ zpool_do_create(int argc, char **argv) goto errout; } } +#endif /* HAVE_ZPL */ } if (dryrun) { @@ -793,8 +797,12 @@ zpool_do_create(int argc, char **argv) zfs_prop_to_name( ZFS_PROP_MOUNTPOINT), mountpoint) == 0); +#ifdef HAVE_ZPL if (zfs_mount(pool, NULL, 0) == 0) ret = zfs_shareall(pool); +#else + ret = 0; +#endif /* HAVE_ZPL */ zfs_close(pool); } } else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) { @@ -1571,12 +1579,14 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL) return (1); +#if HAVE_ZPL if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && !(flags & ZFS_IMPORT_ONLY) && zpool_enable_datasets(zhp, mntopts, 0) != 0) { zpool_close(zhp); return (1); } +#endif /* HAVE_ZPL */ zpool_close(zhp); return (0); @@ -1592,7 +1602,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, * -c Read pool information from a cachefile instead of searching * devices. * - * -d Scan in a specific directory, other than /dev/dsk. More than + * -d Scan in a specific directory, other than /dev/. More than * one directory can be specified using multiple '-d' options. * * -D Scan for previously destroyed pools or import all or only @@ -1773,12 +1783,6 @@ zpool_do_import(int argc, char **argv) nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind_policy) != 0) goto error; - if (searchdirs == NULL) { - searchdirs = safe_malloc(sizeof (char *)); - searchdirs[0] = "/dev/dsk"; - nsearch = 1; - } - /* check argument count */ if (do_all) { if (argc != 0) { @@ -1799,7 +1803,9 @@ zpool_do_import(int argc, char **argv) if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) { (void) fprintf(stderr, gettext("cannot " "discover pools: permission denied\n")); - free(searchdirs); + if (searchdirs != NULL) + free(searchdirs); + nvlist_free(policy); return (1); } @@ -1867,7 +1873,8 @@ zpool_do_import(int argc, char **argv) } if (err == 1) { - free(searchdirs); + if (searchdirs != NULL) + free(searchdirs); nvlist_free(policy); return (1); } @@ -1968,7 +1975,8 @@ error: nvlist_free(props); nvlist_free(pools); nvlist_free(policy); - free(searchdirs); + if (searchdirs != NULL) + free(searchdirs); return (err ? 1 : 0); } diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 0c224903d..febdda95f 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -50,7 +50,7 @@ * * 1. Construct the vdev specification. Performs syntax validation and * makes sure each device is valid. - * 2. Check for devices in use. Using libdiskmgt, makes sure that no + * 2. Check for devices in use. Using libblkid to make sure that no * devices are also in use. Some can be overridden using the 'force' * flag, others cannot. * 3. Check for replication errors if the 'force' flag is not specified. @@ -60,10 +60,10 @@ */ #include <assert.h> +#include <ctype.h> #include <devid.h> #include <errno.h> #include <fcntl.h> -#include <libdiskmgt.h> #include <libintl.h> #include <libnvpair.h> #include <limits.h> @@ -74,13 +74,15 @@ #include <sys/stat.h> #include <sys/vtoc.h> #include <sys/mntent.h> +#include <uuid/uuid.h> +#ifdef HAVE_LIBBLKID +#include <blkid/blkid.h> +#else +#define blkid_cache void * +#endif /* HAVE_LIBBLKID */ #include "zpool_util.h" -#define DISK_ROOT "/dev/dsk" -#define RDISK_ROOT "/dev/rdsk" -#define BACKUP_SLICE "s2" - /* * For any given vdev specification, we can have multiple errors. The * vdev_error() function keeps track of whether we have seen an error yet, and @@ -111,168 +113,6 @@ vdev_error(const char *fmt, ...) va_end(ap); } -static void -libdiskmgt_error(int error) -{ - /* - * ENXIO/ENODEV is a valid error message if the device doesn't live in - * /dev/dsk. Don't bother printing an error message in this case. - */ - if (error == ENXIO || error == ENODEV) - return; - - (void) fprintf(stderr, gettext("warning: device in use checking " - "failed: %s\n"), strerror(error)); -} - -/* - * Validate a device, passing the bulk of the work off to libdiskmgt. - */ -static int -check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare) -{ - char *msg; - int error = 0; - dm_who_type_t who; - - if (force) - who = DM_WHO_ZPOOL_FORCE; - else if (isspare) - who = DM_WHO_ZPOOL_SPARE; - else - who = DM_WHO_ZPOOL; - - if (dm_inuse((char *)path, &msg, who, &error) || error) { - if (error != 0) { - libdiskmgt_error(error); - return (0); - } else { - vdev_error("%s", msg); - free(msg); - return (-1); - } - } - - /* - * If we're given a whole disk, ignore overlapping slices since we're - * about to label it anyway. - */ - error = 0; - if (!wholedisk && !force && - (dm_isoverlapping((char *)path, &msg, &error) || error)) { - if (error == 0) { - /* dm_isoverlapping returned -1 */ - vdev_error(gettext("%s overlaps with %s\n"), path, msg); - free(msg); - return (-1); - } else if (error != ENODEV) { - /* libdiskmgt's devcache only handles physical drives */ - libdiskmgt_error(error); - return (0); - } - } - - return (0); -} - - -/* - * Validate a whole disk. Iterate over all slices on the disk and make sure - * that none is in use by calling check_slice(). - */ -static int -check_disk(const char *name, dm_descriptor_t disk, int force, int isspare) -{ - dm_descriptor_t *drive, *media, *slice; - int err = 0; - int i; - int ret; - - /* - * Get the drive associated with this disk. This should never fail, - * because we already have an alias handle open for the device. - */ - if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE, - &err)) == NULL || *drive == NULL) { - if (err) - libdiskmgt_error(err); - return (0); - } - - if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA, - &err)) == NULL) { - dm_free_descriptors(drive); - if (err) - libdiskmgt_error(err); - return (0); - } - - dm_free_descriptors(drive); - - /* - * It is possible that the user has specified a removable media drive, - * and the media is not present. - */ - if (*media == NULL) { - dm_free_descriptors(media); - vdev_error(gettext("'%s' has no media in drive\n"), name); - return (-1); - } - - if ((slice = dm_get_associated_descriptors(*media, DM_SLICE, - &err)) == NULL) { - dm_free_descriptors(media); - if (err) - libdiskmgt_error(err); - return (0); - } - - dm_free_descriptors(media); - - ret = 0; - - /* - * Iterate over all slices and report any errors. We don't care about - * overlapping slices because we are using the whole disk. - */ - for (i = 0; slice[i] != NULL; i++) { - char *name = dm_get_name(slice[i], &err); - - if (check_slice(name, force, B_TRUE, isspare) != 0) - ret = -1; - - dm_free_name(name); - } - - dm_free_descriptors(slice); - return (ret); -} - -/* - * Validate a device. - */ -static int -check_device(const char *path, boolean_t force, boolean_t isspare) -{ - dm_descriptor_t desc; - int err; - char *dev; - - /* - * For whole disks, libdiskmgt does not include the leading dev path. - */ - dev = strrchr(path, '/'); - assert(dev != NULL); - dev++; - if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) { - err = check_disk(path, desc, force, isspare); - dm_free_descriptor(desc); - return (err); - } - - return (check_slice(path, force, B_FALSE, isspare)); -} - /* * Check that a file is valid. All we can do in this case is check that it's * not in use by another pool, and not in use by swap. @@ -283,19 +123,9 @@ check_file(const char *file, boolean_t force, boolean_t isspare) char *name; int fd; int ret = 0; - int err; pool_state_t state; boolean_t inuse; - if (dm_inuse_swap(file, &err)) { - if (err) - libdiskmgt_error(err); - else - vdev_error(gettext("%s is currently used by swap. " - "Please see swap(1M).\n"), file); - return (-1); - } - if ((fd = open(file, O_RDONLY)) < 0) return (0); @@ -348,6 +178,175 @@ check_file(const char *file, boolean_t force, boolean_t isspare) return (ret); } +static void +check_error(int err) +{ + (void) fprintf(stderr, gettext("warning: device in use checking " + "failed: %s\n"), strerror(err)); +} + +static int +check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare) +{ + struct stat64 statbuf; + int err; +#ifdef HAVE_LIBBLKID + char *value; +#endif /* HAVE_LIBBLKID */ + + if (stat64(path, &statbuf) != 0) { + vdev_error(gettext("cannot stat %s: %s\n"), + path, strerror(errno)); + return (-1); + } + +#ifdef HAVE_LIBBLKID + /* No valid type detected device is safe to use */ + value = blkid_get_tag_value(cache, "TYPE", path); + if (value == NULL) + return (0); + + /* + * If libblkid detects a ZFS device, we check the device + * using check_file() to see if it's safe. The one safe + * case is a spare device shared between multiple pools. + */ + if (strcmp(value, "zfs") == 0) { + err = check_file(path, force, isspare); + } else { + if (force) { + err = 0; + } else { + err = -1; + vdev_error(gettext("%s contains a filesystem of " + "type '%s'\n"), path, value); + } + } + + free(value); +#else + err = check_file(path, force, isspare); +#endif /* HAVE_LIBBLKID */ + + return (err); +} + +/* + * Validate a whole disk. Iterate over all slices on the disk and make sure + * that none is in use by calling check_slice(). + */ +static int +check_disk(const char *path, blkid_cache cache, int force, + boolean_t isspare, boolean_t iswholedisk) +{ + struct dk_gpt *vtoc; + char slice_path[MAXPATHLEN]; + int err = 0; + int fd, i; + + /* This is not a wholedisk we only check the given partition */ + if (!iswholedisk) + return check_slice(path, cache, force, isspare); + + /* + * When the device is a whole disk try to read the efi partition + * label. If this is successful we safely check the all of the + * partitions. However, when it fails it may simply be because + * the disk is partitioned via the MBR. Since we currently can + * not easily decode the MBR return a failure and prompt to the + * user to use force option since we cannot check the partitions. + */ + if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) { + check_error(errno); + return -1; + } + + if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) { + (void) close(fd); + + if (force) { + return 0; + } else { + vdev_error(gettext("%s does not contain an EFI " + "label but it may contain partition\n" + "information in the MBR.\n"), path); + return -1; + } + } + + /* + * The primary efi partition label is damaged however the secondary + * label at the end of the device is intact. Rather than use this + * label we should play it safe and treat this as a non efi device. + */ + if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) { + efi_free(vtoc); + (void) close(fd); + + if (force) { + /* Partitions will no be created using the backup */ + return 0; + } else { + vdev_error(gettext("%s contains a corrupt primary " + "EFI label.\n"), path); + return -1; + } + } + + for (i = 0; i < vtoc->efi_nparts; i++) { + + if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED || + uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid)) + continue; + + if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) + (void) snprintf(slice_path, sizeof (slice_path), + "%s%s%d", path, "-part", i+1); + else + (void) snprintf(slice_path, sizeof (slice_path), + "%s%s%d", path, isdigit(path[strlen(path)-1]) ? + "p" : "", i+1); + + err = check_slice(slice_path, cache, force, isspare); + if (err) + break; + } + + efi_free(vtoc); + (void) close(fd); + + return (err); +} + +static int +check_device(const char *path, boolean_t force, + boolean_t isspare, boolean_t iswholedisk) +{ + static blkid_cache cache = NULL; + +#ifdef HAVE_LIBBLKID + /* + * There is no easy way to add a correct blkid_put_cache() call, + * memory will be reclaimed when the command exits. + */ + if (cache == NULL) { + int err; + + if ((err = blkid_get_cache(&cache, NULL)) != 0) { + check_error(err); + return -1; + } + + if ((err = blkid_probe_all(cache)) != 0) { + blkid_put_cache(cache); + check_error(err); + return -1; + } + } +#endif /* HAVE_LIBBLKID */ + + return check_disk(path, cache, force, isspare, iswholedisk); +} /* * By "whole disk" we mean an entire physical disk (something we can @@ -358,15 +357,12 @@ check_file(const char *file, boolean_t force, boolean_t isspare) * it isn't. */ static boolean_t -is_whole_disk(const char *arg) +is_whole_disk(const char *path) { struct dk_gpt *label; int fd; - char path[MAXPATHLEN]; - (void) snprintf(path, sizeof (path), "%s%s%s", - RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE); - if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) + if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) return (B_FALSE); if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) { (void) close(fd); @@ -378,13 +374,51 @@ is_whole_disk(const char *arg) } /* + * This may be a shorthand device path or it could be total gibberish. + * Check to see if it's a known device in /dev/, /dev/disk/by-id, + * /dev/disk/by-label, /dev/disk/by-path, /dev/disk/by-uuid, or + * /dev/disk/zpool/. As part of this check, see if we've been given + * an entire disk (minus the slice number). + */ +static int +is_shorthand_path(const char *arg, char *path, + struct stat64 *statbuf, boolean_t *wholedisk) +{ + char dirs[5][8] = {"by-id", "by-label", "by-path", "by-uuid", "zpool"}; + int i, err; + + /* /dev/<name> */ + (void) snprintf(path, MAXPATHLEN, "%s/%s", DISK_ROOT, arg); + *wholedisk = is_whole_disk(path); + err = stat64(path, statbuf); + if (*wholedisk || err == 0) + return (0); + + /* /dev/disk/<dirs>/<name> */ + for (i = 0; i < 5; i++) { + (void) snprintf(path, MAXPATHLEN, "%s/%s/%s", + UDISK_ROOT, dirs[i], arg); + *wholedisk = is_whole_disk(path); + err = stat64(path, statbuf); + if (*wholedisk || err == 0) + return (0); + } + + strlcpy(path, arg, sizeof(path)); + memset(statbuf, 0, sizeof(*statbuf)); + *wholedisk = B_FALSE; + + return (ENOENT); +} + +/* * Create a leaf vdev. Determine if this is a file or a device. If it's a * device, fill in the device id to make a complete nvlist. Valid forms for a * leaf vdev are: * - * /dev/dsk/xxx Complete disk path + * /dev/xxx Complete disk path * /xxx Full path to file - * xxx Shorthand for /dev/dsk/xxx + * xxx Shorthand for /dev/disk/yyy/xxx */ static nvlist_t * make_leaf_vdev(const char *arg, uint64_t is_log) @@ -394,6 +428,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log) nvlist_t *vdev = NULL; char *type = NULL; boolean_t wholedisk = B_FALSE; + int err; /* * Determine what type of vdev this is, and put the full path into @@ -403,28 +438,31 @@ make_leaf_vdev(const char *arg, uint64_t is_log) if (arg[0] == '/') { /* * Complete device or file path. Exact type is determined by - * examining the file descriptor afterwards. + * examining the file descriptor afterwards. Symbolic links + * are resolved to their real paths for the is_whole_disk() + * and S_ISBLK/S_ISREG type checks. However, we are careful + * to store the given path as ZPOOL_CONFIG_PATH to ensure we + * can leverage udev's persistent device labels. */ - wholedisk = is_whole_disk(arg); - if (!wholedisk && (stat64(arg, &statbuf) != 0)) { + if (realpath(arg, path) == NULL) { (void) fprintf(stderr, - gettext("cannot open '%s': %s\n"), - arg, strerror(errno)); + gettext("cannot resolve path '%s'\n"), arg); return (NULL); } - (void) strlcpy(path, arg, sizeof (path)); - } else { - /* - * This may be a short path for a device, or it could be total - * gibberish. Check to see if it's a known device in - * /dev/dsk/. As part of this check, see if we've been given a - * an entire disk (minus the slice number). - */ - (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, - arg); wholedisk = is_whole_disk(path); if (!wholedisk && (stat64(path, &statbuf) != 0)) { + (void) fprintf(stderr, + gettext("cannot open '%s': %s\n"), + path, strerror(errno)); + return (NULL); + } + + /* After is_whole_disk() check restore original passed path */ + strlcpy(path, arg, MAXPATHLEN); + } else { + err = is_shorthand_path(arg, path, &statbuf, &wholedisk); + if (err != 0) { /* * If we got ENOENT, then the user gave us * gibberish, so try to direct them with a @@ -432,7 +470,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log) * regurgitate strerror() since it's the best we * can do. */ - if (errno == ENOENT) { + if (err == ENOENT) { (void) fprintf(stderr, gettext("cannot open '%s': no such " "device in %s\n"), arg, DISK_ROOT); @@ -475,6 +513,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log) verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, (uint64_t)wholedisk) == 0); +#if defined(__sun__) || defined(__sun) /* * For a whole disk, defer getting its devid until after labeling it. */ @@ -486,7 +525,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log) ddi_devid_t devid; char *minor = NULL, *devid_str = NULL; - if ((fd = open(path, O_RDONLY)) < 0) { + if ((fd = open(path, O_RDONLY|O_EXCL)) < 0) { (void) fprintf(stderr, gettext("cannot open '%s': " "%s\n"), path, strerror(errno)); nvlist_free(vdev); @@ -509,6 +548,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log) (void) close(fd); } +#endif return (vdev); } @@ -871,6 +911,39 @@ check_replication(nvlist_t *config, nvlist_t *newroot) return (ret); } +static int +zero_label(char *path) +{ + const int size = 4096; + char buf[size]; + int err, fd; + + if ((fd = open(path, O_WRONLY|O_EXCL)) < 0) { + (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), + path, strerror(errno)); + return (-1); + } + + memset(buf, 0, size); + err = write(fd, buf, size); + (void) fdatasync(fd); + (void) close(fd); + + if (err == -1) { + (void) fprintf(stderr, gettext("cannot zero first %d bytes " + "of '%s': %s\n"), size, path, strerror(errno)); + return (-1); + } + + if (err != size) { + (void) fprintf(stderr, gettext("could only zero %d/%d bytes " + "of '%s'\n"), err, size, path); + return (-1); + } + + return 0; +} + /* * Go through and find any whole disks in the vdev specification, labelling them * as appropriate. When constructing the vdev spec, we were unable to open this @@ -889,10 +962,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) char *type, *path, *diskname; char buf[MAXPATHLEN]; uint64_t wholedisk; - int fd; int ret; - ddi_devid_t devid; - char *minor = NULL, *devid_str = NULL; verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); @@ -903,55 +973,66 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) return (0); /* - * We have a disk device. Get the path to the device - * and see if it's a whole disk by appending the backup - * slice and stat()ing the device. + * We have a disk device. If this is a whole disk write + * out the efi partition table, otherwise write zero's to + * the first 4k of the partition. This is to ensure that + * libblkid will not misidentify the partition due to a + * magic value left by the previous filesystem. */ - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk) != 0 || !wholedisk) - return (0); + verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path)); + verify(!nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk)); + + if (!wholedisk) { + ret = zero_label(path); + return (ret); + } + + if (realpath(path, buf) == NULL) { + ret = errno; + (void) fprintf(stderr, + gettext("cannot resolve path '%s'\n"), path); + return (ret); + } - diskname = strrchr(path, '/'); + diskname = strrchr(buf, '/'); assert(diskname != NULL); diskname++; if (zpool_label_disk(g_zfs, zhp, diskname) == -1) return (-1); /* - * Fill in the devid, now that we've labeled the disk. + * Now the we've labeled the disk and the partitions have + * been created. We still need to wait for udev to create + * the symlinks to those partitions. If we are accessing + * the devices via a udev disk path, /dev/disk, then wait + * for *-part# to be created. Otherwise just use the normal + * syntax for devices in /dev. */ - (void) snprintf(buf, sizeof (buf), "%ss0", path); - if ((fd = open(buf, O_RDONLY)) < 0) { + if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) + (void) snprintf(buf, sizeof (buf), + "%s%s%s", path, "-part", FIRST_SLICE); + else + (void) snprintf(buf, sizeof (buf), + "%s%s%s", path, isdigit(path[strlen(path)-1]) ? + "p" : "", FIRST_SLICE); + + if ((ret = zpool_label_disk_wait(buf, 1000)) != 0) { (void) fprintf(stderr, - gettext("cannot open '%s': %s\n"), - buf, strerror(errno)); + gettext( "cannot resolve path '%s'\n"), buf); return (-1); } - if (devid_get(fd, &devid) == 0) { - if (devid_get_minor_name(fd, &minor) == 0 && - (devid_str = devid_str_encode(devid, minor)) != - NULL) { - verify(nvlist_add_string(nv, - ZPOOL_CONFIG_DEVID, devid_str) == 0); - } - if (devid_str != NULL) - devid_str_free(devid_str); - if (minor != NULL) - devid_str_free(minor); - devid_free(devid); - } - /* - * Update the path to refer to the 's0' slice. The presence of + * Update the path to refer to FIRST_SLICE. The presence of * the 'whole_disk' field indicates to the CLI that we should * chop off the slice number when displaying the device in * future output. */ verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0); - (void) close(fd); + /* Just in case this partition already existed. */ + (void) zero_label(buf); return (0); } @@ -991,7 +1072,7 @@ is_spare(nvlist_t *config, const char *path) uint_t i, nspares; boolean_t inuse; - if ((fd = open(path, O_RDONLY)) < 0) + if ((fd = open(path, O_RDONLY|O_EXCL)) < 0) return (B_FALSE); if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || @@ -1034,25 +1115,27 @@ check_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, nvlist_t **child; uint_t c, children; char *type, *path; - int ret; + int ret = 0; char buf[MAXPATHLEN]; - uint64_t wholedisk; + uint64_t wholedisk = B_FALSE; verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) { - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); + verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path)); + if (strcmp(type, VDEV_TYPE_DISK) == 0) + verify(!nvlist_lookup_uint64(nv, + ZPOOL_CONFIG_WHOLE_DISK, &wholedisk)); /* * As a generic check, we look to see if this is a replace of a * hot spare within the same pool. If so, we allow it - * regardless of what libdiskmgt or zpool_in_use() says. + * regardless of what libblkid or zpool_in_use() says. */ if (replacing) { - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk) == 0 && wholedisk) + if (wholedisk) (void) snprintf(buf, sizeof (buf), "%ss0", path); else @@ -1063,7 +1146,7 @@ check_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, } if (strcmp(type, VDEV_TYPE_DISK) == 0) - ret = check_device(path, force, isspare); + ret = check_device(path, force, isspare, wholedisk); if (strcmp(type, VDEV_TYPE_FILE) == 0) ret = check_file(path, force, isspare); diff --git a/lib/libefi/include/sys/uuid.h b/lib/libefi/include/sys/uuid.h index 9ce872e34..eab4622a6 100644 --- a/lib/libefi/include/sys/uuid.h +++ b/lib/libefi/include/sys/uuid.h @@ -74,12 +74,8 @@ struct uuid { uint8_t node_addr[6]; }; -#define UUID_LEN 16 - #define UUID_PRINTABLE_STRING_LENGTH 37 -typedef uchar_t uuid_t[UUID_LEN]; - /* * Convert a uuid to/from little-endian format */ diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c index e682b840a..da71e3486 100644 --- a/lib/libefi/rdwr_efi.c +++ b/lib/libefi/rdwr_efi.c @@ -29,6 +29,7 @@ #include <strings.h> #include <unistd.h> #include <uuid/uuid.h> +#include <zlib.h> #include <libintl.h> #include <sys/types.h> #include <sys/dkio.h> @@ -38,7 +39,9 @@ #include <sys/dktp/fdisk.h> #include <sys/efi_partition.h> #include <sys/byteorder.h> -#include <sys/ddi.h> +#if defined(__linux__) +#include <linux/fs.h> +#endif static struct uuid_to_ptag { struct uuid uuid; @@ -49,11 +52,11 @@ static struct uuid_to_ptag { { EFI_SWAP }, { EFI_USR }, { EFI_BACKUP }, - { 0 }, /* STAND is never used */ + { EFI_UNUSED }, /* STAND is never used */ { EFI_VAR }, { EFI_HOME }, { EFI_ALTSCTR }, - { 0 }, /* CACHE (cachefs) is never used */ + { EFI_UNUSED }, /* CACHE (cachefs) is never used */ { EFI_RESERVED }, { EFI_SYSTEM }, { EFI_LEGACY_MBR }, @@ -107,19 +110,142 @@ int efi_debug = 1; int efi_debug = 0; #endif -extern unsigned int efi_crc32(const unsigned char *, unsigned int); -static int efi_read(int, struct dk_gpt *); +static int efi_read(int, struct dk_gpt *); + +/* + * Return a 32-bit CRC of the contents of the buffer. Pre-and-post + * one's conditioning will be handled by crc32() internally. + */ +static uint32_t +efi_crc32(const unsigned char *buf, unsigned int size) +{ + uint32_t crc = crc32(0, Z_NULL, 0); + + crc = crc32(crc, buf, size); + + return (crc); +} static int read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize) { - struct dk_minfo disk_info; + int sector_size; + unsigned long long capacity_size; + + if (ioctl(fd, BLKSSZGET, §or_size) < 0) + return (-1); + + if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0) + return (-1); + + *lbsize = (uint_t)sector_size; + *capacity = (diskaddr_t)(capacity_size / sector_size); + + return (0); +} - if ((ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info)) == -1) - return (errno); - *capacity = disk_info.dki_capacity; - *lbsize = disk_info.dki_lbsize; +static int +efi_get_info(int fd, struct dk_cinfo *dki_info) +{ +#if defined(__linux__) + char *path; + char *dev_path; + int rval = 0; + + memset(dki_info, 0, sizeof(*dki_info)); + + path = calloc(PATH_MAX, 1); + if (path == NULL) + goto error; + + /* + * The simplest way to get the partition number under linux is + * to parse it out of the /dev/<disk><parition> block device name. + * The kernel creates this using the partition number when it + * populates /dev/ so it may be trusted. The tricky bit here is + * that the naming convention is based on the block device type. + * So we need to take this in to account when parsing out the + * partition information. Another issue is that the libefi API + * API only provides the open fd and not the file path. To handle + * this realpath(3) is used to resolve the block device name from + * /proc/self/fd/<fd>. Aside from the partition number we collect + * some additional device info. + */ + (void) sprintf(path, "/proc/self/fd/%d", fd); + dev_path = realpath(path, NULL); + free(path); + + if (dev_path == NULL) + goto error; + + if ((strncmp(dev_path, "/dev/sd", 7) == 0)) { + strcpy(dki_info->dki_cname, "sd"); + dki_info->dki_ctype = DKC_SCSI_CCS; + rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + } else if ((strncmp(dev_path, "/dev/hd", 7) == 0)) { + strcpy(dki_info->dki_cname, "hd"); + dki_info->dki_ctype = DKC_DIRECT; + rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + } else if ((strncmp(dev_path, "/dev/md", 7) == 0)) { + strcpy(dki_info->dki_cname, "pseudo"); + dki_info->dki_ctype = DKC_MD; + rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + } else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) { + strcpy(dki_info->dki_cname, "pseudo"); + dki_info->dki_ctype = DKC_VBD; + rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9-]p%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + } else if ((strncmp(dev_path, "/dev/ram", 8) == 0)) { + strcpy(dki_info->dki_cname, "pseudo"); + dki_info->dki_ctype = DKC_PCMCIA_MEM; + rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + } else if ((strncmp(dev_path, "/dev/loop", 9) == 0)) { + strcpy(dki_info->dki_cname, "pseudo"); + dki_info->dki_ctype = DKC_VBD; + rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + } else { + strcpy(dki_info->dki_dname, "unknown"); + strcpy(dki_info->dki_cname, "unknown"); + dki_info->dki_ctype = DKC_UNKNOWN; + } + + switch (rval) { + case 0: + errno = EINVAL; + goto error; + case 1: + dki_info->dki_partition = 0; + } + + free(dev_path); +#else + if (ioctl(fd, DKIOCINFO, (caddr_t)dki_info) == -1) + goto error; +#endif return (0); +error: + if (efi_debug) + (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); + + switch (errno) { + case EIO: + return (VT_EIO); + case EINVAL: + return (VT_EINVAL); + default: + return (VT_ERROR); + } } /* @@ -135,12 +261,13 @@ read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize) int efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) { - diskaddr_t capacity; - uint_t lbsize; + diskaddr_t capacity = 0; + uint_t lbsize = 0; uint_t nblocks; size_t length; struct dk_gpt *vptr; struct uuid uuid; + struct dk_cinfo dki_info; if (read_disk_info(fd, &capacity, &lbsize) != 0) { if (efi_debug) @@ -148,6 +275,22 @@ efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) "couldn't read disk information\n"); return (-1); } +#if defined(__linux__) + if (efi_get_info(fd, &dki_info) != 0) { + if (efi_debug) + (void) fprintf(stderr, + "couldn't read disk information\n"); + return (-1); + } + + if (dki_info.dki_partition != 0) + return (-1); + + if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) || + (dki_info.dki_ctype == DKC_VBD) || + (dki_info.dki_ctype == DKC_UNKNOWN)) + return (-1); +#endif nblocks = NBLOCKS(nparts, lbsize); if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) { @@ -243,14 +386,138 @@ efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc) { void *data = dk_ioc->dki_data; int error; +#if defined(__linux__) + diskaddr_t capacity; + uint_t lbsize; + + /* + * When the IO is not being performed in kernel as an ioctl we need + * to know the sector size so we can seek to the proper byte offset. + */ + if (read_disk_info(fd, &capacity, &lbsize) == -1) { + if (efi_debug) + fprintf(stderr,"unable to read disk info: %d",errno); + + errno = EIO; + return -1; + } + + switch (cmd) { + case DKIOCGETEFI: + if (lbsize == 0) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCGETEFI assuming " + "LBA %d bytes\n", DEV_BSIZE); + + lbsize = DEV_BSIZE; + } + + error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); + if (error == -1) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCGETEFI lseek " + "error: %d\n", errno); + return error; + } + + error = read(fd, data, dk_ioc->dki_length); + if (error == -1) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCGETEFI read " + "error: %d\n", errno); + return error; + } + if (error != dk_ioc->dki_length) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCGETEFI short " + "read of %d bytes\n", error); + errno = EIO; + return -1; + } + error = 0; + break; + + case DKIOCSETEFI: + if (lbsize == 0) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCSETEFI unknown " + "LBA size\n"); + errno = EIO; + return -1; + } + + error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); + if (error == -1) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCSETEFI lseek " + "error: %d\n", errno); + return error; + } + + error = write(fd, data, dk_ioc->dki_length); + if (error == -1) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCSETEFI write " + "error: %d\n", errno); + return error; + } + + if (error != dk_ioc->dki_length) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCSETEFI short " + "write of %d bytes\n", error); + errno = EIO; + return -1; + } + + /* Sync the new EFI table to disk */ + error = fsync(fd); + if (error == -1) + return error; + + /* Ensure any local disk cache is also flushed */ + if (ioctl(fd, BLKFLSBUF, 0) == -1) + return error; + + error = 0; + break; + + default: + if (efi_debug) + (void) fprintf(stderr, "unsupported ioctl()\n"); + + errno = EIO; + return -1; + } +#else dk_ioc->dki_data_64 = (uint64_t)(uintptr_t)data; error = ioctl(fd, cmd, (void *)dk_ioc); dk_ioc->dki_data = data; - +#endif return (error); } +#if defined(__linux__) +static int +efi_rescan(int fd) +{ + int retry = 5; + int error; + + /* Notify the kernel a devices partition table has been updated */ + while ((error = ioctl(fd, BLKRRPART)) != 0) { + if (--retry == 0) { + (void) fprintf(stderr, "the kernel failed to rescan " + "the partition table: %d\n", errno); + return (-1); + } + } + + return (0); +} +#endif + static int check_label(int fd, dk_efi_t *dk_ioc) { @@ -305,6 +572,8 @@ efi_read(int fd, struct dk_gpt *vtoc) int rval = 0; int md_flag = 0; int vdc_flag = 0; + diskaddr_t capacity = 0; + uint_t lbsize = 0; struct dk_minfo disk_info; dk_efi_t dk_ioc; efi_gpt_t *efi; @@ -316,19 +585,9 @@ efi_read(int fd, struct dk_gpt *vtoc) /* * get the partition number for this file descriptor. */ - if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) { - if (efi_debug) { - (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); - } - switch (errno) { - case EIO: - return (VT_EIO); - case EINVAL: - return (VT_EINVAL); - default: - return (VT_ERROR); - } - } + if ((rval = efi_get_info(fd, &dki_info)) != 0) + return rval; + if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && (strncmp(dki_info.dki_dname, "md", 3) == 0)) { md_flag++; @@ -342,14 +601,18 @@ efi_read(int fd, struct dk_gpt *vtoc) } /* get the LBA size */ - if (ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info) == -1) { + if (read_disk_info(fd, &capacity, &lbsize) == -1) { if (efi_debug) { (void) fprintf(stderr, - "assuming LBA 512 bytes %d\n", - errno); + "unable to read disk info: %d", + errno); } - disk_info.dki_lbsize = DEV_BSIZE; + return (VT_EINVAL); } + + disk_info.dki_lbsize = lbsize; + disk_info.dki_capacity = capacity; + if (disk_info.dki_lbsize == 0) { if (efi_debug) { (void) fprintf(stderr, @@ -374,9 +637,11 @@ efi_read(int fd, struct dk_gpt *vtoc) } } - if ((dk_ioc.dki_data = calloc(label_len, 1)) == NULL) + if (posix_memalign((void **)&dk_ioc.dki_data, + disk_info.dki_lbsize, label_len)) return (VT_ERROR); + memset(dk_ioc.dki_data, 0, label_len); dk_ioc.dki_length = disk_info.dki_lbsize; user_length = vtoc->efi_nparts; efi = dk_ioc.dki_data; @@ -572,12 +837,14 @@ write_pmbr(int fd, struct dk_gpt *vtoc) int len; len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize; - buf = calloc(len, 1); + if (posix_memalign((void **)&buf, len, len)) + return (VT_ERROR); /* * Preserve any boot code and disk signature if the first block is * already an MBR. */ + memset(buf, 0, len); dk_ioc.dki_lba = 0; dk_ioc.dki_length = len; /* LINTED -- always longlong aligned */ @@ -663,10 +930,9 @@ check_input(struct dk_gpt *vtoc) if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) && (vtoc->efi_parts[i].p_size != 0)) { if (efi_debug) { - (void) fprintf(stderr, -"partition %d is \"unassigned\" but has a size of %llu", - i, - vtoc->efi_parts[i].p_size); + (void) fprintf(stderr, "partition %d is " + "\"unassigned\" but has a size of %llu", + i, vtoc->efi_parts[i].p_size); } return (VT_EINVAL); } @@ -679,9 +945,9 @@ check_input(struct dk_gpt *vtoc) if (vtoc->efi_parts[i].p_tag == V_RESERVED) { if (resv_part != -1) { if (efi_debug) { - (void) fprintf(stderr, -"found duplicate reserved partition at %d\n", - i); + (void) fprintf(stderr, "found " + "duplicate reserved partition " + "at %d\n", i); } return (VT_EINVAL); } @@ -732,8 +998,8 @@ check_input(struct dk_gpt *vtoc) (istart <= endsect)) { if (efi_debug) { (void) fprintf(stderr, -"Partition %d overlaps partition %d.", - i, j); + "Partition %d overlaps " + "partition %d.", i, j); } return (VT_EINVAL); } @@ -839,22 +1105,13 @@ efi_write(int fd, struct dk_gpt *vtoc) efi_gpe_t *efi_parts; int i, j; struct dk_cinfo dki_info; + int rval; int md_flag = 0; int nblocks; diskaddr_t lba_backup_gpt_hdr; - if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) { - if (efi_debug) - (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); - switch (errno) { - case EIO: - return (VT_EIO); - case EINVAL: - return (VT_EINVAL); - default: - return (VT_ERROR); - } - } + if ((rval = efi_get_info(fd, &dki_info)) != 0) + return rval; /* check if we are dealing wih a metadevice */ if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && @@ -892,9 +1149,11 @@ efi_write(int fd, struct dk_gpt *vtoc) * for backup GPT header. */ lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks; - if ((dk_ioc.dki_data = calloc(dk_ioc.dki_length, 1)) == NULL) + if (posix_memalign((void **)&dk_ioc.dki_data, + vtoc->efi_lbasize, dk_ioc.dki_length)) return (VT_ERROR); + memset(dk_ioc.dki_data, 0, dk_ioc.dki_length); efi = dk_ioc.dki_data; /* stuff user's input into EFI struct */ @@ -941,6 +1200,10 @@ efi_write(int fd, struct dk_gpt *vtoc) return (VT_EINVAL); } + /* Zero's should be written for empty partitions */ + if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) + continue; + efi_parts[i].efi_gpe_StartingLBA = LE_64(vtoc->efi_parts[i].p_start); efi_parts[i].efi_gpe_EndingLBA = @@ -1032,6 +1295,13 @@ efi_write(int fd, struct dk_gpt *vtoc) /* write the PMBR */ (void) write_pmbr(fd, vtoc); free(dk_ioc.dki_data); + +#if defined(__linux__) + rval = efi_rescan(fd); + if (rval) + return (VT_ERROR); +#endif + return (0); } @@ -1049,6 +1319,7 @@ efi_free(struct dk_gpt *ptr) int efi_type(int fd) { +#if 0 struct vtoc vtoc; struct extvtoc extvtoc; @@ -1062,6 +1333,9 @@ efi_type(int fd) } } return (0); +#else + return (ENOSYS); +#endif } void @@ -1175,7 +1449,7 @@ efi_auto_sense(int fd, struct dk_gpt **vtoc) return (-1); } - for (i = 0; i < min((*vtoc)->efi_nparts, V_NUMPAR); i++) { + for (i = 0; i < MIN((*vtoc)->efi_nparts, V_NUMPAR); i++) { (*vtoc)->efi_parts[i].p_tag = default_vtoc_map[i].p_tag; (*vtoc)->efi_parts[i].p_flag = default_vtoc_map[i].p_flag; (*vtoc)->efi_parts[i].p_start = 0; diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h index 197e2eefc..dcbd283ac 100644 --- a/lib/libzfs/include/libzfs.h +++ b/lib/libzfs/include/libzfs.h @@ -49,6 +49,26 @@ extern "C" { #define ZPOOL_MAXPROPLEN MAXPATHLEN /* + * Default device paths + */ + +#if defined(__sun__) || defined(__sun) +#define DISK_ROOT "/dev/dsk" +#define RDISK_ROOT "/dev/rdsk" +#define UDISK_ROOT RDISK_ROOT +#define FIRST_SLICE "s0" +#define BACKUP_SLICE "s2" +#endif + +#ifdef __linux__ +#define DISK_ROOT "/dev" +#define RDISK_ROOT DISK_ROOT +#define UDISK_ROOT "/dev/disk" +#define FIRST_SLICE "1" +#define BACKUP_SLICE "" +#endif + +/* * libzfs errors */ enum { @@ -248,6 +268,7 @@ extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); +extern int zpool_label_disk_wait(char *, int); extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *); /* @@ -661,9 +682,6 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **, extern int zpool_read_label(int, nvlist_t **); extern int zpool_clear_label(int); -/* is this zvol valid for use as a dump device? */ -extern int zvol_check_dump_config(char *); - /* * Management interfaces for SMB ACL files */ diff --git a/lib/libzfs/include/libzfs_impl.h b/lib/libzfs/include/libzfs_impl.h index 3d001df07..2389b7823 100644 --- a/lib/libzfs/include/libzfs_impl.h +++ b/lib/libzfs/include/libzfs_impl.h @@ -191,6 +191,8 @@ zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *); int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **); +int zvol_create_link(libzfs_handle_t *, const char *); +int zvol_remove_link(libzfs_handle_t *, const char *); boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *); int zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type, diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c index 0bcfc0423..6f067d563 100644 --- a/lib/libzfs/libzfs_changelist.c +++ b/lib/libzfs/libzfs_changelist.c @@ -93,6 +93,7 @@ struct prop_changelist { int changelist_prefix(prop_changelist_t *clp) { +#ifdef HAVE_ZPL prop_changenode_t *cn; int ret = 0; @@ -141,6 +142,9 @@ changelist_prefix(prop_changelist_t *clp) (void) changelist_postfix(clp); return (ret); +#else + return 0; +#endif /* HAVE_ZPL */ } /* @@ -155,6 +159,7 @@ changelist_prefix(prop_changelist_t *clp) int changelist_postfix(prop_changelist_t *clp) { +#ifdef HAVE_ZPL prop_changenode_t *cn; char shareopts[ZFS_MAXPROPLEN]; int errors = 0; @@ -255,6 +260,9 @@ changelist_postfix(prop_changelist_t *clp) } return (errors ? -1 : 0); +#else + return 0; +#endif /* HAVE_ZPL */ } /* @@ -317,6 +325,7 @@ changelist_rename(prop_changelist_t *clp, const char *src, const char *dst) int changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto) { +#ifdef HAVE_ZPL prop_changenode_t *cn; int ret = 0; @@ -331,6 +340,9 @@ changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto) } return (ret); +#else + return 0; +#endif } /* diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index baf289b64..d876e5d1f 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -57,6 +57,7 @@ #include "libzfs_impl.h" #include "zfs_deleg.h" +static int zvol_create_link_common(libzfs_handle_t *, const char *, int); static int userquota_propname_decode(const char *propname, boolean_t zoned, zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp); @@ -994,6 +995,7 @@ badlabel: /*FALLTHRU*/ +#ifdef HAVE_ZPL case ZFS_PROP_SHARESMB: case ZFS_PROP_SHARENFS: /* @@ -1104,6 +1106,7 @@ badlabel: } break; +#endif /* HAVE_ZPL */ case ZFS_PROP_UTF8ONLY: chosen_utf = (int)intval; break; @@ -2742,6 +2745,7 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) goto ancestorerr; } +#ifdef HAVE_ZPL if (zfs_mount(h, NULL, 0) != 0) { opname = dgettext(TEXT_DOMAIN, "mount"); goto ancestorerr; @@ -2751,6 +2755,7 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) opname = dgettext(TEXT_DOMAIN, "share"); goto ancestorerr; } +#endif /* HAVE_ZPL */ zfs_close(h); } @@ -2887,6 +2892,18 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, /* create the dataset */ ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc); + if (ret == 0 && type == ZFS_TYPE_VOLUME) { + ret = zvol_create_link(hdl, path); + if (ret) { + (void) zfs_standard_error(hdl, errno, + dgettext(TEXT_DOMAIN, + "Volume successfully created, but device links " + "were not created")); + zcmd_free_nvlists(&zc); + return (-1); + } + } + zcmd_free_nvlists(&zc); /* check for failure */ @@ -2949,6 +2966,9 @@ zfs_destroy(zfs_handle_t *zhp, boolean_t defer) (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); if (ZFS_IS_VOLUME(zhp)) { + if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0) + return (-1); + zc.zc_objset_type = DMU_OST_ZVOL; } else { zc.zc_objset_type = DMU_OST_ZFS; @@ -2991,9 +3011,17 @@ zfs_check_snap_cb(zfs_handle_t *zhp, void *arg) zfs_close(szhp); } + if (zhp->zfs_type == ZFS_TYPE_VOLUME) { + (void) zvol_remove_link(zhp->zfs_hdl, name); + /* + * NB: this is simply a best-effort. We don't want to + * return an error, because then we wouldn't visit all + * the volumes. + */ + } + dd->closezhp = B_TRUE; - if (!dd->gotone) - rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, arg); + rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, arg); if (closezhp) zfs_close(zhp); return (rv); @@ -3128,11 +3156,70 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf)); } + } else if (ZFS_IS_VOLUME(zhp)) { + ret = zvol_create_link(zhp->zfs_hdl, target); } return (ret); } +typedef struct promote_data { + char cb_mountpoint[MAXPATHLEN]; + const char *cb_target; + const char *cb_errbuf; + uint64_t cb_pivot_txg; +} promote_data_t; + +static int +promote_snap_cb(zfs_handle_t *zhp, void *data) +{ + promote_data_t *pd = data; + zfs_handle_t *szhp; + char snapname[MAXPATHLEN]; + int rv = 0; + + /* We don't care about snapshots after the pivot point */ + if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) { + zfs_close(zhp); + return (0); + } + + /* Remove the device link if it's a zvol. */ + if (ZFS_IS_VOLUME(zhp)) + (void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name); + + /* Check for conflicting names */ + (void) strlcpy(snapname, pd->cb_target, sizeof (snapname)); + (void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname)); + szhp = make_dataset_handle(zhp->zfs_hdl, snapname); + if (szhp != NULL) { + zfs_close(szhp); + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "snapshot name '%s' from origin \n" + "conflicts with '%s' from target"), + zhp->zfs_name, snapname); + rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf); + } + zfs_close(zhp); + return (rv); +} + +static int +promote_snap_done_cb(zfs_handle_t *zhp, void *data) +{ + promote_data_t *pd = data; + + /* We don't care about snapshots after the pivot point */ + if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) { + /* Create the device link if it's a zvol. */ + if (ZFS_IS_VOLUME(zhp)) + (void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name); + } + + zfs_close(zhp); + return (0); +} + /* * Promotes the given clone fs to be the clone parent. */ @@ -3142,7 +3229,10 @@ zfs_promote(zfs_handle_t *zhp) libzfs_handle_t *hdl = zhp->zfs_hdl; zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; char parent[MAXPATHLEN]; + char *cp; int ret; + zfs_handle_t *pzhp; + promote_data_t pd; char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, @@ -3160,7 +3250,29 @@ zfs_promote(zfs_handle_t *zhp) "not a cloned filesystem")); return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); } + cp = strchr(parent, '@'); + *cp = '\0'; + + /* Walk the snapshots we will be moving */ + pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT); + if (pzhp == NULL) + return (-1); + pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG); + zfs_close(pzhp); + pd.cb_target = zhp->zfs_name; + pd.cb_errbuf = errbuf; + pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET); + if (pzhp == NULL) + return (-1); + (void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint, + sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE); + ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd); + if (ret != 0) { + zfs_close(pzhp); + return (-1); + } + /* issue the ioctl */ (void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin, sizeof (zc.zc_value)); (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); @@ -3169,9 +3281,16 @@ zfs_promote(zfs_handle_t *zhp) if (ret != 0) { int save_errno = errno; + (void) zfs_iter_snapshots(pzhp, promote_snap_done_cb, &pd); + zfs_close(pzhp); + switch (save_errno) { case EEXIST: - /* There is a conflicting snapshot name. */ + /* + * There is a conflicting snapshot name. We + * should have caught this above, but they could + * have renamed something in the mean time. + */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "conflicting snapshot '%s' from parent '%s'"), zc.zc_string, parent); @@ -3180,7 +3299,44 @@ zfs_promote(zfs_handle_t *zhp) default: return (zfs_standard_error(hdl, save_errno, errbuf)); } + } else { + (void) zfs_iter_snapshots(zhp, promote_snap_done_cb, &pd); } + + zfs_close(pzhp); + return (ret); +} + +struct createdata { + const char *cd_snapname; + int cd_ifexists; +}; + +static int +zfs_create_link_cb(zfs_handle_t *zhp, void *arg) +{ + struct createdata *cd = arg; + int ret; + + if (zhp->zfs_type == ZFS_TYPE_VOLUME) { + char name[MAXPATHLEN]; + + (void) strlcpy(name, zhp->zfs_name, sizeof (name)); + (void) strlcat(name, "@", sizeof (name)); + (void) strlcat(name, cd->cd_snapname, sizeof (name)); + (void) zvol_create_link_common(zhp->zfs_hdl, name, + cd->cd_ifexists); + /* + * NB: this is simply a best-effort. We don't want to + * return an error, because then we wouldn't visit all + * the volumes. + */ + } + + ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd); + + zfs_close(zhp); + return (ret); } @@ -3244,12 +3400,32 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive, * if it was recursive, the one that actually failed will be in * zc.zc_name. */ - if (ret != 0) { + if (ret != 0) (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value); - (void) zfs_standard_error(hdl, errno, errbuf); + + if (ret == 0 && recursive) { + struct createdata cd; + + cd.cd_snapname = delim + 1; + cd.cd_ifexists = B_FALSE; + (void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd); + } + if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) { + ret = zvol_create_link(zhp->zfs_hdl, path); + if (ret != 0) { + (void) zfs_standard_error(hdl, errno, + dgettext(TEXT_DOMAIN, + "Volume successfully snapshotted, but device links " + "were not created")); + zfs_close(zhp); + return (-1); + } } + if (ret != 0) + (void) zfs_standard_error(hdl, errno, errbuf); + zfs_close(zhp); return (ret); @@ -3351,6 +3527,8 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) */ if (zhp->zfs_type == ZFS_TYPE_VOLUME) { + if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0) + return (-1); if (zfs_which_resv_prop(zhp, &resv_prop) < 0) return (-1); old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); @@ -3388,6 +3566,10 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force) */ if ((zhp->zfs_type == ZFS_TYPE_VOLUME) && (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) { + if ((err = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name))) { + zfs_close(zhp); + return (err); + } if (restore_resv) { new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE); if (old_volsize != new_volsize) @@ -3536,6 +3718,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive) } if (recursive) { + struct destroydata dd; parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); if (parentname == NULL) { @@ -3550,6 +3733,15 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive) goto error; } + dd.snapname = delim + 1; + dd.gotone = B_FALSE; + dd.closezhp = B_TRUE; + + /* We remove any zvol links prior to renaming them */ + ret = zfs_iter_filesystems(zhrp, zfs_check_snap_cb, &dd); + if (ret) { + goto error; + } } else { if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0)) == NULL) return (-1); @@ -3598,10 +3790,27 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive) * On failure, we still want to remount any filesystems that * were previously mounted, so we don't alter the system state. */ - if (!recursive) + if (recursive) { + struct createdata cd; + + /* only create links for datasets that had existed */ + cd.cd_snapname = delim + 1; + cd.cd_ifexists = B_TRUE; + (void) zfs_iter_filesystems(zhrp, zfs_create_link_cb, + &cd); + } else { (void) changelist_postfix(cl); + } } else { - if (!recursive) { + if (recursive) { + struct createdata cd; + + /* only create links for datasets that had existed */ + cd.cd_snapname = strchr(target, '@') + 1; + cd.cd_ifexists = B_TRUE; + ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb, + &cd); + } else { changelist_rename(cl, zfs_get_name(zhp), target); ret = changelist_postfix(cl); } @@ -3620,19 +3829,103 @@ error: return (ret); } -nvlist_t * -zfs_get_user_props(zfs_handle_t *zhp) +/* + * Given a zvol dataset, issue the ioctl to create the appropriate minor node, + * and wait briefly for udev to create the /dev link. + */ +int +zvol_create_link(libzfs_handle_t *hdl, const char *dataset) { - return (zhp->zfs_user_props); + return (zvol_create_link_common(hdl, dataset, B_FALSE)); +} + +static int +zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + char path[MAXPATHLEN]; + int error; + + (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); + + /* + * Issue the appropriate ioctl. + */ + if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) { + switch (errno) { + case EEXIST: + /* + * Silently ignore the case where the link already + * exists. This allows 'zfs volinit' to be run multiple + * times without errors. + */ + return (0); + + case ENOENT: + /* + * Dataset does not exist in the kernel. If we + * don't care (see zfs_rename), then ignore the + * error quietly. + */ + if (ifexists) { + return (0); + } + + /* FALLTHROUGH */ + + default: + return (zfs_standard_error_fmt(hdl, errno, + dgettext(TEXT_DOMAIN, "cannot create device links " + "for '%s'"), dataset)); + } + } + + /* + * Wait up to 10 seconds for udev to create the device. + */ + (void) snprintf(path, sizeof (path), "%s/%s", ZVOL_DIR, dataset); + error = zpool_label_disk_wait(path, 10000); + if (error) + (void) printf(gettext("%s may not be immediately " + "available\n"), path); + + return (0); +} + +/* + * Remove a minor node for the given zvol and the associated /dev links. + */ +int +zvol_remove_link(libzfs_handle_t *hdl, const char *dataset) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + + (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); + + if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) { + switch (errno) { + case ENXIO: + /* + * Silently ignore the case where the link no longer + * exists, so that 'zfs volfini' can be run multiple + * times without errors. + */ + return (0); + + default: + return (zfs_standard_error_fmt(hdl, errno, + dgettext(TEXT_DOMAIN, "cannot remove device " + "links for '%s'"), dataset)); + } + } + + return (0); } nvlist_t * -zfs_get_recvd_props(zfs_handle_t *zhp) +zfs_get_user_props(zfs_handle_t *zhp) { - if (zhp->zfs_recvd_props == NULL) - if (get_recvd_props_ioctl(zhp) != 0) - return (NULL); - return (zhp->zfs_recvd_props); + return (zhp->zfs_user_props); } /* @@ -3744,6 +4037,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received) return (0); } +#ifdef HAVE_ZPL int zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path, char *resource, void *export, void *sharetab, @@ -3763,6 +4057,7 @@ zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path, error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc); return (error); } +#endif /* HAVE_ZPL */ void zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props) diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c index 386ab002f..ee0064892 100644 --- a/lib/libzfs/libzfs_import.c +++ b/lib/libzfs/libzfs_import.c @@ -52,9 +52,11 @@ #include <sys/vtoc.h> #include <sys/dktp/fdisk.h> #include <sys/efi_partition.h> -#include <thread_pool.h> #include <sys/vdev_impl.h> +#ifdef HAVE_LIBBLKID +#include <blkid/blkid.h> +#endif #include "libzfs.h" #include "libzfs_impl.h" @@ -904,211 +906,76 @@ zpool_read_label(int fd, nvlist_t **config) return (0); } -typedef struct rdsk_node { - char *rn_name; - int rn_dfd; - libzfs_handle_t *rn_hdl; - nvlist_t *rn_config; - avl_tree_t *rn_avl; - avl_node_t rn_node; - boolean_t rn_nozpool; -} rdsk_node_t; - +#ifdef HAVE_LIBBLKID +/* + * Use libblkid to quickly search for zfs devices + */ static int -slice_cache_compare(const void *arg1, const void *arg2) -{ - const char *nm1 = ((rdsk_node_t *)arg1)->rn_name; - const char *nm2 = ((rdsk_node_t *)arg2)->rn_name; - char *nm1slice, *nm2slice; - int rv; - - /* - * slices zero and two are the most likely to provide results, - * so put those first - */ - nm1slice = strstr(nm1, "s0"); - nm2slice = strstr(nm2, "s0"); - if (nm1slice && !nm2slice) { - return (-1); - } - if (!nm1slice && nm2slice) { - return (1); - } - nm1slice = strstr(nm1, "s2"); - nm2slice = strstr(nm2, "s2"); - if (nm1slice && !nm2slice) { - return (-1); - } - if (!nm1slice && nm2slice) { - return (1); - } - - rv = strcmp(nm1, nm2); - if (rv == 0) - return (0); - return (rv > 0 ? 1 : -1); -} - -static void -check_one_slice(avl_tree_t *r, char *diskname, uint_t partno, - diskaddr_t size, uint_t blksz) -{ - rdsk_node_t tmpnode; - rdsk_node_t *node; - char sname[MAXNAMELEN]; - - tmpnode.rn_name = &sname[0]; - (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u", - diskname, partno); - /* - * protect against division by zero for disk labels that - * contain a bogus sector size - */ - if (blksz == 0) - blksz = DEV_BSIZE; - /* too small to contain a zpool? */ - if ((size < (SPA_MINDEVSIZE / blksz)) && - (node = avl_find(r, &tmpnode, NULL))) - node->rn_nozpool = B_TRUE; -} - -static void -nozpool_all_slices(avl_tree_t *r, const char *sname) -{ - char diskname[MAXNAMELEN]; - char *ptr; - int i; - - (void) strncpy(diskname, sname, MAXNAMELEN); - if (((ptr = strrchr(diskname, 's')) == NULL) && - ((ptr = strrchr(diskname, 'p')) == NULL)) - return; - ptr[0] = 's'; - ptr[1] = '\0'; - for (i = 0; i < NDKMAP; i++) - check_one_slice(r, diskname, i, 0, 1); - ptr[0] = 'p'; - for (i = 0; i <= FD_NUMPART; i++) - check_one_slice(r, diskname, i, 0, 1); -} - -static void -check_slices(avl_tree_t *r, int fd, const char *sname) +zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools) { - struct extvtoc vtoc; - struct dk_gpt *gpt; - char diskname[MAXNAMELEN]; - char *ptr; - int i; - - (void) strncpy(diskname, sname, MAXNAMELEN); - if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1])) - return; - ptr[1] = '\0'; - - if (read_extvtoc(fd, &vtoc) >= 0) { - for (i = 0; i < NDKMAP; i++) - check_one_slice(r, diskname, i, - vtoc.v_part[i].p_size, vtoc.v_sectorsz); - } else if (efi_alloc_and_read(fd, &gpt) >= 0) { - /* - * on x86 we'll still have leftover links that point - * to slices s[9-15], so use NDKMAP instead - */ - for (i = 0; i < NDKMAP; i++) - check_one_slice(r, diskname, i, - gpt->efi_parts[i].p_size, gpt->efi_lbasize); - /* nodes p[1-4] are never used with EFI labels */ - ptr[0] = 'p'; - for (i = 1; i <= FD_NUMPART; i++) - check_one_slice(r, diskname, i, 0, 1); - efi_free(gpt); - } -} - -static void -zpool_open_func(void *arg) -{ - rdsk_node_t *rn = arg; - struct stat64 statbuf; + blkid_cache cache; + blkid_dev_iterate iter; + blkid_dev dev; + const char *devname; nvlist_t *config; - int fd; + int fd, err; - if (rn->rn_nozpool) - return; - if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) { - /* symlink to a device that's no longer there */ - if (errno == ENOENT) - nozpool_all_slices(rn->rn_avl, rn->rn_name); - return; - } - /* - * Ignore failed stats. We only want regular - * files, character devs and block devs. - */ - if (fstat64(fd, &statbuf) != 0 || - (!S_ISREG(statbuf.st_mode) && - !S_ISCHR(statbuf.st_mode) && - !S_ISBLK(statbuf.st_mode))) { - (void) close(fd); - return; - } - /* this file is too small to hold a zpool */ - if (S_ISREG(statbuf.st_mode) && - statbuf.st_size < SPA_MINDEVSIZE) { - (void) close(fd); - return; - } else if (!S_ISREG(statbuf.st_mode)) { - /* - * Try to read the disk label first so we don't have to - * open a bunch of minor nodes that can't have a zpool. - */ - check_slices(rn->rn_avl, fd, rn->rn_name); + err = blkid_get_cache(&cache, NULL); + if (err != 0) { + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid_get_cache() %d"), err); + goto err_blkid1; } - if ((zpool_read_label(fd, &config)) != 0) { - (void) close(fd); - (void) no_memory(rn->rn_hdl); - return; + err = blkid_probe_all(cache); + if (err != 0) { + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid_probe_all() %d"), err); + goto err_blkid2; } - (void) close(fd); + iter = blkid_dev_iterate_begin(cache); + if (iter == NULL) { + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid_dev_iterate_begin()")); + goto err_blkid2; + } - rn->rn_config = config; - if (config != NULL) { - assert(rn->rn_nozpool == B_FALSE); + err = blkid_dev_set_search(iter, "TYPE", "zfs"); + if (err != 0) { + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid_dev_set_search() %d"), err); + goto err_blkid3; } -} -/* - * Given a file descriptor, clear (zero) the label information. This function - * is currently only used in the appliance stack as part of the ZFS sysevent - * module. - */ -int -zpool_clear_label(int fd) -{ - struct stat64 statbuf; - int l; - vdev_label_t *label; - uint64_t size; + while (blkid_dev_next(iter, &dev) == 0) { + devname = blkid_dev_devname(dev); + if ((fd = open64(devname, O_RDONLY)) < 0) + continue; - if (fstat64(fd, &statbuf) == -1) - return (0); - size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); + err = zpool_read_label(fd, &config); + (void) close(fd); - if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL) - return (-1); + if (err != 0) { + (void) no_memory(hdl); + goto err_blkid3; + } - for (l = 0; l < VDEV_LABELS; l++) { - if (pwrite64(fd, label, sizeof (vdev_label_t), - label_offset(size, l)) != sizeof (vdev_label_t)) - return (-1); + if (config != NULL) { + err = add_config(hdl, pools, devname, config); + if (err != 0) + goto err_blkid3; + } } - free(label); - return (0); +err_blkid3: + blkid_dev_iterate_end(iter); +err_blkid2: + blkid_put_cache(cache); +err_blkid1: + return err; } +#endif /* HAVE_LIBBLKID */ /* * Given a list of directories to search, find all pools stored on disk. This @@ -1126,18 +993,28 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) char path[MAXPATHLEN]; char *end, **dir = iarg->path; size_t pathleft; - nvlist_t *ret = NULL; - static char *default_dir = "/dev/dsk"; + struct stat64 statbuf; + nvlist_t *ret = NULL, *config; + static char *default_dir = DISK_ROOT; + int fd; pool_list_t pools = { 0 }; pool_entry_t *pe, *penext; vdev_entry_t *ve, *venext; config_entry_t *ce, *cenext; name_entry_t *ne, *nenext; - avl_tree_t slice_cache; - rdsk_node_t *slice; - void *cookie; + + verify(iarg->poolname == NULL || iarg->guid == 0); if (dirs == 0) { +#ifdef HAVE_LIBBLKID + /* Use libblkid to scan all device for their type */ + if (zpool_find_import_blkid(hdl, &pools) == 0) + goto skip_scanning; + + (void) zfs_error_fmt(hdl, EZFS_BADCACHE, + dgettext(TEXT_DOMAIN, "blkid failure falling back " + "to manual probing")); +#endif /* HAVE_LIBBLKID */ dirs = 1; dir = &default_dir; } @@ -1148,7 +1025,6 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) * and toplevel GUID. */ for (i = 0; i < dirs; i++) { - tpool_t *t; char *rdsk; int dfd; @@ -1182,8 +1058,6 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) goto error; } - avl_create(&slice_cache, slice_cache_compare, - sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node)); /* * This is not MT-safe, but we have no MT consumers of libzfs */ @@ -1193,37 +1067,51 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) (name[1] == 0 || (name[1] == '.' && name[2] == 0))) continue; - slice = zfs_alloc(hdl, sizeof (rdsk_node_t)); - slice->rn_name = zfs_strdup(hdl, name); - slice->rn_avl = &slice_cache; - slice->rn_dfd = dfd; - slice->rn_hdl = hdl; - slice->rn_nozpool = B_FALSE; - avl_add(&slice_cache, slice); - } - /* - * create a thread pool to do all of this in parallel; - * rn_nozpool is not protected, so this is racy in that - * multiple tasks could decide that the same slice can - * not hold a zpool, which is benign. Also choose - * double the number of processors; we hold a lot of - * locks in the kernel, so going beyond this doesn't - * buy us much. - */ - t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN), - 0, NULL); - for (slice = avl_first(&slice_cache); slice; - (slice = avl_walk(&slice_cache, slice, - AVL_AFTER))) - (void) tpool_dispatch(t, zpool_open_func, slice); - tpool_wait(t); - tpool_destroy(t); - - cookie = NULL; - while ((slice = avl_destroy_nodes(&slice_cache, - &cookie)) != NULL) { - if (slice->rn_config != NULL) { - nvlist_t *config = slice->rn_config; + /* + * Skip checking devices with well known prefixes: + * watchdog - A special close is required to avoid + * triggering it and resetting the system. + * fuse - Fuse control device. + * ppp - Generic PPP driver. + * tty* - Generic serial interface. + * vcs* - Virtual console memory. + * parport* - Parallel port interface. + * lp* - Printer interface. + * fd* - Floppy interface. + */ + if ((strncmp(name, "watchdog", 8) == 0) || + (strncmp(name, "fuse", 4) == 0) || + (strncmp(name, "ppp", 3) == 0) || + (strncmp(name, "tty", 3) == 0) || + (strncmp(name, "vcs", 3) == 0) || + (strncmp(name, "parport", 7) == 0) || + (strncmp(name, "lp", 2) == 0) || + (strncmp(name, "fd", 2) == 0)) + continue; + + if ((fd = openat64(dfd, name, O_RDONLY)) < 0) + continue; + + /* + * Ignore failed stats. We only want regular + * files and block devs. + */ + if (fstat64(fd, &statbuf) != 0 || + (!S_ISREG(statbuf.st_mode) && + !S_ISBLK(statbuf.st_mode))) { + (void) close(fd); + continue; + } + + if ((zpool_read_label(fd, &config)) != 0) { + (void) close(fd); + (void) no_memory(hdl); + goto error; + } + + (void) close(fd); + + if (config != NULL) { boolean_t matched = B_TRUE; if (iarg->poolname != NULL) { @@ -1247,19 +1135,19 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) continue; } /* use the non-raw path for the config */ - (void) strlcpy(end, slice->rn_name, pathleft); + (void) strlcpy(end, name, pathleft); if (add_config(hdl, &pools, path, config) != 0) goto error; } - free(slice->rn_name); - free(slice); } - avl_destroy(&slice_cache); (void) closedir(dirp); dirp = NULL; } +#ifdef HAVE_LIBBLKID +skip_scanning: +#endif ret = get_configs(hdl, &pools, iarg->can_be_active); error: diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index c31a12371..4b9038de8 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -81,6 +81,7 @@ #include <sys/systeminfo.h> #define MAXISALEN 257 /* based on sysinfo(2) man page */ +#ifdef HAVE_ZPL static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *); zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **, zfs_share_proto_t); @@ -1268,3 +1269,53 @@ out: return (ret); } + +#else /* HAVE_ZPL */ + +int +zfs_unshare_iscsi(zfs_handle_t *zhp) +{ + return 0; +} + +int +zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags) +{ + return 0; +} + +void +remove_mountpoint(zfs_handle_t *zhp) { + return; +} + +boolean_t +is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where) +{ + return B_FALSE; +} + +boolean_t +zfs_is_mounted(zfs_handle_t *zhp, char **where) +{ + return is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where); +} + +boolean_t +zfs_is_shared(zfs_handle_t *zhp) +{ + return B_FALSE; +} + +int +zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) +{ + return B_FALSE; +} + +int +zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force) +{ + return B_FALSE; +} +#endif /* HAVE_ZPL */ diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 42f303894..ec27b5756 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -32,6 +32,8 @@ #include <stdlib.h> #include <strings.h> #include <unistd.h> +#include <zone.h> +#include <sys/stat.h> #include <sys/efi_partition.h> #include <sys/vtoc.h> #include <sys/zfs_ioctl.h> @@ -44,10 +46,6 @@ static int read_efi_label(nvlist_t *config, diskaddr_t *sb); -#define DISK_ROOT "/dev/dsk" -#define RDISK_ROOT "/dev/rdsk" -#define BACKUP_SLICE "s2" - typedef struct prop_flags { int create:1; /* Validate property on creation */ int import:1; /* Validate property on import */ @@ -651,9 +649,12 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp) /* * Don't start the slice at the default block of 34; many storage - * devices will use a stripe width of 128k, so start there instead. + * devices will use a stripe width of 128k, other vendors prefer a 1m + * alignment. It is best to play it safe and ensure a 1m alignment + * give 512b blocks. When the block size is larger by a power of 2 + * we will still be 1m aligned. */ -#define NEW_START_BLOCK 256 +#define NEW_START_BLOCK 2048 /* * Validate the given pool name, optionally putting an extended error message in @@ -948,10 +949,12 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, * This can happen if the user has specified the same * device multiple times. We can't reliably detect this * until we try to add it and see we already have a - * label. + * label. This can also happen under if the device is + * part of an active md or lvm device. */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "one or more vdevs refer to the same device")); + "one or more vdevs refer to the same device, or one of\n" + "the devices is part of an active md or lvm device")); return (zfs_error(hdl, EZFS_BADDEV, msg)); case EOVERFLOW: @@ -1928,7 +1931,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, } else if (zpool_vdev_is_interior(path)) { verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0); } else if (path[0] != '/') { - (void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path); + (void) snprintf(buf, sizeof (buf), "%s/%s", DISK_ROOT, path); verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0); } else { verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0); @@ -2101,22 +2104,14 @@ zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size) * the disk to use the new unallocated space. */ static int -zpool_relabel_disk(libzfs_handle_t *hdl, const char *name) +zpool_relabel_disk(libzfs_handle_t *hdl, const char *path) { - char path[MAXPATHLEN]; char errbuf[1024]; int fd, error; - int (*_efi_use_whole_disk)(int); - - if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT, - "efi_use_whole_disk")) == NULL) - return (-1); - (void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name); - - if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) { + if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " - "relabel '%s': unable to open device"), name); + "relabel '%s': unable to open device"), path); return (zfs_error(hdl, EZFS_OPENFAILED, errbuf)); } @@ -2125,11 +2120,11 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *name) * does not have any unallocated space left. If so, we simply * ignore that error and continue on. */ - error = _efi_use_whole_disk(fd); + error = efi_use_whole_disk(fd); (void) close(fd); if (error && error != VT_ENOSPC) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " - "relabel '%s': unable to read disk capacity"), name); + "relabel '%s': unable to read disk capacity"), path); return (zfs_error(hdl, EZFS_NOCAP, errbuf)); } return (0); @@ -3071,7 +3066,7 @@ char * zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, boolean_t verbose) { - char *path, *devid; + char *path, *devid, *type; uint64_t value; char buf[64]; vdev_stat_t *vs; @@ -3085,7 +3080,6 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, (u_longlong_t)value); path = buf; } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) { - /* * If the device is dead (faulted, offline, etc) then don't * bother opening it. Otherwise we may be forcing the user to @@ -3124,9 +3118,19 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, devid_str_free(newdevid); } - if (strncmp(path, "/dev/dsk/", 9) == 0) - path += 9; + /* + * For a block device only use the name. + */ + verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); + if (strcmp(type, VDEV_TYPE_DISK) == 0) { + path = strrchr(path, '/'); + path++; + } +#if defined(__sun__) || defined(__sun) + /* + * The following code strips the slice from the device path. + */ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value) == 0 && value) { int pathlen = strlen(path); @@ -3148,6 +3152,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, } return (tmp); } +#endif } else { verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0); @@ -3629,7 +3634,7 @@ read_efi_label(nvlist_t *config, diskaddr_t *sb) (void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT, strrchr(path, '/')); - if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) { + if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) { struct dk_gpt *vtoc; if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) { @@ -3675,6 +3680,54 @@ find_start_block(nvlist_t *config) return (MAXOFFSET_T); } +int +zpool_label_disk_wait(char *path, int timeout) +{ + struct stat64 statbuf; + int i; + + /* + * Wait timeout miliseconds for a newly created device to be available + * from the given path. There is a small window when a /dev/ device + * will exist and the udev link will not, so we must wait for the + * symlink. Depending on the udev rules this may take a few seconds. + */ + for (i = 0; i < timeout; i++) { + usleep(1000); + + errno = 0; + if ((stat64(path, &statbuf) == 0) && (errno == 0)) + return (0); + } + + return (ENOENT); +} + +int +zpool_label_disk_check(char *path) +{ + struct dk_gpt *vtoc; + int fd, err; + + if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) + return errno; + + if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) { + (void) close(fd); + return err; + } + + if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) { + efi_free(vtoc); + (void) close(fd); + return EIDRM; + } + + efi_free(vtoc); + (void) close(fd); + return 0; +} + /* * Label an individual disk. The name provided is the short name, * stripped of any leading /dev path. @@ -3684,7 +3737,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) { char path[MAXPATHLEN]; struct dk_gpt *vtoc; - int fd; + int rval, fd; size_t resv = EFI_MIN_RESV_SIZE; uint64_t slice_size; diskaddr_t start_block; @@ -3720,13 +3773,13 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name, BACKUP_SLICE); - if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) { + if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) { /* * This shouldn't happen. We've long since verified that this * is a valid device. */ - zfs_error_aux(hdl, - dgettext(TEXT_DOMAIN, "unable to open device")); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "unable to open device '%s': %d"), path, errno); return (zfs_error(hdl, EZFS_OPENFAILED, errbuf)); } @@ -3769,7 +3822,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) vtoc->efi_parts[8].p_size = resv; vtoc->efi_parts[8].p_tag = V_RESERVED; - if (efi_write(fd, vtoc) != 0) { + if ((rval = efi_write(fd, vtoc)) != 0) { /* * Some block drivers (like pcata) may not support EFI * GPT labels. Print out a helpful error message dir- @@ -3779,123 +3832,34 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) (void) close(fd); efi_free(vtoc); - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "try using fdisk(1M) and then provide a specific slice")); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using " + "parted(8) and then provide a specific slice: %d"), rval); return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); } (void) close(fd); efi_free(vtoc); - return (0); -} - -static boolean_t -supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf) -{ - char *type; - nvlist_t **child; - uint_t children, c; - - verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0); - if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 || - strcmp(type, VDEV_TYPE_FILE) == 0 || - strcmp(type, VDEV_TYPE_LOG) == 0 || - strcmp(type, VDEV_TYPE_HOLE) == 0 || - strcmp(type, VDEV_TYPE_MISSING) == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "vdev type '%s' is not supported"), type); - (void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf); - return (B_FALSE); - } - if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0) { - for (c = 0; c < children; c++) { - if (!supported_dump_vdev_type(hdl, child[c], errbuf)) - return (B_FALSE); - } - } - return (B_TRUE); -} - -/* - * check if this zvol is allowable for use as a dump device; zero if - * it is, > 0 if it isn't, < 0 if it isn't a zvol - */ -int -zvol_check_dump_config(char *arg) -{ - zpool_handle_t *zhp = NULL; - nvlist_t *config, *nvroot; - char *p, *volname; - nvlist_t **top; - uint_t toplevels; - libzfs_handle_t *hdl; - char errbuf[1024]; - char poolname[ZPOOL_MAXNAMELEN]; - int pathlen = strlen(ZVOL_FULL_DEV_DIR); - int ret = 1; - - if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) { - return (-1); - } - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "dump is not supported on device '%s'"), arg); - if ((hdl = libzfs_init()) == NULL) - return (1); - libzfs_print_on_error(hdl, B_TRUE); - - volname = arg + pathlen; - - /* check the configuration of the pool */ - if ((p = strchr(volname, '/')) == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "malformed dataset name")); - (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); - return (1); - } else if (p - volname >= ZFS_MAXNAMELEN) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "dataset name is too long")); - (void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf); - return (1); - } else { - (void) strncpy(poolname, volname, p - volname); - poolname[p - volname] = '\0'; - } - - if ((zhp = zpool_open(hdl, poolname)) == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "could not open pool '%s'"), poolname); - (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf); - goto out; - } - config = zpool_get_config(zhp, NULL); - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "could not obtain vdev configuration for '%s'"), poolname); - (void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf); - goto out; - } - - verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - &top, &toplevels) == 0); - if (toplevels != 1) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' has multiple top level vdevs"), poolname); - (void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf); - goto out; + /* Wait for the first expected slice to appear. */ + (void) snprintf(path, sizeof (path), "%s/%s%s%s", DISK_ROOT, name, + isdigit(name[strlen(name)-1]) ? "p" : "", FIRST_SLICE); + rval = zpool_label_disk_wait(path, 3000); + if (rval) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to " + "detect device partitions on '%s': %d"), path, rval); + return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); } - if (!supported_dump_vdev_type(hdl, top[0], errbuf)) { - goto out; + /* We can't be to paranoid. Read the label back and verify it. */ + (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name); + rval = zpool_label_disk_check(path); + if (rval) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written " + "EFI label on '%s' is damaged. Ensure\nthis device " + "is not in in use, and is functioning properly: %d"), + path, rval); + return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); } - ret = 0; -out: - if (zhp) - zpool_close(zhp); - libzfs_fini(hdl); - return (ret); + return 0; } diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 87ffd124f..40d1d2e53 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -2608,6 +2608,12 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, return (-1); } } + if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME && + zvol_remove_link(hdl, zhp->zfs_name) != 0) { + zfs_close(zhp); + zcmd_free_nvlists(&zc); + return (-1); + } zfs_close(zhp); } else { /* @@ -2813,6 +2819,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (h != NULL) { if (h->zfs_type == ZFS_TYPE_VOLUME) { *cp = '@'; + err = zvol_create_link(hdl, h->zfs_name); + if (err == 0 && ioctl_err == 0) + err = zvol_create_link(hdl, + zc.zc_value); } else if (newfs || stream_avl) { /* * Track the first/top of hierarchy fs, diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index cb7d87cb2..71f81831b 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -36,6 +36,7 @@ #include <unistd.h> #include <ctype.h> #include <math.h> +#include <sys/stat.h> #include <sys/mnttab.h> #include <sys/mntent.h> #include <sys/types.h> @@ -648,7 +649,9 @@ libzfs_fini(libzfs_handle_t *hdl) #endif if (hdl->libzfs_sharetab) (void) fclose(hdl->libzfs_sharetab); +#ifdef HAVE_ZPL zfs_uninit_libshare(hdl); +#endif if (hdl->libzfs_log_str) (void) free(hdl->libzfs_log_str); zpool_free_handles(hdl); diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 494e544ea..6f06f4001 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -35,6 +35,8 @@ #include <sys/processor.h> #include <sys/zfs_context.h> #include <sys/utsname.h> +#include <sys/time.h> +#include <sys/mount.h> /* for BLKGETSIZE64 */ #include <sys/systeminfo.h> /* @@ -533,7 +535,11 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) * for its size. So -- gag -- we open the block device to get * its size, and remember it for subsequent VOP_GETATTR(). */ +#if defined(__sun__) || defined(__sun) if (strncmp(path, "/dev/", 5) == 0) { +#else + if (0) { +#endif char *dsk; fd = open64(path, O_RDONLY); if (fd == -1) { @@ -562,6 +568,14 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) } } + if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) { +#ifdef __linux__ + flags |= O_DIRECT; +#endif + /* We shouldn't be writing to block devices in userspace */ + VERIFY(!(flags & FWRITE)); + } + if (flags & FCREAT) old_umask = umask(0); @@ -584,6 +598,16 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) return (err); } +#ifdef __linux__ + /* In Linux, use an ioctl to get the size of a block device. */ + if (S_ISBLK(st.st_mode)) { + if (ioctl(fd, BLKGETSIZE64, &st.st_size) != 0) { + err = errno; + close(fd); + return (err); + } + } +#endif (void) fcntl(fd, F_SETFD, FD_CLOEXEC); *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); @@ -637,6 +661,16 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, } } +#ifdef __linux__ + if (rc == -1 && errno == EINVAL) { + /* + * Under Linux, this most likely means an alignment issue + * (memory or disk) due to O_DIRECT, so we abort() in order to + * catch the offender. + */ + abort(); + } +#endif if (rc == -1) return (errno); diff --git a/module/zfs/sa.c b/module/zfs/sa.c index 37b815f94..ee1140ffb 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -1436,6 +1436,7 @@ sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen) int sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) { +#ifdef HAVE_ZPL int error; sa_bulk_attr_t bulk; @@ -1452,7 +1453,9 @@ sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) } mutex_exit(&hdl->sa_lock); return (error); - +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } #endif diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c index d141e43d7..88fd78966 100644 --- a/module/zfs/zfs_sa.c +++ b/module/zfs/zfs_sa.c @@ -67,7 +67,7 @@ sa_attr_reg_t zfs_attr_table[ZPL_END+1] = { }; #ifdef _KERNEL - +#ifdef HAVE_ZPL int zfs_sa_readlink(znode_t *zp, uio_t *uio) { @@ -331,4 +331,5 @@ zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp) } } +#endif /* HAVE_ZPL */ #endif |