aboutsummaryrefslogtreecommitdiffstats
path: root/cmd/zpool/os
diff options
context:
space:
mode:
authorMatthew Macy <[email protected]>2019-09-30 12:16:06 -0700
committerBrian Behlendorf <[email protected]>2019-09-30 12:16:06 -0700
commit3283f137d713dce293983e573a62c344ddcf8a19 (patch)
tree0ee2ca31612d7422d6ca750f59a9ab4ce632d5a3 /cmd/zpool/os
parent7bb0c294688ed121477536d7b4a7031c78a5706a (diff)
OpenZFS restructuring - zpool
Factor Linux specific functions out of the zpool command. Reviewed-by: Allan Jude <[email protected]> Reviewed-by: Ryan Moeller <[email protected]> Reviewed-by: Sean Eric Fagan <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: loli10K <[email protected]> Signed-off-by: Matt Macy <[email protected]> Closes #9333
Diffstat (limited to 'cmd/zpool/os')
-rw-r--r--cmd/zpool/os/linux/zpool_vdev_os.c411
1 files changed, 411 insertions, 0 deletions
diff --git a/cmd/zpool/os/linux/zpool_vdev_os.c b/cmd/zpool/os/linux/zpool_vdev_os.c
new file mode 100644
index 000000000..1aaad974e
--- /dev/null
+++ b/cmd/zpool/os/linux/zpool_vdev_os.c
@@ -0,0 +1,411 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2016, 2017 Intel Corporation.
+ * Copyright 2016 Igor Kozhukhov <[email protected]>.
+ */
+
+/*
+ * Functions to convert between a list of vdevs and an nvlist representing the
+ * configuration. Each entry in the list can be one of:
+ *
+ * Device vdevs
+ * disk=(path=..., devid=...)
+ * file=(path=...)
+ *
+ * Group vdevs
+ * raidz[1|2]=(...)
+ * mirror=(...)
+ *
+ * Hot spares
+ *
+ * While the underlying implementation supports it, group vdevs cannot contain
+ * other group vdevs. All userland verification of devices is contained within
+ * this file. If successful, the nvlist returned can be passed directly to the
+ * kernel; we've done as much verification as possible in userland.
+ *
+ * Hot spares are a special case, and passed down as an array of disk vdevs, at
+ * the same level as the root of the vdev tree.
+ *
+ * The only function exported by this file is 'make_root_vdev'. The
+ * function performs several passes:
+ *
+ * 1. Construct the vdev specification. Performs syntax validation and
+ * makes sure each device is valid.
+ * 2. Check for devices in use. Using libblkid to make sure that no
+ * devices are also in use. Some can be overridden using the 'force'
+ * flag, others cannot.
+ * 3. Check for replication errors if the 'force' flag is not specified.
+ * validates that the replication level is consistent across the
+ * entire pool.
+ * 4. Call libzfs to label any whole disks with an EFI label.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <devid.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <libnvpair.h>
+#include <libzutil.h>
+#include <limits.h>
+#include <sys/spa.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include "zpool_util.h"
+#include <sys/zfs_context.h>
+
+#include <scsi/scsi.h>
+#include <scsi/sg.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/efi_partition.h>
+#include <sys/stat.h>
+#include <sys/vtoc.h>
+#include <sys/mntent.h>
+#include <uuid/uuid.h>
+#include <blkid/blkid.h>
+
+typedef struct vdev_disk_db_entry
+{
+ char id[24];
+ int sector_size;
+} vdev_disk_db_entry_t;
+
+/*
+ * Database of block devices that lie about physical sector sizes. The
+ * identification string must be precisely 24 characters to avoid false
+ * negatives
+ */
+static vdev_disk_db_entry_t vdev_disk_database[] = {
+ {"ATA ADATA SSD S396 3", 8192},
+ {"ATA APPLE SSD SM128E", 8192},
+ {"ATA APPLE SSD SM256E", 8192},
+ {"ATA APPLE SSD SM512E", 8192},
+ {"ATA APPLE SSD SM768E", 8192},
+ {"ATA C400-MTFDDAC064M", 8192},
+ {"ATA C400-MTFDDAC128M", 8192},
+ {"ATA C400-MTFDDAC256M", 8192},
+ {"ATA C400-MTFDDAC512M", 8192},
+ {"ATA Corsair Force 3 ", 8192},
+ {"ATA Corsair Force GS", 8192},
+ {"ATA INTEL SSDSA2CT04", 8192},
+ {"ATA INTEL SSDSA2BZ10", 8192},
+ {"ATA INTEL SSDSA2BZ20", 8192},
+ {"ATA INTEL SSDSA2BZ30", 8192},
+ {"ATA INTEL SSDSA2CW04", 8192},
+ {"ATA INTEL SSDSA2CW08", 8192},
+ {"ATA INTEL SSDSA2CW12", 8192},
+ {"ATA INTEL SSDSA2CW16", 8192},
+ {"ATA INTEL SSDSA2CW30", 8192},
+ {"ATA INTEL SSDSA2CW60", 8192},
+ {"ATA INTEL SSDSC2CT06", 8192},
+ {"ATA INTEL SSDSC2CT12", 8192},
+ {"ATA INTEL SSDSC2CT18", 8192},
+ {"ATA INTEL SSDSC2CT24", 8192},
+ {"ATA INTEL SSDSC2CW06", 8192},
+ {"ATA INTEL SSDSC2CW12", 8192},
+ {"ATA INTEL SSDSC2CW18", 8192},
+ {"ATA INTEL SSDSC2CW24", 8192},
+ {"ATA INTEL SSDSC2CW48", 8192},
+ {"ATA KINGSTON SH100S3", 8192},
+ {"ATA KINGSTON SH103S3", 8192},
+ {"ATA M4-CT064M4SSD2 ", 8192},
+ {"ATA M4-CT128M4SSD2 ", 8192},
+ {"ATA M4-CT256M4SSD2 ", 8192},
+ {"ATA M4-CT512M4SSD2 ", 8192},
+ {"ATA OCZ-AGILITY2 ", 8192},
+ {"ATA OCZ-AGILITY3 ", 8192},
+ {"ATA OCZ-VERTEX2 3.5 ", 8192},
+ {"ATA OCZ-VERTEX3 ", 8192},
+ {"ATA OCZ-VERTEX3 LT ", 8192},
+ {"ATA OCZ-VERTEX3 MI ", 8192},
+ {"ATA OCZ-VERTEX4 ", 8192},
+ {"ATA SAMSUNG MZ7WD120", 8192},
+ {"ATA SAMSUNG MZ7WD240", 8192},
+ {"ATA SAMSUNG MZ7WD480", 8192},
+ {"ATA SAMSUNG MZ7WD960", 8192},
+ {"ATA SAMSUNG SSD 830 ", 8192},
+ {"ATA Samsung SSD 840 ", 8192},
+ {"ATA SanDisk SSD U100", 8192},
+ {"ATA TOSHIBA THNSNH06", 8192},
+ {"ATA TOSHIBA THNSNH12", 8192},
+ {"ATA TOSHIBA THNSNH25", 8192},
+ {"ATA TOSHIBA THNSNH51", 8192},
+ {"ATA APPLE SSD TS064C", 4096},
+ {"ATA APPLE SSD TS128C", 4096},
+ {"ATA APPLE SSD TS256C", 4096},
+ {"ATA APPLE SSD TS512C", 4096},
+ {"ATA INTEL SSDSA2M040", 4096},
+ {"ATA INTEL SSDSA2M080", 4096},
+ {"ATA INTEL SSDSA2M160", 4096},
+ {"ATA INTEL SSDSC2MH12", 4096},
+ {"ATA INTEL SSDSC2MH25", 4096},
+ {"ATA OCZ CORE_SSD ", 4096},
+ {"ATA OCZ-VERTEX ", 4096},
+ {"ATA SAMSUNG MCCOE32G", 4096},
+ {"ATA SAMSUNG MCCOE64G", 4096},
+ {"ATA SAMSUNG SSD PM80", 4096},
+ /* Flash drives optimized for 4KB IOs on larger pages */
+ {"ATA INTEL SSDSC2BA10", 4096},
+ {"ATA INTEL SSDSC2BA20", 4096},
+ {"ATA INTEL SSDSC2BA40", 4096},
+ {"ATA INTEL SSDSC2BA80", 4096},
+ {"ATA INTEL SSDSC2BB08", 4096},
+ {"ATA INTEL SSDSC2BB12", 4096},
+ {"ATA INTEL SSDSC2BB16", 4096},
+ {"ATA INTEL SSDSC2BB24", 4096},
+ {"ATA INTEL SSDSC2BB30", 4096},
+ {"ATA INTEL SSDSC2BB40", 4096},
+ {"ATA INTEL SSDSC2BB48", 4096},
+ {"ATA INTEL SSDSC2BB60", 4096},
+ {"ATA INTEL SSDSC2BB80", 4096},
+ {"ATA INTEL SSDSC2BW24", 4096},
+ {"ATA INTEL SSDSC2BW48", 4096},
+ {"ATA INTEL SSDSC2BP24", 4096},
+ {"ATA INTEL SSDSC2BP48", 4096},
+ {"NA SmrtStorSDLKAE9W", 4096},
+ {"NVMe Amazon EC2 NVMe ", 4096},
+ /* Imported from Open Solaris */
+ {"ATA MARVELL SD88SA02", 4096},
+ /* Advanced format Hard drives */
+ {"ATA Hitachi HDS5C303", 4096},
+ {"ATA SAMSUNG HD204UI ", 4096},
+ {"ATA ST2000DL004 HD20", 4096},
+ {"ATA WDC WD10EARS-00M", 4096},
+ {"ATA WDC WD10EARS-00S", 4096},
+ {"ATA WDC WD10EARS-00Z", 4096},
+ {"ATA WDC WD15EARS-00M", 4096},
+ {"ATA WDC WD15EARS-00S", 4096},
+ {"ATA WDC WD15EARS-00Z", 4096},
+ {"ATA WDC WD20EARS-00M", 4096},
+ {"ATA WDC WD20EARS-00S", 4096},
+ {"ATA WDC WD20EARS-00Z", 4096},
+ {"ATA WDC WD1600BEVT-0", 4096},
+ {"ATA WDC WD2500BEVT-0", 4096},
+ {"ATA WDC WD3200BEVT-0", 4096},
+ {"ATA WDC WD5000BEVT-0", 4096},
+};
+
+
+#define INQ_REPLY_LEN 96
+#define INQ_CMD_LEN 6
+
+static const int vdev_disk_database_size =
+ sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]);
+
+boolean_t
+check_sector_size_database(char *path, int *sector_size)
+{
+ unsigned char inq_buff[INQ_REPLY_LEN];
+ unsigned char sense_buffer[32];
+ unsigned char inq_cmd_blk[INQ_CMD_LEN] =
+ {INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0};
+ sg_io_hdr_t io_hdr;
+ int error;
+ int fd;
+ int i;
+
+ /* Prepare INQUIRY command */
+ memset(&io_hdr, 0, sizeof (sg_io_hdr_t));
+ io_hdr.interface_id = 'S';
+ io_hdr.cmd_len = sizeof (inq_cmd_blk);
+ io_hdr.mx_sb_len = sizeof (sense_buffer);
+ io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+ io_hdr.dxfer_len = INQ_REPLY_LEN;
+ io_hdr.dxferp = inq_buff;
+ io_hdr.cmdp = inq_cmd_blk;
+ io_hdr.sbp = sense_buffer;
+ io_hdr.timeout = 10; /* 10 milliseconds is ample time */
+
+ if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
+ return (B_FALSE);
+
+ error = ioctl(fd, SG_IO, (unsigned long) &io_hdr);
+
+ (void) close(fd);
+
+ if (error < 0)
+ return (B_FALSE);
+
+ if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK)
+ return (B_FALSE);
+
+ for (i = 0; i < vdev_disk_database_size; i++) {
+ if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24))
+ continue;
+
+ *sector_size = vdev_disk_database[i].sector_size;
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static int
+check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare)
+{
+ int err;
+ char *value;
+
+ /* No valid type detected device is safe to use */
+ value = blkid_get_tag_value(cache, "TYPE", path);
+ if (value == NULL)
+ return (0);
+
+ /*
+ * If libblkid detects a ZFS device, we check the device
+ * using check_file() to see if it's safe. The one safe
+ * case is a spare device shared between multiple pools.
+ */
+ if (strcmp(value, "zfs_member") == 0) {
+ err = check_file(path, force, isspare);
+ } else {
+ if (force) {
+ err = 0;
+ } else {
+ err = -1;
+ vdev_error(gettext("%s contains a filesystem of "
+ "type '%s'\n"), path, value);
+ }
+ }
+
+ free(value);
+
+ return (err);
+}
+
+/*
+ * Validate that a disk including all partitions are safe to use.
+ *
+ * For EFI labeled disks this can done relatively easily with the libefi
+ * library. The partition numbers are extracted from the label and used
+ * to generate the expected /dev/ paths. Each partition can then be
+ * checked for conflicts.
+ *
+ * For non-EFI labeled disks (MBR/EBR/etc) the same process is possible
+ * but due to the lack of a readily available libraries this scanning is
+ * not implemented. Instead only the device path as given is checked.
+ */
+static int
+check_disk(const char *path, blkid_cache cache, int force,
+ boolean_t isspare, boolean_t iswholedisk)
+{
+ struct dk_gpt *vtoc;
+ char slice_path[MAXPATHLEN];
+ int err = 0;
+ int fd, i;
+ int flags = O_RDONLY|O_DIRECT;
+
+ if (!iswholedisk)
+ return (check_slice(path, cache, force, isspare));
+
+ /* only spares can be shared, other devices require exclusive access */
+ if (!isspare)
+ flags |= O_EXCL;
+
+ if ((fd = open(path, flags)) < 0) {
+ char *value = blkid_get_tag_value(cache, "TYPE", path);
+ (void) fprintf(stderr, gettext("%s is in use and contains "
+ "a %s filesystem.\n"), path, value ? value : "unknown");
+ free(value);
+ return (-1);
+ }
+
+ /*
+ * Expected to fail for non-EFI labeled disks. Just check the device
+ * as given and do not attempt to detect and scan partitions.
+ */
+ err = efi_alloc_and_read(fd, &vtoc);
+ if (err) {
+ (void) close(fd);
+ return (check_slice(path, cache, force, isspare));
+ }
+
+ /*
+ * The primary efi partition label is damaged however the secondary
+ * label at the end of the device is intact. Rather than use this
+ * label we should play it safe and treat this as a non efi device.
+ */
+ if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
+ efi_free(vtoc);
+ (void) close(fd);
+
+ if (force) {
+ /* Partitions will now be created using the backup */
+ return (0);
+ } else {
+ vdev_error(gettext("%s contains a corrupt primary "
+ "EFI label.\n"), path);
+ return (-1);
+ }
+ }
+
+ for (i = 0; i < vtoc->efi_nparts; i++) {
+
+ if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED ||
+ uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
+ continue;
+
+ if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
+ (void) snprintf(slice_path, sizeof (slice_path),
+ "%s%s%d", path, "-part", i+1);
+ else
+ (void) snprintf(slice_path, sizeof (slice_path),
+ "%s%s%d", path, isdigit(path[strlen(path)-1]) ?
+ "p" : "", i+1);
+
+ err = check_slice(slice_path, cache, force, isspare);
+ if (err)
+ break;
+ }
+
+ efi_free(vtoc);
+ (void) close(fd);
+
+ return (err);
+}
+
+int
+check_device(const char *path, boolean_t force,
+ boolean_t isspare, boolean_t iswholedisk)
+{
+ blkid_cache cache;
+ int error;
+
+ error = blkid_get_cache(&cache, NULL);
+ if (error != 0) {
+ (void) fprintf(stderr, gettext("unable to access the blkid "
+ "cache.\n"));
+ return (-1);
+ }
+
+ error = check_disk(path, cache, force, isspare, iswholedisk);
+ blkid_put_cache(cache);
+
+ return (error);
+}