diff options
author | Ned A. Bass <bass6@zeno1.(none)> | 2012-04-20 17:32:30 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2012-06-01 08:55:14 -0700 |
commit | 821b683436423593a1f3ee597f40a61bd4569bdd (patch) | |
tree | a327afe3250cad9feae0ca1bdab8a16c9171b33d /cmd | |
parent | e5b85622771090b7529cffdd38c0402a544609ef (diff) |
Add vdev_id for JBOD-friendly udev aliases
vdev_id parses the file /etc/zfs/vdev_id.conf to map a physical path
in a storage topology to a channel name. The channel name is combined
with a disk enclosure slot number to create an alias that reflects the
physical location of the drive. This is particularly helpful when it
comes to tasks like replacing failed drives. Slot numbers may also be
re-mapped in case the default numbering is unsatisfactory. The drive
aliases will be created as symbolic links in /dev/disk/by-vdev.
The only currently supported topologies are sas_direct and sas_switch:
o sas_direct - a channel is uniquely identified by a PCI slot and a
HBA port
o sas_switch - a channel is uniquely identified by a SAS switch port
A multipath mode is supported in which dm-mpath devices are handled by
examining the first running component disk, as reported by 'multipath
-l'. In multipath mode the configuration file should contain a
channel definition with the same name for each path to a given
enclosure.
vdev_id can replace the existing zpool_id script on systems where the
storage topology conforms to sas_direct or sas_switch. The script
could be extended to support other topologies as well. The advantage
of vdev_id is that it is driven by a single static input file that can
be shared across multiple nodes having a common storage toplogy.
zpool_id, on the other hand, requires a unique /etc/zfs/zdev.conf per
node and a separate slot-mapping file. However, zpool_id provides the
flexibility of using any device names that show up in
/dev/disk/by-path, so it may still be needed on some systems.
vdev_id's functionality subsumes that of the sas_switch_id script, and
it is unlikely that anyone is using it, so sas_switch_id is removed.
Finally, /dev/disk/by-vdev is added to the list of directories that
'zpool import' will scan.
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #713
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/Makefile.am | 2 | ||||
-rw-r--r-- | cmd/Makefile.in | 2 | ||||
-rw-r--r-- | cmd/sas_switch_id/Makefile.am | 1 | ||||
-rwxr-xr-x | cmd/sas_switch_id/sas_switch_id | 96 | ||||
-rw-r--r-- | cmd/vdev_id/Makefile.am | 1 | ||||
-rw-r--r-- | cmd/vdev_id/Makefile.in (renamed from cmd/sas_switch_id/Makefile.in) | 8 | ||||
-rwxr-xr-x | cmd/vdev_id/vdev_id | 291 | ||||
-rw-r--r-- | cmd/zpool/zpool_vdev.c | 6 |
8 files changed, 301 insertions, 106 deletions
diff --git a/cmd/Makefile.am b/cmd/Makefile.am index de2db4030..5c8afb4e7 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -1,2 +1,2 @@ SUBDIRS = zfs zpool zdb zinject ztest zpios mount_zfs -SUBDIRS += zpool_layout zvol_id zpool_id sas_switch_id +SUBDIRS += zpool_layout zvol_id zpool_id vdev_id diff --git a/cmd/Makefile.in b/cmd/Makefile.in index 0efcdb4a2..ffc2834f9 100644 --- a/cmd/Makefile.in +++ b/cmd/Makefile.in @@ -329,7 +329,7 @@ top_srcdir = @top_srcdir@ udevdir = @udevdir@ udevruledir = @udevruledir@ SUBDIRS = zfs zpool zdb zinject ztest zpios mount_zfs zpool_layout \ - zvol_id zpool_id sas_switch_id + zvol_id zpool_id vdev_id all: all-recursive .SUFFIXES: diff --git a/cmd/sas_switch_id/Makefile.am b/cmd/sas_switch_id/Makefile.am deleted file mode 100644 index b666bea0a..000000000 --- a/cmd/sas_switch_id/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -dist_udev_SCRIPTS = sas_switch_id diff --git a/cmd/sas_switch_id/sas_switch_id b/cmd/sas_switch_id/sas_switch_id deleted file mode 100755 index ecaabc028..000000000 --- a/cmd/sas_switch_id/sas_switch_id +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/sh -# -# sas_switch_id -# -# Callout script for multipathd to obtain disk UUIDs. Combine the UUID -# from the scsi_id program with the SAS switch port number and enclosure -# bay number, if available. This naming convention enables easier -# identification of the physical drive location when multiple disk -# enclosures are accessed via a SAS switch. For other storage -# topologies just return the undecorated UUID of the drive. - -PHYS_PER_PORT=4 -DEV= - -usage() { - cat << EOF -Usage: sas_switch_id [-d disk] [-p phys_per_port] - -d Basename of the disk device [default=none] - -p Number of PHYs per switch port [default=${PHYS_PER_PORT}] - -h Show this message -EOF - exit 0 -} - -while getopts 'd:p:h' OPTION; do - case ${OPTION} in - d) - DEV=${OPTARG} - ;; - p) - PHYS_PER_PORT=${OPTARG} - ;; - h) - usage - ;; - esac -done - -if [ -z "$DEV" ] ; then - echo "Error: missing required option -d" - exit 1 -fi - -UUID=`/lib/udev/scsi_id --whitelisted --device=/dev/$DEV` -if [ $? != 0 -o -z "$UUID" ] ; then - exit 1 -fi -sys_path=`udevadm info -q path -p /sys/block/$DEV` -dirs=(`echo "$sys_path" | tr / ' '`) -switch_port_dir="/sys" - -# Get path up to /sys/.../hostX -for (( i=0; i<${#dirs[*]}; i++ )); do - d=${dirs[$i]} - switch_port_dir=$switch_port_dir/$d - echo $d | egrep -q -e '^host[0-9]+$' && break -done - -if [ $i = ${#dirs[*]} ] ; then - echo $UUID - exit 0 -fi - -# The directory three levels beneath /sys/.../hostX contains -# symlinks to phy devices that reveal the switch port number. -# Lowest phy number is $PHYS_PER_PORT*switch_port_number. -for (( j=(($i+1)) ; j<(($i+4)); j++ )); do - switch_port_dir=$switch_port_dir/${dirs[$j]} -done -pushd $switch_port_dir > /dev/null -PHY=`ls -d phy* 2>/dev/null | head -1 | awk -F: '{print $NF}'` -PORT=$(( $PHY / $PHYS_PER_PORT )) -popd > /dev/null -if [ -z "$PHY" ] ; then - echo $UUID - exit 0 -fi - -# Look in /sys/.../sas_device/end_device-X for the bay_identifier -# attribute. -end_device_dir=$switch_port_dir -for (( k=$j ; k<${#dirs[*]} ; k++ )); do - d=${dirs[$k]} - end_device_dir=$end_device_dir/$d - if echo $d | egrep -q -e '^end_device' ; then - end_device_dir=$end_device_dir/sas_device/$d - break - fi -done -SLOT=`cat $end_device_dir/bay_identifier 2>/dev/null` -if [ -z "$SLOT" ] ; then - echo $UUID - exit 0 -fi - -echo "$UUID-switch-port:$PORT-slot:$SLOT" diff --git a/cmd/vdev_id/Makefile.am b/cmd/vdev_id/Makefile.am new file mode 100644 index 000000000..fb815faad --- /dev/null +++ b/cmd/vdev_id/Makefile.am @@ -0,0 +1 @@ +dist_udev_SCRIPTS = vdev_id diff --git a/cmd/sas_switch_id/Makefile.in b/cmd/vdev_id/Makefile.in index 12be6784e..8f3b4ae6a 100644 --- a/cmd/sas_switch_id/Makefile.in +++ b/cmd/vdev_id/Makefile.in @@ -35,7 +35,7 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ -subdir = cmd/sas_switch_id +subdir = cmd/vdev_id DIST_COMMON = $(dist_udev_SCRIPTS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 @@ -313,7 +313,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ udevdir = @udevdir@ udevruledir = @udevruledir@ -dist_udev_SCRIPTS = sas_switch_id +dist_udev_SCRIPTS = vdev_id all: all-am .SUFFIXES: @@ -326,9 +326,9 @@ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__confi exit 1;; \ esac; \ done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu cmd/sas_switch_id/Makefile'; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu cmd/vdev_id/Makefile'; \ $(am__cd) $(top_srcdir) && \ - $(AUTOMAKE) --gnu cmd/sas_switch_id/Makefile + $(AUTOMAKE) --gnu cmd/vdev_id/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ diff --git a/cmd/vdev_id/vdev_id b/cmd/vdev_id/vdev_id new file mode 100755 index 000000000..d2781972f --- /dev/null +++ b/cmd/vdev_id/vdev_id @@ -0,0 +1,291 @@ +#!/bin/bash +# +# vdev_id: udev helper to generate user-friendly names for JBOD disks +# +# This script parses the file /etc/zfs/vdev_id.conf to map a +# physical path in a storage topology to a channel name. The +# channel name is combined with a disk enclosure slot number to +# create an alias that reflects the physical location of the drive. +# This is particularly helpful when it comes to tasks like replacing +# failed drives. Slot numbers may also be re-mapped in case the +# default numbering is unsatisfactory. The drive aliases will be +# created as symbolic links in /dev/disk/by-vdev. +# +# The only currently supported topologies are sas_direct and +# sas_switch. A multipath mode is supported in which dm-mpath +# devices are handled by examining the first-listed running +# component disk. In multipath mode the configuration file +# should contain a channel definition with the same name for +# each path to a given enclosure. + +# +# Some example configuration files are given below. + +# # +# # Example vdev_id.conf - sas_direct. +# # +# +# multipath no +# topology sas_direct +# phys_per_port 4 +# +# # PCI_ID HBA PORT CHANNEL NAME +# channel 85:00.0 1 A +# channel 85:00.0 0 B +# channel 86:00.0 1 C +# channel 86:00.0 0 D +# +# # Linux Mapped +# # Slot Slot +# slot 1 7 +# slot 2 10 +# slot 3 3 +# slot 4 6 +# slot 5 2 +# slot 6 8 +# slot 7 1 +# slot 8 4 +# slot 9 9 +# slot 10 5 + +# # +# # Example vdev_id.conf - sas_switch +# # +# +# topology sas_switch +# +# # SWITCH PORT CHANNEL NAME +# channel 1 A +# channel 2 B +# channel 3 C +# channel 4 D + +# # +# # Example vdev_id.conf - multipath +# # +# +# multipath yes +# +# # PCI_ID HBA PORT CHANNEL NAME +# channel 85:00.0 1 A +# channel 85:00.0 0 B +# channel 86:00.0 1 A +# channel 86:00.0 0 B + +PATH=/bin:/sbin:/usr/bin:/usr/sbin +CONFIG=/etc/zfs/vdev_id.conf +PHYS_PER_PORT= +DEV= +SLOT_MAP= +CHANNEL_MAP= +MULTIPATH= +TOPOLOGY= +declare -i i j + +usage() { + cat << EOF +Usage: vdev_id [-h] + vdev_id <-d device> [-c config_file] [-p phys_per_port] + [-g sas_direct|sas_switch] [-m] + + -c specify name of alernate config file [default=$CONFIG] + -d specify basename of device (i.e. sda) + -g Storage network topology [default="$TOPOLOGY"] + -m Run in multipath mode + -p number of phy's per switch port [default=$PHYS_PER_PORT] + -h show this summary +EOF + exit 0 +} + +map_slot() { + local LINUX_SLOT=$1 + local MAPPED_SLOT= + + MAPPED_SLOT=`awk "/^slot / && \\$2 == ${LINUX_SLOT} \ + { print \\$3; exit }" $CONFIG` + if [ -z "$MAPPED_SLOT" ] ; then + MAPPED_SLOT=$LINUX_SLOT + fi + printf "%d" ${MAPPED_SLOT} +} + +map_channel() { + local MAPPED_CHAN= + local PCI_ID=$1 + local PORT=$2 + + case $TOPOLOGY in + "sas_switch") + MAPPED_CHAN=`awk "/^channel / && \\$2 == ${PORT} \ + { print \\$3; exit }" $CONFIG` + ;; + "sas_direct") + MAPPED_CHAN=`awk "/^channel / && \\$2 == \"${PCI_ID}\" && \ + \\$3 == ${PORT} { print \\$4; exit }" \ + $CONFIG` + ;; + esac + printf "%s" ${MAPPED_CHAN} +} + +while getopts 'c:s:d:g:mp:h' OPTION; do + case ${OPTION} in + c) + CONFIG=`readlink -e ${OPTARG}` + ;; + d) + DEV=${OPTARG} + ;; + g) + TOPOLOGY=$OPTARG + ;; + p) + PHYS_PER_PORT=${OPTARG} + ;; + m) + MULTIPATH_MODE=yes + ;; + s) + SLOT_MAP=`readlink -e ${OPTARG}` + if [ ! -r $SLOT_MAP ] ; then + echo "Error: $SLOT_MAP is nonexistant or unreadable" + exit 1 + fi + ;; + h) + usage + ;; + esac +done + +if [ ! -r $CONFIG ] ; then + exit 0 +fi + +if [ -z "$DEV" ] ; then + echo "Error: missing required option -d" + exit 1 +fi + +if [ -z "$TOPOLOGY" ] ; then + TOPOLOGY=`awk "/^topology /{print \\$2; exit}" $CONFIG` +fi +TOPOLOGY=${TOPOLOGY:-sas_direct} +case $TOPOLOGY in + sas_direct|sas_switch) + ;; + *) + echo "Error: unknown topology $TOPOLOGY" + exit 1 + ;; +esac + +if [ -z "$PHYS_PER_PORT" ] ; then + PHYS_PER_PORT=`awk "/^phys_per_port /{print \\$2; exit}" $CONFIG` +fi +PHYS_PER_PORT=${PHYS_PER_PORT:-4} +if ! echo $PHYS_PER_PORT | egrep -q '^[0-9]+$' ; then + echo "Error: phys_per_port value $PHYS_PER_PORT is non-numeric" + exit 1 +fi + +if [ -z "$MULTIPATH_MODE" ] ; then + MULTIPATH_MODE=`awk "/^multipath /{print \\$2; exit}" $CONFIG` +fi + +# Use first running component device if we're handling a dm-mpath device. +if [ "$MULTIPATH_MODE" = "yes" ] ; then + # If udev didn't tell us the UUID via DM_NAME, find it in /dev/mapper + if [ -z "$DM_NAME" ] ; then + DM_NAME=`ls -l --full-time /dev/mapper | + awk "/\/$DEV$/{print \\$9}"` + fi + + # For raw disks udev exports DEVTYPE=partition when handling partitions, + # and the rules can be written to take advantage of this to append a + # -part suffix. For dm devices we get DEVTYPE=disk even for partitions + # so we have to append the -part suffix directly in the helper. + if [ "$DEVTYPE" != "partition" ] ; then + PART=`echo $DM_NAME | awk -Fp '/p/{print "-part"$2}'` + fi + + # Strip off partition information. + DM_NAME=`echo $DM_NAME | sed 's/p[0-9][0-9]*$//'` + if [ -z "$DM_NAME" ] ; then + exit 0 + fi + + # Get the raw scsi device name from multipath -l. + DEV=`multipath -l $DM_NAME |awk '/running/{print $3 ; exit}'` + if [ -z "$DEV" ] ; then + exit 0 + fi +fi + +if echo $DEV | grep -q ^/devices/ ; then + sys_path=$DEV +else + sys_path=`udevadm info -q path -p /sys/block/$DEV 2>/dev/null` +fi + +dirs=(`echo "$sys_path" | tr / ' '`) +scsi_host_dir="/sys" + +# Get path up to /sys/.../hostX +for (( i=0; i<${#dirs[*]}; i++ )); do + d=${dirs[$i]} + scsi_host_dir="$scsi_host_dir/$d" + echo $d | egrep -q -e '^host[0-9]+$' && break +done + +if [ $i = ${#dirs[*]} ] ; then + exit 0 +fi + +PCI_ID=`echo ${dirs[$(( $i - 1 ))]} | awk -F: '{print $2":"$3}'` + +# In sas_switch mode, the directory three levels beneath /sys/.../hostX +# contains symlinks to phy devices that reveal the switch port number. In +# sas_direct mode, the phy links one directory down reveal the HBA port. +port_dir=$scsi_host_dir +case $TOPOLOGY in + "sas_switch") j=$(($i+4)) ;; + "sas_direct") j=$(($i + 1)) ;; +esac +for (( i++; i<=$j; i++ )); do + port_dir="$port_dir/${dirs[$i]}" +done + +PHY=`ls -d $port_dir/phy* 2>/dev/null | head -1 | awk -F: '{print $NF}'` +if [ -z "$PHY" ] ; then + exit 0 +fi +PORT=$(( $PHY / $PHYS_PER_PORT )) + +# Look in /sys/.../sas_device/end_device-X for the bay_identifier +# attribute. +end_device_dir=$port_dir +for (( ; i<${#dirs[*]} ; i++ )); do + d=${dirs[$i]} + end_device_dir="$end_device_dir/$d" + if echo $d | egrep -q -e '^end_device' ; then + end_device_dir="$end_device_dir/sas_device/$d" + break + fi +done + +SLOT=`cat $end_device_dir/bay_identifier 2>/dev/null` +if [ -z "$SLOT" ] ; then + exit 0 +fi + +SLOT=`map_slot $SLOT` +CHAN=`map_channel $PCI_ID $PORT` +if [ -z "$CHAN" ] ; then + exit 0 +fi +ID_VDEV=${CHAN}${SLOT}${PART} + +echo "ID_VDEV=${ID_VDEV}" +echo "ID_VDEV_PATH=disk/by-vdev/${ID_VDEV}" diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index a65847038..8c4fadebd 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -367,9 +367,9 @@ is_whole_disk(const char *path) /* * This may be a shorthand device path or it could be total gibberish. * Check to see if it's a known device in /dev/, /dev/disk/by-id, - * /dev/disk/by-label, /dev/disk/by-path, /dev/disk/by-uuid, or - * /dev/disk/zpool/. As part of this check, see if we've been given - * an entire disk (minus the slice number). + * /dev/disk/by-label, /dev/disk/by-path, /dev/disk/by-uuid, + * /dev/disk/by-vdev, or /dev/disk/zpool/. As part of this check, see + * if we've been given an entire disk (minus the slice number). */ static int is_shorthand_path(const char *arg, char *path, |