diff options
author | Ned A. Bass <[email protected]> | 2011-06-21 16:18:27 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2011-06-23 10:46:06 -0700 |
commit | 560bcf9d14a63e2cef4dd49d61399c8a865c1348 (patch) | |
tree | c077c852fe1a91c51d72e42951b888feab898906 /cmd/zpool_layout | |
parent | 7e7baecaa321ce4e96938a02b87ab22a7939e422 (diff) |
Multipath device manageability improvements
Update udev helper scripts to deal with device-mapper devices created
by multipathd. These enhancements are targeted at a particular
storage network topology under evaluation at LLNL consisting of two
SAS switches providing redundant connectivity between multiple server
nodes and disk enclosures.
The key to making these systems manageable is to create shortnames for
each disk that conveys its physical location in a drawer. In a
direct-attached topology we infer a disk's enclosure from the PCI bus
number and HBA port number in the by-path name provided by udev. In a
switched topology, however, multiple drawers are accessed via a single
HBA port. We therefore resort to assigning drawer identifiers based
on which switch port a drive's enclosure is connected to. This
information is available from sysfs.
Add options to zpool_layout to generate an /etc/zfs/zdev.conf using
symbolic links in /dev/disk/by-id of the form
<label>-<UUID>-switch-port:<X>-slot:<Y>. <label> is a string that
depends on the subsystem that created the link and defaults to
"dm-uuid-mpath" (this prefix is used by multipathd). <UUID> is a
unique identifier for the disk typically obtained from the scsi_id
program, and <X> and <Y> denote the switch port and disk slot numbers,
respectively.
Add a callout script sas_switch_id for use by multipathd to help
create symlinks of the form described above. Update zpool_id and the
udev zpool rules file to handle both multipath devices and
conventional drives.
Diffstat (limited to 'cmd/zpool_layout')
-rwxr-xr-x | cmd/zpool_layout/zpool_layout | 231 |
1 files changed, 176 insertions, 55 deletions
diff --git a/cmd/zpool_layout/zpool_layout b/cmd/zpool_layout/zpool_layout index 26d3ec265..8fc6bad0e 100755 --- a/cmd/zpool_layout/zpool_layout +++ b/cmd/zpool_layout/zpool_layout @@ -1,41 +1,74 @@ #!/bin/bash # -# Set BUSES and PORTS to match the topology of your system. As each -# port is enumerated it will be assigned the next channel name. The -# current script enumerates each port on a bus before moving on to -# enumerate the next bus. +# Direct-Attached Mode +# -------------------- +# Set BUSES and HOST_PORTS to match the topology of your system. As +# each port is enumerated it will be assigned the next channel name. +# The current script enumerates each port on a bus before moving on +# to enumerate the next bus. # # Every distribution, version of udev, and type of attached storage # seems to result in slightly different formatting of the by-path # name. For this reason you may need to adjust the parsing below # to suit your needs. This is one of the reasons to use a custom -# /etc/zfs/zdev.conf file, it allows the by-path naming convertion +# /etc/zfs/zdev.conf file, it allows the by-path naming convention # to change and still keep the simple <channel><rank> naming. # +# SAS-Switch Mode +# ------------------------- +# When the host accesses disk via SAS switches the combination of +# bus and port number does not necessarily uniquely identify a +# channel or disk drawer. In this case we must resort to other +# means to infer the physical topology. For a single-level network +# (i.e. no switch cascading) we can assign alphabetic channel labels +# based on the switch port number that the drawer is connected to. +# If support for more complex topologies is needed this script will +# need to be customized or replaced. +# +# In SAS-Switch mode (enabled with "-g switch" ) we require that +# udev has been configured to create per-disk symbolic links in +# /dev/disk/by-id of the form +# <label>-<UUID>-switch-port:<X>-slot:<Y>. <label> is a string that +# depends on the subsystem that created the link and defaults to +# "dm-uuid-mpath" (this prefix is used by multipathd). <UUID> is a +# unique identifier for the disk typically obtained from the scsi_id +# program. <X> and <Y> denote the switch port and disk slot +# numbers, respectively, and are typically obtained from sysfs. + AWK=${AWK:-/usr/bin/awk} CONFIG=${CONFIG:-/etc/zfs/zdev.conf} BUSES=( 01 02 03 ) -PORTS=( 4 0 ) +HOST_PORTS=( 4 0 ) +SWITCH_PORTS=( 0 1 2 3 4 5 6 7 8 9 ) CHANNELS=( A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ) +TOPOLOGY="direct" TRIGGER="no" MAPPING=linux +LABEL=${LABEL:-"dm-uuid-mpath"} +DEV_DISK_DIR="/dev/disk/by-path" +shopt -s extglob usage() { cat << EOF -Usage: zpool_layout [-th] [-c file] [-b buses] [-p ports] [-n channels] [-m map] +Usage: zpool_layout [-th] [-c file] [-b buses] [-o switch_ports] + [-p host_ports] [-n channels] [-m map] [-l label] + [-g direct|switch] -c Alternate config file [default=${CONFIG}] -b Enumerate buses [default="${BUSES[*]}"] - -p Enumerate ports [default="${PORTS[*]}"] + -o Enumerate switch ports [default="${SWITCH_PORTS[*]}"] + -p Enumerate host ports [default="${HOST_PORTS[*]}"] -n Channel names [default="A..Z"] + -g Storage network topology [default="${TOPOLOGY}"] -t Trigger and wait for udev to settle [default=${TRIGGER}] + -l Prefix of SAS-switch-mode device links [default=${LABEL}] -m Slot mapping [default=${MAPPING}] -h Show this message EOF exit 0 } -while getopts 'c:b:p:n:m:th' OPTION; do +while getopts 'c:b:o:p:n:l:m:g:th' OPTION; do case ${OPTION} in c) CONFIG=${OPTARG} @@ -43,15 +76,24 @@ while getopts 'c:b:p:n:m:th' OPTION; do b) BUSES=(${OPTARG}) ;; + o) + SWITCH_PORTS=(${OPTARG}) + ;; p) - PORTS=(${OPTARG}) + HOST_PORTS=(${OPTARG}) ;; n) CHANNELS=(${OPTARG}) ;; + l) + LABEL=${OPTARG} + ;; m) MAPPING=`readlink -e ${OPTARG}` ;; + g) + TOPOLOGY=${OPTARG} + ;; t) TRIGGER=yes ;; @@ -71,7 +113,6 @@ fi # Save stdout as fd #8, then redirect stdout to the config file. exec 8>&1 exec >${CONFIG} -pushd /dev/disk/by-path >/dev/null map_slot() { local LINUX_SLOT=$1 @@ -86,71 +127,151 @@ map_slot() { printf "%d" ${MAPPED_SLOT} } -# Generate comment header. -echo "#" -echo "# Custom /dev/disk/by-path to /dev/disk/zpool mapping, " -echo "# based of the following physical cable layout." -echo "#" - # Generate host port layout table for comment header. -echo "# ------------------ Host Port Layout ---------------------" -echo -n "# " -for (( i=0; i<${#BUSES[*]}; i++ )); do - printf "%-8d" ${BUSES[$i]} -done -echo +print_host_port_layout() { + echo "# ------------------ Host Port Layout ---------------------" + echo -n "# " + for (( i=0; i<${#BUSES[*]}; i++ )); do + printf "%-8d" ${BUSES[$i]} + done + echo + + for (( i=0, k=0; i<${#HOST_PORTS[*]}; i++ )); do + printf "# Port %-2d " ${HOST_PORTS[$i]} -for (( i=0, k=0; i<${#PORTS[*]}; i++ )); do - printf "# Port %-2d " ${PORTS[$i]} + for (( j=0; j<${#BUSES[*]}; j++, k++ )); do + let k=$j*${#HOST_PORTS[*]}+$i + printf "%-8s" ${CHANNELS[$k]} + done + echo + done + echo "#" +} - for (( j=0; j<${#BUSES[*]}; j++, k++ )); do - let k=$j*${#PORTS[*]}+$i - printf "%-8s" ${CHANNELS[$k]} +# Generate SAS switch port layout table for comment header. +print_switch_port_layout() { + echo "# --------------- SAS Switch Port Layout ------------------" + echo -n "# Switch Port " + for (( i=0; i<${#SWITCH_PORTS[*]}; i++ )); do + printf "%3d" ${SWITCH_PORTS[$i]} done echo -done -echo "#" + echo -n "# Channel " + for (( i=0; i<${#SWITCH_PORTS[*]}; i++ )); do + printf "%3s" ${CHANNELS[$i]} + done + echo + echo "#" +} # Generate channel/disk layout table for comment header. -echo "# ----------------- Channel/Disk Layout -------------------" -echo "# Channel Disks" -for (( i=0, k=0; i<${#BUSES[*]}; i++ )); do - for (( j=0; j<${#PORTS[*]}; j++, k++ )); do - printf "# %-9s" ${CHANNELS[$k]} - ls *:${BUSES[$i]}:*:${PORTS[$j]}* 2>/dev/null | \ - cut -f7 -d'-' | sort -u -n | tr '\n' ',' - echo - done -done -echo "#" +print_channel_layout() { + pushd ${DEV_DISK_DIR} >/dev/null + echo "# ----------------- Channel/Disk Layout -------------------" + echo "# Channel Disks" + if [ ${TOPOLOGY} = "switch" ] ; then + for (( i=0; i<${#SWITCH_PORTS[*]}; i++ )); do + printf "# %-9s" ${CHANNELS[$i]} + p=${SWITCH_PORTS[$i]} + ls ${LABEL}-+([0-9a-f])-switch-port:${p}-slot:+([0-9]) \ + 2>/dev/null | cut -f3 -d':' | sort -u -n | \ + xargs | tr ' ' ',' + done + else + for (( i=0, k=0; i<${#BUSES[*]}; i++ )); do + for (( j=0; j<${#HOST_PORTS[*]}; j++, k++ )); do + printf "# %-9s" ${CHANNELS[$k]} + ls *:${BUSES[$i]}:*:${HOST_PORTS[$j]}* \ + 2>/dev/null | cut -f7 -d'-' | \ + sort -u -n | xargs | tr ' ' ',' + done + done + fi + echo "#" + popd > /dev/null +} # Generate mapping from <channel><rank> to by-path name. -for (( i=0, k=0; i<${#BUSES[*]}; i++ )); do - for (( j=0; j<${#PORTS[*]}; j++, k++ )); do - BYPATH=(`ls *:${BUSES[$i]}:*:${PORTS[$j]}* 2>/dev/null | \ - grep -v part | sort -n -k7 -t'-' | cut -f1-6 -d'-'`) - SLOTS=(`ls *:${BUSES[$i]}:*:${PORTS[$j]}* 2>/dev/null | \ - grep -v part | sort -n -k7 -t'-' | cut -f7 -d'-'`) +map_shortname_to_by_path() { + pushd ${DEV_DISK_DIR} >/dev/null + for (( i=0, k=0; i<${#BUSES[*]}; i++ )); do + for (( j=0; j<${#HOST_PORTS[*]}; j++, k++ )); do + BYPATH=(`ls *:${BUSES[$i]}:*:${HOST_PORTS[$j]}* \ + 2>/dev/null | grep -v part | \ + sort -n -k7 -t'-' | cut -f1-6 -d'-'`) + SLOTS=(`ls *:${BUSES[$i]}:*:${HOST_PORTS[$j]}* \ + 2>/dev/null | grep -v part | \ + sort -n -k7 -t'-' | cut -f7 -d'-'`) + TMP_FILE=`mktemp` + + for (( l=0; l<${#SLOTS[*]}; l++ )); do + MAPPED_SLOT=`map_slot ${SLOTS[$l]}` + printf "%s%d\t%s-%d\n" \ + ${CHANNELS[$k]} ${MAPPED_SLOT} \ + ${BYPATH[$l]} ${SLOTS[$l]} >>${TMP_FILE} + done + + echo + echo -n "# Channel ${CHANNELS[$k]}, " + echo "Bus ${BUSES[$i]}, Port ${HOST_PORTS[$j]}" + cat ${TMP_FILE} | sort -n -k2 -t${CHANNELS[$k]} + rm -f ${TMP_FILE} + done + done + popd >/dev/null +} + +# Generate mapping from <channel><rank> to by-id name. +map_shortname_to_by_id() { + pushd ${DEV_DISK_DIR} >/dev/null + for (( i=0; i<${#SWITCH_PORTS[*]}; i++ )); do + p=${SWITCH_PORTS[$i]} + BYID=(`ls ${LABEL}-+([0-9a-f])-switch-port:${p}-slot:+([0-9]) \ + 2>/dev/null | grep -v part | sort -k3n -t':' | \ + cut -f1-2 -d':'`) + SLOTS=(`ls ${LABEL}-+([0-9a-f])-switch-port:${p}-slot:+([0-9]) \ + 2>/dev/null | grep -v part | sort -k3n -t':' | \ + cut -f3 -d':'`) TMP_FILE=`mktemp` for (( l=0; l<${#SLOTS[*]}; l++ )); do MAPPED_SLOT=`map_slot ${SLOTS[$l]}` - printf "%s%d\t%s-%d\n" \ - ${CHANNELS[$k]} ${MAPPED_SLOT} \ - ${BYPATH[$l]} ${SLOTS[$l]} >>${TMP_FILE} + printf "%s%d\t%s:%d\n" \ + ${CHANNELS[$i]} ${MAPPED_SLOT} ${BYID[$l]} \ + ${SLOTS[$l]} >>${TMP_FILE} done echo - echo -n "# Channel ${CHANNELS[$k]}, " - echo "Bus ${BUSES[$i]}, Port ${PORTS[$j]}" - cat ${TMP_FILE} | sort -n -k2 -t${CHANNELS[$k]} + echo -n "# Channel ${CHANNELS[$i]}, " + echo "SAS Switch Port ${SWITCH_PORTS[$i]}" + cat ${TMP_FILE} | sort -n -k2 -t${CHANNELS[$i]} rm -f ${TMP_FILE} done -done + popd > /dev/null +} + +# Generate comment header. +echo "#" +echo "# Custom ${DEV_DISK_DIR} to /dev/disk/zpool mapping, " +echo "# based of the following physical cable layout." +echo "#" + +case ${TOPOLOGY} in + direct) + print_host_port_layout + print_channel_layout + map_shortname_to_by_path + ;; + switch) + DEV_DISK_DIR="/dev/disk/by-id" + print_switch_port_layout + print_channel_layout + map_shortname_to_by_id + ;; +esac # Restore stdout from fd #8 and close fd #8. exec 1>&8 8>&- -popd >/dev/null if [ ${TRIGGER} = "yes" ]; then udevadm trigger --action=change --subsystem-match=block |