summaryrefslogtreecommitdiffstats
path: root/include/sys/zfs_ioctl.h
diff options
context:
space:
mode:
authorTim Chase <[email protected]>2014-05-08 09:51:01 -0500
committerBrian Behlendorf <[email protected]>2014-05-19 11:45:11 -0700
commit3937ab20f32fc7b79cacfd91c0891f4e1b4ab2de (patch)
tree485d588f306837e2d2d271d1b3cc01cd1d67d2e8 /include/sys/zfs_ioctl.h
parent1cbae971c5ef215d1036b65511a839879e446c4c (diff)
Allow for lock-free reading zfsdev_state_list.
Restructure the zfsdev_state_list to allow for lock-free reading by converting to a simple singly-linked list from which items are never deleted and over which only forward iterations are performed. It depends on, among other things, the atomicity of accessing the zs_minor integer and zs_next pointer. This fixes a lock inversion in which the zfsdev_state_lock is used by both the sync task (txg_sync) and indirectly by any user program which uses /dev/zfs; the zfsdev_release method uses the same lock and then blocks on the sync task. The most typical failure scenerio occurs when the sync task is cleaning up a user hold while various concurrent "zfs" commands are in progress. Neither Illumos nor Solaris are affected by this issue because they use DDI interface which provides lock-free reading of device state via the ddi_get_soft_state() function. Signed-off-by: Tim Chase <[email protected]> Signed-off-by: Chunwei Chen <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #2301
Diffstat (limited to 'include/sys/zfs_ioctl.h')
-rw-r--r--include/sys/zfs_ioctl.h9
1 files changed, 8 insertions, 1 deletions
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index 0ab095c1a..c7bd789e8 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -371,8 +371,15 @@ enum zfsdev_state_type {
ZST_ALL,
};
+/*
+ * The zfsdev_state_t structure is managed as a singly-linked list
+ * from which items are never deleted. This allows for lock-free
+ * reading of the list so long as assignments to the zs_next and
+ * reads from zs_minor are performed atomically. Empty items are
+ * indicated by storing -1 into zs_minor.
+ */
typedef struct zfsdev_state {
- list_node_t zs_next; /* next zfsdev_state_t link */
+ struct zfsdev_state *zs_next; /* next zfsdev_state_t link */
struct file *zs_file; /* associated file struct */
minor_t zs_minor; /* made up minor number */
void *zs_onexit; /* onexit data */