aboutsummaryrefslogtreecommitdiffstats
path: root/include/sys/zfs_vfsops.h
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2015-12-22 13:47:38 -0800
committerBrian Behlendorf <[email protected]>2016-01-15 15:33:45 -0800
commitc96c36fa22ab97f6b3025b356bfca8d9e030d002 (patch)
treecfa628fd0d582460925b75e54801d079984f1fda /include/sys/zfs_vfsops.h
parent0720116d4dd7a62d2097863fc4c32c3cbd11aefa (diff)
Fix zsb->z_hold_mtx deadlock
The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to serialize access to a znode and its SA buffer while the object is being created or destroyed. This kind of locking would normally reside in the znode itself but in this case that's impossible because the znode and SA buffer may not yet exist. Therefore the locking is handled externally with an array of mutexs and AVLs trees which contain per-object locks. In zfs_znode_hold_enter() a per-object lock is created as needed, inserted in to the correct AVL tree and finally the per-object lock is held. In zfs_znode_hold_exit() the process is reversed. The per-object lock is released, removed from the AVL tree and destroyed if there are no waiters. This scheme has two important properties: 1) No memory allocations are performed while holding one of the z_hold_locks. This ensures evict(), which can be called from direct memory reclaim, will never block waiting on a z_hold_locks which just happens to have hashed to the same index. 2) All locks used to serialize access to an object are per-object and never shared. This minimizes lock contention without creating a large number of dedicated locks. On the downside it does require znode_lock_t structures to be frequently allocated and freed. However, because these are backed by a kmem cache and very short lived this cost is minimal. Signed-off-by: Brian Behlendorf <[email protected]> Issue #4106
Diffstat (limited to 'include/sys/zfs_vfsops.h')
-rw-r--r--include/sys/zfs_vfsops.h5
1 files changed, 3 insertions, 2 deletions
diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h
index a59114a1a..efaefdacc 100644
--- a/include/sys/zfs_vfsops.h
+++ b/include/sys/zfs_vfsops.h
@@ -112,8 +112,9 @@ typedef struct zfs_sb {
uint64_t z_groupquota_obj;
uint64_t z_replay_eof; /* New end of file - replay only */
sa_attr_type_t *z_attr_table; /* SA attr mapping->id */
- uint64_t z_hold_mtx_size; /* znode hold locks size */
- kmutex_t *z_hold_mtx; /* znode hold locks */
+ uint64_t z_hold_size; /* znode hold array size */
+ avl_tree_t *z_hold_trees; /* znode hold trees */
+ kmutex_t *z_hold_locks; /* znode hold locks */
} zfs_sb_t;
#define ZFS_SUPER_MAGIC 0x2fc12fc1