aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/zfs_vfsops.h5
-rw-r--r--include/sys/zfs_znode.h25
-rw-r--r--module/zfs/zfs_vfsops.c32
-rw-r--r--module/zfs/zfs_znode.c220
4 files changed, 218 insertions, 64 deletions
diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h
index a59114a1a..efaefdacc 100644
--- a/include/sys/zfs_vfsops.h
+++ b/include/sys/zfs_vfsops.h
@@ -112,8 +112,9 @@ typedef struct zfs_sb {
uint64_t z_groupquota_obj;
uint64_t z_replay_eof; /* New end of file - replay only */
sa_attr_type_t *z_attr_table; /* SA attr mapping->id */
- uint64_t z_hold_mtx_size; /* znode hold locks size */
- kmutex_t *z_hold_mtx; /* znode hold locks */
+ uint64_t z_hold_size; /* znode hold array size */
+ avl_tree_t *z_hold_trees; /* znode hold trees */
+ kmutex_t *z_hold_locks; /* znode hold locks */
} zfs_sb_t;
#define ZFS_SUPER_MAGIC 0x2fc12fc1
diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
index 59ca085ef..c03bef5c7 100644
--- a/include/sys/zfs_znode.h
+++ b/include/sys/zfs_znode.h
@@ -220,6 +220,12 @@ typedef struct znode {
struct inode z_inode; /* generic vfs inode */
} znode_t;
+typedef struct znode_hold {
+ uint64_t zh_obj; /* object id */
+ kmutex_t zh_lock; /* lock serializing object access */
+ avl_node_t zh_node; /* avl tree linkage */
+ refcount_t zh_refcount; /* active consumer reference count */
+} znode_hold_t;
/*
* Range locking rules
@@ -273,24 +279,12 @@ typedef struct znode {
/*
* Macros for dealing with dmu_buf_hold
*/
-#define ZFS_OBJ_MTX_SZ 64
-#define ZFS_OBJ_MTX_MAX (1024 * 1024)
+#define ZFS_OBJ_MTX_SZ 64
+#define ZFS_OBJ_MTX_MAX (1024 * 1024)
+#define ZFS_OBJ_HASH(zsb, obj) ((obj) & ((zsb->z_hold_size) - 1))
extern unsigned int zfs_object_mutex_size;
-#define ZFS_OBJ_HASH(zsb, obj_num) \
- ((obj_num) & ((zsb->z_hold_mtx_size) - 1))
-#define ZFS_OBJ_MUTEX(zsb, obj_num) \
- (&(zsb)->z_hold_mtx[ZFS_OBJ_HASH(zsb, obj_num)])
-#define ZFS_OBJ_HOLD_ENTER(zsb, obj_num) \
- mutex_enter(ZFS_OBJ_MUTEX((zsb), (obj_num)))
-#define ZFS_OBJ_HOLD_TRYENTER(zsb, obj_num) \
- mutex_tryenter(ZFS_OBJ_MUTEX((zsb), (obj_num)))
-#define ZFS_OBJ_HOLD_EXIT(zsb, obj_num) \
- mutex_exit(ZFS_OBJ_MUTEX((zsb), (obj_num)))
-#define ZFS_OBJ_HOLD_OWNED(zsb, obj_num) \
- mutex_owned(ZFS_OBJ_MUTEX((zsb), (obj_num)))
-
/* Encode ZFS stored time values from a struct timespec */
#define ZFS_TIME_ENCODE(tp, stmp) \
{ \
@@ -326,6 +320,7 @@ extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
extern void zfs_znode_init(void);
extern void zfs_znode_fini(void);
+extern int zfs_znode_hold_compare(const void *, const void *);
extern int zfs_zget(zfs_sb_t *, uint64_t, znode_t **);
extern int zfs_rezget(znode_t *);
extern void zfs_zinactive(znode_t *);
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 5e15faca4..7696071f1 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -663,7 +663,7 @@ zfs_sb_create(const char *osname, zfs_mntopts_t *zmo, zfs_sb_t **zsbp)
objset_t *os;
zfs_sb_t *zsb;
uint64_t zval;
- int i, error;
+ int i, size, error;
uint64_t sa_obj;
zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP);
@@ -685,8 +685,7 @@ zfs_sb_create(const char *osname, zfs_mntopts_t *zmo, zfs_sb_t **zsbp)
/*
* Initialize the zfs-specific filesystem structure.
- * Should probably make this a kmem cache, shuffle fields,
- * and just bzero up to z_hold_mtx[].
+ * Should probably make this a kmem cache, shuffle fields.
*/
zsb->z_sb = NULL;
zsb->z_parent = zsb;
@@ -795,12 +794,15 @@ zfs_sb_create(const char *osname, zfs_mntopts_t *zmo, zfs_sb_t **zsbp)
rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL);
- zsb->z_hold_mtx_size = MIN(1 << (highbit64(zfs_object_mutex_size) - 1),
- ZFS_OBJ_MTX_MAX);
- zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * zsb->z_hold_mtx_size,
- KM_SLEEP);
- for (i = 0; i != zsb->z_hold_mtx_size; i++)
- mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
+ size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
+ zsb->z_hold_size = size;
+ zsb->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size, KM_SLEEP);
+ zsb->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
+ for (i = 0; i != size; i++) {
+ avl_create(&zsb->z_hold_trees[i], zfs_znode_hold_compare,
+ sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
+ mutex_init(&zsb->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
+ }
*zsbp = zsb;
return (0);
@@ -809,7 +811,6 @@ out:
dmu_objset_disown(os, zsb);
*zsbp = NULL;
- vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * zsb->z_hold_mtx_size);
kmem_free(zsb, sizeof (zfs_sb_t));
return (error);
}
@@ -901,7 +902,7 @@ EXPORT_SYMBOL(zfs_sb_setup);
void
zfs_sb_free(zfs_sb_t *zsb)
{
- int i;
+ int i, size = zsb->z_hold_size;
zfs_fuid_destroy(zsb);
@@ -911,9 +912,12 @@ zfs_sb_free(zfs_sb_t *zsb)
rrm_destroy(&zsb->z_teardown_lock);
rw_destroy(&zsb->z_teardown_inactive_lock);
rw_destroy(&zsb->z_fuid_lock);
- for (i = 0; i != zsb->z_hold_mtx_size; i++)
- mutex_destroy(&zsb->z_hold_mtx[i]);
- vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * zsb->z_hold_mtx_size);
+ for (i = 0; i != size; i++) {
+ avl_destroy(&zsb->z_hold_trees[i]);
+ mutex_destroy(&zsb->z_hold_locks[i]);
+ }
+ vmem_free(zsb->z_hold_trees, sizeof (avl_tree_t) * size);
+ vmem_free(zsb->z_hold_locks, sizeof (kmutex_t) * size);
zfs_mntopts_free(zsb->z_mntopts);
kmem_free(zsb, sizeof (zfs_sb_t));
}
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index 94e26fb13..03274a9b9 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -95,6 +95,7 @@
#ifdef _KERNEL
static kmem_cache_t *znode_cache = NULL;
+static kmem_cache_t *znode_hold_cache = NULL;
unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
/*ARGSUSED*/
@@ -145,6 +146,27 @@ zfs_znode_cache_destructor(void *buf, void *arg)
ASSERT(zp->z_xattr_parent == NULL);
}
+static int
+zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags)
+{
+ znode_hold_t *zh = buf;
+
+ mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL);
+ refcount_create(&zh->zh_refcount);
+ zh->zh_obj = ZFS_NO_OBJECT;
+
+ return (0);
+}
+
+static void
+zfs_znode_hold_cache_destructor(void *buf, void *arg)
+{
+ znode_hold_t *zh = buf;
+
+ mutex_destroy(&zh->zh_lock);
+ refcount_destroy(&zh->zh_refcount);
+}
+
void
zfs_znode_init(void)
{
@@ -157,6 +179,11 @@ zfs_znode_init(void)
znode_cache = kmem_cache_create("zfs_znode_cache",
sizeof (znode_t), 0, zfs_znode_cache_constructor,
zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB);
+
+ ASSERT(znode_hold_cache == NULL);
+ znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache",
+ sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor,
+ zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0);
}
void
@@ -168,6 +195,124 @@ zfs_znode_fini(void)
if (znode_cache)
kmem_cache_destroy(znode_cache);
znode_cache = NULL;
+
+ if (znode_hold_cache)
+ kmem_cache_destroy(znode_hold_cache);
+ znode_hold_cache = NULL;
+}
+
+/*
+ * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to
+ * serialize access to a znode and its SA buffer while the object is being
+ * created or destroyed. This kind of locking would normally reside in the
+ * znode itself but in this case that's impossible because the znode and SA
+ * buffer may not yet exist. Therefore the locking is handled externally
+ * with an array of mutexs and AVLs trees which contain per-object locks.
+ *
+ * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted
+ * in to the correct AVL tree and finally the per-object lock is held. In
+ * zfs_znode_hold_exit() the process is reversed. The per-object lock is
+ * released, removed from the AVL tree and destroyed if there are no waiters.
+ *
+ * This scheme has two important properties:
+ *
+ * 1) No memory allocations are performed while holding one of the z_hold_locks.
+ * This ensures evict(), which can be called from direct memory reclaim, will
+ * never block waiting on a z_hold_locks which just happens to have hashed
+ * to the same index.
+ *
+ * 2) All locks used to serialize access to an object are per-object and never
+ * shared. This minimizes lock contention without creating a large number
+ * of dedicated locks.
+ *
+ * On the downside it does require znode_lock_t structures to be frequently
+ * allocated and freed. However, because these are backed by a kmem cache
+ * and very short lived this cost is minimal.
+ */
+int
+zfs_znode_hold_compare(const void *a, const void *b)
+{
+ const znode_hold_t *zh_a = a;
+ const znode_hold_t *zh_b = b;
+
+ if (zh_a->zh_obj < zh_b->zh_obj)
+ return (-1);
+ else if (zh_a->zh_obj > zh_b->zh_obj)
+ return (1);
+ else
+ return (0);
+}
+
+boolean_t
+zfs_znode_held(zfs_sb_t *zsb, uint64_t obj)
+{
+ znode_hold_t *zh, search;
+ int i = ZFS_OBJ_HASH(zsb, obj);
+
+ search.zh_obj = obj;
+
+ mutex_enter(&zsb->z_hold_locks[i]);
+ zh = avl_find(&zsb->z_hold_trees[i], &search, NULL);
+ mutex_exit(&zsb->z_hold_locks[i]);
+
+ if (zh && MUTEX_HELD(&zh->zh_lock))
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+static znode_hold_t *
+zfs_znode_hold_enter(zfs_sb_t *zsb, uint64_t obj)
+{
+ znode_hold_t *zh, *zh_new, search;
+ int i = ZFS_OBJ_HASH(zsb, obj);
+ boolean_t found = B_FALSE;
+
+ zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
+ zh_new->zh_obj = obj;
+ search.zh_obj = obj;
+
+ mutex_enter(&zsb->z_hold_locks[i]);
+ zh = avl_find(&zsb->z_hold_trees[i], &search, NULL);
+ if (likely(zh == NULL)) {
+ zh = zh_new;
+ avl_add(&zsb->z_hold_trees[i], zh);
+ } else {
+ ASSERT3U(zh->zh_obj, ==, obj);
+ found = B_TRUE;
+ }
+ refcount_add(&zh->zh_refcount, NULL);
+ mutex_exit(&zsb->z_hold_locks[i]);
+
+ if (found == B_TRUE)
+ kmem_cache_free(znode_hold_cache, zh_new);
+
+ ASSERT(MUTEX_NOT_HELD(&zh->zh_lock));
+ ASSERT3S(refcount_count(&zh->zh_refcount), >, 0);
+ mutex_enter(&zh->zh_lock);
+
+ return (zh);
+}
+
+static void
+zfs_znode_hold_exit(zfs_sb_t *zsb, znode_hold_t *zh)
+{
+ int i = ZFS_OBJ_HASH(zsb, zh->zh_obj);
+ boolean_t remove = B_FALSE;
+
+ ASSERT(zfs_znode_held(zsb, zh->zh_obj));
+ ASSERT3S(refcount_count(&zh->zh_refcount), >, 0);
+ mutex_exit(&zh->zh_lock);
+
+ mutex_enter(&zsb->z_hold_locks[i]);
+ if (refcount_remove(&zh->zh_refcount, NULL) == 0) {
+ avl_remove(&zsb->z_hold_trees[i], zh);
+ remove = B_TRUE;
+ }
+ mutex_exit(&zsb->z_hold_locks[i]);
+
+ if (remove == B_TRUE)
+ kmem_cache_free(znode_hold_cache, zh);
}
int
@@ -222,7 +367,7 @@ static void
zfs_znode_sa_init(zfs_sb_t *zsb, znode_t *zp,
dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
{
- ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zsb, zp->z_id)));
+ ASSERT(zfs_znode_held(zsb, zp->z_id));
mutex_enter(&zp->z_lock);
@@ -244,8 +389,7 @@ zfs_znode_sa_init(zfs_sb_t *zsb, znode_t *zp,
void
zfs_znode_dmu_fini(znode_t *zp)
{
- ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(ZTOZSB(zp), zp->z_id)) ||
- zp->z_unlinked ||
+ ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked ||
RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
sa_handle_destroy(zp->z_sa_hdl);
@@ -573,6 +717,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
sa_bulk_attr_t *sa_attrs;
int cnt = 0;
zfs_acl_locator_cb_t locate = { 0 };
+ znode_hold_t *zh;
if (zsb->z_replay) {
obj = vap->va_nodeid;
@@ -619,7 +764,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
}
}
- ZFS_OBJ_HOLD_ENTER(zsb, obj);
+ zh = zfs_znode_hold_enter(zsb, obj);
VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db));
/*
@@ -793,7 +938,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
}
kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
- ZFS_OBJ_HOLD_EXIT(zsb, obj);
+ zfs_znode_hold_exit(zsb, zh);
}
/*
@@ -897,17 +1042,18 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
dmu_object_info_t doi;
dmu_buf_t *db;
znode_t *zp;
+ znode_hold_t *zh;
int err;
sa_handle_t *hdl;
*zpp = NULL;
again:
- ZFS_OBJ_HOLD_ENTER(zsb, obj_num);
+ zh = zfs_znode_hold_enter(zsb, obj_num);
err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
if (err) {
- ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ zfs_znode_hold_exit(zsb, zh);
return (err);
}
@@ -917,7 +1063,7 @@ again:
(doi.doi_bonus_type == DMU_OT_ZNODE &&
doi.doi_bonus_size < sizeof (znode_phys_t)))) {
sa_buf_rele(db, NULL);
- ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ zfs_znode_hold_exit(zsb, zh);
return (SET_ERROR(EINVAL));
}
@@ -956,7 +1102,7 @@ again:
if (igrab(ZTOI(zp)) == NULL) {
mutex_exit(&zp->z_lock);
sa_buf_rele(db, NULL);
- ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ zfs_znode_hold_exit(zsb, zh);
/* inode might need this to finish evict */
cond_resched();
goto again;
@@ -966,7 +1112,7 @@ again:
}
mutex_exit(&zp->z_lock);
sa_buf_rele(db, NULL);
- ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ zfs_znode_hold_exit(zsb, zh);
return (err);
}
@@ -987,7 +1133,7 @@ again:
} else {
*zpp = zp;
}
- ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ zfs_znode_hold_exit(zsb, zh);
return (err);
}
@@ -1003,8 +1149,9 @@ zfs_rezget(znode_t *zp)
int err;
int count = 0;
uint64_t gen;
+ znode_hold_t *zh;
- ZFS_OBJ_HOLD_ENTER(zsb, obj_num);
+ zh = zfs_znode_hold_enter(zsb, obj_num);
mutex_enter(&zp->z_acl_lock);
if (zp->z_acl_cached) {
@@ -1028,7 +1175,7 @@ zfs_rezget(znode_t *zp)
ASSERT(zp->z_sa_hdl == NULL);
err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
if (err) {
- ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ zfs_znode_hold_exit(zsb, zh);
return (err);
}
@@ -1038,7 +1185,7 @@ zfs_rezget(znode_t *zp)
(doi.doi_bonus_type == DMU_OT_ZNODE &&
doi.doi_bonus_size < sizeof (znode_phys_t)))) {
sa_buf_rele(db, NULL);
- ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ zfs_znode_hold_exit(zsb, zh);
return (SET_ERROR(EINVAL));
}
@@ -1064,7 +1211,7 @@ zfs_rezget(znode_t *zp)
if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
zfs_znode_dmu_fini(zp);
- ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ zfs_znode_hold_exit(zsb, zh);
return (SET_ERROR(EIO));
}
@@ -1072,7 +1219,7 @@ zfs_rezget(znode_t *zp)
if (gen != zp->z_gen) {
zfs_znode_dmu_fini(zp);
- ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ zfs_znode_hold_exit(zsb, zh);
return (SET_ERROR(EIO));
}
@@ -1080,7 +1227,7 @@ zfs_rezget(znode_t *zp)
zp->z_blksz = doi.doi_data_block_size;
zfs_inode_update(zp);
- ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ zfs_znode_hold_exit(zsb, zh);
return (0);
}
@@ -1092,15 +1239,16 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
objset_t *os = zsb->z_os;
uint64_t obj = zp->z_id;
uint64_t acl_obj = zfs_external_acl(zp);
+ znode_hold_t *zh;
- ZFS_OBJ_HOLD_ENTER(zsb, obj);
+ zh = zfs_znode_hold_enter(zsb, obj);
if (acl_obj) {
VERIFY(!zp->z_is_sa);
VERIFY(0 == dmu_object_free(os, acl_obj, tx));
}
VERIFY(0 == dmu_object_free(os, obj, tx));
zfs_znode_dmu_fini(zp);
- ZFS_OBJ_HOLD_EXIT(zsb, obj);
+ zfs_znode_hold_exit(zsb, zh);
}
void
@@ -1108,13 +1256,14 @@ zfs_zinactive(znode_t *zp)
{
zfs_sb_t *zsb = ZTOZSB(zp);
uint64_t z_id = zp->z_id;
+ znode_hold_t *zh;
ASSERT(zp->z_sa_hdl);
/*
* Don't allow a zfs_zget() while were trying to release this znode.
*/
- ZFS_OBJ_HOLD_ENTER(zsb, z_id);
+ zh = zfs_znode_hold_enter(zsb, z_id);
mutex_enter(&zp->z_lock);
@@ -1124,9 +1273,7 @@ zfs_zinactive(znode_t *zp)
*/
if (zp->z_unlinked) {
mutex_exit(&zp->z_lock);
-
- ZFS_OBJ_HOLD_EXIT(zsb, z_id);
-
+ zfs_znode_hold_exit(zsb, zh);
zfs_rmnode(zp);
return;
}
@@ -1134,7 +1281,7 @@ zfs_zinactive(znode_t *zp)
mutex_exit(&zp->z_lock);
zfs_znode_dmu_fini(zp);
- ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+ zfs_znode_hold_exit(zsb, zh);
}
static inline int
@@ -1624,6 +1771,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
uint64_t sense = ZFS_CASE_SENSITIVE;
uint64_t norm = 0;
nvpair_t *elem;
+ int size;
int error;
int i;
znode_t *rootzp = NULL;
@@ -1735,12 +1883,15 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
list_create(&zsb->z_all_znodes, sizeof (znode_t),
offsetof(znode_t, z_link_node));
- zsb->z_hold_mtx_size = MIN(1 << (highbit64(zfs_object_mutex_size) - 1),
- ZFS_OBJ_MTX_MAX);
- zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * zsb->z_hold_mtx_size,
- KM_SLEEP);
- for (i = 0; i != zsb->z_hold_mtx_size; i++)
- mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
+ size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
+ zsb->z_hold_size = size;
+ zsb->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size, KM_SLEEP);
+ zsb->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
+ for (i = 0; i != size; i++) {
+ avl_create(&zsb->z_hold_trees[i], zfs_znode_hold_compare,
+ sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
+ mutex_init(&zsb->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
+ }
VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
cr, NULL, &acl_ids));
@@ -1760,10 +1911,13 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
error = zfs_create_share_dir(zsb, tx);
ASSERT(error == 0);
- for (i = 0; i != zsb->z_hold_mtx_size; i++)
- mutex_destroy(&zsb->z_hold_mtx[i]);
+ for (i = 0; i != size; i++) {
+ avl_destroy(&zsb->z_hold_trees[i]);
+ mutex_destroy(&zsb->z_hold_locks[i]);
+ }
- vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * zsb->z_hold_mtx_size);
+ vmem_free(zsb->z_hold_trees, sizeof (avl_tree_t) * size);
+ vmem_free(zsb->z_hold_locks, sizeof (kmutex_t) * size);
kmem_free(sb, sizeof (struct super_block));
kmem_free(zsb, sizeof (zfs_sb_t));
}