summaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2011-03-08 12:17:35 -0800
committerBrian Behlendorf <[email protected]>2011-03-08 12:44:06 -0800
commit450dc149bd5afdddad724a6eff7ff741fa8fdf11 (patch)
treea648684e14f51b440f5cab7150ac3ff332de677a /module/zfs
parent126400a1ca656d41dea9d2ad88afbec3ed32d391 (diff)
Range lock performance improvements
The original range lock implementation had to be modified by commit 8926ab7 because it was unsafe on Linux. In particular, calling cv_destroy() immediately after cv_broadcast() is dangerous because the waiters may still be asleep. Thus the following cv_destroy() will free memory which may still be in use. This was fixed by updating cv_destroy() to block on waiters but this in turn introduced a deadlock. The deadlock was resolved with the use of a taskq to move the offending free outside the range lock. This worked well but using the taskq for the free resulted in a serious performace hit. This is somewhat ironic because at the time I felt using the taskq might improve things by making the free asynchronous. This patch refines the original fix and moves the free from the taskq to a private free list. Then items which must be free'd are simply inserted in to the list. When the range lock is dropped it's safe to free the items. The list is walked and all rl_t entries are freed. This change improves small cached read performance by 26x. This was expected because for small reads the number of locking calls goes up significantly. More surprisingly this change significantly improves large cache read performance. This probably attributable to better cpu/memory locality. Very likely the same processor which allocated the memory is now freeing it. bs ext3 zfs zfs+fix faster ---------------------------------------------- 512 435 3 79 26x 1k 820 7 160 22x 2k 1536 14 305 21x 4k 2764 28 572 20x 8k 3788 50 1024 20x 16k 4300 86 1843 21x 32k 4505 138 2560 18x 64k 5324 252 3891 15x 128k 5427 276 4710 17x 256k 5427 413 5017 12x 512k 5427 497 5324 10x 1m 5427 521 5632 10x Closes #142
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/zfs_rlock.c21
1 files changed, 15 insertions, 6 deletions
diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c
index 26ad58de8..f3ada1706 100644
--- a/module/zfs/zfs_rlock.c
+++ b/module/zfs/zfs_rlock.c
@@ -471,7 +471,7 @@ zfs_range_free(void *arg)
* Unlock a reader lock
*/
static void
-zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
+zfs_range_unlock_reader(znode_t *zp, rl_t *remove, list_t *free_list)
{
avl_tree_t *tree = &zp->z_range_avl;
rl_t *rl, *next = NULL;
@@ -493,7 +493,7 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
if (remove->r_read_wanted)
cv_broadcast(&remove->r_rd_cv);
- taskq_dispatch(system_taskq, zfs_range_free, remove, 0);
+ list_insert_tail(free_list, remove);
} else {
ASSERT3U(remove->r_cnt, ==, 0);
ASSERT3U(remove->r_write_wanted, ==, 0);
@@ -526,8 +526,7 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
if (rl->r_read_wanted)
cv_broadcast(&rl->r_rd_cv);
- taskq_dispatch(system_taskq,
- zfs_range_free, rl, 0);
+ list_insert_tail(free_list, rl);
}
}
@@ -543,10 +542,13 @@ void
zfs_range_unlock(rl_t *rl)
{
znode_t *zp = rl->r_zp;
+ list_t free_list;
+ rl_t *free_rl;
ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER);
ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0);
ASSERT(!rl->r_proxy);
+ list_create(&free_list, sizeof(rl_t), offsetof(rl_t, rl_node));
mutex_enter(&zp->z_range_lock);
if (rl->r_type == RL_WRITER) {
@@ -559,14 +561,21 @@ zfs_range_unlock(rl_t *rl)
if (rl->r_read_wanted)
cv_broadcast(&rl->r_rd_cv);
- taskq_dispatch(system_taskq, zfs_range_free, rl, 0);
+ list_insert_tail(&free_list, rl);
} else {
/*
* lock may be shared, let zfs_range_unlock_reader()
* release the zp->z_range_lock lock and free the rl_t
*/
- zfs_range_unlock_reader(zp, rl);
+ zfs_range_unlock_reader(zp, rl, &free_list);
}
+
+ while ((free_rl = list_head(&free_list)) != NULL) {
+ list_remove(&free_list, free_rl);
+ zfs_range_free(free_rl);
+ }
+
+ list_destroy(&free_list);
}
/*