diff options
author | Brian Behlendorf <[email protected]> | 2011-02-04 14:38:11 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2011-02-10 09:27:21 -0800 |
commit | 8926ab7a50d60d855e4d49d2ed7bdef49dd56149 (patch) | |
tree | d87a6ca7624381f1a8c500ba7be772a3ba0499ca /module/zfs | |
parent | c0d35759c5ab1abaa6b72062cc4ecd0d86628de8 (diff) |
Move cv_destroy() outside zp->z_range_lock()
With the recent SPL change (d599e4fa) that forces cv_destroy()
to block until all waiters have been woken. It is now unsafe
to call cv_destroy() under the zp->z_range_lock() because it
is used as the condition variable mutex. If there are waiters
cv_destroy() will block until they wake up and aquire the mutex.
However, they will never aquire the mutex because cv_destroy()
will not return allowing it's caller to drop the lock. Deadlock.
To avoid this cv_destroy() is now run asynchronously in a taskq.
This solves two problems:
1) It is no longer run under the zp->z_range_lock so no deadlock.
2) Since cv_destroy() may now block we don't want this slowing
down zfs_range_unlock() and throttling the system.
This was not as much of an issue under OpenSolaris because their
cv_destroy() implementation does not do anything. They do however
risk a bad paging request if cv_destroy() returns, the memory holding
the condition variable is free'd, and then the waiters wake up and
try to reference it. It's a very small unlikely race, but it is
possible.
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/zfs_rlock.c | 59 |
1 files changed, 36 insertions, 23 deletions
diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c index 9362fb4e8..26ad58de8 100644 --- a/module/zfs/zfs_rlock.c +++ b/module/zfs/zfs_rlock.c @@ -453,6 +453,20 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type) return (new); } +static void +zfs_range_free(void *arg) +{ + rl_t *rl = arg; + + if (rl->r_write_wanted) + cv_destroy(&rl->r_wr_cv); + + if (rl->r_read_wanted) + cv_destroy(&rl->r_rd_cv); + + kmem_free(rl, sizeof (rl_t)); +} + /* * Unlock a reader lock */ @@ -472,14 +486,14 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove) */ if (remove->r_cnt == 1) { avl_remove(tree, remove); - if (remove->r_write_wanted) { + mutex_exit(&zp->z_range_lock); + if (remove->r_write_wanted) cv_broadcast(&remove->r_wr_cv); - cv_destroy(&remove->r_wr_cv); - } - if (remove->r_read_wanted) { + + if (remove->r_read_wanted) cv_broadcast(&remove->r_rd_cv); - cv_destroy(&remove->r_rd_cv); - } + + taskq_dispatch(system_taskq, zfs_range_free, remove, 0); } else { ASSERT3U(remove->r_cnt, ==, 0); ASSERT3U(remove->r_write_wanted, ==, 0); @@ -505,19 +519,21 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove) rl->r_cnt--; if (rl->r_cnt == 0) { avl_remove(tree, rl); - if (rl->r_write_wanted) { + + if (rl->r_write_wanted) cv_broadcast(&rl->r_wr_cv); - cv_destroy(&rl->r_wr_cv); - } - if (rl->r_read_wanted) { + + if (rl->r_read_wanted) cv_broadcast(&rl->r_rd_cv); - cv_destroy(&rl->r_rd_cv); - } - kmem_free(rl, sizeof (rl_t)); + + taskq_dispatch(system_taskq, + zfs_range_free, rl, 0); } } + + mutex_exit(&zp->z_range_lock); + kmem_free(remove, sizeof (rl_t)); } - kmem_free(remove, sizeof (rl_t)); } /* @@ -537,22 +553,19 @@ zfs_range_unlock(rl_t *rl) /* writer locks can't be shared or split */ avl_remove(&zp->z_range_avl, rl); mutex_exit(&zp->z_range_lock); - if (rl->r_write_wanted) { + if (rl->r_write_wanted) cv_broadcast(&rl->r_wr_cv); - cv_destroy(&rl->r_wr_cv); - } - if (rl->r_read_wanted) { + + if (rl->r_read_wanted) cv_broadcast(&rl->r_rd_cv); - cv_destroy(&rl->r_rd_cv); - } - kmem_free(rl, sizeof (rl_t)); + + taskq_dispatch(system_taskq, zfs_range_free, rl, 0); } else { /* * lock may be shared, let zfs_range_unlock_reader() - * release the lock and free the rl_t + * release the zp->z_range_lock lock and free the rl_t */ zfs_range_unlock_reader(zp, rl); - mutex_exit(&zp->z_range_lock); } } |