diff options
author | John Poduska <[email protected]> | 2020-03-12 13:25:56 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2020-03-12 10:25:56 -0700 |
commit | e6b28efccc4853b4f2715c2d37ab05cead2e1c3e (patch) | |
tree | 106de4649fcdb96798b03a92309b1e9ffa9540ef | |
parent | 1e9231ada893d43bdbfa8ef3a11f1e423296981d (diff) |
Prevent race condition in dnode_dest (#10101)
dnode_special_close() waits for the refcount of dn_holds to go to zero
without holding the dn_mtx. dnode_rele_and_unlock() does the final
remove to dn_holds with dn_mtx being held:
refs = zfs_refcount_remove(&dn->dn_holds, tag);
mutex_exit(&dn->dn_mtx);
So, there is a race condition after the remove until dn_mtx is
dropped. During that time, dnode_destroy() can get called, which ends
up in dnode_dest() calling mutex_destroy() and a panic since the lock
is still held.
This change adds a condvar to wait for the final dnode_rele_and_unlock()
to release the dn_mtx before calling dnode_destroy().
Reviewed-by: Paul Dagnelie <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Matthew Ahrens <[email protected]>
Signed-off-by: John Poduska <[email protected]>
Closes #7814
Closes #10101
-rw-r--r-- | include/sys/dmu_impl.h | 1 | ||||
-rw-r--r-- | include/sys/dnode.h | 1 | ||||
-rw-r--r-- | module/zfs/dnode.c | 19 |
3 files changed, 15 insertions, 6 deletions
diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h index 8d0b96084..0c6273a3a 100644 --- a/include/sys/dmu_impl.h +++ b/include/sys/dmu_impl.h @@ -164,6 +164,7 @@ extern "C" { * dn_dirty_txg * dd_assigned_tx * dn_notxholds + * dn_nodnholds * dn_dirtyctx * dn_dirtyctx_firstset * (dn_phys copy fields?) diff --git a/include/sys/dnode.h b/include/sys/dnode.h index 3ea7aeb7f..14821bab2 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -332,6 +332,7 @@ struct dnode { uint64_t dn_assigned_txg; uint64_t dn_dirty_txg; /* txg dnode was last dirtied */ kcondvar_t dn_notxholds; + kcondvar_t dn_nodnholds; enum dnode_dirtycontext dn_dirtyctx; void *dn_dirtyctx_firstset; /* dbg: contents meaningless */ diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 5a3de232c..138f50630 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -119,6 +119,7 @@ dnode_cons(void *arg, void *unused, int kmflag) mutex_init(&dn->dn_mtx, NULL, MUTEX_DEFAULT, NULL); mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL); cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL); + cv_init(&dn->dn_nodnholds, NULL, CV_DEFAULT, NULL); /* * Every dbuf has a reference, and dropping a tracked reference is @@ -183,6 +184,7 @@ dnode_dest(void *arg, void *unused) mutex_destroy(&dn->dn_mtx); mutex_destroy(&dn->dn_dbufs_mtx); cv_destroy(&dn->dn_notxholds); + cv_destroy(&dn->dn_nodnholds); zfs_refcount_destroy(&dn->dn_holds); zfs_refcount_destroy(&dn->dn_tx_holds); ASSERT(!list_link_active(&dn->dn_link)); @@ -1171,13 +1173,15 @@ dnode_special_close(dnode_handle_t *dnh) dnode_t *dn = dnh->dnh_dnode; /* - * Wait for final references to the dnode to clear. This can - * only happen if the arc is asynchronously evicting state that - * has a hold on this dnode while we are trying to evict this - * dnode. + * Ensure dnode_rele_and_unlock() has released dn_mtx, after final + * zfs_refcount_remove() */ - while (zfs_refcount_count(&dn->dn_holds) > 0) - delay(1); + mutex_enter(&dn->dn_mtx); + if (zfs_refcount_count(&dn->dn_holds) > 0) + cv_wait(&dn->dn_nodnholds, &dn->dn_mtx); + mutex_exit(&dn->dn_mtx); + ASSERT3U(zfs_refcount_count(&dn->dn_holds), ==, 0); + ASSERT(dn->dn_dbuf == NULL || dmu_buf_get_user(&dn->dn_dbuf->db) == NULL); zrl_add(&dnh->dnh_zrlock); @@ -1606,7 +1610,10 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting) dnode_handle_t *dnh = dn->dn_handle; refs = zfs_refcount_remove(&dn->dn_holds, tag); + if (refs == 0) + cv_broadcast(&dn->dn_nodnholds); mutex_exit(&dn->dn_mtx); + /* dnode could get destroyed at this point, so don't use it anymore */ /* * It's unsafe to release the last hold on a dnode by dnode_rele() or |