diff options
author | George Wilson <[email protected]> | 2014-06-12 15:29:11 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2014-07-22 09:41:32 -0700 |
commit | 080b3100155c472f8c8a380db2458bf70bd13279 (patch) | |
tree | c5ee4ad94fd156b7db4440a8a90b8ad3d158459c | |
parent | 3c51c5cb1f1c6097ecbfd18aa1177ba0f67c215e (diff) |
Illumos #4756 Fix metaslab_group_preload deadlock
4756 metaslab_group_preload() could deadlock
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: Christopher Siden <[email protected]>
Reviewed by: Dan McDonald <[email protected]>
Reviewed by: Saso Kiselkov <[email protected]>
Approved by: Garrett D'Amore <[email protected]>
The metaslab_group_preload() function grabs the mg_lock and then later
tries to grab the metaslab lock. This lock ordering may lead to a
deadlock since other consumers of the mg_lock will grab the metaslab
lock first.
References:
https://www.illumos.org/issues/4756
https://github.com/illumos/illumos-gate/commit/30beaff
Ported-by: Prakash Surya <[email protected]>
Signed-off-by: Prakash Surya <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #2488
-rw-r--r-- | cmd/ztest/ztest.c | 4 | ||||
-rw-r--r-- | module/zfs/metaslab.c | 25 |
2 files changed, 25 insertions, 4 deletions
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 05c5dbe2c..0a0fa7f49 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. */ @@ -187,6 +187,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = { extern uint64_t metaslab_gang_bang; extern uint64_t metaslab_df_alloc_threshold; +extern int metaslab_preload_limit; static ztest_shared_opts_t *ztest_shared_opts; static ztest_shared_opts_t ztest_opts; @@ -5734,6 +5735,7 @@ ztest_run(ztest_shared_t *zs) kernel_init(FREAD | FWRITE); VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); spa->spa_debug = B_TRUE; + metaslab_preload_limit = ztest_random(20) + 1; ztest_spa = spa; VERIFY0(dmu_objset_own(ztest_opts.zo_pool, diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 46543a52b..6d6ff0914 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -1242,6 +1242,8 @@ metaslab_preload(void *arg) metaslab_t *msp = arg; spa_t *spa = msp->ms_group->mg_vd->vdev_spa; + ASSERT(!MUTEX_HELD(&msp->ms_group->mg_lock)); + mutex_enter(&msp->ms_lock); metaslab_load_wait(msp); if (!msp->ms_loaded) @@ -1266,19 +1268,36 @@ metaslab_group_preload(metaslab_group_t *mg) taskq_wait(mg->mg_taskq); return; } - mutex_enter(&mg->mg_lock); + mutex_enter(&mg->mg_lock); /* - * Prefetch the next potential metaslabs + * Load the next potential metaslabs */ - for (msp = avl_first(t); msp != NULL; msp = AVL_NEXT(t, msp)) { + msp = avl_first(t); + while (msp != NULL) { + metaslab_t *msp_next = AVL_NEXT(t, msp); /* If we have reached our preload limit then we're done */ if (++m > metaslab_preload_limit) break; + /* + * We must drop the metaslab group lock here to preserve + * lock ordering with the ms_lock (when grabbing both + * the mg_lock and the ms_lock, the ms_lock must be taken + * first). As a result, it is possible that the ordering + * of the metaslabs within the avl tree may change before + * we reacquire the lock. The metaslab cannot be removed from + * the tree while we're in syncing context so it is safe to + * drop the mg_lock here. If the metaslabs are reordered + * nothing will break -- we just may end up loading a + * less than optimal one. + */ + mutex_exit(&mg->mg_lock); VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload, msp, TQ_PUSHPAGE) != 0); + mutex_enter(&mg->mg_lock); + msp = msp_next; } mutex_exit(&mg->mg_lock); } |