summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2013-04-25 16:29:22 -0700
committerBrian Behlendorf <[email protected]>2013-04-26 14:42:36 -0700
commit57f5a2008e2e6acf58934cf43c5fdca0faffa73e (patch)
tree6097764cffc614760d918d844479d85414c84075
parent0c15bf16f1050e710cad1783d15eea63cca792c0 (diff)
Fix txg_quiesce thread deadlock
A deadlock was accidentally introduced by commit e95853a which can occur when the system is under memory pressure. What happens is that while the txg_quiesce thread is holding the tx->tx_cpu locks it enters memory reclaim. In the context of this memory reclaim it then issues synchronous I/O to a ZVOL swap device. Because the txg_quiesce thread is holding the tx->tx_cpu locks a new txg cannot be opened to handle the I/O. Deadlock. The fix is straight forward. Move the memory allocation outside the critical region where the tx->tx_cpu locks are held. And for good measure change the offending allocation to KM_PUSHPAGE to ensure it never attempts to issue I/O during reclaim. Signed-off-by: Brian Behlendorf <[email protected]> Issue #1274
-rw-r--r--module/zfs/dsl_pool.c2
-rw-r--r--module/zfs/txg.c14
2 files changed, 8 insertions, 8 deletions
diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c
index 704f034e9..771b265c2 100644
--- a/module/zfs/dsl_pool.c
+++ b/module/zfs/dsl_pool.c
@@ -143,7 +143,7 @@ dsl_pool_txg_history_add(dsl_pool_t *dp, uint64_t txg)
{
txg_history_t *th, *rm;
- th = kmem_zalloc(sizeof(txg_history_t), KM_SLEEP);
+ th = kmem_zalloc(sizeof(txg_history_t), KM_PUSHPAGE);
mutex_init(&th->th_lock, NULL, MUTEX_DEFAULT, NULL);
th->th_kstat.txg = txg;
th->th_kstat.state = TXG_STATE_OPEN;
diff --git a/module/zfs/txg.c b/module/zfs/txg.c
index c7c3df3f8..7c820af4f 100644
--- a/module/zfs/txg.c
+++ b/module/zfs/txg.c
@@ -367,6 +367,13 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
tx->tx_open_txg++;
/*
+ * Now that we've incremented tx_open_txg, we can let threads
+ * enter the next transaction group.
+ */
+ for (c = 0; c < max_ncpus; c++)
+ mutex_exit(&tx->tx_cpu[c].tc_lock);
+
+ /*
* Measure how long the txg was open and replace the kstat.
*/
th = dsl_pool_txg_history_get(dp, txg);
@@ -376,13 +383,6 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
dsl_pool_txg_history_add(dp, tx->tx_open_txg);
/*
- * Now that we've incremented tx_open_txg, we can let threads
- * enter the next transaction group.
- */
- for (c = 0; c < max_ncpus; c++)
- mutex_exit(&tx->tx_cpu[c].tc_lock);
-
- /*
* Quiesce the transaction group by waiting for everyone to txg_exit().
*/
start = gethrtime();