Illumos #3642, #3643

3642 dsl_scan_active() should not issue I/O to determine if async destroying is active 3643 txg_delay should not hold the tc_lock Reviewed by: Matthew Ahrens <[email protected]> Reviewed by: Adam Leventhal <[email protected]> Approved by: Gordon Ross <[email protected]> References: https://www.illumos.org/issues/3642 https://www.illumos.org/issues/3643 illumos/illumos-gate@4a92375985c37d61406d66cd2b10ee642eb1f5e7 Ported-by: Richard Yao <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Issue #1775 Porting Notes: 1. The alignment assumptions for the tx_cpu structure assume that a kmutex_t is 8 bytes. This isn't true under Linux but tc_pad[] was adjusted anyway for consistency since this structure was never carefully aligned in ZoL. If careful alignment does impact performance significantly this should be reworked to be portable.
author: George Wilson <[email protected]> 2013-04-23 09:31:42 -0800
committer: Brian Behlendorf <[email protected]> 2013-11-01 08:55:12 -0700
commit: 2696dfafd9ebce5e3aa227c630b13f2c5b26bce9 (patch)
tree: 9c1bd42bcdb8da5e7c20759e9f518092ff50330d /module/zfs
parent: 7ec09286b761ee1fb85178ff55daaf8f74d935be (diff)
3 files changed, 29 insertions, 11 deletions
diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c
index 35e9244c8..eee0df106 100644
--- a/module/zfs/dsl_destroy.c
+++ b/module/zfs/dsl_destroy.c
@@ -761,12 +761,16 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
 		zil_destroy_sync(dmu_objset_zil(os), tx);
 
 		if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
+			dsl_scan_t *scn = dp->dp_scan;
+
 			spa_feature_incr(dp->dp_spa, async_destroy, tx);
 			dp->dp_bptree_obj = bptree_alloc(mos, tx);
 			VERIFY0(zap_add(mos,
 			    DMU_POOL_DIRECTORY_OBJECT,
 			    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
 			    &dp->dp_bptree_obj, tx));
+			ASSERT(!scn->scn_async_destroying);
+			scn->scn_async_destroying = B_TRUE;
 		}
 
 		used = ds->ds_dir->dd_phys->dd_used_bytes;
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index aa861a592..de82ab1ce 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -91,6 +91,15 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
 	scn = dp->dp_scan = kmem_zalloc(sizeof (dsl_scan_t), KM_SLEEP);
 	scn->scn_dp = dp;
 
+	/*
+	 * It's possible that we're resuming a scan after a reboot so
+	 * make sure that the scan_async_destroying flag is initialized
+	 * appropriately.
+	 */
+	ASSERT(!scn->scn_async_destroying);
+	scn->scn_async_destroying = spa_feature_is_active(dp->dp_spa,
+	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]);
+
 	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    "scrub_func", sizeof (uint64_t), 1, &f);
 	if (err == 0) {
@@ -1362,13 +1371,10 @@ dsl_scan_active(dsl_scan_t *scn)
 	if (spa_shutting_down(spa))
 		return (B_FALSE);
 
-	if (scn->scn_phys.scn_state == DSS_SCANNING)
+	if (scn->scn_phys.scn_state == DSS_SCANNING ||
+	    scn->scn_async_destroying)
 		return (B_TRUE);
 
-	if (spa_feature_is_active(spa,
-	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
-		return (B_TRUE);
-	}
 	if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
 		(void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
 		    &used, &comp, &uncomp);
@@ -1424,6 +1430,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
 
 		if (err == 0 && spa_feature_is_active(spa,
 		    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+			ASSERT(scn->scn_async_destroying);
 			scn->scn_is_bptree = B_TRUE;
 			scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
 			    NULL, ZIO_FLAG_MUSTSUCCEED);
@@ -1444,6 +1451,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
 				VERIFY0(bptree_free(dp->dp_meta_objset,
 				    dp->dp_bptree_obj, tx));
 				dp->dp_bptree_obj = 0;
+				scn->scn_async_destroying = B_FALSE;
 			}
 		}
 		if (scn->scn_visited_this_txg) {
diff --git a/module/zfs/txg.c b/module/zfs/txg.c
index 7a3da8647..c0c0b295a 100644
--- a/module/zfs/txg.c
+++ b/module/zfs/txg.c
@@ -127,6 +127,8 @@ txg_init(dsl_pool_t *dp, uint64_t txg)
 		int i;
 
 		mutex_init(&tx->tx_cpu[c].tc_lock, NULL, MUTEX_DEFAULT, NULL);
+		mutex_init(&tx->tx_cpu[c].tc_open_lock, NULL, MUTEX_DEFAULT,
+		    NULL);
 		for (i = 0; i < TXG_SIZE; i++) {
 			cv_init(&tx->tx_cpu[c].tc_cv[i], NULL, CV_DEFAULT,
 			    NULL);
@@ -169,6 +171,7 @@ txg_fini(dsl_pool_t *dp)
 	for (c = 0; c < max_ncpus; c++) {
 		int i;
 
+		mutex_destroy(&tx->tx_cpu[c].tc_open_lock);
 		mutex_destroy(&tx->tx_cpu[c].tc_lock);
 		for (i = 0; i < TXG_SIZE; i++) {
 			cv_destroy(&tx->tx_cpu[c].tc_cv[i]);
@@ -303,10 +306,12 @@ txg_hold_open(dsl_pool_t *dp, txg_handle_t *th)
 	tc = &tx->tx_cpu[CPU_SEQID];
 	kpreempt_enable();
 
-	mutex_enter(&tc->tc_lock);
-
+	mutex_enter(&tc->tc_open_lock);
 	txg = tx->tx_open_txg;
+
+	mutex_enter(&tc->tc_lock);
 	tc->tc_count[txg & TXG_MASK]++;
+	mutex_exit(&tc->tc_lock);
 
 	th->th_cpu = tc;
 	th->th_txg = txg;
@@ -319,7 +324,8 @@ txg_rele_to_quiesce(txg_handle_t *th)
 {
 	tx_cpu_t *tc = th->th_cpu;
 
-	mutex_exit(&tc->tc_lock);
+	ASSERT(!MUTEX_HELD(&tc->tc_lock));
+	mutex_exit(&tc->tc_open_lock);
 }
 
 void
@@ -356,10 +362,10 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
 	int c;
 
 	/*
-	 * Grab all tx_cpu locks so nobody else can get into this txg.
+	 * Grab all tc_open_locks so nobody else can get into this txg.
 	 */
 	for (c = 0; c < max_ncpus; c++)
-		mutex_enter(&tx->tx_cpu[c].tc_lock);
+		mutex_enter(&tx->tx_cpu[c].tc_open_lock);
 
 	ASSERT(txg == tx->tx_open_txg);
 	tx->tx_open_txg++;
@@ -372,7 +378,7 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
 	 * enter the next transaction group.
 	 */
 	for (c = 0; c < max_ncpus; c++)
-		mutex_exit(&tx->tx_cpu[c].tc_lock);
+		mutex_exit(&tx->tx_cpu[c].tc_open_lock);
 
 	/*
 	 * Quiesce the transaction group by waiting for everyone to txg_exit().
author	George Wilson <[email protected]>	2013-04-23 09:31:42 -0800
committer	Brian Behlendorf <[email protected]>	2013-11-01 08:55:12 -0700
commit	2696dfafd9ebce5e3aa227c630b13f2c5b26bce9 (patch)
tree	9c1bd42bcdb8da5e7c20759e9f518092ff50330d /module/zfs
parent	7ec09286b761ee1fb85178ff55daaf8f74d935be (diff)