diff options
author | George Wilson <[email protected]> | 2013-04-23 09:31:42 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2013-11-01 08:55:12 -0700 |
commit | 2696dfafd9ebce5e3aa227c630b13f2c5b26bce9 (patch) | |
tree | 9c1bd42bcdb8da5e7c20759e9f518092ff50330d /module | |
parent | 7ec09286b761ee1fb85178ff55daaf8f74d935be (diff) |
Illumos #3642, #3643
3642 dsl_scan_active() should not issue I/O to determine if async
destroying is active
3643 txg_delay should not hold the tc_lock
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: Adam Leventhal <[email protected]>
Approved by: Gordon Ross <[email protected]>
References:
https://www.illumos.org/issues/3642
https://www.illumos.org/issues/3643
illumos/illumos-gate@4a92375985c37d61406d66cd2b10ee642eb1f5e7
Ported-by: Richard Yao <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Issue #1775
Porting Notes:
1. The alignment assumptions for the tx_cpu structure assume that
a kmutex_t is 8 bytes. This isn't true under Linux but tc_pad[]
was adjusted anyway for consistency since this structure was
never carefully aligned in ZoL. If careful alignment does impact
performance significantly this should be reworked to be portable.
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/dsl_destroy.c | 4 | ||||
-rw-r--r-- | module/zfs/dsl_scan.c | 18 | ||||
-rw-r--r-- | module/zfs/txg.c | 18 |
3 files changed, 29 insertions, 11 deletions
diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index 35e9244c8..eee0df106 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -761,12 +761,16 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) zil_destroy_sync(dmu_objset_zil(os), tx); if (!spa_feature_is_active(dp->dp_spa, async_destroy)) { + dsl_scan_t *scn = dp->dp_scan; + spa_feature_incr(dp->dp_spa, async_destroy, tx); dp->dp_bptree_obj = bptree_alloc(mos, tx); VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, &dp->dp_bptree_obj, tx)); + ASSERT(!scn->scn_async_destroying); + scn->scn_async_destroying = B_TRUE; } used = ds->ds_dir->dd_phys->dd_used_bytes; diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index aa861a592..de82ab1ce 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -91,6 +91,15 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) scn = dp->dp_scan = kmem_zalloc(sizeof (dsl_scan_t), KM_SLEEP); scn->scn_dp = dp; + /* + * It's possible that we're resuming a scan after a reboot so + * make sure that the scan_async_destroying flag is initialized + * appropriately. + */ + ASSERT(!scn->scn_async_destroying); + scn->scn_async_destroying = spa_feature_is_active(dp->dp_spa, + &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]); + err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, "scrub_func", sizeof (uint64_t), 1, &f); if (err == 0) { @@ -1362,13 +1371,10 @@ dsl_scan_active(dsl_scan_t *scn) if (spa_shutting_down(spa)) return (B_FALSE); - if (scn->scn_phys.scn_state == DSS_SCANNING) + if (scn->scn_phys.scn_state == DSS_SCANNING || + scn->scn_async_destroying) return (B_TRUE); - if (spa_feature_is_active(spa, - &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { - return (B_TRUE); - } if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) { (void) bpobj_space(&scn->scn_dp->dp_free_bpobj, &used, &comp, &uncomp); @@ -1424,6 +1430,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (err == 0 && spa_feature_is_active(spa, &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { + ASSERT(scn->scn_async_destroying); scn->scn_is_bptree = B_TRUE; scn->scn_zio_root = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); @@ -1444,6 +1451,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) VERIFY0(bptree_free(dp->dp_meta_objset, dp->dp_bptree_obj, tx)); dp->dp_bptree_obj = 0; + scn->scn_async_destroying = B_FALSE; } } if (scn->scn_visited_this_txg) { diff --git a/module/zfs/txg.c b/module/zfs/txg.c index 7a3da8647..c0c0b295a 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -127,6 +127,8 @@ txg_init(dsl_pool_t *dp, uint64_t txg) int i; mutex_init(&tx->tx_cpu[c].tc_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&tx->tx_cpu[c].tc_open_lock, NULL, MUTEX_DEFAULT, + NULL); for (i = 0; i < TXG_SIZE; i++) { cv_init(&tx->tx_cpu[c].tc_cv[i], NULL, CV_DEFAULT, NULL); @@ -169,6 +171,7 @@ txg_fini(dsl_pool_t *dp) for (c = 0; c < max_ncpus; c++) { int i; + mutex_destroy(&tx->tx_cpu[c].tc_open_lock); mutex_destroy(&tx->tx_cpu[c].tc_lock); for (i = 0; i < TXG_SIZE; i++) { cv_destroy(&tx->tx_cpu[c].tc_cv[i]); @@ -303,10 +306,12 @@ txg_hold_open(dsl_pool_t *dp, txg_handle_t *th) tc = &tx->tx_cpu[CPU_SEQID]; kpreempt_enable(); - mutex_enter(&tc->tc_lock); - + mutex_enter(&tc->tc_open_lock); txg = tx->tx_open_txg; + + mutex_enter(&tc->tc_lock); tc->tc_count[txg & TXG_MASK]++; + mutex_exit(&tc->tc_lock); th->th_cpu = tc; th->th_txg = txg; @@ -319,7 +324,8 @@ txg_rele_to_quiesce(txg_handle_t *th) { tx_cpu_t *tc = th->th_cpu; - mutex_exit(&tc->tc_lock); + ASSERT(!MUTEX_HELD(&tc->tc_lock)); + mutex_exit(&tc->tc_open_lock); } void @@ -356,10 +362,10 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg) int c; /* - * Grab all tx_cpu locks so nobody else can get into this txg. + * Grab all tc_open_locks so nobody else can get into this txg. */ for (c = 0; c < max_ncpus; c++) - mutex_enter(&tx->tx_cpu[c].tc_lock); + mutex_enter(&tx->tx_cpu[c].tc_open_lock); ASSERT(txg == tx->tx_open_txg); tx->tx_open_txg++; @@ -372,7 +378,7 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg) * enter the next transaction group. */ for (c = 0; c < max_ncpus; c++) - mutex_exit(&tx->tx_cpu[c].tc_lock); + mutex_exit(&tx->tx_cpu[c].tc_open_lock); /* * Quiesce the transaction group by waiting for everyone to txg_exit(). |