diff options
author | Adam Leventhal <ahl@delphix.com> | 2013-08-28 16:05:48 -0700 |
---|---|---|
committer | Brian Behlendorf <behlendorf1@llnl.gov> | 2013-11-04 10:55:25 -0800 |
commit | 63fd3c6cfd264cab94dc186fe8cceecac8bc0d50 (patch) | |
tree | 2b4c580be7c82a4c5160ef99c0020c4752ba9eff /module | |
parent | c1fabe7961b100a7dfd77cddba1650d9a6580dc0 (diff) |
Illumos #3582, #3584
3582 zfs_delay() should support a variable resolution
3584 DTrace sdt probes for ZFS txg states
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Dan McDonald <danmcd@nexenta.com>
Reviewed by: Richard Elling <richard.elling@dey-sys.com>
Approved by: Garrett D'Amore <garrett@damore.org>
References:
https://www.illumos.org/issues/3582
illumos/illumos-gate@0689f76
Ported by: Ned Bass <bass6@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #1775
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/dsl_dir.c | 3 | ||||
-rw-r--r-- | module/zfs/dsl_pool.c | 18 | ||||
-rw-r--r-- | module/zfs/dsl_scan.c | 8 | ||||
-rw-r--r-- | module/zfs/spa_misc.c | 4 | ||||
-rw-r--r-- | module/zfs/txg.c | 27 |
5 files changed, 37 insertions, 23 deletions
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 572086b93..51a78f902 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -743,7 +743,8 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx); } else { if (err == EAGAIN) { - txg_delay(dd->dd_pool, tx->tx_txg, 1); + txg_delay(dd->dd_pool, tx->tx_txg, + MSEC2NSEC(10), MSEC2NSEC(10)); err = SET_ERROR(ERESTART); } dsl_pool_memory_pressure(dd->dd_pool); diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index c28ed3e03..950738e98 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -58,6 +58,9 @@ kmutex_t zfs_write_limit_lock; static pgcnt_t old_physmem = 0; +hrtime_t zfs_throttle_delay = MSEC2NSEC(10); +hrtime_t zfs_throttle_resolution = MSEC2NSEC(10); + int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) { @@ -512,12 +515,13 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) * Weight the throughput calculation towards the current value: * thru = 3/4 old_thru + 1/4 new_thru * - * Note: write_time is in nanosecs, so write_time/MICROSEC - * yields millisecs + * Note: write_time is in nanosecs while dp_throughput is expressed in + * bytes per millisecond. */ ASSERT(zfs_write_limit_min > 0); - if (data_written > zfs_write_limit_min / 8 && write_time > MICROSEC) { - uint64_t throughput = data_written / (write_time / MICROSEC); + if (data_written > zfs_write_limit_min / 8 && + write_time > MSEC2NSEC(1)) { + uint64_t throughput = data_written / NSEC2MSEC(write_time); if (dp->dp_throughput) dp->dp_throughput = throughput / 4 + @@ -617,8 +621,10 @@ dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx) * the caller 1 clock tick. This will slow down the "fill" * rate until the sync process can catch up with us. */ - if (reserved && reserved > (write_limit - (write_limit >> 3))) - txg_delay(dp, tx->tx_txg, 1); + if (reserved && reserved > (write_limit - (write_limit >> 3))) { + txg_delay(dp, tx->tx_txg, zfs_throttle_delay, + zfs_throttle_resolution); + } return (0); } diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index de82ab1ce..6cc08ab47 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -409,7 +409,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb) zfs_resilver_min_time_ms : zfs_scan_min_time_ms; elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || - (elapsed_nanosecs / MICROSEC > mintime && + (NSEC2MSEC(elapsed_nanosecs) > mintime && txg_sync_waiting(scn->scn_dp)) || spa_shutting_down(scn->scn_dp->dp_spa)) { if (zb) { @@ -1335,7 +1335,7 @@ dsl_scan_free_should_pause(dsl_scan_t *scn) elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time; return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout || - (elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms && + (NSEC2MSEC(elapsed_nanosecs) > zfs_free_min_time_ms && txg_sync_waiting(scn->scn_dp)) || spa_shutting_down(scn->scn_dp->dp_spa)); } @@ -1459,7 +1459,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) "free_bpobj/bptree txg %llu", (longlong_t)scn->scn_visited_this_txg, (longlong_t) - (gethrtime() - scn->scn_sync_start_time) / MICROSEC, + NSEC2MSEC(gethrtime() - scn->scn_sync_start_time), (longlong_t)tx->tx_txg); scn->scn_visited_this_txg = 0; /* @@ -1507,7 +1507,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) zfs_dbgmsg("visited %llu blocks in %llums", (longlong_t)scn->scn_visited_this_txg, - (longlong_t)(gethrtime() - scn->scn_sync_start_time) / MICROSEC); + (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time)); if (!scn->scn_pausing) { /* finished with scan. */ diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 5b2784125..ffba5028f 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -490,8 +490,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa->spa_proc = &p0; spa->spa_proc_state = SPA_PROC_NONE; - spa->spa_deadman_synctime = zfs_deadman_synctime * - zfs_txg_synctime_ms * MICROSEC; + spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime * + zfs_txg_synctime_ms); refcount_create(&spa->spa_refcount); spa_config_lock_init(spa); diff --git a/module/zfs/txg.c b/module/zfs/txg.c index c0c0b295a..c8a29e14f 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -236,7 +236,7 @@ txg_thread_exit(tx_state_t *tx, callb_cpr_t *cpr, kthread_t **tpp) } static void -txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, uint64_t time) +txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, clock_t time) { CALLB_CPR_SAFE_BEGIN(cpr); @@ -373,6 +373,9 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg) spa_txg_history_set(dp->dp_spa, txg, TXG_STATE_OPEN, gethrtime()); spa_txg_history_add(dp->dp_spa, tx->tx_open_txg); + DTRACE_PROBE2(txg__quiescing, dsl_pool_t *, dp, uint64_t, txg); + DTRACE_PROBE2(txg__opened, dsl_pool_t *, dp, uint64_t, tx->tx_open_txg); + /* * Now that we've incremented tx_open_txg, we can let threads * enter the next transaction group. @@ -531,6 +534,7 @@ txg_sync_thread(dsl_pool_t *dp) txg = tx->tx_quiesced_txg; tx->tx_quiesced_txg = 0; tx->tx_syncing_txg = txg; + DTRACE_PROBE2(txg__syncing, dsl_pool_t *, dp, uint64_t, txg); cv_broadcast(&tx->tx_quiesce_more_cv); dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n", @@ -544,6 +548,7 @@ txg_sync_thread(dsl_pool_t *dp) mutex_enter(&tx->tx_sync_lock); tx->tx_synced_txg = txg; tx->tx_syncing_txg = 0; + DTRACE_PROBE2(txg__synced, dsl_pool_t *, dp, uint64_t, txg); cv_broadcast(&tx->tx_sync_done_cv); /* @@ -602,21 +607,22 @@ txg_quiesce_thread(dsl_pool_t *dp) */ dprintf("quiesce done, handing off txg %llu\n", txg); tx->tx_quiesced_txg = txg; + DTRACE_PROBE2(txg__quiesced, dsl_pool_t *, dp, uint64_t, txg); cv_broadcast(&tx->tx_sync_more_cv); cv_broadcast(&tx->tx_quiesce_done_cv); } } /* - * Delay this thread by 'ticks' if we are still in the open transaction - * group and there is already a waiting txg quiesing or quiesced. Abort - * the delay if this txg stalls or enters the quiesing state. + * Delay this thread by delay nanoseconds if we are still in the open + * transaction group and there is already a waiting txg quiesing or quiesced. + * Abort the delay if this txg stalls or enters the quiesing state. */ void -txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks) +txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution) { tx_state_t *tx = &dp->dp_tx; - clock_t timeout = ddi_get_lbolt() + ticks; + hrtime_t start = gethrtime(); /* don't delay if this txg could transition to quiesing immediately */ if (tx->tx_open_txg > txg || @@ -629,10 +635,11 @@ txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks) return; } - while (ddi_get_lbolt() < timeout && - tx->tx_syncing_txg < txg-1 && !txg_stalled(dp)) - (void) cv_timedwait(&tx->tx_quiesce_more_cv, &tx->tx_sync_lock, - timeout); + while (gethrtime() - start < delay && + tx->tx_syncing_txg < txg-1 && !txg_stalled(dp)) { + (void) cv_timedwait_hires(&tx->tx_quiesce_more_cv, + &tx->tx_sync_lock, delay, resolution, 0); + } DMU_TX_STAT_BUMP(dmu_tx_delay); |