diff options
author | Tony Nguyen <[email protected]> | 2019-08-28 15:56:54 -0600 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2019-08-28 14:56:54 -0700 |
commit | 8d042842815f33d2e4ab919a695139b11b7ed0c2 (patch) | |
tree | 556fe0036452cb823a06856bab1a7bfd61f0bc8d /module | |
parent | 07a328dde4937a49aa975e8dffea2e6f8754a097 (diff) |
Use smaller default slack/delta value for schedule_hrtimeout_range()
For interrupt coalescing, cv_timedwait_hires() uses a 100us slack/delta
for calls to schedule_hrtimeout_range(). This 100us slack can be costly
for small writes.
This change improves small write performance by passing resolution `res`
parameter to schedule_hrtimeout_range() to be used as delta/slack. A new
tunable `spl_schedule_hrtimeout_slack_us` is added to preserve old
behavior when desired.
Performance observations on 8K recordsize filesystem:
- 8K random writes at 1-64 threads, up to 60% improvement for one thread
and smaller gains as thread count increases. At >64 threads, 2-5%
decrease in performance was observed.
- 8K sequential writes, similar 60% improvement for one thread and
leveling out around 64 threads. At >64 threads, 5-10% decrease in
performance was observed.
- 128K sequential write sees 1-5 for the 128K. No observed regression at
high thread count.
Testing done on Ubuntu 18.04 with 4.15 kernel, 8vCPUs and SSD storage on
VMware ESX.
Reviewed-by: Richard Elling <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Matt Ahrens <[email protected]>
Signed-off-by: Tony Nguyen <[email protected]>
Closes #9217
Diffstat (limited to 'module')
-rw-r--r-- | module/spl/spl-condvar.c | 54 | ||||
-rw-r--r-- | module/zfs/mmp.c | 2 |
2 files changed, 38 insertions, 18 deletions
diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c index 19c575f77..664fae1e7 100644 --- a/module/spl/spl-condvar.c +++ b/module/spl/spl-condvar.c @@ -26,8 +26,10 @@ #include <sys/condvar.h> #include <sys/time.h> +#include <sys/sysmacros.h> #include <linux/hrtimer.h> #include <linux/compiler_compat.h> +#include <linux/mod_compat.h> #include <linux/sched.h> @@ -35,6 +37,34 @@ #include <linux/sched/signal.h> #endif +#define MAX_HRTIMEOUT_SLACK_US 1000 +unsigned int spl_schedule_hrtimeout_slack_us = 0; + +static int +param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp) +{ + unsigned long val; + int error; + + error = kstrtoul(buf, 0, &val); + if (error) + return (error); + + if (val > MAX_HRTIMEOUT_SLACK_US) + return (-EINVAL); + + error = param_set_uint(buf, kp); + if (error < 0) + return (error); + + return (0); +} + +module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack, + param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644); +MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us, + "schedule_hrtimeout_range() delta/slack value in us, default(0)"); + void __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) { @@ -304,12 +334,13 @@ EXPORT_SYMBOL(__cv_timedwait_sig); */ static clock_t __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, - int state) + hrtime_t res, int state) { DEFINE_WAIT(wait); kmutex_t *m; hrtime_t time_left; ktime_t ktime_left; + u64 slack = 0; ASSERT(cvp); ASSERT(mp); @@ -336,13 +367,11 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, * race where 'cvp->cv_waiters > 0' but the list is empty. */ mutex_exit(mp); - /* - * Allow a 100 us range to give kernel an opportunity to coalesce - * interrupts - */ + ktime_left = ktime_set(0, time_left); - schedule_hrtimeout_range(&ktime_left, 100 * NSEC_PER_USEC, - HRTIMER_MODE_REL); + slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC), + MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC); + schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL); /* No more waiters a different mutex could be used */ if (atomic_dec_and_test(&cvp->cv_waiters)) { @@ -369,19 +398,10 @@ static clock_t cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag, int state) { - if (res > 1) { - /* - * Align expiration to the specified resolution. - */ - if (flag & CALLOUT_FLAG_ROUNDUP) - tim += res - 1; - tim = (tim / res) * res; - } - if (!(flag & CALLOUT_FLAG_ABSOLUTE)) tim += gethrtime(); - return (__cv_timedwait_hires(cvp, mp, tim, state)); + return (__cv_timedwait_hires(cvp, mp, tim, res, state)); } clock_t diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c index cd5603a1a..1ffd862da 100644 --- a/module/zfs/mmp.c +++ b/module/zfs/mmp.c @@ -672,7 +672,7 @@ mmp_thread(void *arg) CALLB_CPR_SAFE_BEGIN(&cpr); (void) cv_timedwait_sig_hires(&mmp->mmp_thread_cv, - &mmp->mmp_thread_lock, next_time, USEC2NSEC(1), + &mmp->mmp_thread_lock, next_time, USEC2NSEC(100), CALLOUT_FLAG_ABSOLUTE); CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock); } |