summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorTony Nguyen <[email protected]>2019-08-28 15:56:54 -0600
committerBrian Behlendorf <[email protected]>2019-08-28 14:56:54 -0700
commit8d042842815f33d2e4ab919a695139b11b7ed0c2 (patch)
tree556fe0036452cb823a06856bab1a7bfd61f0bc8d /module
parent07a328dde4937a49aa975e8dffea2e6f8754a097 (diff)
Use smaller default slack/delta value for schedule_hrtimeout_range()
For interrupt coalescing, cv_timedwait_hires() uses a 100us slack/delta for calls to schedule_hrtimeout_range(). This 100us slack can be costly for small writes. This change improves small write performance by passing resolution `res` parameter to schedule_hrtimeout_range() to be used as delta/slack. A new tunable `spl_schedule_hrtimeout_slack_us` is added to preserve old behavior when desired. Performance observations on 8K recordsize filesystem: - 8K random writes at 1-64 threads, up to 60% improvement for one thread and smaller gains as thread count increases. At >64 threads, 2-5% decrease in performance was observed. - 8K sequential writes, similar 60% improvement for one thread and leveling out around 64 threads. At >64 threads, 5-10% decrease in performance was observed. - 128K sequential write sees 1-5 for the 128K. No observed regression at high thread count. Testing done on Ubuntu 18.04 with 4.15 kernel, 8vCPUs and SSD storage on VMware ESX. Reviewed-by: Richard Elling <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Matt Ahrens <[email protected]> Signed-off-by: Tony Nguyen <[email protected]> Closes #9217
Diffstat (limited to 'module')
-rw-r--r--module/spl/spl-condvar.c54
-rw-r--r--module/zfs/mmp.c2
2 files changed, 38 insertions, 18 deletions
diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c
index 19c575f77..664fae1e7 100644
--- a/module/spl/spl-condvar.c
+++ b/module/spl/spl-condvar.c
@@ -26,8 +26,10 @@
#include <sys/condvar.h>
#include <sys/time.h>
+#include <sys/sysmacros.h>
#include <linux/hrtimer.h>
#include <linux/compiler_compat.h>
+#include <linux/mod_compat.h>
#include <linux/sched.h>
@@ -35,6 +37,34 @@
#include <linux/sched/signal.h>
#endif
+#define MAX_HRTIMEOUT_SLACK_US 1000
+unsigned int spl_schedule_hrtimeout_slack_us = 0;
+
+static int
+param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
+{
+ unsigned long val;
+ int error;
+
+ error = kstrtoul(buf, 0, &val);
+ if (error)
+ return (error);
+
+ if (val > MAX_HRTIMEOUT_SLACK_US)
+ return (-EINVAL);
+
+ error = param_set_uint(buf, kp);
+ if (error < 0)
+ return (error);
+
+ return (0);
+}
+
+module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
+ param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
+MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
+ "schedule_hrtimeout_range() delta/slack value in us, default(0)");
+
void
__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
{
@@ -304,12 +334,13 @@ EXPORT_SYMBOL(__cv_timedwait_sig);
*/
static clock_t
__cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
- int state)
+ hrtime_t res, int state)
{
DEFINE_WAIT(wait);
kmutex_t *m;
hrtime_t time_left;
ktime_t ktime_left;
+ u64 slack = 0;
ASSERT(cvp);
ASSERT(mp);
@@ -336,13 +367,11 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
* race where 'cvp->cv_waiters > 0' but the list is empty.
*/
mutex_exit(mp);
- /*
- * Allow a 100 us range to give kernel an opportunity to coalesce
- * interrupts
- */
+
ktime_left = ktime_set(0, time_left);
- schedule_hrtimeout_range(&ktime_left, 100 * NSEC_PER_USEC,
- HRTIMER_MODE_REL);
+ slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
+ MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
+ schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
@@ -369,19 +398,10 @@ static clock_t
cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
hrtime_t res, int flag, int state)
{
- if (res > 1) {
- /*
- * Align expiration to the specified resolution.
- */
- if (flag & CALLOUT_FLAG_ROUNDUP)
- tim += res - 1;
- tim = (tim / res) * res;
- }
-
if (!(flag & CALLOUT_FLAG_ABSOLUTE))
tim += gethrtime();
- return (__cv_timedwait_hires(cvp, mp, tim, state));
+ return (__cv_timedwait_hires(cvp, mp, tim, res, state));
}
clock_t
diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c
index cd5603a1a..1ffd862da 100644
--- a/module/zfs/mmp.c
+++ b/module/zfs/mmp.c
@@ -672,7 +672,7 @@ mmp_thread(void *arg)
CALLB_CPR_SAFE_BEGIN(&cpr);
(void) cv_timedwait_sig_hires(&mmp->mmp_thread_cv,
- &mmp->mmp_thread_lock, next_time, USEC2NSEC(1),
+ &mmp->mmp_thread_lock, next_time, USEC2NSEC(100),
CALLOUT_FLAG_ABSOLUTE);
CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock);
}