diff options
author | Matthew Macy <[email protected]> | 2020-06-18 10:17:50 -0700 |
---|---|---|
committer | GitHub <[email protected]> | 2020-06-18 10:17:50 -0700 |
commit | 8056a75672a57c85b8e10c0c6bce138146f7d213 (patch) | |
tree | e1e3001b253620aae9f8f14d2eb49a880ceebac2 | |
parent | 7564073ed6344c12e6bc4ffabd130522d937fb93 (diff) |
Disambiguate condvar API contract
On Illumos callers of cv_timedwait and cv_timedwait_hires
can't distinguish between whether or not the cv was signaled
or the call timed out. Illumos handles this (for some definition
of handles) by calling cv_signal in the return path if we were
signaled but the return value indicates instead that we timed
out. This would make sense if it were possible to query the the
cv for its net signal disposition. However, this isn't possible
and, in spite of the fact that there are places in the code that
clearly take a different and incompatible path if a timeout value
is indicated, this distinction appears to be rather subtle to most
developers. This problem is further compounded by the fact that on
Linux, calling cv_signal in the return path wouldn't even do the
right thing unless there are other waiters.
Since it is possible for the caller to independently determine how
much time is remaining but it is not possible to query if the cv
was in fact signaled, prioritizing signalling over timeout seems
like a cleaner solution. In addition, judging from usage patterns
within the code itself, it is also less error prone.
Reviewed-by: Jorgen Lundman <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Alexander Motin <[email protected]>
Signed-off-by: Matt Macy <[email protected]>
Closes #10471
-rw-r--r-- | include/os/freebsd/spl/sys/condvar.h | 36 | ||||
-rw-r--r-- | include/os/linux/spl/sys/condvar.h | 48 | ||||
-rw-r--r-- | include/sys/zfs_context.h | 4 | ||||
-rw-r--r-- | lib/libzpool/kernel.c | 4 | ||||
-rw-r--r-- | module/os/linux/spl/spl-condvar.c | 33 | ||||
-rw-r--r-- | module/zfs/zil.c | 4 |
6 files changed, 93 insertions, 36 deletions
diff --git a/include/os/freebsd/spl/sys/condvar.h b/include/os/freebsd/spl/sys/condvar.h index ff5f308e2..4375bd6a3 100644 --- a/include/os/freebsd/spl/sys/condvar.h +++ b/include/os/freebsd/spl/sys/condvar.h @@ -37,6 +37,31 @@ #include <sys/time.h> #include <sys/kmem.h> +/* + * cv_timedwait() is similar to cv_wait() except that it additionally expects + * a timeout value specified in ticks. When woken by cv_signal() or + * cv_broadcast() it returns 1, otherwise when the timeout is reached -1 is + * returned. + * + * cv_timedwait_sig() behaves the same as cv_timedwait() but blocks + * interruptibly and can be woken by a signal (EINTR, ERESTART). When + * this occurs 0 is returned. + * + * cv_timedwait_io() and cv_timedwait_sig_io() are variants of cv_timedwait() + * and cv_timedwait_sig() which should be used when waiting for outstanding + * IO to complete. They are responsible for updating the iowait accounting + * when this is supported by the platform. + * + * cv_timedwait_hires() and cv_timedwait_sig_hires() are high resolution + * versions of cv_timedwait() and cv_timedwait_sig(). They expect the timeout + * to be specified as a hrtime_t allowing for timeouts of less than a tick. + * + * N.B. The return values differ slightly from the illumos implementation + * which returns the time remaining, instead of 1, when woken. They both + * return -1 on timeout. Consumers which need to know the time remaining + * are responsible for tracking it themselves. + */ + static __inline sbintime_t zfs_nstosbt(int64_t _ns) { @@ -120,7 +145,7 @@ cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t timo) #define cv_timedwait_io cv_timedwait #define cv_timedwait_sig_io cv_timedwait_sig -static inline clock_t +static inline int cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag) { @@ -135,7 +160,6 @@ cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, if (hrtime >= tim) return (-1); - rc = cv_timedwait_sbt(cvp, mp, zfs_nstosbt(tim), zfs_nstosbt(res), C_ABSOLUTE); @@ -143,11 +167,10 @@ cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, return (-1); KASSERT(rc == 0, ("unexpected rc value %d", rc)); - hrtime = tim - gethrtime(); - return ((hrtime > 0) ? hrtime : -1); + return (1); } -static inline clock_t +static inline int cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag) { @@ -175,8 +198,7 @@ cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, return (0); default: KASSERT(rc == 0, ("unexpected rc value %d", rc)); - hrtime = tim - gethrtime(); - return ((hrtime > 0) ? hrtime : -1); + return (1); } } diff --git a/include/os/linux/spl/sys/condvar.h b/include/os/linux/spl/sys/condvar.h index f1438c4e2..22408824f 100644 --- a/include/os/linux/spl/sys/condvar.h +++ b/include/os/linux/spl/sys/condvar.h @@ -33,6 +33,32 @@ #include <sys/time.h> /* + * cv_timedwait() is similar to cv_wait() except that it additionally expects + * a timeout value specified in ticks. When woken by cv_signal() or + * cv_broadcast() it returns 1, otherwise when the timeout is reached -1 is + * returned. + * + * cv_timedwait_sig() behaves the same as cv_timedwait() but blocks + * interruptibly and can be woken by a signal (EINTR, ERESTART). When + * this occurs 0 is returned. + * + * cv_timedwait_io() and cv_timedwait_sig_io() are variants of cv_timedwait() + * and cv_timedwait_sig() which should be used when waiting for outstanding + * IO to complete. They are responsible for updating the iowait accounting + * when this is supported by the platform. + * + * cv_timedwait_hires() and cv_timedwait_sig_hires() are high resolution + * versions of cv_timedwait() and cv_timedwait_sig(). They expect the timeout + * to be specified as a hrtime_t allowing for timeouts of less than a tick. + * + * N.B. The return values differ slightly from the illumos implementation + * which returns the time remaining, instead of 1, when woken. They both + * return -1 on timeout. Consumers which need to know the time remaining + * are responsible for tracking it themselves. + */ + + +/* * The kcondvar_t struct is protected by mutex taken externally before * calling any of the wait/signal funs, and passed into the wait funs. */ @@ -56,12 +82,12 @@ extern void __cv_wait(kcondvar_t *, kmutex_t *); extern void __cv_wait_io(kcondvar_t *, kmutex_t *); extern int __cv_wait_io_sig(kcondvar_t *, kmutex_t *); extern int __cv_wait_sig(kcondvar_t *, kmutex_t *); -extern clock_t __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t); -extern clock_t __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t); -extern clock_t __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t); -extern clock_t cv_timedwait_hires(kcondvar_t *, kmutex_t *, hrtime_t, +extern int __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t); +extern int __cv_timedwait_io(kcondvar_t *, kmutex_t *, clock_t); +extern int __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t); +extern int cv_timedwait_hires(kcondvar_t *, kmutex_t *, hrtime_t, hrtime_t res, int flag); -extern clock_t cv_timedwait_sig_hires(kcondvar_t *, kmutex_t *, hrtime_t, +extern int cv_timedwait_sig_hires(kcondvar_t *, kmutex_t *, hrtime_t, hrtime_t res, int flag); extern void __cv_signal(kcondvar_t *); extern void __cv_broadcast(kcondvar_t *c); @@ -72,12 +98,16 @@ extern void __cv_broadcast(kcondvar_t *c); #define cv_wait_io(cvp, mp) __cv_wait_io(cvp, mp) #define cv_wait_io_sig(cvp, mp) __cv_wait_io_sig(cvp, mp) #define cv_wait_sig(cvp, mp) __cv_wait_sig(cvp, mp) -#define cv_wait_interruptible(cvp, mp) cv_wait_sig(cvp, mp) +#define cv_signal(cvp) __cv_signal(cvp) +#define cv_broadcast(cvp) __cv_broadcast(cvp) + +/* + * NB: There is no way to reliably distinguish between having been signalled + * and having timed out on Linux. If the client code needs to reliably + * distinguish between the two it should use the hires variant. + */ #define cv_timedwait(cvp, mp, t) __cv_timedwait(cvp, mp, t) #define cv_timedwait_io(cvp, mp, t) __cv_timedwait_io(cvp, mp, t) #define cv_timedwait_sig(cvp, mp, t) __cv_timedwait_sig(cvp, mp, t) -#define cv_timedwait_interruptible(cvp, mp, t) cv_timedwait_sig(cvp, mp, t) -#define cv_signal(cvp) __cv_signal(cvp) -#define cv_broadcast(cvp) __cv_broadcast(cvp) #endif /* _SPL_CONDVAR_H */ diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 72fcd74ca..7d5567db4 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -315,8 +315,8 @@ extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg); extern void cv_destroy(kcondvar_t *cv); extern void cv_wait(kcondvar_t *cv, kmutex_t *mp); extern int cv_wait_sig(kcondvar_t *cv, kmutex_t *mp); -extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime); -extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, +extern int cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime); +extern int cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag); extern void cv_signal(kcondvar_t *cv); extern void cv_broadcast(kcondvar_t *cv); diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index d19ecc18f..f19ebac12 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -344,7 +344,7 @@ cv_wait_sig(kcondvar_t *cv, kmutex_t *mp) return (1); } -clock_t +int cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) { int error; @@ -378,7 +378,7 @@ cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) } /*ARGSUSED*/ -clock_t +int cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag) { diff --git a/module/os/linux/spl/spl-condvar.c b/module/os/linux/spl/spl-condvar.c index 3cc33da62..9d045e3e8 100644 --- a/module/os/linux/spl/spl-condvar.c +++ b/module/os/linux/spl/spl-condvar.c @@ -301,10 +301,10 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, * with a thread holding the mutex and call cv_destroy. */ mutex_enter(mp); - return (time_left > 0 ? time_left : -1); + return (time_left > 0 ? 1 : -1); } -clock_t +int __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) { return (__cv_timedwait_common(cvp, mp, exp_time, @@ -312,7 +312,7 @@ __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) } EXPORT_SYMBOL(__cv_timedwait); -clock_t +int __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) { return (__cv_timedwait_common(cvp, mp, exp_time, @@ -320,11 +320,13 @@ __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) } EXPORT_SYMBOL(__cv_timedwait_io); -clock_t +int __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) { - return (__cv_timedwait_common(cvp, mp, exp_time, - TASK_INTERRUPTIBLE, 0)); + int rc; + + rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0); + return (signal_pending(current) ? 0 : rc); } EXPORT_SYMBOL(__cv_timedwait_sig); @@ -341,6 +343,7 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, hrtime_t time_left; ktime_t ktime_left; u64 slack = 0; + int rc; ASSERT(cvp); ASSERT(mp); @@ -371,7 +374,7 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, ktime_left = ktime_set(0, time_left); slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC), MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC); - schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL); + rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL); /* No more waiters a different mutex could be used */ if (atomic_dec_and_test(&cvp->cv_waiters)) { @@ -387,14 +390,13 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, atomic_dec(&cvp->cv_refs); mutex_enter(mp); - time_left = expire_time - gethrtime(); - return (time_left > 0 ? NSEC_TO_TICK(time_left) : -1); + return (rc == -EINTR ? 1 : -1); } /* * Compatibility wrapper for the cv_timedwait_hires() Illumos interface. */ -static clock_t +static int cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag, int state) { @@ -404,7 +406,7 @@ cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, return (__cv_timedwait_hires(cvp, mp, tim, res, state)); } -clock_t +int cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag) { @@ -413,12 +415,15 @@ cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, } EXPORT_SYMBOL(cv_timedwait_hires); -clock_t +int cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag) { - return (cv_timedwait_hires_common(cvp, mp, tim, res, flag, - TASK_INTERRUPTIBLE)); + int rc; + + rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag, + TASK_INTERRUPTIBLE); + return (signal_pending(current) ? 0 : rc); } EXPORT_SYMBOL(cv_timedwait_sig_hires); diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 34495af55..50b3b1434 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -2687,11 +2687,11 @@ zil_commit_waiter(zilog_t *zilog, zil_commit_waiter_t *zcw) * timeout is reached; responsibility (2) from * the comment above this function. */ - clock_t timeleft = cv_timedwait_hires(&zcw->zcw_cv, + int rc = cv_timedwait_hires(&zcw->zcw_cv, &zcw->zcw_lock, wakeup, USEC2NSEC(1), CALLOUT_FLAG_ABSOLUTE); - if (timeleft != -1 || zcw->zcw_done) + if (rc != -1 || zcw->zcw_done) continue; timedout = B_TRUE; |