aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChunwei Chen <[email protected]>2016-05-25 16:35:42 -0700
committerBrian Behlendorf <[email protected]>2016-05-31 11:44:15 -0700
commitf58040c0fc8bc6490fcc75db7fc3e709dfc3c656 (patch)
tree03aaebb8e0e152ade6edb7a9ae14f4b430314797
parentc60a51b640bab61c54f370752750841675730899 (diff)
Implement a proper rw_tryupgrade
Current rw_tryupgrade does rw_exit and then rw_tryenter(RW_RWITER), and then does rw_enter(RW_READER) if it fails. This violate the assumption that rw_tryupgrade should be atomic and could cause extra contention or even lock inversion. This patch we implement a proper rw_tryupgrade. For rwsem-spinlock, we take the spinlock to check rwsem->count and rwsem->wait_list. For normal rwsem, we use cmpxchg on rwsem->count to change the value from single reader to single writer. Signed-off-by: Chunwei Chen <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Signed-off-by: Tim Chase <[email protected]> Closes zfsonlinux/zfs#4692 Closes #554
-rw-r--r--config/spl-build.m425
-rw-r--r--include/linux/rwsem_compat.h17
-rw-r--r--include/sys/rwlock.h11
-rw-r--r--module/spl/spl-rwlock.c41
-rw-r--r--module/splat/splat-rwlock.c66
5 files changed, 147 insertions, 13 deletions
diff --git a/config/spl-build.m4 b/config/spl-build.m4
index 720506959..d705c6531 100644
--- a/config/spl-build.m4
+++ b/config/spl-build.m4
@@ -39,6 +39,7 @@ AC_DEFUN([SPL_AC_CONFIG_KERNEL], [
SPL_AC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
SPL_AC_SHRINK_CONTROL_STRUCT
SPL_AC_RWSEM_SPINLOCK_IS_RAW
+ SPL_AC_RWSEM_ACTIVITY
SPL_AC_SCHED_RT_HEADER
SPL_AC_2ARGS_VFS_GETATTR
SPL_AC_USLEEP_RANGE
@@ -1317,6 +1318,30 @@ AC_DEFUN([SPL_AC_RWSEM_SPINLOCK_IS_RAW], [
])
dnl #
+dnl # 3.16 API Change
+dnl #
+dnl # rwsem-spinlock "->activity" changed to "->count"
+dnl #
+AC_DEFUN([SPL_AC_RWSEM_ACTIVITY], [
+ AC_MSG_CHECKING([whether struct rw_semaphore has member activity])
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Werror"
+ SPL_LINUX_TRY_COMPILE([
+ #include <linux/rwsem.h>
+ ],[
+ struct rw_semaphore dummy_semaphore __attribute__ ((unused));
+ dummy_semaphore.activity = 0;
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_RWSEM_ACTIVITY, 1,
+ [struct rw_semaphore has member activity])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ EXTRA_KCFLAGS="$tmp_flags"
+])
+
+dnl #
dnl # 3.9 API change,
dnl # Moved things from linux/sched.h to linux/sched/rt.h
dnl #
diff --git a/include/linux/rwsem_compat.h b/include/linux/rwsem_compat.h
index 5841d7c28..9a4df2673 100644
--- a/include/linux/rwsem_compat.h
+++ b/include/linux/rwsem_compat.h
@@ -27,6 +27,23 @@
#include <linux/rwsem.h>
+#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+#define SPL_RWSEM_SINGLE_READER_VALUE (1)
+#define SPL_RWSEM_SINGLE_WRITER_VALUE (-1)
+#else
+#define SPL_RWSEM_SINGLE_READER_VALUE (RWSEM_ACTIVE_READ_BIAS)
+#define SPL_RWSEM_SINGLE_WRITER_VALUE (RWSEM_ACTIVE_WRITE_BIAS)
+#endif
+
+/* Linux 3.16 change activity to count for rwsem-spinlock */
+#ifdef HAVE_RWSEM_ACTIVITY
+#define RWSEM_COUNT(sem) sem->activity
+#else
+#define RWSEM_COUNT(sem) sem->count
+#endif
+
+int rwsem_tryupgrade(struct rw_semaphore *rwsem);
+
#if defined(RWSEM_SPINLOCK_IS_RAW)
#define spl_rwsem_lock_irqsave(lk, fl) raw_spin_lock_irqsave(lk, fl)
#define spl_rwsem_unlock_irqrestore(lk, fl) raw_spin_unlock_irqrestore(lk, fl)
diff --git a/include/sys/rwlock.h b/include/sys/rwlock.h
index 14d097b01..facebe3ba 100644
--- a/include/sys/rwlock.h
+++ b/include/sys/rwlock.h
@@ -223,13 +223,10 @@ RW_LOCK_HELD(krwlock_t *rwp)
if (RW_WRITE_HELD(rwp)) { \
_rc_ = 1; \
} else { \
- rw_exit(rwp); \
- if (rw_tryenter(rwp, RW_WRITER)) { \
- _rc_ = 1; \
- } else { \
- rw_enter(rwp, RW_READER); \
- _rc_ = 0; \
- } \
+ spl_rw_lockdep_off_maybe(rwp); \
+ if ((_rc_ = rwsem_tryupgrade(SEM(rwp)))) \
+ spl_rw_set_owner(rwp); \
+ spl_rw_lockdep_on_maybe(rwp); \
} \
_rc_; \
})
diff --git a/module/spl/spl-rwlock.c b/module/spl/spl-rwlock.c
index 98251c011..9b356a843 100644
--- a/module/spl/spl-rwlock.c
+++ b/module/spl/spl-rwlock.c
@@ -32,5 +32,46 @@
#define DEBUG_SUBSYSTEM S_RWLOCK
+#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+static int
+__rwsem_tryupgrade(struct rw_semaphore *rwsem)
+{
+ int ret = 0;
+ unsigned long flags;
+ spl_rwsem_lock_irqsave(&rwsem->wait_lock, flags);
+ if (RWSEM_COUNT(rwsem) == SPL_RWSEM_SINGLE_READER_VALUE &&
+ list_empty(&rwsem->wait_list)) {
+ ret = 1;
+ RWSEM_COUNT(rwsem) = SPL_RWSEM_SINGLE_WRITER_VALUE;
+ }
+ spl_rwsem_unlock_irqrestore(&rwsem->wait_lock, flags);
+ return (ret);
+}
+#else
+static int
+__rwsem_tryupgrade(struct rw_semaphore *rwsem)
+{
+ typeof (rwsem->count) val;
+ val = cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE,
+ SPL_RWSEM_SINGLE_WRITER_VALUE);
+ return (val == SPL_RWSEM_SINGLE_READER_VALUE);
+}
+#endif
+
+int
+rwsem_tryupgrade(struct rw_semaphore *rwsem)
+{
+ if (__rwsem_tryupgrade(rwsem)) {
+ rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
+ rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+ rwsem->owner = current;
+#endif
+ return (1);
+ }
+ return (0);
+}
+EXPORT_SYMBOL(rwsem_tryupgrade);
+
int spl_rw_init(void) { return 0; }
void spl_rw_fini(void) { }
diff --git a/module/splat/splat-rwlock.c b/module/splat/splat-rwlock.c
index abd6a0e60..c17eb07ba 100644
--- a/module/splat/splat-rwlock.c
+++ b/module/splat/splat-rwlock.c
@@ -55,8 +55,12 @@
#define SPLAT_RWLOCK_TEST5_DESC "Write downgrade"
#define SPLAT_RWLOCK_TEST6_ID 0x0706
-#define SPLAT_RWLOCK_TEST6_NAME "rw_tryupgrade"
-#define SPLAT_RWLOCK_TEST6_DESC "Read upgrade"
+#define SPLAT_RWLOCK_TEST6_NAME "rw_tryupgrade-1"
+#define SPLAT_RWLOCK_TEST6_DESC "rwsem->count value"
+
+#define SPLAT_RWLOCK_TEST7_ID 0x0707
+#define SPLAT_RWLOCK_TEST7_NAME "rw_tryupgrade-2"
+#define SPLAT_RWLOCK_TEST7_DESC "Read upgrade"
#define SPLAT_RWLOCK_TEST_MAGIC 0x115599DDUL
#define SPLAT_RWLOCK_TEST_NAME "rwlock_test"
@@ -580,8 +584,55 @@ splat_rwlock_test6(struct file *file, void *arg)
splat_init_rw_priv(rwp, file);
rw_enter(&rwp->rw_rwlock, RW_READER);
- if (!RW_READ_HELD(&rwp->rw_rwlock)) {
+ if (RWSEM_COUNT(SEM(&rwp->rw_rwlock)) !=
+ SPL_RWSEM_SINGLE_READER_VALUE) {
+ splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
+ "We assumed single reader rwsem->count "
+ "should be %ld, but is %ld\n",
+ SPL_RWSEM_SINGLE_READER_VALUE,
+ RWSEM_COUNT(SEM(&rwp->rw_rwlock)));
+ rc = -ENOLCK;
+ goto out;
+ }
+ rw_exit(&rwp->rw_rwlock);
+
+ rw_enter(&rwp->rw_rwlock, RW_WRITER);
+ if (RWSEM_COUNT(SEM(&rwp->rw_rwlock)) !=
+ SPL_RWSEM_SINGLE_WRITER_VALUE) {
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
+ "We assumed single writer rwsem->count "
+ "should be %ld, but is %ld\n",
+ SPL_RWSEM_SINGLE_WRITER_VALUE,
+ RWSEM_COUNT(SEM(&rwp->rw_rwlock)));
+ rc = -ENOLCK;
+ goto out;
+ }
+ rc = 0;
+ splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
+ "rwsem->count same as we assumed\n");
+out:
+ rw_exit(&rwp->rw_rwlock);
+ rw_destroy(&rwp->rw_rwlock);
+ kfree(rwp);
+
+ return rc;
+}
+
+static int
+splat_rwlock_test7(struct file *file, void *arg)
+{
+ rw_priv_t *rwp;
+ int rc;
+
+ rwp = (rw_priv_t *)kmalloc(sizeof(*rwp), GFP_KERNEL);
+ if (rwp == NULL)
+ return -ENOMEM;
+
+ splat_init_rw_priv(rwp, file);
+
+ rw_enter(&rwp->rw_rwlock, RW_READER);
+ if (!RW_READ_HELD(&rwp->rw_rwlock)) {
+ splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME,
"rwlock should be read lock: %d\n",
RW_READ_HELD(&rwp->rw_rwlock));
rc = -ENOLCK;
@@ -591,7 +642,7 @@ splat_rwlock_test6(struct file *file, void *arg)
/* With one reader upgrade should never fail. */
rc = rw_tryupgrade(&rwp->rw_rwlock);
if (!rc) {
- splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
+ splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME,
"rwlock failed upgrade from reader: %d\n",
RW_READ_HELD(&rwp->rw_rwlock));
rc = -ENOLCK;
@@ -599,7 +650,7 @@ splat_rwlock_test6(struct file *file, void *arg)
}
if (RW_READ_HELD(&rwp->rw_rwlock) || !RW_WRITE_HELD(&rwp->rw_rwlock)) {
- splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "rwlock should "
+ splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME, "rwlock should "
"have 0 (not %d) reader and 1 (not %d) writer\n",
RW_READ_HELD(&rwp->rw_rwlock),
RW_WRITE_HELD(&rwp->rw_rwlock));
@@ -607,7 +658,7 @@ splat_rwlock_test6(struct file *file, void *arg)
}
rc = 0;
- splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
+ splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME, "%s",
"rwlock properly upgraded\n");
out:
rw_exit(&rwp->rw_rwlock);
@@ -646,6 +697,8 @@ splat_rwlock_init(void)
SPLAT_RWLOCK_TEST5_ID, splat_rwlock_test5);
SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST6_NAME, SPLAT_RWLOCK_TEST6_DESC,
SPLAT_RWLOCK_TEST6_ID, splat_rwlock_test6);
+ SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST7_NAME, SPLAT_RWLOCK_TEST7_DESC,
+ SPLAT_RWLOCK_TEST7_ID, splat_rwlock_test7);
return sub;
}
@@ -654,6 +707,7 @@ void
splat_rwlock_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
+ SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST7_ID);
SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST6_ID);
SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST5_ID);
SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST4_ID);