summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorAlexander Motin <[email protected]>2014-07-18 08:53:38 -0800
committerBrian Behlendorf <[email protected]>2015-07-06 09:34:13 -0700
commite16b3fcc610fab2dcf3381486b2640dc2a2213cb (patch)
tree8571cd7ae1db3137b7f36ae93ff07f447a16fecc /include
parent4bda3bd0e72d582a785b6552ce16b99e04414fbe (diff)
Illumos 5008 - lock contention (rrw_exit) while running a read only load
5008 lock contention (rrw_exit) while running a read only load Reviewed by: Matthew Ahrens <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Alex Reece <[email protected]> Reviewed by: Christopher Siden <[email protected]> Reviewed by: Richard Yao <[email protected]> Reviewed by: Saso Kiselkov <[email protected]> Approved by: Garrett D'Amore <[email protected]> Porting notes: This patch ported perfectly cleanly to ZoL. During testing 100% cached small-block reads, extreme contention was noticed on rrl->rr_lock from rrw_exit() due to the frequent entering and leaving ZPL. Illumos picked up this patch from FreeBSD and it also helps under Linux. On a 1-minute 4K cached read test with 10 fio processes pinned to a single socket on a 4-socket (10 thread per socket) NUMA system, contentions on rrl->rr_lock were reduced from 508799 to 43085. Ported-by: Tim Chase <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #3555
Diffstat (limited to 'include')
-rw-r--r--include/sys/rrwlock.h25
-rw-r--r--include/sys/zfs_vfsops.h2
-rw-r--r--include/sys/zfs_znode.h4
3 files changed, 28 insertions, 3 deletions
diff --git a/include/sys/rrwlock.h b/include/sys/rrwlock.h
index d2bdff495..7a328fd68 100644
--- a/include/sys/rrwlock.h
+++ b/include/sys/rrwlock.h
@@ -83,6 +83,31 @@ void rrw_tsd_destroy(void *arg);
#define RRW_LOCK_HELD(x) \
(rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER))
+/*
+ * A reader-mostly lock implementation, tuning above reader-writer locks
+ * for hightly parallel read acquisitions, pessimizing write acquisitions.
+ *
+ * This should be a prime number. See comment in rrwlock.c near
+ * RRM_TD_LOCK() for details.
+ */
+#define RRM_NUM_LOCKS 17
+typedef struct rrmlock {
+ rrwlock_t locks[RRM_NUM_LOCKS];
+} rrmlock_t;
+
+void rrm_init(rrmlock_t *rrl, boolean_t track_all);
+void rrm_destroy(rrmlock_t *rrl);
+void rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag);
+void rrm_enter_read(rrmlock_t *rrl, void *tag);
+void rrm_enter_write(rrmlock_t *rrl);
+void rrm_exit(rrmlock_t *rrl, void *tag);
+boolean_t rrm_held(rrmlock_t *rrl, krw_t rw);
+
+#define RRM_READ_HELD(x) rrm_held(x, RW_READER)
+#define RRM_WRITE_HELD(x) rrm_held(x, RW_WRITER)
+#define RRM_LOCK_HELD(x) \
+ (rrm_held(x, RW_WRITER) || rrm_held(x, RW_READER))
+
#ifdef __cplusplus
}
#endif
diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h
index c4db2a911..4073d82f8 100644
--- a/include/sys/zfs_vfsops.h
+++ b/include/sys/zfs_vfsops.h
@@ -67,7 +67,7 @@ typedef struct zfs_sb {
boolean_t z_atime; /* enable atimes mount option */
boolean_t z_relatime; /* enable relatime mount option */
boolean_t z_unmounted; /* unmounted */
- rrwlock_t z_teardown_lock;
+ rrmlock_t z_teardown_lock;
krwlock_t z_teardown_inactive_lock;
list_t z_all_znodes; /* all znodes in the fs */
uint64_t z_nr_znodes; /* number of znodes in the fs */
diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
index 79ca4f7e9..7e73cf991 100644
--- a/include/sys/zfs_znode.h
+++ b/include/sys/zfs_znode.h
@@ -250,7 +250,7 @@ typedef struct znode {
/* Called on entry to each ZFS vnode and vfs operation */
#define ZFS_ENTER(zsb) \
{ \
- rrw_enter_read(&(zsb)->z_teardown_lock, FTAG); \
+ rrm_enter_read(&(zsb)->z_teardown_lock, FTAG); \
if ((zsb)->z_unmounted) { \
ZFS_EXIT(zsb); \
return (EIO); \
@@ -260,7 +260,7 @@ typedef struct znode {
/* Must be called before exiting the vop */
#define ZFS_EXIT(zsb) \
{ \
- rrw_exit(&(zsb)->z_teardown_lock, FTAG); \
+ rrm_exit(&(zsb)->z_teardown_lock, FTAG); \
}
/* Verifies the znode is valid */