summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2013-05-16 14:18:06 -0700
committerBrian Behlendorf <[email protected]>2013-11-05 12:14:48 -0800
commit498877baf5038b32c1531e5ec96b435023200f4d (patch)
tree741845c71f02bf2dca826e3a988c7125c8958117 /module
parent03c6040bee6c87a9413b7da41d9f580f79a8ab62 (diff)
Illumos #3112, #3113, #3114
3112 ztest does not honor ZFS_DEBUG 3113 ztest should use watchpoints to protect frozen arc bufs 3114 some leaked nvlists in zfsdev_ioctl Reviewed by: Adam Leventhal <[email protected]> Reviewed by: Matt Amdur <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Christopher Siden <[email protected]> Approved by: Eric Schrock <[email protected]> References: https://www.illumos.org/issues/3112 https://www.illumos.org/issues/3113 https://www.illumos.org/issues/3114 illumos/illumos-gate@cd1c8b85eb30b568e9816221430c479ace7a559d The /proc/self/cmd watchpoint interface is specific to Solaris. Therefore, the #3113 implementation was reworked to use the more portable mprotect(2) system call. When the pages are watched they are marked read-only for protection. Any write to the protected address range immediately trigger a SIGSEGV. The pages are marked writable again when they are unwatched. Ported-by: Brian Behlendorf <[email protected]> Issue #1489
Diffstat (limited to 'module')
-rw-r--r--module/zfs/arc.c60
-rw-r--r--module/zfs/spa_misc.c17
-rw-r--r--module/zfs/zio.c14
3 files changed, 80 insertions, 11 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 04f87a810..5bba9a317 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -145,6 +145,11 @@
#include <sys/dmu_tx.h>
#include <zfs_fletcher.h>
+#ifndef _KERNEL
+/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
+boolean_t arc_watch = B_FALSE;
+#endif
+
static kmutex_t arc_reclaim_thr_lock;
static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */
static uint8_t arc_thread_exit;
@@ -569,6 +574,7 @@ static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock);
static int arc_evict_needed(arc_buf_contents_t type);
static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes,
arc_buf_contents_t type);
+static void arc_buf_watch(arc_buf_t *buf);
static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab);
@@ -1060,6 +1066,37 @@ arc_cksum_compute(arc_buf_t *buf, boolean_t force)
fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
buf->b_hdr->b_freeze_cksum);
mutex_exit(&buf->b_hdr->b_freeze_lock);
+ arc_buf_watch(buf);
+}
+
+#ifndef _KERNEL
+void
+arc_buf_sigsegv(int sig, siginfo_t *si, void *unused)
+{
+ panic("Got SIGSEGV at address: 0x%lx\n", (long) si->si_addr);
+}
+#endif
+
+/* ARGSUSED */
+static void
+arc_buf_unwatch(arc_buf_t *buf)
+{
+#ifndef _KERNEL
+ if (arc_watch) {
+ ASSERT0(mprotect(buf->b_data, buf->b_hdr->b_size,
+ PROT_READ | PROT_WRITE));
+ }
+#endif
+}
+
+/* ARGSUSED */
+static void
+arc_buf_watch(arc_buf_t *buf)
+{
+#ifndef _KERNEL
+ if (arc_watch)
+ ASSERT0(mprotect(buf->b_data, buf->b_hdr->b_size, PROT_READ));
+#endif
}
void
@@ -1080,6 +1117,8 @@ arc_buf_thaw(arc_buf_t *buf)
}
mutex_exit(&buf->b_hdr->b_freeze_lock);
+
+ arc_buf_unwatch(buf);
}
void
@@ -1097,6 +1136,7 @@ arc_buf_freeze(arc_buf_t *buf)
buf->b_hdr->b_state == arc_anon);
arc_cksum_compute(buf, B_FALSE);
mutex_exit(hash_lock);
+
}
static void
@@ -1504,21 +1544,22 @@ arc_buf_add_ref(arc_buf_t *buf, void* tag)
* the buffer is placed on l2arc_free_on_write to be freed later.
*/
static void
-arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t),
- void *data, size_t size)
+arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
{
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+
if (HDR_L2_WRITING(hdr)) {
l2arc_data_free_t *df;
df = kmem_alloc(sizeof (l2arc_data_free_t), KM_PUSHPAGE);
- df->l2df_data = data;
- df->l2df_size = size;
+ df->l2df_data = buf->b_data;
+ df->l2df_size = hdr->b_size;
df->l2df_func = free_func;
mutex_enter(&l2arc_free_on_write_mtx);
list_insert_head(l2arc_free_on_write, df);
mutex_exit(&l2arc_free_on_write_mtx);
ARCSTAT_BUMP(arcstat_l2_free_on_write);
} else {
- free_func(data, size);
+ free_func(buf->b_data, hdr->b_size);
}
}
@@ -1534,16 +1575,15 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
arc_buf_contents_t type = buf->b_hdr->b_type;
arc_cksum_verify(buf);
+ arc_buf_unwatch(buf);
if (!recycle) {
if (type == ARC_BUFC_METADATA) {
- arc_buf_data_free(buf->b_hdr, zio_buf_free,
- buf->b_data, size);
+ arc_buf_data_free(buf, zio_buf_free);
arc_space_return(size, ARC_SPACE_DATA);
} else {
ASSERT(type == ARC_BUFC_DATA);
- arc_buf_data_free(buf->b_hdr,
- zio_data_buf_free, buf->b_data, size);
+ arc_buf_data_free(buf, zio_data_buf_free);
ARCSTAT_INCR(arcstat_data_size, -size);
atomic_add_64(&arc_size, -size);
}
@@ -2908,6 +2948,7 @@ arc_read_done(zio_t *zio)
}
arc_cksum_compute(buf, B_FALSE);
+ arc_buf_watch(buf);
if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) {
/*
@@ -3542,6 +3583,7 @@ arc_release(arc_buf_t *buf, void *tag)
}
hdr->b_datacnt -= 1;
arc_cksum_verify(buf);
+ arc_buf_unwatch(buf);
mutex_exit(hash_lock);
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index 7bdfbf738..91e7fdf35 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -1630,6 +1630,23 @@ spa_init(int mode)
spa_mode_global = mode;
+#ifndef _KERNEL
+ if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
+ struct sigaction sa;
+
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_sigaction = arc_buf_sigsegv;
+
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("could not enable watchpoints: "
+ "sigaction(SIGSEGV, ...) = ");
+ } else {
+ arc_watch = B_TRUE;
+ }
+ }
+#endif
+
fm_init();
refcount_init();
unique_init();
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 1f803a44b..f0c9c0f08 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -169,11 +169,21 @@ zio_init(void)
while (p2 & (p2 - 1))
p2 &= p2 - 1;
+#ifndef _KERNEL
+ /*
+ * If we are using watchpoints, put each buffer on its own page,
+ * to eliminate the performance overhead of trapping to the
+ * kernel when modifying a non-watched buffer that shares the
+ * page with a watched buffer.
+ */
+ if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
+ continue;
+#endif
if (size <= 4 * SPA_MINBLOCKSIZE) {
align = SPA_MINBLOCKSIZE;
- } else if (P2PHASE(size, PAGESIZE) == 0) {
+ } else if (IS_P2ALIGNED(size, PAGESIZE)) {
align = PAGESIZE;
- } else if (P2PHASE(size, p2 >> 2) == 0) {
+ } else if (IS_P2ALIGNED(size, p2 >> 2)) {
align = p2 >> 2;
}