aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
authorGeorge Wilson <[email protected]>2013-05-06 10:14:52 -0700
committerBrian Behlendorf <[email protected]>2013-05-06 12:39:34 -0700
commit55d85d5a8c45c4559a4a0e675c37b0c3afb19c2f (patch)
treede4e1e0564aa00bb1dcb8b91c15e9655f7f36248 /module/zfs
parent5853fe790d1df58c5dd85ea52c5e165b6d43013c (diff)
Illumos #3329, #3330, #3331, #3335
3329 spa_sync() spends 10-20% of its time in spa_free_sync_cb() 3330 space_seg_t should have its own kmem_cache 3331 deferred frees should happen after sync_pass 1 3335 make SYNC_PASS_* constants tunable Reviewed by: Adam Leventhal <[email protected]> Reviewed by: Matt Ahrens <[email protected]> Reviewed by: Christopher Siden <[email protected]> Reviewed by: Eric Schrock <[email protected]> Reviewed by: Richard Lowe <[email protected]> Reviewed by: Dan McDonald <[email protected]> Approved by: Eric Schrock <[email protected]> References: illumos/illumos-gate@01f55e48fb4d524eaf70687728aa51b7762e2e97 https://www.illumos.org/issues/3329 https://www.illumos.org/issues/3330 https://www.illumos.org/issues/3331 https://www.illumos.org/issues/3335 Ported-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/metaslab.c5
-rw-r--r--module/zfs/spa.c2
-rw-r--r--module/zfs/spa_misc.c2
-rw-r--r--module/zfs/space_map.c42
-rw-r--r--module/zfs/zio.c22
5 files changed, 59 insertions, 14 deletions
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c
index 03d5a1d9c..76dc4f637 100644
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@@ -899,8 +899,9 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
space_map_load_wait(sm);
if (!sm->sm_loaded) {
- int error = space_map_load(sm, sm_ops, SM_FREE,
- &msp->ms_smo,
+ space_map_obj_t *smo = &msp->ms_smo;
+
+ int error = space_map_load(sm, sm_ops, SM_FREE, smo,
spa_meta_objset(msp->ms_group->mg_vd->vdev_spa));
if (error) {
metaslab_group_sort(msp->ms_group, msp, 0);
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 12c24879b..22fa7878c 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -6078,7 +6078,7 @@ spa_sync(spa_t *spa, uint64_t txg)
spa_errlog_sync(spa, txg);
dsl_pool_sync(dp, txg);
- if (pass <= SYNC_PASS_DEFERRED_FREE) {
+ if (pass < zfs_sync_pass_deferred_free) {
zio_t *zio = zio_root(spa, NULL, NULL, 0);
bplist_iterate(free_bpl, spa_free_sync_cb,
zio, tx);
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index 476f9c02c..0ca9f3a7a 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -1633,6 +1633,7 @@ spa_init(int mode)
fm_init();
refcount_init();
unique_init();
+ space_map_init();
zio_init();
dmu_init();
zil_init();
@@ -1655,6 +1656,7 @@ spa_fini(void)
zil_fini();
dmu_fini();
zio_fini();
+ space_map_fini();
unique_fini();
refcount_fini();
fm_fini();
diff --git a/module/zfs/space_map.c b/module/zfs/space_map.c
index 9c0cdb6be..d99c7c0b3 100644
--- a/module/zfs/space_map.c
+++ b/module/zfs/space_map.c
@@ -29,6 +29,23 @@
#include <sys/zio.h>
#include <sys/space_map.h>
+static kmem_cache_t *space_seg_cache;
+
+void
+space_map_init(void)
+{
+ ASSERT(space_seg_cache == NULL);
+ space_seg_cache = kmem_cache_create("space_seg_cache",
+ sizeof (space_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+}
+
+void
+space_map_fini(void)
+{
+ kmem_cache_destroy(space_seg_cache);
+ space_seg_cache = NULL;
+}
+
/*
* Space map routines.
* NOTE: caller is responsible for all locking.
@@ -121,7 +138,7 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
avl_remove(sm->sm_pp_root, ss_after);
}
ss_after->ss_start = ss_before->ss_start;
- kmem_free(ss_before, sizeof (*ss_before));
+ kmem_cache_free(space_seg_cache, ss_before);
ss = ss_after;
} else if (merge_before) {
ss_before->ss_end = end;
@@ -134,7 +151,7 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
avl_remove(sm->sm_pp_root, ss_after);
ss = ss_after;
} else {
- ss = kmem_alloc(sizeof (*ss), KM_PUSHPAGE);
+ ss = kmem_cache_alloc(space_seg_cache, KM_PUSHPAGE);
ss->ss_start = start;
ss->ss_end = end;
avl_insert(&sm->sm_root, ss, where);
@@ -181,7 +198,7 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
avl_remove(sm->sm_pp_root, ss);
if (left_over && right_over) {
- newseg = kmem_alloc(sizeof (*newseg), KM_PUSHPAGE);
+ newseg = kmem_cache_alloc(space_seg_cache, KM_PUSHPAGE);
newseg->ss_start = end;
newseg->ss_end = ss->ss_end;
ss->ss_end = start;
@@ -194,7 +211,7 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
ss->ss_start = end;
} else {
avl_remove(&sm->sm_root, ss);
- kmem_free(ss, sizeof (*ss));
+ kmem_cache_free(space_seg_cache, ss);
ss = NULL;
}
@@ -234,7 +251,7 @@ space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest)
while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
if (func != NULL)
func(mdest, ss->ss_start, ss->ss_end - ss->ss_start);
- kmem_free(ss, sizeof (*ss));
+ kmem_cache_free(space_seg_cache, ss);
}
sm->sm_space = 0;
}
@@ -406,7 +423,7 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
spa_t *spa = dmu_objset_spa(os);
void *cookie = NULL;
space_seg_t *ss;
- uint64_t bufsize, start, size, run_len;
+ uint64_t bufsize, start, size, run_len, delta, sm_space;
uint64_t *entry, *entry_map, *entry_map_end;
ASSERT(MUTEX_HELD(sm->sm_lock));
@@ -435,11 +452,13 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) |
SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
+ delta = 0;
+ sm_space = sm->sm_space;
while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
size = ss->ss_end - ss->ss_start;
start = (ss->ss_start - sm->sm_start) >> sm->sm_shift;
- sm->sm_space -= size;
+ delta += size;
size >>= sm->sm_shift;
while (size) {
@@ -461,7 +480,7 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
start += run_len;
size -= run_len;
}
- kmem_free(ss, sizeof (*ss));
+ kmem_cache_free(space_seg_cache, ss);
}
if (entry != entry_map) {
@@ -473,8 +492,15 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
smo->smo_objsize += size;
}
+ /*
+ * Ensure that the space_map's accounting wasn't changed
+ * while we were in the middle of writing it out.
+ */
+ VERIFY3U(sm->sm_space, ==, sm_space);
+
zio_buf_free(entry_map, bufsize);
+ sm->sm_space -= delta;
VERIFY3U(sm->sm_space, ==, 0);
}
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index a721903a3..fda76123e 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -85,6 +85,22 @@ extern vmem_t *zio_alloc_arena;
extern int zfs_mg_alloc_failures;
/*
+ * The following actions directly effect the spa's sync-to-convergence logic.
+ * The values below define the sync pass when we start performing the action.
+ * Care should be taken when changing these values as they directly impact
+ * spa_sync() performance. Tuning these values may introduce subtle performance
+ * pathologies and should only be done in the context of performance analysis.
+ * These tunables will eventually be removed and replaced with #defines once
+ * enough analysis has been done to determine optimal values.
+ *
+ * The 'zfs_sync_pass_deferred_free' pass must be greater than 1 to ensure that
+ * regular blocks are not deferred.
+ */
+int zfs_sync_pass_deferred_free = 2; /* defer frees starting in this pass */
+int zfs_sync_pass_dont_compress = 5; /* don't compress starting in this pass */
+int zfs_sync_pass_rewrite = 2; /* rewrite new bps starting in this pass */
+
+/*
* An allocating zio is one that either currently has the DVA allocate
* stage set or will have it later in its lifetime.
*/
@@ -771,7 +787,7 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
ASSERT(!BP_IS_HOLE(bp));
ASSERT(spa_syncing_txg(spa) == txg);
- ASSERT(spa_sync_pass(spa) <= SYNC_PASS_DEFERRED_FREE);
+ ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags,
@@ -1068,7 +1084,7 @@ zio_write_bp_init(zio_t *zio)
ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
ASSERT(!BP_GET_DEDUP(bp));
- if (pass > SYNC_PASS_DONT_COMPRESS)
+ if (pass >= zfs_sync_pass_dont_compress)
compress = ZIO_COMPRESS_OFF;
/* Make sure someone doesn't change their mind on overwrites */
@@ -1097,7 +1113,7 @@ zio_write_bp_init(zio_t *zio)
* There should only be a handful of blocks after pass 1 in any case.
*/
if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == psize &&
- pass > SYNC_PASS_REWRITE) {
+ pass >= zfs_sync_pass_rewrite) {
enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES;
ASSERT(psize != 0);
zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages;