aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/zio.c
diff options
context:
space:
mode:
authorDon Brady <[email protected]>2018-09-05 19:33:36 -0600
committerBrian Behlendorf <[email protected]>2018-09-05 18:33:36 -0700
commitcc99f275a28c43fe450a66a7544f73c4935f7361 (patch)
treef867e1d2cbb550a047c0f87986831252c41a2fd9 /module/zfs/zio.c
parentcfa37548ebc880580782b245f2d233ed540e7a01 (diff)
Pool allocation classes
Allocation Classes add the ability to have allocation classes in a pool that are dedicated to serving specific block categories, such as DDT data, metadata, and small file blocks. A pool can opt-in to this feature by adding a 'special' or 'dedup' top-level VDEV. Reviewed by: Pavel Zakharov <[email protected]> Reviewed-by: Richard Laager <[email protected]> Reviewed-by: Alek Pinchuk <[email protected]> Reviewed-by: HÃ¥kan Johansson <[email protected]> Reviewed-by: Andreas Dilger <[email protected]> Reviewed-by: DHE <[email protected]> Reviewed-by: Richard Elling <[email protected]> Reviewed-by: Gregor Kopka <[email protected]> Reviewed-by: Kash Pande <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Matthew Ahrens <[email protected]> Signed-off-by: Don Brady <[email protected]> Closes #5182
Diffstat (limited to 'module/zfs/zio.c')
-rw-r--r--module/zfs/zio.c98
1 files changed, 79 insertions, 19 deletions
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 654c81ef9..88bd7831e 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2017, Intel Corporation.
*/
#include <sys/sysmacros.h>
@@ -825,6 +826,8 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio->io_bookmark = *zb;
if (pio != NULL) {
+ if (zio->io_metaslab_class == NULL)
+ zio->io_metaslab_class = pio->io_metaslab_class;
if (zio->io_logical == NULL)
zio->io_logical = pio->io_logical;
if (zio->io_child_type == ZIO_CHILD_GANG)
@@ -1315,9 +1318,8 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
*/
if (flags & ZIO_FLAG_IO_ALLOCATING &&
(vd != vd->vdev_top || (flags & ZIO_FLAG_IO_RETRY))) {
- ASSERTV(metaslab_class_t *mc = spa_normal_class(pio->io_spa));
-
- ASSERT(mc->mc_alloc_throttle_enabled);
+ ASSERT(pio->io_metaslab_class != NULL);
+ ASSERT(pio->io_metaslab_class->mc_alloc_throttle_enabled);
ASSERT(type == ZIO_TYPE_WRITE);
ASSERT(priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(!(flags & ZIO_FLAG_IO_REPAIR));
@@ -1644,8 +1646,9 @@ zio_write_compress(zio_t *zio)
if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg &&
BP_GET_PSIZE(bp) == psize &&
pass >= zfs_sync_pass_rewrite) {
- ASSERT(psize != 0);
+ VERIFY3U(psize, !=, 0);
enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES;
+
zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages;
zio->io_flags |= ZIO_FLAG_IO_REWRITE;
} else {
@@ -3266,7 +3269,7 @@ zio_io_to_allocate(spa_t *spa, int allocator)
* reserve then we throttle.
*/
ASSERT3U(zio->io_allocator, ==, allocator);
- if (!metaslab_class_throttle_reserve(spa_normal_class(spa),
+ if (!metaslab_class_throttle_reserve(zio->io_metaslab_class,
zio->io_prop.zp_copies, zio->io_allocator, zio, 0)) {
return (NULL);
}
@@ -3282,9 +3285,14 @@ zio_dva_throttle(zio_t *zio)
{
spa_t *spa = zio->io_spa;
zio_t *nio;
+ metaslab_class_t *mc;
+
+ /* locate an appropriate allocation class */
+ mc = spa_preferred_class(spa, zio->io_size, zio->io_prop.zp_type,
+ zio->io_prop.zp_level, zio->io_prop.zp_zpl_smallblk);
if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE ||
- !spa_normal_class(zio->io_spa)->mc_alloc_throttle_enabled ||
+ !mc->mc_alloc_throttle_enabled ||
zio->io_child_type == ZIO_CHILD_GANG ||
zio->io_flags & ZIO_FLAG_NODATA) {
return (zio);
@@ -3306,17 +3314,15 @@ zio_dva_throttle(zio_t *zio)
zio->io_allocator = cityhash4(bm->zb_objset, bm->zb_object,
bm->zb_level, bm->zb_blkid >> 20) % spa->spa_alloc_count;
mutex_enter(&spa->spa_alloc_locks[zio->io_allocator]);
-
ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+ zio->io_metaslab_class = mc;
avl_add(&spa->spa_alloc_trees[zio->io_allocator], zio);
-
- nio = zio_io_to_allocate(zio->io_spa, zio->io_allocator);
+ nio = zio_io_to_allocate(spa, zio->io_allocator);
mutex_exit(&spa->spa_alloc_locks[zio->io_allocator]);
-
return (nio);
}
-void
+static void
zio_allocate_dispatch(spa_t *spa, int allocator)
{
zio_t *zio;
@@ -3336,7 +3342,7 @@ static zio_t *
zio_dva_allocate(zio_t *zio)
{
spa_t *spa = zio->io_spa;
- metaslab_class_t *mc = spa_normal_class(spa);
+ metaslab_class_t *mc;
blkptr_t *bp = zio->io_bp;
int error;
int flags = 0;
@@ -3360,10 +3366,50 @@ zio_dva_allocate(zio_t *zio)
if (zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE)
flags |= METASLAB_ASYNC_ALLOC;
+ /*
+ * if not already chosen, locate an appropriate allocation class
+ */
+ mc = zio->io_metaslab_class;
+ if (mc == NULL) {
+ mc = spa_preferred_class(spa, zio->io_size,
+ zio->io_prop.zp_type, zio->io_prop.zp_level,
+ zio->io_prop.zp_zpl_smallblk);
+ zio->io_metaslab_class = mc;
+ }
+
error = metaslab_alloc(spa, mc, zio->io_size, bp,
zio->io_prop.zp_copies, zio->io_txg, NULL, flags,
&zio->io_alloc_list, zio, zio->io_allocator);
+ /*
+ * Fallback to normal class when an alloc class is full
+ */
+ if (error == ENOSPC && mc != spa_normal_class(spa)) {
+ /*
+ * If throttling, transfer reservation over to normal class.
+ * The io_allocator slot can remain the same even though we
+ * are switching classes.
+ */
+ if (mc->mc_alloc_throttle_enabled &&
+ (zio->io_flags & ZIO_FLAG_IO_ALLOCATING)) {
+ metaslab_class_throttle_unreserve(mc,
+ zio->io_prop.zp_copies, zio->io_allocator, zio);
+ zio->io_flags &= ~ZIO_FLAG_IO_ALLOCATING;
+
+ mc = spa_normal_class(spa);
+ VERIFY(metaslab_class_throttle_reserve(mc,
+ zio->io_prop.zp_copies, zio->io_allocator, zio,
+ flags | METASLAB_MUST_RESERVE));
+ } else {
+ mc = spa_normal_class(spa);
+ }
+ zio->io_metaslab_class = mc;
+
+ error = metaslab_alloc(spa, mc, zio->io_size, bp,
+ zio->io_prop.zp_copies, zio->io_txg, NULL, flags,
+ &zio->io_alloc_list, zio, zio->io_allocator);
+ }
+
if (error != 0) {
zfs_dbgmsg("%s: metaslab allocation failure: zio %p, "
"size %llu, error %d", spa_name(spa), zio, zio->io_size,
@@ -3431,6 +3477,15 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
ASSERT(txg > spa_syncing_txg(spa));
metaslab_trace_init(&io_alloc_list);
+
+ /*
+ * Block pointer fields are useful to metaslabs for stats and debugging.
+ * Fill in the obvious ones before calling into metaslab_alloc().
+ */
+ BP_SET_TYPE(new_bp, DMU_OT_INTENT_LOG);
+ BP_SET_PSIZE(new_bp, size);
+ BP_SET_LEVEL(new_bp, 0);
+
/*
* When allocating a zil block, we don't have information about
* the final destination of the block except the objset it's part
@@ -4144,13 +4199,15 @@ zio_ready(zio_t *zio)
if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
ASSERT(IO_IS_ALLOCATING(zio));
ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
+ ASSERT(zio->io_metaslab_class != NULL);
+
/*
* We were unable to allocate anything, unreserve and
* issue the next I/O to allocate.
*/
metaslab_class_throttle_unreserve(
- spa_normal_class(zio->io_spa),
- zio->io_prop.zp_copies, zio->io_allocator, zio);
+ zio->io_metaslab_class, zio->io_prop.zp_copies,
+ zio->io_allocator, zio);
zio_allocate_dispatch(zio->io_spa, zio->io_allocator);
}
}
@@ -4233,14 +4290,15 @@ zio_dva_throttle_done(zio_t *zio)
ASSERT(zio->io_logical != NULL);
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR));
ASSERT0(zio->io_flags & ZIO_FLAG_NOPWRITE);
+ ASSERT(zio->io_metaslab_class != NULL);
mutex_enter(&pio->io_lock);
metaslab_group_alloc_decrement(zio->io_spa, vd->vdev_id, pio, flags,
pio->io_allocator, B_TRUE);
mutex_exit(&pio->io_lock);
- metaslab_class_throttle_unreserve(spa_normal_class(zio->io_spa),
- 1, pio->io_allocator, pio);
+ metaslab_class_throttle_unreserve(zio->io_metaslab_class, 1,
+ pio->io_allocator, pio);
/*
* Call into the pipeline to see if there is more work that
@@ -4259,7 +4317,6 @@ zio_done(zio_t *zio)
*/
const uint64_t psize = zio->io_size;
zio_t *pio, *pio_next;
- ASSERTV(metaslab_class_t *mc = spa_normal_class(zio->io_spa));
zio_link_t *zl = NULL;
/*
@@ -4278,7 +4335,8 @@ zio_done(zio_t *zio)
*/
if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING &&
zio->io_child_type == ZIO_CHILD_VDEV) {
- ASSERT(mc->mc_alloc_throttle_enabled);
+ ASSERT(zio->io_metaslab_class != NULL);
+ ASSERT(zio->io_metaslab_class->mc_alloc_throttle_enabled);
zio_dva_throttle_done(zio);
}
@@ -4290,9 +4348,11 @@ zio_done(zio_t *zio)
ASSERT(zio->io_type == ZIO_TYPE_WRITE);
ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(zio->io_bp != NULL);
+
metaslab_group_alloc_verify(zio->io_spa, zio->io_bp, zio,
zio->io_allocator);
- VERIFY(refcount_not_held(&mc->mc_alloc_slots[zio->io_allocator],
+ VERIFY(refcount_not_held(
+ &zio->io_metaslab_class->mc_alloc_slots[zio->io_allocator],
zio));
}