diff options
author | George Wilson <[email protected]> | 2020-09-18 14:13:47 -0500 |
---|---|---|
committer | GitHub <[email protected]> | 2020-09-18 12:13:47 -0700 |
commit | c494aa7f578d5cb844b770f679bd46495242edad (patch) | |
tree | 6abcc94bc41285c5157817062b600c81986b5e7e /module | |
parent | 908d43d0a9f736af62c0f4b179950bb1262dfd7d (diff) |
vdev_ashift should only be set once
== Motivation and Context
The new vdev ashift optimization prevents the removal of devices when
a zfs configuration is comprised of disks which have different logical
and physical block sizes. This is caused because we set 'spa_min_ashift'
in vdev_open and then later call 'vdev_ashift_optimize'. This would
result in an inconsistency between spa's ashift calculations and that
of the top-level vdev.
In addition, the optimization logical ignores the overridden ashift
value that would be provided by '-o ashift=<val>'.
== Description
This change reworks the vdev ashift optimization so that it's only
set the first time the device is configured. It still allows the
physical and logical ahsift values to be set every time the device
is opened but those values are only consulted on first open.
Reviewed-by: Matthew Ahrens <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Cedric Berger <[email protected]>
Signed-off-by: George Wilson <[email protected]>
External-Issue: DLPX-71831
Closes #10932
Diffstat (limited to 'module')
-rw-r--r-- | module/os/freebsd/zfs/sysctl_os.c | 1 | ||||
-rw-r--r-- | module/os/freebsd/zfs/vdev_file.c | 14 | ||||
-rw-r--r-- | module/os/linux/zfs/vdev_file.c | 22 | ||||
-rw-r--r-- | module/zfs/arc.c | 2 | ||||
-rw-r--r-- | module/zfs/spa.c | 1 | ||||
-rw-r--r-- | module/zfs/spa_config.c | 4 | ||||
-rw-r--r-- | module/zfs/vdev.c | 96 | ||||
-rw-r--r-- | module/zfs/vdev_mirror.c | 2 |
8 files changed, 92 insertions, 50 deletions
diff --git a/module/os/freebsd/zfs/sysctl_os.c b/module/os/freebsd/zfs/sysctl_os.c index 200bbf43d..b3cb7e7e4 100644 --- a/module/os/freebsd/zfs/sysctl_os.c +++ b/module/os/freebsd/zfs/sysctl_os.c @@ -121,6 +121,7 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO"); SYSCTL_NODE(_vfs_zfs_livelist, OID_AUTO, condense, CTLFLAG_RW, 0, "ZFS livelist condense"); SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, cache, CTLFLAG_RW, 0, "ZFS VDEV Cache"); +SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, file, CTLFLAG_RW, 0, "ZFS VDEV file"); SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, mirror, CTLFLAG_RD, 0, "ZFS VDEV mirror"); diff --git a/module/os/freebsd/zfs/vdev_file.c b/module/os/freebsd/zfs/vdev_file.c index 4d27751c8..cf762c5fd 100644 --- a/module/os/freebsd/zfs/vdev_file.c +++ b/module/os/freebsd/zfs/vdev_file.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2016 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -40,6 +40,9 @@ static taskq_t *vdev_file_taskq; +unsigned long vdev_file_logical_ashift = SPA_MINBLOCKSHIFT; +unsigned long vdev_file_physical_ashift = SPA_MINBLOCKSHIFT; + void vdev_file_init(void) { @@ -167,8 +170,8 @@ skip_open: } *max_psize = *psize = zfa.zfa_size; - *logical_ashift = SPA_MINBLOCKSHIFT; - *physical_ashift = SPA_MINBLOCKSHIFT; + *logical_ashift = vdev_file_logical_ashift; + *physical_ashift = vdev_file_physical_ashift; return (0); } @@ -326,3 +329,8 @@ vdev_ops_t vdev_disk_ops = { }; #endif + +ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, ULONG, ZMOD_RW, + "Logical ashift for file-based devices"); +ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, ULONG, ZMOD_RW, + "Physical ashift for file-based devices"); diff --git a/module/os/linux/zfs/vdev_file.c b/module/os/linux/zfs/vdev_file.c index a4e71ca40..423ce8581 100644 --- a/module/os/linux/zfs/vdev_file.c +++ b/module/os/linux/zfs/vdev_file.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2016 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -45,6 +45,17 @@ static taskq_t *vdev_file_taskq; +/* + * By default, the logical/physical ashift for file vdevs is set to + * SPA_MINBLOCKSHIFT (9). This allows all file vdevs to use 512B (1 << 9) + * blocksizes. Users may opt to change one or both of these for testing + * or performance reasons. Care should be taken as these values will + * impact the vdev_ashift setting which can only be set at vdev creation + * time. + */ +unsigned long vdev_file_logical_ashift = SPA_MINBLOCKSHIFT; +unsigned long vdev_file_physical_ashift = SPA_MINBLOCKSHIFT; + static void vdev_file_hold(vdev_t *vd) { @@ -159,8 +170,8 @@ skip_open: } *max_psize = *psize = zfa.zfa_size; - *logical_ashift = SPA_MINBLOCKSHIFT; - *physical_ashift = SPA_MINBLOCKSHIFT; + *logical_ashift = vdev_file_logical_ashift; + *physical_ashift = vdev_file_physical_ashift; return (0); } @@ -346,3 +357,8 @@ vdev_ops_t vdev_disk_ops = { }; #endif + +ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, ULONG, ZMOD_RW, + "Logical ashift for file-based devices"); +ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, ULONG, ZMOD_RW, + "Physical ashift for file-based devices"); diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 7c21e9b42..53ebf629e 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -9434,8 +9434,6 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd) ASSERT(!l2arc_vdev_present(vd)); - vdev_ashift_optimize(vd); - /* * Create a new l2arc device entry. */ diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 8c662f6b0..532f04b91 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -5762,7 +5762,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, for (int c = 0; error == 0 && c < rvd->vdev_children; c++) { vdev_t *vd = rvd->vdev_child[c]; - vdev_ashift_optimize(vd); vdev_metaslab_set_size(vd); vdev_expand(vd, txg); } diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 81059c69d..dacba127d 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -577,10 +577,8 @@ spa_config_update(spa_t *spa, int what) (tvd->vdev_islog && tvd->vdev_removing)) continue; - if (tvd->vdev_ms_array == 0) { - vdev_ashift_optimize(tvd); + if (tvd->vdev_ms_array == 0) vdev_metaslab_set_size(tvd); - } vdev_expand(tvd, txg); } } diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 0a3b8bd83..a94101485 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1673,6 +1673,38 @@ vdev_set_deflate_ratio(vdev_t *vd) } /* + * Maximize performance by inflating the configured ashift for top level + * vdevs to be as close to the physical ashift as possible while maintaining + * administrator defined limits and ensuring it doesn't go below the + * logical ashift. + */ +static void +vdev_ashift_optimize(vdev_t *vd) +{ + ASSERT(vd == vd->vdev_top); + + if (vd->vdev_ashift < vd->vdev_physical_ashift) { + vd->vdev_ashift = MIN( + MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift), + MAX(zfs_vdev_min_auto_ashift, + vd->vdev_physical_ashift)); + } else { + /* + * If the logical and physical ashifts are the same, then + * we ensure that the top-level vdev's ashift is not smaller + * than our minimum ashift value. For the unusual case + * where logical ashift > physical ashift, we can't cap + * the calculated ashift based on max ashift as that + * would cause failures. + * We still check if we need to increase it to match + * the min ashift. + */ + vd->vdev_ashift = MAX(zfs_vdev_min_auto_ashift, + vd->vdev_ashift); + } +} + +/* * Prepare a virtual device for access. */ int @@ -1830,16 +1862,17 @@ vdev_open(vdev_t *vd) return (SET_ERROR(EINVAL)); } + /* + * We can always set the logical/physical ashift members since + * their values are only used to calculate the vdev_ashift when + * the device is first added to the config. These values should + * not be used for anything else since they may change whenever + * the device is reopened and we don't store them in the label. + */ vd->vdev_physical_ashift = MAX(physical_ashift, vd->vdev_physical_ashift); - vd->vdev_logical_ashift = MAX(logical_ashift, vd->vdev_logical_ashift); - vd->vdev_ashift = MAX(vd->vdev_logical_ashift, vd->vdev_ashift); - - if (vd->vdev_logical_ashift > ASHIFT_MAX) { - vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, - VDEV_AUX_ASHIFT_TOO_BIG); - return (SET_ERROR(EDOM)); - } + vd->vdev_logical_ashift = MAX(logical_ashift, + vd->vdev_logical_ashift); if (vd->vdev_asize == 0) { /* @@ -1848,6 +1881,24 @@ vdev_open(vdev_t *vd) */ vd->vdev_asize = asize; vd->vdev_max_asize = max_asize; + + /* + * If the vdev_ashift was not overriden at creation time, + * then set it the logical ashift and optimize the ashift. + */ + if (vd->vdev_ashift == 0) { + vd->vdev_ashift = vd->vdev_logical_ashift; + + if (vd->vdev_logical_ashift > ASHIFT_MAX) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_ASHIFT_TOO_BIG); + return (SET_ERROR(EDOM)); + } + + if (vd->vdev_top == vd) { + vdev_ashift_optimize(vd); + } + } if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN || vd->vdev_ashift > ASHIFT_MAX)) { vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, @@ -2444,35 +2495,6 @@ vdev_metaslab_set_size(vdev_t *vd) ASSERT3U(vd->vdev_ms_shift, >=, SPA_MAXBLOCKSHIFT); } -/* - * Maximize performance by inflating the configured ashift for top level - * vdevs to be as close to the physical ashift as possible while maintaining - * administrator defined limits and ensuring it doesn't go below the - * logical ashift. - */ -void -vdev_ashift_optimize(vdev_t *vd) -{ - if (vd == vd->vdev_top) { - if (vd->vdev_ashift < vd->vdev_physical_ashift) { - vd->vdev_ashift = MIN( - MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift), - MAX(zfs_vdev_min_auto_ashift, - vd->vdev_physical_ashift)); - } else { - /* - * Unusual case where logical ashift > physical ashift - * so we can't cap the calculated ashift based on max - * ashift as that would cause failures. - * We still check if we need to increase it to match - * the min ashift. - */ - vd->vdev_ashift = MAX(zfs_vdev_min_auto_ashift, - vd->vdev_ashift); - } - } -} - void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg) { diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 5e1060f12..71b5adbbd 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -391,7 +391,7 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1; *logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift); *physical_ashift = MAX(*physical_ashift, - vd->vdev_physical_ashift); + cvd->vdev_physical_ashift); } if (numerrors == vd->vdev_children) { |