aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmeer Hamza <[email protected]>2023-07-20 22:23:52 +0500
committerGitHub <[email protected]>2023-07-20 10:23:52 -0700
commitd9bb583c25d833e57c0842a81dca1bd50da5d9b1 (patch)
treebff0987817a6d4cab358efac3ebce2aba2c72962
parent929173ab42fa9482455c8d51ed64326a3d983e41 (diff)
spa_min_alloc should be GCD, not min
Since spa_min_alloc may not be a power of 2, unlike ashifts, in the case of DRAID, we should not select the minimal value among several vdevs. Rounding to a multiple of it is unlikely to work for other vdevs. Instead, using the greatest common divisor produces smaller yet more reasonable results. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Alexander Motin <[email protected]> Signed-off-by: Ameer Hamza <[email protected]> Closes #15067
-rw-r--r--include/sys/spa_impl.h1
-rw-r--r--module/zfs/spa_misc.c1
-rw-r--r--module/zfs/vdev.c36
-rw-r--r--module/zfs/zio.c22
4 files changed, 51 insertions, 9 deletions
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index 44afa7632..588c72f6e 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -250,6 +250,7 @@ struct spa {
uint64_t spa_min_ashift; /* of vdevs in normal class */
uint64_t spa_max_ashift; /* of vdevs in normal class */
uint64_t spa_min_alloc; /* of vdevs in normal class */
+ uint64_t spa_gcd_alloc; /* of vdevs in normal class */
uint64_t spa_config_guid; /* config pool guid */
uint64_t spa_load_guid; /* spa_load initialized guid */
uint64_t spa_last_synced_guid; /* last synced guid */
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index 06f640769..3b355e0de 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -772,6 +772,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa->spa_min_ashift = INT_MAX;
spa->spa_max_ashift = 0;
spa->spa_min_alloc = INT_MAX;
+ spa->spa_gcd_alloc = INT_MAX;
/* Reset cached value */
spa->spa_dedup_dspace = ~0ULL;
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index b6f8c0ab3..f3812b843 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -1399,6 +1399,36 @@ vdev_remove_parent(vdev_t *cvd)
vdev_free(mvd);
}
+/*
+ * Choose GCD for spa_gcd_alloc.
+ */
+static uint64_t
+vdev_gcd(uint64_t a, uint64_t b)
+{
+ while (b != 0) {
+ uint64_t t = b;
+ b = a % b;
+ a = t;
+ }
+ return (a);
+}
+
+/*
+ * Set spa_min_alloc and spa_gcd_alloc.
+ */
+static void
+vdev_spa_set_alloc(spa_t *spa, uint64_t min_alloc)
+{
+ if (min_alloc < spa->spa_min_alloc)
+ spa->spa_min_alloc = min_alloc;
+ if (spa->spa_gcd_alloc == INT_MAX) {
+ spa->spa_gcd_alloc = min_alloc;
+ } else {
+ spa->spa_gcd_alloc = vdev_gcd(min_alloc,
+ spa->spa_gcd_alloc);
+ }
+}
+
void
vdev_metaslab_group_create(vdev_t *vd)
{
@@ -1451,8 +1481,7 @@ vdev_metaslab_group_create(vdev_t *vd)
spa->spa_min_ashift = vd->vdev_ashift;
uint64_t min_alloc = vdev_get_min_alloc(vd);
- if (min_alloc < spa->spa_min_alloc)
- spa->spa_min_alloc = min_alloc;
+ vdev_spa_set_alloc(spa, min_alloc);
}
}
}
@@ -2213,8 +2242,7 @@ vdev_open(vdev_t *vd)
if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
uint64_t min_alloc = vdev_get_min_alloc(vd);
- if (min_alloc < spa->spa_min_alloc)
- spa->spa_min_alloc = min_alloc;
+ vdev_spa_set_alloc(spa, min_alloc);
}
/*
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 10279fde8..3f5e6a08d 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -1597,6 +1597,19 @@ zio_shrink(zio_t *zio, uint64_t size)
}
/*
+ * Round provided allocation size up to a value that can be allocated
+ * by at least some vdev(s) in the pool with minimum or no additional
+ * padding and without extra space usage on others
+ */
+static uint64_t
+zio_roundup_alloc_size(spa_t *spa, uint64_t size)
+{
+ if (size > spa->spa_min_alloc)
+ return (roundup(size, spa->spa_gcd_alloc));
+ return (spa->spa_min_alloc);
+}
+
+/*
* ==========================================================================
* Prepare to read and write logical blocks
* ==========================================================================
@@ -1802,9 +1815,8 @@ zio_write_compress(zio_t *zio)
* in that we charge for the padding used to fill out
* the last sector.
*/
- ASSERT3U(spa->spa_min_alloc, >=, SPA_MINBLOCKSHIFT);
- size_t rounded = (size_t)roundup(psize,
- spa->spa_min_alloc);
+ size_t rounded = (size_t)zio_roundup_alloc_size(spa,
+ psize);
if (rounded >= lsize) {
compress = ZIO_COMPRESS_OFF;
zio_buf_free(cbuf, lsize);
@@ -1847,8 +1859,8 @@ zio_write_compress(zio_t *zio)
* take this codepath because it will change the on-disk block
* and decryption will fail.
*/
- size_t rounded = MIN((size_t)roundup(psize,
- spa->spa_min_alloc), lsize);
+ size_t rounded = MIN((size_t)zio_roundup_alloc_size(spa, psize),
+ lsize);
if (rounded != psize) {
abd_t *cdata = abd_alloc_linear(rounded, B_TRUE);