aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2023-11-27 13:49:20 -0800
committerBrian Behlendorf <[email protected]>2023-11-28 09:03:58 -0800
commit89fcb8c6f969cbfa237849d9b33610124d361186 (patch)
tree7cec349ed85eb6897d575e8aa65deb42bf2cb476
parent55dd24c4ccee2da61d5396289ef560f9b7bc6a68 (diff)
Revert "Tune zio buffer caches and their alignments"
This reverts commit bd7a02c251d8c119937e847d5161b512913667e6 which can trigger an unlikely existing bio alignment issue on Linux. This change is good, but the underlying issue it exposes needs to be resolved before this can be re-applied. Signed-off-by: Brian Behlendorf <[email protected]> Issue #15533
-rw-r--r--module/zfs/zio.c89
1 files changed, 50 insertions, 39 deletions
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index a719e5492..3b3b40fa7 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -158,22 +158,23 @@ zio_init(void)
zio_link_cache = kmem_cache_create("zio_link_cache",
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ /*
+ * For small buffers, we want a cache for each multiple of
+ * SPA_MINBLOCKSIZE. For larger buffers, we want a cache
+ * for each quarter-power of 2.
+ */
for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
- size_t align, cflags, data_cflags;
- char name[32];
-
- /*
- * Create cache for each half-power of 2 size, starting from
- * SPA_MINBLOCKSIZE. It should give us memory space efficiency
- * of ~7/8, sufficient for transient allocations mostly using
- * these caches.
- */
size_t p2 = size;
+ size_t align = 0;
+ size_t data_cflags, cflags;
+
+ data_cflags = KMC_NODEBUG;
+ cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
+ KMC_NODEBUG : 0;
+
while (!ISP2(p2))
p2 &= p2 - 1;
- if (!IS_P2ALIGNED(size, p2 / 2))
- continue;
#ifndef _KERNEL
/*
@@ -184,37 +185,47 @@ zio_init(void)
*/
if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
continue;
-#endif
-
- if (IS_P2ALIGNED(size, PAGESIZE))
+ /*
+ * Here's the problem - on 4K native devices in userland on
+ * Linux using O_DIRECT, buffers must be 4K aligned or I/O
+ * will fail with EINVAL, causing zdb (and others) to coredump.
+ * Since userland probably doesn't need optimized buffer caches,
+ * we just force 4K alignment on everything.
+ */
+ align = 8 * SPA_MINBLOCKSIZE;
+#else
+ if (size < PAGESIZE) {
+ align = SPA_MINBLOCKSIZE;
+ } else if (IS_P2ALIGNED(size, p2 >> 2)) {
align = PAGESIZE;
- else
- align = 1 << (highbit64(size ^ (size - 1)) - 1);
+ }
+#endif
- cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
- KMC_NODEBUG : 0;
- data_cflags = KMC_NODEBUG;
- if (cflags == data_cflags) {
- /*
- * Resulting kmem caches would be identical.
- * Save memory by creating only one.
- */
- (void) snprintf(name, sizeof (name),
- "zio_buf_comb_%lu", (ulong_t)size);
- zio_buf_cache[c] = kmem_cache_create(name, size, align,
- NULL, NULL, NULL, NULL, NULL, cflags);
- zio_data_buf_cache[c] = zio_buf_cache[c];
- continue;
+ if (align != 0) {
+ char name[36];
+ if (cflags == data_cflags) {
+ /*
+ * Resulting kmem caches would be identical.
+ * Save memory by creating only one.
+ */
+ (void) snprintf(name, sizeof (name),
+ "zio_buf_comb_%lu", (ulong_t)size);
+ zio_buf_cache[c] = kmem_cache_create(name,
+ size, align, NULL, NULL, NULL, NULL, NULL,
+ cflags);
+ zio_data_buf_cache[c] = zio_buf_cache[c];
+ continue;
+ }
+ (void) snprintf(name, sizeof (name), "zio_buf_%lu",
+ (ulong_t)size);
+ zio_buf_cache[c] = kmem_cache_create(name, size,
+ align, NULL, NULL, NULL, NULL, NULL, cflags);
+
+ (void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
+ (ulong_t)size);
+ zio_data_buf_cache[c] = kmem_cache_create(name, size,
+ align, NULL, NULL, NULL, NULL, NULL, data_cflags);
}
- (void) snprintf(name, sizeof (name), "zio_buf_%lu",
- (ulong_t)size);
- zio_buf_cache[c] = kmem_cache_create(name, size, align,
- NULL, NULL, NULL, NULL, NULL, cflags);
-
- (void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
- (ulong_t)size);
- zio_data_buf_cache[c] = kmem_cache_create(name, size, align,
- NULL, NULL, NULL, NULL, NULL, data_cflags);
}
while (--c != 0) {