diff options
author | Rich Ercolani <[email protected]> | 2021-08-30 17:13:46 -0400 |
---|---|---|
committer | Tony Hutter <[email protected]> | 2021-09-14 15:05:55 -0700 |
commit | 72a989cf60b4c7b9a46dc5854c0bd2561ce6b576 (patch) | |
tree | 8133d7ee2002161fe3f3c20c7020d3d7c5b15e5c | |
parent | 6bb6410570e6aeb19ae82d9287922927432f8c74 (diff) |
Fix cross-endian interoperability of zstd
It turns out that layouts of union bitfields are a pain, and the
current code results in an inconsistent layout between BE and LE
systems, leading to zstd-active datasets on one erroring out on
the other.
Switch everyone over to the LE layout, and add compatibility code
to read both.
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Matthew Ahrens <[email protected]>
Signed-off-by: Rich Ercolani <[email protected]>
Closes #12008
Closes #12022
-rw-r--r-- | cmd/zdb/zdb.c | 6 | ||||
-rw-r--r-- | include/sys/zstd/zstd.h | 148 | ||||
-rw-r--r-- | module/zstd/Makefile.in | 1 | ||||
-rw-r--r-- | module/zstd/include/sparc_compat.h | 4 | ||||
-rw-r--r-- | module/zstd/zfs_zstd.c | 14 | ||||
-rw-r--r-- | module/zstd/zstd_sparc.c | 11 |
6 files changed, 165 insertions, 19 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index c548936bc..e964e3ba8 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -2218,7 +2218,8 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen, (void) snprintf(blkbuf + strlen(blkbuf), buflen - strlen(blkbuf), " ZSTD:size=%u:version=%u:level=%u:EMBEDDED", - zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level); + zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr), + zfs_get_hdrlevel(&zstd_hdr)); return; } @@ -2242,7 +2243,8 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen, (void) snprintf(blkbuf + strlen(blkbuf), buflen - strlen(blkbuf), " ZSTD:size=%u:version=%u:level=%u:NORMAL", - zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level); + zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr), + zfs_get_hdrlevel(&zstd_hdr)); abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp)); } diff --git a/include/sys/zstd/zstd.h b/include/sys/zstd/zstd.h index 8fe4a2482..e87dda1b1 100644 --- a/include/sys/zstd/zstd.h +++ b/include/sys/zstd/zstd.h @@ -56,22 +56,25 @@ typedef struct zfs_zstd_header { /* * Version and compression level - * We use a union to be able to big endian encode a single 32 bit - * unsigned integer, but still access the individual bitmasked - * components easily. + * We used to use a union to reference compression level + * and version easily, but as it turns out, relying on the + * ordering of bitfields is not remotely portable. + * So now we have get/set functions in zfs_zstd.c for + * manipulating this in just the right way forever. */ - union { - uint32_t raw_version_level; - struct { - uint32_t version : 24; - uint8_t level; - }; - }; - + uint32_t raw_version_level; char data[]; } zfs_zstdhdr_t; /* + * Simple struct to pass the data from raw_version_level around. + */ +typedef struct zfs_zstd_meta { + uint8_t level; + uint32_t version; +} zfs_zstdmeta_t; + +/* * kstat helper macros */ #define ZSTDSTAT(stat) (zstd_stats.stat.value.ui64) @@ -94,6 +97,129 @@ int zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n); void zfs_zstd_cache_reap_now(void); +/* + * So, the reason we have all these complicated set/get functions is that + * originally, in the zstd "header" we wrote out to disk, we used a 32-bit + * bitfield to store the "level" (8 bits) and "version" (24 bits). + * + * Unfortunately, bitfields make few promises about how they're arranged in + * memory... + * + * By way of example, if we were using version 1.4.5 and level 3, it'd be + * level = 0x03, version = 10405/0x0028A5, which gets broken into Vhigh = 0x00, + * Vmid = 0x28, Vlow = 0xA5. We include these positions below to help follow + * which data winds up where. + * + * As a consequence, we wound up with little endian platforms with a layout + * like this in memory: + * + * 0 8 16 24 32 + * +-------+-------+-------+-------+ + * | Vlow | Vmid | Vhigh | level | + * +-------+-------+-------+-------+ + * =A5 =28 =00 =03 + * + * ...and then, after being run through BE_32(), serializing this out to + * disk: + * + * 0 8 16 24 32 + * +-------+-------+-------+-------+ + * | level | Vhigh | Vmid | Vlow | + * +-------+-------+-------+-------+ + * =03 =00 =28 =A5 + * + * while on big-endian systems, since BE_32() is a noop there, both in + * memory and on disk, we wind up with: + * + * 0 8 16 24 32 + * +-------+-------+-------+-------+ + * | Vhigh | Vmid | Vlow | level | + * +-------+-------+-------+-------+ + * =00 =28 =A5 =03 + * + * (Vhigh is always 0 until version exceeds 6.55.35. Vmid and Vlow are the + * other two bytes of the "version" data.) + * + * So now we use the BF32_SET macros to get consistent behavior (the + * ondisk LE encoding, since x86 currently rules the world) across + * platforms, but the "get" behavior requires that we check each of the + * bytes in the aforementioned former-bitfield for 0x00, and from there, + * we can know which possible layout we're dealing with. (Only the two + * that have been observed in the wild are illustrated above, but handlers + * for all 4 positions of 0x00 are implemented. + */ + +static inline void +zfs_get_hdrmeta(const zfs_zstdhdr_t *blob, zfs_zstdmeta_t *res) +{ + uint32_t raw = blob->raw_version_level; + uint8_t findme = 0xff; + int shift; + for (shift = 0; shift < 4; shift++) { + findme = BF32_GET(raw, 8*shift, 8); + if (findme == 0) + break; + } + switch (shift) { + case 0: + res->level = BF32_GET(raw, 24, 8); + res->version = BSWAP_32(raw); + res->version = BF32_GET(res->version, 8, 24); + break; + case 1: + res->level = BF32_GET(raw, 0, 8); + res->version = BSWAP_32(raw); + res->version = BF32_GET(res->version, 0, 24); + break; + case 2: + res->level = BF32_GET(raw, 24, 8); + res->version = BF32_GET(raw, 0, 24); + break; + case 3: + res->level = BF32_GET(raw, 0, 8); + res->version = BF32_GET(raw, 8, 24); + break; + default: + res->level = 0; + res->version = 0; + break; + } +} + +static inline uint8_t +zfs_get_hdrlevel(const zfs_zstdhdr_t *blob) +{ + uint8_t level = 0; + zfs_zstdmeta_t res; + zfs_get_hdrmeta(blob, &res); + level = res.level; + return (level); +} + +static inline uint32_t +zfs_get_hdrversion(const zfs_zstdhdr_t *blob) +{ + uint32_t version = 0; + zfs_zstdmeta_t res; + zfs_get_hdrmeta(blob, &res); + version = res.version; + return (version); + +} + +static inline void +zfs_set_hdrversion(zfs_zstdhdr_t *blob, uint32_t version) +{ + BF32_SET(blob->raw_version_level, 0, 24, version); +} + +static inline void +zfs_set_hdrlevel(zfs_zstdhdr_t *blob, uint8_t level) +{ + BF32_SET(blob->raw_version_level, 24, 8, level); +} + + #ifdef __cplusplus } #endif diff --git a/module/zstd/Makefile.in b/module/zstd/Makefile.in index f67db710f..091f7cea3 100644 --- a/module/zstd/Makefile.in +++ b/module/zstd/Makefile.in @@ -33,6 +33,7 @@ $(obj)/zfs_zstd.o: c_flags += -include $(zstd_include)/zstd_compat_wrapper.h $(MODULE)-objs += zfs_zstd.o $(MODULE)-objs += lib/zstd.o +$(MODULE)-objs += zstd_sparc.o all: mkdir -p lib diff --git a/module/zstd/include/sparc_compat.h b/module/zstd/include/sparc_compat.h new file mode 100644 index 000000000..14c1bdde9 --- /dev/null +++ b/module/zstd/include/sparc_compat.h @@ -0,0 +1,4 @@ +#if defined(__sparc) +uint64_t __bswapdi2(uint64_t in); +uint32_t __bswapsi2(uint32_t in); +#endif diff --git a/module/zstd/zfs_zstd.c b/module/zstd/zfs_zstd.c index a91ed1be9..2c698716c 100644 --- a/module/zstd/zfs_zstd.c +++ b/module/zstd/zfs_zstd.c @@ -380,6 +380,7 @@ zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level) return (1); } + /* Compress block using zstd */ size_t zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, @@ -477,8 +478,8 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, * As soon as such incompatibility occurs, handling code needs to be * added, differentiating between the versions. */ - hdr->version = ZSTD_VERSION_NUMBER; - hdr->level = level; + zfs_set_hdrversion(hdr, ZSTD_VERSION_NUMBER); + zfs_set_hdrlevel(hdr, level); hdr->raw_version_level = BE_32(hdr->raw_version_level); return (c_len + sizeof (*hdr)); @@ -504,6 +505,7 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len, * not modify the original data that may be used again later. */ hdr_copy.raw_version_level = BE_32(hdr->raw_version_level); + uint8_t curlevel = zfs_get_hdrlevel(&hdr_copy); /* * NOTE: We ignore the ZSTD version for now. As soon as any @@ -516,13 +518,13 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len, * An invalid level is a strong indicator for data corruption! In such * case return an error so the upper layers can try to fix it. */ - if (zstd_enum_to_level(hdr_copy.level, &zstd_level)) { + if (zstd_enum_to_level(curlevel, &zstd_level)) { ZSTDSTAT_BUMP(zstd_stat_dec_inval); return (1); } ASSERT3U(d_len, >=, s_len); - ASSERT3U(hdr_copy.level, !=, ZIO_COMPLEVEL_INHERIT); + ASSERT3U(curlevel, !=, ZIO_COMPLEVEL_INHERIT); /* Invalid compressed buffer size encoded at start */ if (c_len + sizeof (*hdr) > s_len) { @@ -553,7 +555,7 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len, } if (level) { - *level = hdr_copy.level; + *level = curlevel; } return (0); @@ -783,7 +785,7 @@ module_exit(zstd_fini); ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS"); ZFS_MODULE_LICENSE("Dual BSD/GPL"); -ZFS_MODULE_VERSION(ZSTD_VERSION_STRING); +ZFS_MODULE_VERSION(ZSTD_VERSION_STRING "a"); EXPORT_SYMBOL(zfs_zstd_compress); EXPORT_SYMBOL(zfs_zstd_decompress_level); diff --git a/module/zstd/zstd_sparc.c b/module/zstd/zstd_sparc.c new file mode 100644 index 000000000..463df99bd --- /dev/null +++ b/module/zstd/zstd_sparc.c @@ -0,0 +1,11 @@ +#ifdef __sparc__ +#include <stdint.h> +#include <sys/byteorder.h> +#include "include/sparc_compat.h" +uint64_t __bswapdi2(uint64_t in) { + return (BSWAP_64(in)); +} +uint32_t __bswapsi2(uint32_t in) { + return (BSWAP_32(in)); +} +#endif |