summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
Diffstat (limited to 'module')
-rw-r--r--module/Kbuild.in1
-rw-r--r--module/Makefile.bsd12
-rw-r--r--module/Makefile.in2
-rw-r--r--module/zcommon/zfeature_common.c13
-rw-r--r--module/zcommon/zfs_prop.c89
-rw-r--r--module/zfs/arc.c76
-rw-r--r--module/zfs/blkptr.c2
-rw-r--r--module/zfs/dbuf.c8
-rw-r--r--module/zfs/dmu.c6
-rw-r--r--module/zfs/dmu_objset.c9
-rw-r--r--module/zfs/dmu_recv.c20
-rw-r--r--module/zfs/dmu_send.c22
-rw-r--r--module/zfs/dsl_dataset.c85
-rw-r--r--module/zfs/spa_misc.c1
-rw-r--r--module/zfs/zfs_ioctl.c36
-rw-r--r--module/zfs/zio.c18
-rw-r--r--module/zfs/zio_compress.c95
-rw-r--r--module/zstd/Makefile.in33
-rw-r--r--module/zstd/README.md60
-rw-r--r--module/zstd/include/aarch64_compat.h37
-rw-r--r--module/zstd/include/limits.h63
-rw-r--r--module/zstd/include/stddef.h62
-rw-r--r--module/zstd/include/stdint.h62
-rw-r--r--module/zstd/include/stdio.h54
-rw-r--r--module/zstd/include/stdlib.h58
-rw-r--r--module/zstd/include/string.h62
-rw-r--r--module/zstd/zfs_zstd.c737
27 files changed, 1652 insertions, 71 deletions
diff --git a/module/Kbuild.in b/module/Kbuild.in
index 031b5a9a8..1507965c5 100644
--- a/module/Kbuild.in
+++ b/module/Kbuild.in
@@ -9,6 +9,7 @@ ZFS_MODULES += nvpair/
ZFS_MODULES += unicode/
ZFS_MODULES += zcommon/
ZFS_MODULES += zfs/
+ZFS_MODULES += zstd/
# The rest is only relevant when run by kbuild
ifneq ($(KERNELRELEASE),)
diff --git a/module/Makefile.bsd b/module/Makefile.bsd
index c6ace9fb5..d0b4a5bd6 100644
--- a/module/Makefile.bsd
+++ b/module/Makefile.bsd
@@ -16,7 +16,10 @@ KMOD= openzfs
${SRCDIR}/os/freebsd/zfs \
${SRCDIR}/unicode \
${SRCDIR}/zcommon \
- ${SRCDIR}/zfs
+ ${SRCDIR}/zfs \
+ ${SRCDIR}/zstd \
+ ${SRCDIR}/zstd/lib
+
CFLAGS+= -I${.OBJDIR:H}/include
@@ -25,6 +28,7 @@ CFLAGS+= -I${INCDIR}/spl
CFLAGS+= -I${INCDIR}/os/freebsd
CFLAGS+= -I${INCDIR}/os/freebsd/spl
CFLAGS+= -I${INCDIR}/os/freebsd/zfs
+CFLAGS+= -I${SRCDIR}/zstd/include
CFLAGS+= -include ${INCDIR}/os/freebsd/spl/sys/ccompile.h
CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS -D__BSD_VISIBLE=1 \
@@ -292,6 +296,10 @@ SRCS+= abd.c \
zthr.c \
zvol.c
+#zstd
+SRCS+= zfs_zstd.c \
+ zstd.c
+
beforeinstall:
.if ${MK_DEBUG_FILES} != "no"
mtree -eu \
@@ -347,3 +355,5 @@ CFLAGS.zfs_ioctl.c= -Wno-cast-qual
CFLAGS.zil.c= -Wno-cast-qual
CFLAGS.zio.c= -Wno-cast-qual
CFLAGS.zrlock.c= -Wno-cast-qual
+CFLAGS.zfs_zstd.c= -Wno-cast-qual -Wno-pointer-arith
+CFLAGS.zstd.c= -fno-tree-vectorize
diff --git a/module/Makefile.in b/module/Makefile.in
index 59b485d55..ead4ff136 100644
--- a/module/Makefile.in
+++ b/module/Makefile.in
@@ -2,7 +2,7 @@ include Kbuild
INSTALL_MOD_DIR ?= extra
-SUBDIR_TARGETS = icp lua
+SUBDIR_TARGETS = icp lua zstd
all: modules
distclean maintainer-clean: clean
diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c
index ed7967dc1..97ddacbab 100644
--- a/module/zcommon/zfeature_common.c
+++ b/module/zcommon/zfeature_common.c
@@ -25,6 +25,8 @@
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
*/
#ifndef _KERNEL
@@ -576,6 +578,17 @@ zpool_feature_init(void)
"org.openzfs:device_rebuild", "device_rebuild",
"Support for sequential device rebuilds",
ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL);
+
+ {
+ static const spa_feature_t zstd_deps[] = {
+ SPA_FEATURE_EXTENSIBLE_DATASET,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_ZSTD_COMPRESS,
+ "org.freebsd:zstd_compress", "zstd_compress",
+ "zstd compression algorithm support.",
+ ZFEATURE_FLAG_PER_DATASET, ZFEATURE_TYPE_BOOLEAN, zstd_deps);
+ }
}
#if defined(_KERNEL)
diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c
index 3a005b687..272e0e93c 100644
--- a/module/zcommon/zfs_prop.c
+++ b/module/zcommon/zfs_prop.c
@@ -23,6 +23,8 @@
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright 2016, Joyent, Inc.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -125,6 +127,87 @@ zfs_prop_init(void)
{ "gzip-9", ZIO_COMPRESS_GZIP_9 },
{ "zle", ZIO_COMPRESS_ZLE },
{ "lz4", ZIO_COMPRESS_LZ4 },
+ { "zstd", ZIO_COMPRESS_ZSTD },
+ { "zstd-fast",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_DEFAULT) },
+
+ /*
+ * ZSTD 1-19 are synthetic. We store the compression level in a
+ * separate hidden property to avoid wasting a large amount of
+ * space in the ZIO_COMPRESS enum.
+ *
+ * The compression level is also stored within the header of the
+ * compressed block since we may need it for later recompression
+ * to avoid checksum errors (L2ARC).
+ *
+ * Note that the level here is defined as bit shifted mask on
+ * top of the method.
+ */
+ { "zstd-1", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_1) },
+ { "zstd-2", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_2) },
+ { "zstd-3", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_3) },
+ { "zstd-4", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_4) },
+ { "zstd-5", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_5) },
+ { "zstd-6", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_6) },
+ { "zstd-7", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_7) },
+ { "zstd-8", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_8) },
+ { "zstd-9", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_9) },
+ { "zstd-10", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_10) },
+ { "zstd-11", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_11) },
+ { "zstd-12", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_12) },
+ { "zstd-13", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_13) },
+ { "zstd-14", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_14) },
+ { "zstd-15", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_15) },
+ { "zstd-16", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_16) },
+ { "zstd-17", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_17) },
+ { "zstd-18", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_18) },
+ { "zstd-19", ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_19) },
+
+ /*
+ * The ZSTD-Fast levels are also synthetic.
+ */
+ { "zstd-fast-1",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_1) },
+ { "zstd-fast-2",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_2) },
+ { "zstd-fast-3",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_3) },
+ { "zstd-fast-4",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_4) },
+ { "zstd-fast-5",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_5) },
+ { "zstd-fast-6",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_6) },
+ { "zstd-fast-7",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_7) },
+ { "zstd-fast-8",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_8) },
+ { "zstd-fast-9",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_9) },
+ { "zstd-fast-10",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_10) },
+ { "zstd-fast-20",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_20) },
+ { "zstd-fast-30",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_30) },
+ { "zstd-fast-40",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_40) },
+ { "zstd-fast-50",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_50) },
+ { "zstd-fast-60",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_60) },
+ { "zstd-fast-70",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_70) },
+ { "zstd-fast-80",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_80) },
+ { "zstd-fast-90",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_90) },
+ { "zstd-fast-100",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_100) },
+ { "zstd-fast-500",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_500) },
+ { "zstd-fast-1000",
+ ZIO_COMPLEVEL_ZSTD(ZIO_ZSTD_LEVEL_FAST_1000) },
{ NULL }
};
@@ -330,8 +413,10 @@ zfs_prop_init(void)
zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
- "on | off | lzjb | gzip | gzip-[1-9] | zle | lz4", "COMPRESS",
- compress_table);
+ "on | off | lzjb | gzip | gzip-[1-9] | zle | lz4 | "
+ "zstd | zstd-[1-19] | "
+ "zstd-fast-[1-10,20,30,40,50,60,70,80,90,100,500,1000]",
+ "COMPRESS", compress_table);
zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
"hidden | visible", "SNAPDIR", snapdir_table);
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 0512497d5..ff2621194 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -26,6 +26,8 @@
* Copyright (c) 2017, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2019, loli10K <[email protected]>. All rights reserved.
* Copyright (c) 2020, George Amanakis. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
* Copyright (c) 2020, The FreeBSD Foundation [1]
*
* [1] Portions of this software were developed by Allan Jude
@@ -1362,6 +1364,12 @@ arc_hdr_get_compress(arc_buf_hdr_t *hdr)
HDR_GET_COMPRESS(hdr) : ZIO_COMPRESS_OFF);
}
+uint8_t
+arc_get_complevel(arc_buf_t *buf)
+{
+ return (buf->b_hdr->b_complevel);
+}
+
static inline boolean_t
arc_buf_is_shared(arc_buf_t *buf)
{
@@ -1707,7 +1715,8 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf)
static arc_buf_hdr_t *
arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev,
dva_t dva, uint64_t daddr, int32_t psize, uint64_t birth,
- enum zio_compress compress, boolean_t protected, boolean_t prefetch)
+ enum zio_compress compress, uint8_t complevel, boolean_t protected,
+ boolean_t prefetch)
{
arc_buf_hdr_t *hdr;
@@ -1720,6 +1729,7 @@ arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev,
HDR_SET_LSIZE(hdr, size);
HDR_SET_PSIZE(hdr, psize);
arc_hdr_set_compress(hdr, compress);
+ hdr->b_complevel = complevel;
if (protected)
arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
if (prefetch)
@@ -1779,9 +1789,8 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj)
tmpbuf = zio_buf_alloc(lsize);
abd = abd_get_from_buf(tmpbuf, lsize);
abd_take_ownership_of_buf(abd, B_TRUE);
-
csize = zio_compress_data(HDR_GET_COMPRESS(hdr),
- hdr->b_l1hdr.b_pabd, tmpbuf, lsize);
+ hdr->b_l1hdr.b_pabd, tmpbuf, lsize, hdr->b_complevel);
ASSERT3U(csize, <=, psize);
abd_zero_off(abd, csize, psize - csize);
}
@@ -1867,7 +1876,7 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr),
- HDR_GET_LSIZE(hdr));
+ HDR_GET_LSIZE(hdr), &hdr->b_complevel);
if (ret != 0) {
abd_return_buf(cabd, tmp, arc_hdr_size(hdr));
goto error;
@@ -2114,7 +2123,8 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
} else {
error = zio_decompress_data(HDR_GET_COMPRESS(hdr),
hdr->b_l1hdr.b_pabd, buf->b_data,
- HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr));
+ HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr),
+ &hdr->b_complevel);
/*
* Absent hardware errors or software bugs, this should
@@ -2865,10 +2875,10 @@ arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size)
arc_buf_t *
arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
- enum zio_compress compression_type)
+ enum zio_compress compression_type, uint8_t complevel)
{
arc_buf_t *buf = arc_alloc_compressed_buf(spa, arc_onloan_tag,
- psize, lsize, compression_type);
+ psize, lsize, compression_type, complevel);
arc_loaned_bytes_update(arc_buf_size(buf));
@@ -2879,10 +2889,11 @@ arc_buf_t *
arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
- enum zio_compress compression_type)
+ enum zio_compress compression_type, uint8_t complevel)
{
arc_buf_t *buf = arc_alloc_raw_buf(spa, arc_onloan_tag, dsobj,
- byteorder, salt, iv, mac, ot, psize, lsize, compression_type);
+ byteorder, salt, iv, mac, ot, psize, lsize, compression_type,
+ complevel);
atomic_add_64(&arc_loaned_bytes, psize);
return (buf);
@@ -3249,7 +3260,7 @@ arc_hdr_free_abd(arc_buf_hdr_t *hdr, boolean_t free_rdata)
static arc_buf_hdr_t *
arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
- boolean_t protected, enum zio_compress compression_type,
+ boolean_t protected, enum zio_compress compression_type, uint8_t complevel,
arc_buf_contents_t type, boolean_t alloc_rdata)
{
arc_buf_hdr_t *hdr;
@@ -3272,6 +3283,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
hdr->b_flags = 0;
arc_hdr_set_flags(hdr, arc_bufc_to_flags(type) | ARC_FLAG_HAS_L1HDR);
arc_hdr_set_compress(hdr, compression_type);
+ hdr->b_complevel = complevel;
if (protected)
arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
@@ -3574,7 +3586,7 @@ arc_buf_t *
arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
{
arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size,
- B_FALSE, ZIO_COMPRESS_OFF, type, B_FALSE);
+ B_FALSE, ZIO_COMPRESS_OFF, 0, type, B_FALSE);
arc_buf_t *buf = NULL;
VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE, B_FALSE,
@@ -3590,7 +3602,7 @@ arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
*/
arc_buf_t *
arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
- enum zio_compress compression_type)
+ enum zio_compress compression_type, uint8_t complevel)
{
ASSERT3U(lsize, >, 0);
ASSERT3U(lsize, >=, psize);
@@ -3598,7 +3610,7 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
- B_FALSE, compression_type, ARC_BUFC_DATA, B_FALSE);
+ B_FALSE, compression_type, complevel, ARC_BUFC_DATA, B_FALSE);
arc_buf_t *buf = NULL;
VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE,
@@ -3624,7 +3636,7 @@ arc_buf_t *
arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
- enum zio_compress compression_type)
+ enum zio_compress compression_type, uint8_t complevel)
{
arc_buf_hdr_t *hdr;
arc_buf_t *buf;
@@ -3637,7 +3649,7 @@ arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, B_TRUE,
- compression_type, type, B_TRUE);
+ compression_type, complevel, type, B_TRUE);
hdr->b_crypt_hdr.b_dsobj = dsobj;
hdr->b_crypt_hdr.b_ot = ot;
@@ -5579,6 +5591,9 @@ arc_read_done(zio_t *zio)
} else {
hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
}
+ if (!HDR_L2_READING(hdr)) {
+ hdr->b_complevel = zio->io_prop.zp_complevel;
+ }
}
arc_hdr_clear_flags(hdr, ARC_FLAG_L2_EVICTED);
@@ -5982,7 +5997,7 @@ top:
arc_buf_hdr_t *exists = NULL;
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
- BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), type,
+ BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), 0, type,
encrypted_read);
if (!embedded_bp) {
@@ -6549,7 +6564,7 @@ arc_release(arc_buf_t *buf, void *tag)
* buffer which will be freed in arc_write().
*/
nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
- compress, type, HDR_HAS_RABD(hdr));
+ compress, hdr->b_complevel, type, HDR_HAS_RABD(hdr));
ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
ASSERT0(nhdr->b_l1hdr.b_bufcnt);
ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
@@ -6713,6 +6728,7 @@ arc_write_ready(zio_t *zio)
}
HDR_SET_PSIZE(hdr, psize);
arc_hdr_set_compress(hdr, compress);
+ hdr->b_complevel = zio->io_prop.zp_complevel;
if (zio->io_error != 0 || psize == 0)
goto out;
@@ -6902,6 +6918,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
ASSERT(ARC_BUF_COMPRESSED(buf));
localprop.zp_encrypt = B_TRUE;
localprop.zp_compress = HDR_GET_COMPRESS(hdr);
+ localprop.zp_complevel = hdr->b_complevel;
localprop.zp_byteorder =
(hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ?
ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER;
@@ -6920,6 +6937,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
} else if (ARC_BUF_COMPRESSED(buf)) {
ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf));
localprop.zp_compress = HDR_GET_COMPRESS(hdr);
+ localprop.zp_complevel = hdr->b_complevel;
zio_flags |= ZIO_FLAG_RAW_COMPRESS;
}
callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
@@ -8252,7 +8270,7 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr),
- HDR_GET_LSIZE(hdr));
+ HDR_GET_LSIZE(hdr), &hdr->b_complevel);
if (ret != 0) {
abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr));
arc_free_data_abd(hdr, cabd, arc_hdr_size(hdr), hdr);
@@ -8351,6 +8369,7 @@ l2arc_read_done(zio_t *zio)
(HDR_HAS_RABD(hdr) && zio->io_abd == hdr->b_crypt_hdr.b_rabd));
zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */
zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */
+ zio->io_prop.zp_complevel = hdr->b_complevel;
valid_cksum = arc_cksum_is_equal(hdr, zio);
@@ -8763,7 +8782,18 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize,
cabd = abd_alloc_for_io(asize, ismd);
tmp = abd_borrow_buf(cabd, asize);
- psize = zio_compress_data(compress, to_write, tmp, size);
+ psize = zio_compress_data(compress, to_write, tmp, size,
+ hdr->b_complevel);
+
+ if (psize >= size) {
+ abd_return_buf(cabd, tmp, asize);
+ HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
+ to_write = cabd;
+ abd_copy(to_write, hdr->b_l1hdr.b_pabd, size);
+ if (size != asize)
+ abd_zero_off(to_write, size, asize - size);
+ goto encrypt;
+ }
ASSERT3U(psize, <=, HDR_GET_PSIZE(hdr));
if (psize < asize)
bzero((char *)tmp + psize, asize - psize);
@@ -8772,6 +8802,7 @@ l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize,
to_write = cabd;
}
+encrypt:
if (HDR_ENCRYPTED(hdr)) {
eabd = abd_alloc_for_io(asize, ismd);
@@ -9922,7 +9953,7 @@ l2arc_log_blk_read(l2arc_dev_t *dev,
abd_copy_from_buf_off(abd, this_lb, 0, asize);
if ((err = zio_decompress_data(
L2BLK_GET_COMPRESS((this_lbp)->lbp_prop),
- abd, this_lb, asize, sizeof (*this_lb))) != 0) {
+ abd, this_lb, asize, sizeof (*this_lb), NULL)) != 0) {
err = SET_ERROR(EINVAL);
goto cleanup;
}
@@ -10021,7 +10052,7 @@ l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev)
hdr = arc_buf_alloc_l2only(L2BLK_GET_LSIZE((le)->le_prop), type,
dev, le->le_dva, le->le_daddr,
L2BLK_GET_PSIZE((le)->le_prop), le->le_birth,
- L2BLK_GET_COMPRESS((le)->le_prop),
+ L2BLK_GET_COMPRESS((le)->le_prop), le->le_complevel,
L2BLK_GET_PROTECTED((le)->le_prop),
L2BLK_GET_PREFETCH((le)->le_prop));
asize = vdev_psize_to_asize(dev->l2ad_vdev,
@@ -10197,7 +10228,7 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
/* try to compress the buffer */
psize = zio_compress_data(ZIO_COMPRESS_LZ4,
- abd_buf->abd, tmpbuf, sizeof (*lb));
+ abd_buf->abd, tmpbuf, sizeof (*lb), 0);
/* a log block is never entirely zero */
ASSERT(psize != 0);
@@ -10354,6 +10385,7 @@ l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr)
L2BLK_SET_LSIZE((le)->le_prop, HDR_GET_LSIZE(hdr));
L2BLK_SET_PSIZE((le)->le_prop, HDR_GET_PSIZE(hdr));
L2BLK_SET_COMPRESS((le)->le_prop, HDR_GET_COMPRESS(hdr));
+ le->le_complevel = hdr->b_complevel;
L2BLK_SET_TYPE((le)->le_prop, hdr->b_type);
L2BLK_SET_PROTECTED((le)->le_prop, !!(HDR_PROTECTED(hdr)));
L2BLK_SET_PREFETCH((le)->le_prop, !!(HDR_PREFETCH(hdr)));
diff --git a/module/zfs/blkptr.c b/module/zfs/blkptr.c
index 73600e4ab..aa09ded8d 100644
--- a/module/zfs/blkptr.c
+++ b/module/zfs/blkptr.c
@@ -143,7 +143,7 @@ decode_embedded_bp(const blkptr_t *bp, void *buf, int buflen)
uint8_t dstbuf[BPE_PAYLOAD_SIZE];
decode_embedded_bp_compressed(bp, dstbuf);
VERIFY0(zio_decompress_data_buf(BP_GET_COMPRESS(bp),
- dstbuf, buf, psize, buflen));
+ dstbuf, buf, psize, buflen, NULL));
} else {
ASSERT3U(lsize, ==, psize);
decode_embedded_bp_compressed(bp, buf);
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 83b2c3721..2de1f4e4c 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -24,6 +24,8 @@
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
*/
#include <sys/zfs_context.h>
@@ -1095,11 +1097,13 @@ dbuf_alloc_arcbuf_from_arcbuf(dmu_buf_impl_t *db, arc_buf_t *data)
spa_t *spa = os->os_spa;
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
enum zio_compress compress_type;
+ uint8_t complevel;
int psize, lsize;
psize = arc_buf_size(data);
lsize = arc_buf_lsize(data);
compress_type = arc_get_compression(data);
+ complevel = arc_get_complevel(data);
if (arc_is_encrypted(data)) {
boolean_t byteorder;
@@ -1111,11 +1115,11 @@ dbuf_alloc_arcbuf_from_arcbuf(dmu_buf_impl_t *db, arc_buf_t *data)
arc_get_raw_params(data, &byteorder, salt, iv, mac);
data = arc_alloc_raw_buf(spa, db, dmu_objset_id(os),
byteorder, salt, iv, mac, dn->dn_type, psize, lsize,
- compress_type);
+ compress_type, complevel);
} else if (compress_type != ZIO_COMPRESS_OFF) {
ASSERT3U(type, ==, ARC_BUFC_DATA);
data = arc_alloc_compressed_buf(spa, db,
- psize, lsize, compress_type);
+ psize, lsize, compress_type, complevel);
} else {
data = arc_alloc_buf(spa, db, type, psize);
}
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 5cc7bfe11..06d6df618 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -26,6 +26,8 @@
* Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
* Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
*/
#include <sys/dmu.h>
@@ -2067,6 +2069,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
(wp & WP_SPILL));
enum zio_checksum checksum = os->os_checksum;
enum zio_compress compress = os->os_compress;
+ uint8_t complevel = os->os_complevel;
enum zio_checksum dedup_checksum = os->os_dedup_checksum;
boolean_t dedup = B_FALSE;
boolean_t nopwrite = B_FALSE;
@@ -2123,6 +2126,8 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
} else {
compress = zio_compress_select(os->os_spa, dn->dn_compress,
compress);
+ complevel = zio_complevel_select(os->os_spa, compress,
+ complevel, complevel);
checksum = (dedup_checksum == ZIO_CHECKSUM_OFF) ?
zio_checksum_select(dn->dn_checksum, checksum) :
@@ -2181,6 +2186,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
}
zp->zp_compress = compress;
+ zp->zp_complevel = complevel;
zp->zp_checksum = checksum;
zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
zp->zp_level = level;
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index bf488384d..b1590d7db 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -30,6 +30,8 @@
* Copyright 2017 Nexenta Systems, Inc.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2018, loli10K <[email protected]>. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -192,8 +194,10 @@ compression_changed_cb(void *arg, uint64_t newval)
*/
ASSERT(newval != ZIO_COMPRESS_INHERIT);
- os->os_compress = zio_compress_select(os->os_spa, newval,
- ZIO_COMPRESS_ON);
+ os->os_compress = zio_compress_select(os->os_spa,
+ ZIO_COMPRESS_ALGO(newval), ZIO_COMPRESS_ON);
+ os->os_complevel = zio_complevel_select(os->os_spa, os->os_compress,
+ ZIO_COMPRESS_LEVEL(newval), ZIO_COMPLEVEL_DEFAULT);
}
static void
@@ -580,6 +584,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
/* It's the meta-objset. */
os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
os->os_compress = ZIO_COMPRESS_ON;
+ os->os_complevel = ZIO_COMPLEVEL_DEFAULT;
os->os_encrypted = B_FALSE;
os->os_copies = spa_max_replication(spa);
os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c
index 2f3507914..2eee19a28 100644
--- a/module/zfs/dmu_recv.c
+++ b/module/zfs/dmu_recv.c
@@ -25,6 +25,8 @@
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright 2014 HybridCluster. All rights reserved.
* Copyright (c) 2018, loli10K <[email protected]>. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
*/
#include <sys/dmu.h>
@@ -529,14 +531,18 @@ recv_begin_check_feature_flags_impl(uint64_t featureflags, spa_t *spa)
return (SET_ERROR(ENOTSUP));
/*
- * LZ4 compressed, embedded, mooched, large blocks, and large_dnodes
- * in the stream can only be used if those pool features are enabled
- * because we don't attempt to decompress / un-embed / un-mooch /
- * split up the blocks / dnodes during the receive process.
+ * LZ4 compressed, ZSTD compressed, embedded, mooched, large blocks,
+ * and large_dnodes in the stream can only be used if those pool
+ * features are enabled because we don't attempt to decompress /
+ * un-embed / un-mooch / split up the blocks / dnodes during the
+ * receive process.
*/
if ((featureflags & DMU_BACKUP_FEATURE_LZ4) &&
!spa_feature_is_enabled(spa, SPA_FEATURE_LZ4_COMPRESS))
return (SET_ERROR(ENOTSUP));
+ if ((featureflags & DMU_BACKUP_FEATURE_ZSTD) &&
+ !spa_feature_is_enabled(spa, SPA_FEATURE_ZSTD_COMPRESS))
+ return (SET_ERROR(ENOTSUP));
if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
!spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA))
return (SET_ERROR(ENOTSUP));
@@ -2457,7 +2463,7 @@ receive_read_record(dmu_recv_cookie_t *drc)
drrw->drr_object, byteorder, drrw->drr_salt,
drrw->drr_iv, drrw->drr_mac, drrw->drr_type,
drrw->drr_compressed_size, drrw->drr_logical_size,
- drrw->drr_compressiontype);
+ drrw->drr_compressiontype, 0);
} else if (DRR_WRITE_COMPRESSED(drrw)) {
ASSERT3U(drrw->drr_compressed_size, >, 0);
ASSERT3U(drrw->drr_logical_size, >=,
@@ -2466,7 +2472,7 @@ receive_read_record(dmu_recv_cookie_t *drc)
abuf = arc_loan_compressed_buf(
dmu_objset_spa(drc->drc_os),
drrw->drr_compressed_size, drrw->drr_logical_size,
- drrw->drr_compressiontype);
+ drrw->drr_compressiontype, 0);
} else {
abuf = arc_loan_buf(dmu_objset_spa(drc->drc_os),
is_meta, drrw->drr_logical_size);
@@ -2541,7 +2547,7 @@ receive_read_record(dmu_recv_cookie_t *drc)
drrs->drr_object, byteorder, drrs->drr_salt,
drrs->drr_iv, drrs->drr_mac, drrs->drr_type,
drrs->drr_compressed_size, drrs->drr_length,
- drrs->drr_compressiontype);
+ drrs->drr_compressiontype, 0);
} else {
abuf = arc_loan_buf(dmu_objset_spa(drc->drc_os),
DMU_OT_IS_METADATA(drrs->drr_type),
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 403e85592..33e99c2e0 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -26,6 +26,8 @@
* Copyright 2014 HybridCluster. All rights reserved.
* Copyright 2016 RackTop Systems.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
*/
#include <sys/dmu.h>
@@ -863,6 +865,14 @@ send_do_embed(const blkptr_t *bp, uint64_t featureflags)
return (B_FALSE);
/*
+ * If we have not set the ZSTD feature flag, we can't send ZSTD
+ * compressed embedded blocks, as the receiver may not support them.
+ */
+ if ((BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD &&
+ !(featureflags & DMU_BACKUP_FEATURE_ZSTD)))
+ return (B_FALSE);
+
+ /*
* Embed type must be explicitly enabled.
*/
switch (BPE_GET_ETYPE(bp)) {
@@ -1954,6 +1964,7 @@ setup_featureflags(struct dmu_send_params *dspp, objset_t *os,
/* raw send implies compressok */
if (dspp->compressok || dspp->rawok)
*featureflags |= DMU_BACKUP_FEATURE_COMPRESSED;
+
if (dspp->rawok && os->os_encrypted)
*featureflags |= DMU_BACKUP_FEATURE_RAW;
@@ -1964,6 +1975,17 @@ setup_featureflags(struct dmu_send_params *dspp, objset_t *os,
*featureflags |= DMU_BACKUP_FEATURE_LZ4;
}
+ /*
+ * We specifically do not include DMU_BACKUP_FEATURE_EMBED_DATA here to
+ * allow sending ZSTD compressed datasets to a receiver that does not
+ * support ZSTD
+ */
+ if ((*featureflags &
+ (DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_RAW)) != 0 &&
+ dsl_dataset_feature_is_active(to_ds, SPA_FEATURE_ZSTD_COMPRESS)) {
+ *featureflags |= DMU_BACKUP_FEATURE_ZSTD;
+ }
+
if (dspp->resumeobj != 0 || dspp->resumeoff != 0) {
*featureflags |= DMU_BACKUP_FEATURE_RESUMING;
}
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index c5143ac5a..1fcd83db7 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -28,6 +28,12 @@
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
* Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
+ * Copyright (c) 2020 The FreeBSD Foundation [1]
+ *
+ * [1] Portions of this software were developed by Allan Jude
+ * under sponsorship from the FreeBSD Foundation.
*/
#include <sys/dmu_objset.h>
@@ -127,6 +133,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
int compressed = BP_GET_PSIZE(bp);
int uncompressed = BP_GET_UCSIZE(bp);
int64_t delta;
+ spa_feature_t f;
dprintf_bp(bp, "ds=%p", ds);
@@ -156,7 +163,15 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
(void *)B_TRUE;
}
- spa_feature_t f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
+
+ f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
+ if (f != SPA_FEATURE_NONE) {
+ ASSERT3S(spa_feature_table[f].fi_type, ==,
+ ZFEATURE_TYPE_BOOLEAN);
+ ds->ds_feature_activation[f] = (void *)B_TRUE;
+ }
+
+ f = zio_compress_to_feature(BP_GET_COMPRESS(bp));
if (f != SPA_FEATURE_NONE) {
ASSERT3S(spa_feature_table[f].fi_type, ==,
ZFEATURE_TYPE_BOOLEAN);
@@ -4507,6 +4522,74 @@ dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
ZFS_SPACE_CHECK_EXTRA_RESERVED));
}
+typedef struct dsl_dataset_set_compression_arg {
+ const char *ddsca_name;
+ zprop_source_t ddsca_source;
+ uint64_t ddsca_value;
+} dsl_dataset_set_compression_arg_t;
+
+/* ARGSUSED */
+static int
+dsl_dataset_set_compression_check(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_set_compression_arg_t *ddsca = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+
+ uint64_t compval = ZIO_COMPRESS_ALGO(ddsca->ddsca_value);
+ spa_feature_t f = zio_compress_to_feature(compval);
+
+ if (f == SPA_FEATURE_NONE)
+ return (SET_ERROR(EINVAL));
+
+ if (!spa_feature_is_enabled(dp->dp_spa, f))
+ return (SET_ERROR(ENOTSUP));
+
+ return (0);
+}
+
+static void
+dsl_dataset_set_compression_sync(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_set_compression_arg_t *ddsca = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dataset_t *ds = NULL;
+
+ uint64_t compval = ZIO_COMPRESS_ALGO(ddsca->ddsca_value);
+ spa_feature_t f = zio_compress_to_feature(compval);
+ ASSERT3S(spa_feature_table[f].fi_type, ==, ZFEATURE_TYPE_BOOLEAN);
+
+ VERIFY0(dsl_dataset_hold(dp, ddsca->ddsca_name, FTAG, &ds));
+ if (zfeature_active(f, ds->ds_feature[f]) != B_TRUE) {
+ ds->ds_feature_activation[f] = (void *)B_TRUE;
+ dsl_dataset_activate_feature(ds->ds_object, f,
+ ds->ds_feature_activation[f], tx);
+ ds->ds_feature[f] = ds->ds_feature_activation[f];
+ }
+ dsl_dataset_rele(ds, FTAG);
+}
+
+int
+dsl_dataset_set_compression(const char *dsname, zprop_source_t source,
+ uint64_t compression)
+{
+ dsl_dataset_set_compression_arg_t ddsca;
+
+ /*
+ * The sync task is only required for zstd in order to activate
+ * the feature flag when the property is first set.
+ */
+ if (ZIO_COMPRESS_ALGO(compression) != ZIO_COMPRESS_ZSTD)
+ return (0);
+
+ ddsca.ddsca_name = dsname;
+ ddsca.ddsca_source = source;
+ ddsca.ddsca_value = compression;
+
+ return (dsl_sync_task(dsname, dsl_dataset_set_compression_check,
+ dsl_dataset_set_compression_sync, &ddsca, 0,
+ ZFS_SPACE_CHECK_EXTRA_RESERVED));
+}
+
/*
* Return (in *usedp) the amount of space referenced by "new" that was not
* referenced at the time the bookmark corresponds to. "New" may be a
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index 4c884409a..41f0ddbde 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -62,6 +62,7 @@
#include <sys/btree.h>
#include <sys/zfeature.h>
#include <sys/qat.h>
+#include <sys/zstd/zstd.h>
/*
* SPA locking
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 463704c14..7f623bb04 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -38,6 +38,8 @@
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
* Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
*/
/*
@@ -2464,6 +2466,15 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
case ZFS_PROP_REFRESERVATION:
err = dsl_dataset_set_refreservation(dsname, source, intval);
break;
+ case ZFS_PROP_COMPRESSION:
+ err = dsl_dataset_set_compression(dsname, source, intval);
+ /*
+ * Set err to -1 to force the zfs_set_prop_nvlist code down the
+ * default path to set the value in the nvlist.
+ */
+ if (err == 0)
+ err = -1;
+ break;
case ZFS_PROP_VOLSIZE:
err = zvol_set_volsize(dsname, intval);
break;
@@ -4355,7 +4366,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
const char *propname = nvpair_name(pair);
boolean_t issnap = (strchr(dsname, '@') != NULL);
zfs_prop_t prop = zfs_name_to_prop(propname);
- uint64_t intval;
+ uint64_t intval, compval;
int err;
if (prop == ZPROP_INVAL) {
@@ -4437,19 +4448,20 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
* we'll catch them later.
*/
if (nvpair_value_uint64(pair, &intval) == 0) {
- if (intval >= ZIO_COMPRESS_GZIP_1 &&
- intval <= ZIO_COMPRESS_GZIP_9 &&
+ compval = ZIO_COMPRESS_ALGO(intval);
+ if (compval >= ZIO_COMPRESS_GZIP_1 &&
+ compval <= ZIO_COMPRESS_GZIP_9 &&
zfs_earlier_version(dsname,
SPA_VERSION_GZIP_COMPRESSION)) {
return (SET_ERROR(ENOTSUP));
}
- if (intval == ZIO_COMPRESS_ZLE &&
+ if (compval == ZIO_COMPRESS_ZLE &&
zfs_earlier_version(dsname,
SPA_VERSION_ZLE_COMPRESSION))
return (SET_ERROR(ENOTSUP));
- if (intval == ZIO_COMPRESS_LZ4) {
+ if (compval == ZIO_COMPRESS_LZ4) {
spa_t *spa;
if ((err = spa_open(dsname, &spa, FTAG)) != 0)
@@ -4462,6 +4474,20 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
}
spa_close(spa, FTAG);
}
+
+ if (compval == ZIO_COMPRESS_ZSTD) {
+ spa_t *spa;
+
+ if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+ return (err);
+
+ if (!spa_feature_is_enabled(spa,
+ SPA_FEATURE_ZSTD_COMPRESS)) {
+ spa_close(spa, FTAG);
+ return (SET_ERROR(ENOTSUP));
+ }
+ spa_close(spa, FTAG);
+ }
}
break;
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 93d6b115c..2628cc029 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -23,6 +23,8 @@
* Copyright (c) 2011, 2019 by Delphix. All rights reserved.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
*/
#include <sys/sysmacros.h>
@@ -409,7 +411,8 @@ zio_decompress(zio_t *zio, abd_t *data, uint64_t size)
if (zio->io_error == 0) {
void *tmp = abd_borrow_buf(data, size);
int ret = zio_decompress_data(BP_GET_COMPRESS(zio->io_bp),
- zio->io_abd, tmp, zio->io_size, size);
+ zio->io_abd, tmp, zio->io_size, size,
+ &zio->io_prop.zp_complevel);
abd_return_buf_copy(data, tmp, size);
if (zio_injection_enabled && ret == 0)
@@ -459,7 +462,8 @@ zio_decrypt(zio_t *zio, abd_t *data, uint64_t size)
*/
tmp = zio_buf_alloc(lsize);
ret = zio_decompress_data(BP_GET_COMPRESS(bp),
- zio->io_abd, tmp, zio->io_size, lsize);
+ zio->io_abd, tmp, zio->io_size, lsize,
+ &zio->io_prop.zp_complevel);
if (ret != 0) {
ret = SET_ERROR(EIO);
goto error;
@@ -1678,8 +1682,9 @@ zio_write_compress(zio_t *zio)
if (compress != ZIO_COMPRESS_OFF &&
!(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) {
void *cbuf = zio_buf_alloc(lsize);
- psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize);
- if (psize == 0 || psize == lsize) {
+ psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize,
+ zp->zp_complevel);
+ if (psize == 0 || psize >= lsize) {
compress = ZIO_COMPRESS_OFF;
zio_buf_free(cbuf, lsize);
} else if (!zp->zp_dedup && !zp->zp_encrypt &&
@@ -1741,8 +1746,8 @@ zio_write_compress(zio_t *zio)
* to a hole.
*/
psize = zio_compress_data(ZIO_COMPRESS_EMPTY,
- zio->io_abd, NULL, lsize);
- if (psize == 0)
+ zio->io_abd, NULL, lsize, zp->zp_complevel);
+ if (psize == 0 || psize >= lsize)
compress = ZIO_COMPRESS_OFF;
} else {
ASSERT3U(psize, !=, 0);
@@ -2849,6 +2854,7 @@ zio_write_gang_block(zio_t *pio)
zp.zp_checksum = gio->io_prop.zp_checksum;
zp.zp_compress = ZIO_COMPRESS_OFF;
+ zp.zp_complevel = gio->io_prop.zp_complevel;
zp.zp_type = DMU_OT_NONE;
zp.zp_level = 0;
zp.zp_copies = gio->io_prop.zp_copies;
diff --git a/module/zfs/zio_compress.c b/module/zfs/zio_compress.c
index 01c51347f..d91e82d9e 100644
--- a/module/zfs/zio_compress.c
+++ b/module/zfs/zio_compress.c
@@ -29,6 +29,8 @@
/*
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2019, Klara Inc.
+ * Copyright (c) 2019, Allan Jude
*/
#include <sys/zfs_context.h>
@@ -36,6 +38,7 @@
#include <sys/zfeature.h>
#include <sys/zio.h>
#include <sys/zio_compress.h>
+#include <sys/zstd/zstd.h>
/*
* If nonzero, every 1/X decompression attempts will fail, simulating
@@ -47,24 +50,42 @@ unsigned long zio_decompress_fail_fraction = 0;
* Compression vectors.
*/
zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
- {"inherit", 0, NULL, NULL},
- {"on", 0, NULL, NULL},
- {"uncompressed", 0, NULL, NULL},
- {"lzjb", 0, lzjb_compress, lzjb_decompress},
- {"empty", 0, NULL, NULL},
- {"gzip-1", 1, gzip_compress, gzip_decompress},
- {"gzip-2", 2, gzip_compress, gzip_decompress},
- {"gzip-3", 3, gzip_compress, gzip_decompress},
- {"gzip-4", 4, gzip_compress, gzip_decompress},
- {"gzip-5", 5, gzip_compress, gzip_decompress},
- {"gzip-6", 6, gzip_compress, gzip_decompress},
- {"gzip-7", 7, gzip_compress, gzip_decompress},
- {"gzip-8", 8, gzip_compress, gzip_decompress},
- {"gzip-9", 9, gzip_compress, gzip_decompress},
- {"zle", 64, zle_compress, zle_decompress},
- {"lz4", 0, lz4_compress_zfs, lz4_decompress_zfs}
+ {"inherit", 0, NULL, NULL, NULL},
+ {"on", 0, NULL, NULL, NULL},
+ {"uncompressed", 0, NULL, NULL, NULL},
+ {"lzjb", 0, lzjb_compress, lzjb_decompress, NULL},
+ {"empty", 0, NULL, NULL, NULL},
+ {"gzip-1", 1, gzip_compress, gzip_decompress, NULL},
+ {"gzip-2", 2, gzip_compress, gzip_decompress, NULL},
+ {"gzip-3", 3, gzip_compress, gzip_decompress, NULL},
+ {"gzip-4", 4, gzip_compress, gzip_decompress, NULL},
+ {"gzip-5", 5, gzip_compress, gzip_decompress, NULL},
+ {"gzip-6", 6, gzip_compress, gzip_decompress, NULL},
+ {"gzip-7", 7, gzip_compress, gzip_decompress, NULL},
+ {"gzip-8", 8, gzip_compress, gzip_decompress, NULL},
+ {"gzip-9", 9, gzip_compress, gzip_decompress, NULL},
+ {"zle", 64, zle_compress, zle_decompress, NULL},
+ {"lz4", 0, lz4_compress_zfs, lz4_decompress_zfs, NULL},
+ {"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zstd_compress, zstd_decompress,
+ zstd_decompress_level},
};
+uint8_t
+zio_complevel_select(spa_t *spa, enum zio_compress compress, uint8_t child,
+ uint8_t parent)
+{
+ uint8_t result;
+
+ if (!ZIO_COMPRESS_HASLEVEL(compress))
+ return (0);
+
+ result = child;
+ if (result == ZIO_COMPLEVEL_INHERIT)
+ result = parent;
+
+ return (result);
+}
+
enum zio_compress
zio_compress_select(spa_t *spa, enum zio_compress child,
enum zio_compress parent)
@@ -102,9 +123,11 @@ zio_compress_zeroed_cb(void *data, size_t len, void *private)
}
size_t
-zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len)
+zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len,
+ uint8_t level)
{
size_t c_len, d_len;
+ uint8_t complevel;
zio_compress_info_t *ci = &zio_compress_table[c];
ASSERT((uint_t)c < ZIO_COMPRESS_FUNCTIONS);
@@ -123,9 +146,24 @@ zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len)
/* Compress at least 12.5% */
d_len = s_len - (s_len >> 3);
+ complevel = ci->ci_level;
+
+ if (c == ZIO_COMPRESS_ZSTD) {
+ /* If we don't know the level, we can't compress it */
+ if (level == ZIO_COMPLEVEL_INHERIT)
+ return (s_len);
+
+ if (level == ZIO_COMPLEVEL_DEFAULT)
+ complevel = ZIO_ZSTD_LEVEL_DEFAULT;
+ else
+ complevel = level;
+
+ ASSERT3U(complevel, !=, ZIO_COMPLEVEL_INHERIT);
+ }
+
/* No compression algorithms can read from ABDs directly */
void *tmp = abd_borrow_buf_copy(src, s_len);
- c_len = ci->ci_compress(tmp, dst, s_len, d_len, ci->ci_level);
+ c_len = ci->ci_compress(tmp, dst, s_len, d_len, complevel);
abd_return_buf(src, tmp, s_len);
if (c_len > d_len)
@@ -137,21 +175,24 @@ zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len)
int
zio_decompress_data_buf(enum zio_compress c, void *src, void *dst,
- size_t s_len, size_t d_len)
+ size_t s_len, size_t d_len, uint8_t *level)
{
zio_compress_info_t *ci = &zio_compress_table[c];
if ((uint_t)c >= ZIO_COMPRESS_FUNCTIONS || ci->ci_decompress == NULL)
return (SET_ERROR(EINVAL));
+ if (ci->ci_decompress_level != NULL && level != NULL)
+ return (ci->ci_decompress_level(src, dst, s_len, d_len, level));
+
return (ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level));
}
int
zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
- size_t s_len, size_t d_len)
+ size_t s_len, size_t d_len, uint8_t *level)
{
void *tmp = abd_borrow_buf_copy(src, s_len);
- int ret = zio_decompress_data_buf(c, tmp, dst, s_len, d_len);
+ int ret = zio_decompress_data_buf(c, tmp, dst, s_len, d_len, level);
abd_return_buf(src, tmp, s_len);
/*
@@ -165,3 +206,15 @@ zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
return (ret);
}
+
+int
+zio_compress_to_feature(enum zio_compress comp)
+{
+ switch (comp) {
+ case ZIO_COMPRESS_ZSTD:
+ return (SPA_FEATURE_ZSTD_COMPRESS);
+ default:
+ /* fallthru */;
+ }
+ return (SPA_FEATURE_NONE);
+}
diff --git a/module/zstd/Makefile.in b/module/zstd/Makefile.in
new file mode 100644
index 000000000..a7f91a435
--- /dev/null
+++ b/module/zstd/Makefile.in
@@ -0,0 +1,33 @@
+ifneq ($(KBUILD_EXTMOD),)
+src = @abs_srcdir@
+obj = @abs_builddir@
+zstd_include = $(src)/include
+else
+zstd_include = $(srctree)/$(src)/include
+endif
+
+MODULE := zzstd
+
+obj-$(CONFIG_ZFS) := $(MODULE).o
+
+asflags-y := -I$(zstd_include)
+ccflags-y := -I$(zstd_include)
+
+# Zstd uses -O3 by default, so we should follow
+ccflags-y += -O3
+
+# -fno-tree-vectorize gets set for gcc in zstd/common/compiler.h
+# Set it for other compilers, too.
+$(obj)/lib/zstd.o: c_flags += -fno-tree-vectorize
+
+# Quiet warnings about frame size due to unused code in unmodified zstd lib
+$(obj)/lib/zstd.o: c_flags += -Wframe-larger-than=20480
+
+# Disable aarch64 neon SIMD instructions for kernel mode
+$(obj)/lib/zstd.o: c_flags += -include $(zstd_include)/aarch64_compat.h
+
+$(MODULE)-objs += zfs_zstd.o
+$(MODULE)-objs += lib/zstd.o
+
+all:
+ mkdir -p lib
diff --git a/module/zstd/README.md b/module/zstd/README.md
new file mode 100644
index 000000000..b08a41906
--- /dev/null
+++ b/module/zstd/README.md
@@ -0,0 +1,60 @@
+# ZSTD-On-ZFS Library Manual
+
+## Introduction
+
+This subtree contains the ZSTD library used in ZFS. It is heavily cut-down by
+dropping any unneeded files, and combined into a single file, but otherwise is
+intentionally unmodified. Please do not alter the file containing the zstd
+library, besides upgrading to a newer ZSTD release.
+
+Tree structure:
+
+* `zfs_zstd.c` is the actual `zzstd` kernel module.
+* `lib/` contains the the unmodified, [_"amalgamated"_](https://github.com/facebook/zstd/blob/dev/contrib/single_file_libs/README.md)
+ version of the `Zstandard` library, generated from our template file
+* `zstd-in.c` is our template file for generating the library
+* `include/`: This directory contains supplemental includes for platform
+ compatibility, which are not expected to be used by ZFS elsewhere in the
+ future. Thus we keep them private to ZSTD.
+
+## Updating ZSTD
+
+To update ZSTD the following steps need to be taken:
+
+1. Grab the latest release of [ZSTD](https://github.com/facebook/zstd/releases).
+2. Update `module/zstd/zstd-in.c` if required. (see
+ `zstd/contrib/single_file_libs/zstd-in.c` in the zstd repository)
+3. Generate the "single-file-library" and put it to `module/zstd/lib/`.
+4. Copy the following files to `module/zstd/lib/`:
+ - `zstd/lib/zstd.h`
+ - `zstd/lib/common/zstd_errors.h`
+
+This can be done using a few shell commands from inside the zfs repo:
+
+~~~sh
+cd PATH/TO/ZFS
+
+url="https://github.com/facebook/zstd"
+release="$(curl -s "${url}"/releases/latest | grep -oP '(?<=v)[\d\.]+')"
+zstd="/tmp/zstd-${release}/"
+
+wget -O /tmp/zstd.tar.gz \
+ "${url}/releases/download/v${release}/zstd-${release}.tar.gz"
+tar -C /tmp -xzf /tmp/zstd.tar.gz
+
+cp ${zstd}/lib/zstd.h module/zstd/lib/
+cp ${zstd}/lib/zstd_errors.h module/zstd/lib/
+${zstd}/contrib/single_file_libs/combine.sh \
+ -r ${zstd}/lib -o module/zstd/lib/zstd.c module/zstd/zstd-in.c
+~~~
+
+
+## Altering ZSTD and breaking changes
+
+If ZSTD made changes that break compatibility or you need to make breaking
+changes to the way we handle ZSTD, it is required to maintain backwards
+compatibility.
+
+We already save the ZSTD version number within the block header to be used
+to add future compatibility checks and/or fixes. However, currently it is
+not actually used in such a way.
diff --git a/module/zstd/include/aarch64_compat.h b/module/zstd/include/aarch64_compat.h
new file mode 100644
index 000000000..088517d3d
--- /dev/null
+++ b/module/zstd/include/aarch64_compat.h
@@ -0,0 +1,37 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2018-2020, Sebastian Gottschall
+ */
+
+#ifdef _KERNEL
+#undef __aarch64__
+#endif
diff --git a/module/zstd/include/limits.h b/module/zstd/include/limits.h
new file mode 100644
index 000000000..3bf5b6776
--- /dev/null
+++ b/module/zstd/include/limits.h
@@ -0,0 +1,63 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2014-2019, Allan Jude
+ * Copyright (c) 2020, Brian Behlendorf
+ * Copyright (c) 2020, Michael Niewöhner
+ */
+
+#ifndef _ZSTD_LIMITS_H
+#define _ZSTD_LIMITS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+#if defined(__FreeBSD__)
+#include <sys/limits.h>
+#elif defined(__linux__)
+#include <linux/limits.h>
+#include <linux/kernel.h>
+#else
+#error "Unsupported platform"
+#endif
+
+#else /* !_KERNEL */
+#include_next <limits.h>
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZSTD_LIMITS_H */
diff --git a/module/zstd/include/stddef.h b/module/zstd/include/stddef.h
new file mode 100644
index 000000000..3f46fb8b0
--- /dev/null
+++ b/module/zstd/include/stddef.h
@@ -0,0 +1,62 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2014-2019, Allan Jude
+ * Copyright (c) 2020, Brian Behlendorf
+ * Copyright (c) 2020, Michael Niewöhner
+ */
+
+#ifndef _ZSTD_STDDEF_H
+#define _ZSTD_STDDEF_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+#if defined(__FreeBSD__)
+#include <sys/types.h>
+#elif defined(__linux__)
+#include <linux/types.h>
+#else
+#error "Unsupported platform"
+#endif
+
+#else /* !_KERNEL */
+#include_next <stddef.h>
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZSTD_STDDEF_H */
diff --git a/module/zstd/include/stdint.h b/module/zstd/include/stdint.h
new file mode 100644
index 000000000..2d98a556c
--- /dev/null
+++ b/module/zstd/include/stdint.h
@@ -0,0 +1,62 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2014-2019, Allan Jude
+ * Copyright (c) 2020, Brian Behlendorf
+ * Copyright (c) 2020, Michael Niewöhner
+ */
+
+#ifndef _ZSTD_STDINT_H
+#define _ZSTD_STDINT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+#if defined(__FreeBSD__)
+#include <sys/stdint.h>
+#elif defined(__linux__)
+#include <linux/types.h>
+#else
+#error "Unsupported platform"
+#endif
+
+#else /* !_KERNEL */
+#include_next <stdint.h>
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZSTD_STDINT_H */
diff --git a/module/zstd/include/stdio.h b/module/zstd/include/stdio.h
new file mode 100644
index 000000000..5a7c6ec69
--- /dev/null
+++ b/module/zstd/include/stdio.h
@@ -0,0 +1,54 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2014-2019, Allan Jude
+ * Copyright (c) 2020, Brian Behlendorf
+ * Copyright (c) 2020, Michael Niewöhner
+ */
+
+#ifndef _ZSTD_STDIO_H
+#define _ZSTD_STDIO_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _KERNEL
+
+#include_next <stdio.h>
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZSTD_STDIO_H */
diff --git a/module/zstd/include/stdlib.h b/module/zstd/include/stdlib.h
new file mode 100644
index 000000000..c341a0c84
--- /dev/null
+++ b/module/zstd/include/stdlib.h
@@ -0,0 +1,58 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2014-2019, Allan Jude
+ * Copyright (c) 2020, Brian Behlendorf
+ * Copyright (c) 2020, Michael Niewöhner
+ */
+
+#ifndef _ZSTD_STDLIB_H
+#define _ZSTD_STDLIB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#undef GCC_VERSION
+
+/*
+ * Define calloc, malloc, free to make building work. They are never really used
+ * in zstdlib.c since allocation is done in zstd.c.
+ */
+#define calloc(n, sz) NULL
+#define malloc(sz) NULL
+#define free(ptr)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZSTD_STDLIB_H */
diff --git a/module/zstd/include/string.h b/module/zstd/include/string.h
new file mode 100644
index 000000000..78998d3c4
--- /dev/null
+++ b/module/zstd/include/string.h
@@ -0,0 +1,62 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2014-2019, Allan Jude
+ * Copyright (c) 2020, Brian Behlendorf
+ * Copyright (c) 2020, Michael Niewöhner
+ */
+
+#ifndef _ZSTD_STRING_H
+#define _ZSTD_STRING_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+#if defined(__FreeBSD__)
+#include <sys/systm.h> /* memcpy, memset */
+#elif defined(__linux__)
+#include <linux/string.h> /* memcpy, memset */
+#else
+#error "Unsupported platform"
+#endif
+
+#else /* !_KERNEL */
+#include_next <string.h>
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZSTD_STRING_H */
diff --git a/module/zstd/zfs_zstd.c b/module/zstd/zfs_zstd.c
new file mode 100644
index 000000000..b6dd7efcc
--- /dev/null
+++ b/module/zstd/zfs_zstd.c
@@ -0,0 +1,737 @@
+/*
+ * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2016-2018, Klara Inc.
+ * Copyright (c) 2016-2018, Allan Jude
+ * Copyright (c) 2018-2020, Sebastian Gottschall
+ * Copyright (c) 2019-2020, Michael Niewöhner
+ * Copyright (c) 2020, The FreeBSD Foundation [1]
+ *
+ * [1] Portions of this software were developed by Allan Jude
+ * under sponsorship from the FreeBSD Foundation.
+ */
+
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/zfs_context.h>
+#include <sys/zio_compress.h>
+#include <sys/spa.h>
+#include <sys/zstd/zstd.h>
+
+#define ZSTD_STATIC_LINKING_ONLY
+#include "lib/zstd.h"
+#include "lib/zstd_errors.h"
+
+kstat_t *zstd_ksp = NULL;
+
+typedef struct zstd_stats {
+ kstat_named_t zstd_stat_alloc_fail;
+ kstat_named_t zstd_stat_alloc_fallback;
+ kstat_named_t zstd_stat_com_alloc_fail;
+ kstat_named_t zstd_stat_dec_alloc_fail;
+ kstat_named_t zstd_stat_com_inval;
+ kstat_named_t zstd_stat_dec_inval;
+ kstat_named_t zstd_stat_dec_header_inval;
+ kstat_named_t zstd_stat_com_fail;
+ kstat_named_t zstd_stat_dec_fail;
+} zstd_stats_t;
+
+static zstd_stats_t zstd_stats = {
+ { "alloc_fail", KSTAT_DATA_UINT64 },
+ { "alloc_fallback", KSTAT_DATA_UINT64 },
+ { "compress_alloc_fail", KSTAT_DATA_UINT64 },
+ { "decompress_alloc_fail", KSTAT_DATA_UINT64 },
+ { "compress_level_invalid", KSTAT_DATA_UINT64 },
+ { "decompress_level_invalid", KSTAT_DATA_UINT64 },
+ { "decompress_header_invalid", KSTAT_DATA_UINT64 },
+ { "compress_failed", KSTAT_DATA_UINT64 },
+ { "decompress_failed", KSTAT_DATA_UINT64 },
+};
+
+/* Enums describing the allocator type specified by kmem_type in zstd_kmem */
+enum zstd_kmem_type {
+ ZSTD_KMEM_UNKNOWN = 0,
+ /* Allocation type using kmem_vmalloc */
+ ZSTD_KMEM_DEFAULT,
+ /* Pool based allocation using mempool_alloc */
+ ZSTD_KMEM_POOL,
+ /* Reserved fallback memory for decompression only */
+ ZSTD_KMEM_DCTX,
+ ZSTD_KMEM_COUNT,
+};
+
+/* Structure for pooled memory objects */
+struct zstd_pool {
+ void *mem;
+ size_t size;
+ kmutex_t barrier;
+ hrtime_t timeout;
+};
+
+/* Global structure for handling memory allocations */
+struct zstd_kmem {
+ enum zstd_kmem_type kmem_type;
+ size_t kmem_size;
+ struct zstd_pool *pool;
+};
+
+/* Fallback memory structure used for decompression only if memory runs out */
+struct zstd_fallback_mem {
+ size_t mem_size;
+ void *mem;
+ kmutex_t barrier;
+};
+
+struct zstd_levelmap {
+ int16_t zstd_level;
+ enum zio_zstd_levels level;
+};
+
+/*
+ * ZSTD memory handlers
+ *
+ * For decompression we use a different handler which also provides fallback
+ * memory allocation in case memory runs out.
+ *
+ * The ZSTD handlers were split up for the most simplified implementation.
+ */
+static void *zstd_alloc(void *opaque, size_t size);
+static void *zstd_dctx_alloc(void *opaque, size_t size);
+static void zstd_free(void *opaque, void *ptr);
+
+/* Compression memory handler */
+static const ZSTD_customMem zstd_malloc = {
+ zstd_alloc,
+ zstd_free,
+ NULL,
+};
+
+/* Decompression memory handler */
+static const ZSTD_customMem zstd_dctx_malloc = {
+ zstd_dctx_alloc,
+ zstd_free,
+ NULL,
+};
+
+/* Level map for converting ZFS internal levels to ZSTD levels and vice versa */
+static struct zstd_levelmap zstd_levels[] = {
+ {ZIO_ZSTD_LEVEL_1, ZIO_ZSTD_LEVEL_1},
+ {ZIO_ZSTD_LEVEL_2, ZIO_ZSTD_LEVEL_2},
+ {ZIO_ZSTD_LEVEL_3, ZIO_ZSTD_LEVEL_3},
+ {ZIO_ZSTD_LEVEL_4, ZIO_ZSTD_LEVEL_4},
+ {ZIO_ZSTD_LEVEL_5, ZIO_ZSTD_LEVEL_5},
+ {ZIO_ZSTD_LEVEL_6, ZIO_ZSTD_LEVEL_6},
+ {ZIO_ZSTD_LEVEL_7, ZIO_ZSTD_LEVEL_7},
+ {ZIO_ZSTD_LEVEL_8, ZIO_ZSTD_LEVEL_8},
+ {ZIO_ZSTD_LEVEL_9, ZIO_ZSTD_LEVEL_9},
+ {ZIO_ZSTD_LEVEL_10, ZIO_ZSTD_LEVEL_10},
+ {ZIO_ZSTD_LEVEL_11, ZIO_ZSTD_LEVEL_11},
+ {ZIO_ZSTD_LEVEL_12, ZIO_ZSTD_LEVEL_12},
+ {ZIO_ZSTD_LEVEL_13, ZIO_ZSTD_LEVEL_13},
+ {ZIO_ZSTD_LEVEL_14, ZIO_ZSTD_LEVEL_14},
+ {ZIO_ZSTD_LEVEL_15, ZIO_ZSTD_LEVEL_15},
+ {ZIO_ZSTD_LEVEL_16, ZIO_ZSTD_LEVEL_16},
+ {ZIO_ZSTD_LEVEL_17, ZIO_ZSTD_LEVEL_17},
+ {ZIO_ZSTD_LEVEL_18, ZIO_ZSTD_LEVEL_18},
+ {ZIO_ZSTD_LEVEL_19, ZIO_ZSTD_LEVEL_19},
+ {-1, ZIO_ZSTD_LEVEL_FAST_1},
+ {-2, ZIO_ZSTD_LEVEL_FAST_2},
+ {-3, ZIO_ZSTD_LEVEL_FAST_3},
+ {-4, ZIO_ZSTD_LEVEL_FAST_4},
+ {-5, ZIO_ZSTD_LEVEL_FAST_5},
+ {-6, ZIO_ZSTD_LEVEL_FAST_6},
+ {-7, ZIO_ZSTD_LEVEL_FAST_7},
+ {-8, ZIO_ZSTD_LEVEL_FAST_8},
+ {-9, ZIO_ZSTD_LEVEL_FAST_9},
+ {-10, ZIO_ZSTD_LEVEL_FAST_10},
+ {-20, ZIO_ZSTD_LEVEL_FAST_20},
+ {-30, ZIO_ZSTD_LEVEL_FAST_30},
+ {-40, ZIO_ZSTD_LEVEL_FAST_40},
+ {-50, ZIO_ZSTD_LEVEL_FAST_50},
+ {-60, ZIO_ZSTD_LEVEL_FAST_60},
+ {-70, ZIO_ZSTD_LEVEL_FAST_70},
+ {-80, ZIO_ZSTD_LEVEL_FAST_80},
+ {-90, ZIO_ZSTD_LEVEL_FAST_90},
+ {-100, ZIO_ZSTD_LEVEL_FAST_100},
+ {-500, ZIO_ZSTD_LEVEL_FAST_500},
+ {-1000, ZIO_ZSTD_LEVEL_FAST_1000},
+};
+
+/*
+ * This variable represents the maximum count of the pool based on the number
+ * of CPUs plus some buffer. We default to cpu count * 4, see init_zstd.
+ */
+static int pool_count = 16;
+
+#define ZSTD_POOL_MAX pool_count
+#define ZSTD_POOL_TIMEOUT 60 * 2
+
+static struct zstd_fallback_mem zstd_dctx_fallback;
+static struct zstd_pool *zstd_mempool_cctx;
+static struct zstd_pool *zstd_mempool_dctx;
+
+/*
+ * Try to get a cached allocated buffer from memory pool or allocate a new one
+ * if necessary. If a object is older than 2 minutes and does not fit the
+ * requested size, it will be released and a new cached entry will be allocated.
+ * If other pooled objects are detected without being used for 2 minutes, they
+ * will be released, too.
+ *
+ * The concept is that high frequency memory allocations of bigger objects are
+ * expensive. So if a lot of work is going on, allocations will be kept for a
+ * while and can be reused in that time frame.
+ *
+ * The scheduled release will be updated every time a object is reused.
+ */
+static void *
+zstd_mempool_alloc(struct zstd_pool *zstd_mempool, size_t size)
+{
+ struct zstd_pool *pool;
+ struct zstd_kmem *mem = NULL;
+
+ if (!zstd_mempool) {
+ return (NULL);
+ }
+
+ /* Seek for preallocated memory slot and free obsolete slots */
+ for (int i = 0; i < ZSTD_POOL_MAX; i++) {
+ pool = &zstd_mempool[i];
+ /*
+ * This lock is simply a marker for a pool object beeing in use.
+ * If it's already hold, it will be skipped.
+ *
+ * We need to create it before checking it to avoid race
+ * conditions caused by running in a threaded context.
+ *
+ * The lock is later released by zstd_mempool_free.
+ */
+ if (mutex_tryenter(&pool->barrier)) {
+ /*
+ * Check if objects fits the size, if so we take it and
+ * update the timestamp.
+ */
+ if (!mem && pool->mem && size <= pool->size) {
+ pool->timeout = gethrestime_sec() +
+ ZSTD_POOL_TIMEOUT;
+ mem = pool->mem;
+ continue;
+ }
+
+ /* Free memory if unused object older than 2 minutes */
+ if (pool->mem && gethrestime_sec() > pool->timeout) {
+ vmem_free(pool->mem, pool->size);
+ pool->mem = NULL;
+ pool->size = 0;
+ pool->timeout = 0;
+ }
+
+ mutex_exit(&pool->barrier);
+ }
+ }
+
+ if (mem) {
+ return (mem);
+ }
+
+ /*
+ * If no preallocated slot was found, try to fill in a new one.
+ *
+ * We run a similar algorithm twice here to avoid pool fragmentation.
+ * The first one may generate holes in the list if objects get released.
+ * We always make sure that these holes get filled instead of adding new
+ * allocations constantly at the end.
+ */
+ for (int i = 0; i < ZSTD_POOL_MAX; i++) {
+ pool = &zstd_mempool[i];
+ if (mutex_tryenter(&pool->barrier)) {
+ /* Object is free, try to allocate new one */
+ if (!pool->mem) {
+ mem = vmem_alloc(size, KM_SLEEP);
+ pool->mem = mem;
+
+ if (pool->mem) {
+ /* Keep track for later release */
+ mem->pool = pool;
+ pool->size = size;
+ mem->kmem_type = ZSTD_KMEM_POOL;
+ mem->kmem_size = size;
+ }
+ }
+
+ if (size <= pool->size) {
+ /* Update timestamp */
+ pool->timeout = gethrestime_sec() +
+ ZSTD_POOL_TIMEOUT;
+
+ return (pool->mem);
+ }
+
+ mutex_exit(&pool->barrier);
+ }
+ }
+
+ /*
+ * If the pool is full or the allocation failed, try lazy allocation
+ * instead.
+ */
+ if (!mem) {
+ mem = vmem_alloc(size, KM_NOSLEEP);
+ if (mem) {
+ mem->pool = NULL;
+ mem->kmem_type = ZSTD_KMEM_DEFAULT;
+ mem->kmem_size = size;
+ }
+ }
+
+ return (mem);
+}
+
+/* Mark object as released by releasing the barrier mutex */
+static void
+zstd_mempool_free(struct zstd_kmem *z)
+{
+ mutex_exit(&z->pool->barrier);
+}
+
+/* Convert ZFS internal enum to ZSTD level */
+static int
+zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
+{
+ if (level > 0 && level <= ZIO_ZSTD_LEVEL_19) {
+ *zstd_level = zstd_levels[level - 1].zstd_level;
+ return (0);
+ }
+ if (level >= ZIO_ZSTD_LEVEL_FAST_1 &&
+ level <= ZIO_ZSTD_LEVEL_FAST_1000) {
+ *zstd_level = zstd_levels[level - ZIO_ZSTD_LEVEL_FAST_1
+ + ZIO_ZSTD_LEVEL_19].zstd_level;
+ return (0);
+ }
+
+ /* Invalid/unknown zfs compression enum - this should never happen. */
+ return (1);
+}
+
+/* Compress block using zstd */
+size_t
+zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
+ int level)
+{
+ size_t c_len;
+ int16_t zstd_level;
+ zfs_zstdhdr_t *hdr;
+ ZSTD_CCtx *cctx;
+
+ hdr = (zfs_zstdhdr_t *)d_start;
+
+ /* Skip compression if the specified level is invalid */
+ if (zstd_enum_to_level(level, &zstd_level)) {
+ ZSTDSTAT_BUMP(zstd_stat_com_inval);
+ return (s_len);
+ }
+
+ ASSERT3U(d_len, >=, sizeof (*hdr));
+ ASSERT3U(d_len, <=, s_len);
+ ASSERT3U(zstd_level, !=, 0);
+
+ cctx = ZSTD_createCCtx_advanced(zstd_malloc);
+
+ /*
+ * Out of kernel memory, gently fall through - this will disable
+ * compression in zio_compress_data
+ */
+ if (!cctx) {
+ ZSTDSTAT_BUMP(zstd_stat_com_alloc_fail);
+ return (s_len);
+ }
+
+ /* Set the compression level */
+ ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstd_level);
+
+ /* Use the "magicless" zstd header which saves us 4 header bytes */
+ ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless);
+
+ /*
+ * Disable redundant checksum calculation and content size storage since
+ * this is already done by ZFS itself.
+ */
+ ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0);
+ ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0);
+
+ c_len = ZSTD_compress2(cctx,
+ hdr->data,
+ d_len - sizeof (*hdr),
+ s_start, s_len);
+
+ ZSTD_freeCCtx(cctx);
+
+ /* Error in the compression routine, disable compression. */
+ if (ZSTD_isError(c_len)) {
+ /*
+ * If we are aborting the compression because the saves are
+ * too small, that is not a failure. Everything else is a
+ * failure, so increment the compression failure counter.
+ */
+ if (ZSTD_getErrorCode(c_len) != ZSTD_error_dstSize_tooSmall) {
+ ZSTDSTAT_BUMP(zstd_stat_com_fail);
+ }
+ return (s_len);
+ }
+
+ /*
+ * Encode the compressed buffer size at the start. We'll need this in
+ * decompression to counter the effects of padding which might be added
+ * to the compressed buffer and which, if unhandled, would confuse the
+ * hell out of our decompression function.
+ */
+ hdr->c_len = BE_32(c_len);
+
+ /*
+ * Check version for overflow.
+ * The limit of 24 bits must not be exceeded. This allows a maximum
+ * version 1677.72.15 which we don't expect to be ever reached.
+ */
+ ASSERT3U(ZSTD_VERSION_NUMBER, <=, 0xFFFFFF);
+
+ /*
+ * Encode the compression level as well. We may need to know the
+ * original compression level if compressed_arc is disabled, to match
+ * the compression settings to write this block to the L2ARC.
+ *
+ * Encode the actual level, so if the enum changes in the future, we
+ * will be compatible.
+ *
+ * The upper 24 bits store the ZSTD version to be able to provide
+ * future compatibility, since new versions might enhance the
+ * compression algorithm in a way, where the compressed data will
+ * change.
+ *
+ * As soon as such incompatibility occurs, handling code needs to be
+ * added, differentiating between the versions.
+ */
+ hdr->version = ZSTD_VERSION_NUMBER;
+ hdr->level = level;
+ hdr->raw_version_level = BE_32(hdr->raw_version_level);
+
+ return (c_len + sizeof (*hdr));
+}
+
+/* Decompress block using zstd and return its stored level */
+int
+zstd_decompress_level(void *s_start, void *d_start, size_t s_len, size_t d_len,
+ uint8_t *level)
+{
+ ZSTD_DCtx *dctx;
+ size_t result;
+ int16_t zstd_level;
+ uint32_t c_len;
+ const zfs_zstdhdr_t *hdr;
+ zfs_zstdhdr_t hdr_copy;
+
+ hdr = (const zfs_zstdhdr_t *)s_start;
+ c_len = BE_32(hdr->c_len);
+
+ /*
+ * Make a copy instead of directly converting the header, since we must
+ * not modify the original data that may be used again later.
+ */
+ hdr_copy.raw_version_level = BE_32(hdr->raw_version_level);
+
+ /*
+ * NOTE: We ignore the ZSTD version for now. As soon as any
+ * incompatibility occurrs, it has to be handled accordingly.
+ * The version can be accessed via `hdr_copy.version`.
+ */
+
+ /*
+ * Convert and check the level
+ * An invalid level is a strong indicator for data corruption! In such
+ * case return an error so the upper layers can try to fix it.
+ */
+ if (zstd_enum_to_level(hdr_copy.level, &zstd_level)) {
+ ZSTDSTAT_BUMP(zstd_stat_dec_inval);
+ return (1);
+ }
+
+ ASSERT3U(d_len, >=, s_len);
+ ASSERT3U(hdr_copy.level, !=, ZIO_COMPLEVEL_INHERIT);
+
+ /* Invalid compressed buffer size encoded at start */
+ if (c_len + sizeof (*hdr) > s_len) {
+ ZSTDSTAT_BUMP(zstd_stat_dec_header_inval);
+ return (1);
+ }
+
+ dctx = ZSTD_createDCtx_advanced(zstd_dctx_malloc);
+ if (!dctx) {
+ ZSTDSTAT_BUMP(zstd_stat_dec_alloc_fail);
+ return (1);
+ }
+
+ /* Set header type to "magicless" */
+ ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless);
+
+ /* Decompress the data and release the context */
+ result = ZSTD_decompressDCtx(dctx, d_start, d_len, hdr->data, c_len);
+ ZSTD_freeDCtx(dctx);
+
+ /*
+ * Returns 0 on success (decompression function returned non-negative)
+ * and non-zero on failure (decompression function returned negative.
+ */
+ if (ZSTD_isError(result)) {
+ ZSTDSTAT_BUMP(zstd_stat_dec_fail);
+ return (1);
+ }
+
+ if (level) {
+ *level = hdr_copy.level;
+ }
+
+ return (0);
+}
+
+/* Decompress datablock using zstd */
+int
+zstd_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
+ int level __maybe_unused)
+{
+
+ return (zstd_decompress_level(s_start, d_start, s_len, d_len, NULL));
+}
+
+/* Allocator for zstd compression context using mempool_allocator */
+static void *
+zstd_alloc(void *opaque __maybe_unused, size_t size)
+{
+ size_t nbytes = sizeof (struct zstd_kmem) + size;
+ struct zstd_kmem *z = NULL;
+
+ z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_cctx, nbytes);
+
+ if (!z) {
+ ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
+ return (NULL);
+ }
+
+ return ((void*)z + (sizeof (struct zstd_kmem)));
+}
+
+/*
+ * Allocator for zstd decompression context using mempool_allocator with
+ * fallback to reserved memory if allocation fails
+ */
+static void *
+zstd_dctx_alloc(void *opaque __maybe_unused, size_t size)
+{
+ size_t nbytes = sizeof (struct zstd_kmem) + size;
+ struct zstd_kmem *z = NULL;
+ enum zstd_kmem_type type = ZSTD_KMEM_DEFAULT;
+
+ z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_dctx, nbytes);
+ if (!z) {
+ /* Try harder, decompression shall not fail */
+ z = vmem_alloc(nbytes, KM_SLEEP);
+ if (z) {
+ z->pool = NULL;
+ }
+ ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
+ } else {
+ return ((void*)z + (sizeof (struct zstd_kmem)));
+ }
+
+ /* Fallback if everything fails */
+ if (!z) {
+ /*
+ * Barrier since we only can handle it in a single thread. All
+ * other following threads need to wait here until decompression
+ * is completed. zstd_free will release this barrier later.
+ */
+ mutex_enter(&zstd_dctx_fallback.barrier);
+
+ z = zstd_dctx_fallback.mem;
+ type = ZSTD_KMEM_DCTX;
+ ZSTDSTAT_BUMP(zstd_stat_alloc_fallback);
+ }
+
+ /* Allocation should always be successful */
+ if (!z) {
+ return (NULL);
+ }
+
+ z->kmem_type = type;
+ z->kmem_size = nbytes;
+
+ return ((void*)z + (sizeof (struct zstd_kmem)));
+}
+
+/* Free allocated memory by its specific type */
+static void
+zstd_free(void *opaque __maybe_unused, void *ptr)
+{
+ struct zstd_kmem *z = (ptr - sizeof (struct zstd_kmem));
+ enum zstd_kmem_type type;
+
+ ASSERT3U(z->kmem_type, <, ZSTD_KMEM_COUNT);
+ ASSERT3U(z->kmem_type, >, ZSTD_KMEM_UNKNOWN);
+
+ type = z->kmem_type;
+ switch (type) {
+ case ZSTD_KMEM_DEFAULT:
+ vmem_free(z, z->kmem_size);
+ break;
+ case ZSTD_KMEM_POOL:
+ zstd_mempool_free(z);
+ break;
+ case ZSTD_KMEM_DCTX:
+ mutex_exit(&zstd_dctx_fallback.barrier);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Allocate fallback memory to ensure safe decompression */
+static void __init
+create_fallback_mem(struct zstd_fallback_mem *mem, size_t size)
+{
+ mem->mem_size = size;
+ mem->mem = vmem_zalloc(mem->mem_size, KM_SLEEP);
+ mutex_init(&mem->barrier, NULL, MUTEX_DEFAULT, NULL);
+}
+
+/* Initialize memory pool barrier mutexes */
+static void __init
+zstd_mempool_init(void)
+{
+ zstd_mempool_cctx = (struct zstd_pool *)
+ kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
+ zstd_mempool_dctx = (struct zstd_pool *)
+ kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
+
+ for (int i = 0; i < ZSTD_POOL_MAX; i++) {
+ mutex_init(&zstd_mempool_cctx[i].barrier, NULL,
+ MUTEX_DEFAULT, NULL);
+ mutex_init(&zstd_mempool_dctx[i].barrier, NULL,
+ MUTEX_DEFAULT, NULL);
+ }
+}
+
+/* Initialize zstd-related memory handling */
+static int __init
+zstd_meminit(void)
+{
+ zstd_mempool_init();
+
+ /*
+ * Estimate the size of the fallback decompression context.
+ * The expected size on x64 with current ZSTD should be about 160 KB.
+ */
+ create_fallback_mem(&zstd_dctx_fallback,
+ P2ROUNDUP(ZSTD_estimateDCtxSize() + sizeof (struct zstd_kmem),
+ PAGESIZE));
+
+ return (0);
+}
+
+/* Release object from pool and free memory */
+static void __exit
+release_pool(struct zstd_pool *pool)
+{
+ mutex_destroy(&pool->barrier);
+ vmem_free(pool->mem, pool->size);
+ pool->mem = NULL;
+ pool->size = 0;
+}
+
+/* Release memory pool objects */
+static void __exit
+zstd_mempool_deinit(void)
+{
+ for (int i = 0; i < ZSTD_POOL_MAX; i++) {
+ release_pool(&zstd_mempool_cctx[i]);
+ release_pool(&zstd_mempool_dctx[i]);
+ }
+
+ kmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
+ kmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
+ zstd_mempool_dctx = NULL;
+ zstd_mempool_cctx = NULL;
+}
+
+extern int __init
+zstd_init(void)
+{
+ /* Set pool size by using maximum sane thread count * 4 */
+ pool_count = (boot_ncpus * 4);
+ zstd_meminit();
+
+ /* Initialize kstat */
+ zstd_ksp = kstat_create("zfs", 0, "zstd", "misc",
+ KSTAT_TYPE_NAMED, sizeof (zstd_stats) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL);
+ if (zstd_ksp != NULL) {
+ zstd_ksp->ks_data = &zstd_stats;
+ kstat_install(zstd_ksp);
+ }
+
+ return (0);
+}
+
+extern void __exit
+zstd_fini(void)
+{
+ /* Deinitialize kstat */
+ if (zstd_ksp != NULL) {
+ kstat_delete(zstd_ksp);
+ zstd_ksp = NULL;
+ }
+
+ /* Release fallback memory */
+ vmem_free(zstd_dctx_fallback.mem, zstd_dctx_fallback.mem_size);
+ mutex_destroy(&zstd_dctx_fallback.barrier);
+
+ /* Deinit memory pool */
+ zstd_mempool_deinit();
+}
+
+#if defined(_KERNEL)
+module_init(zstd_init);
+module_exit(zstd_fini);
+
+ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS");
+ZFS_MODULE_LICENSE("BSD");
+ZFS_MODULE_VERSION(ZSTD_VERSION_STRING);
+
+EXPORT_SYMBOL(zstd_compress);
+EXPORT_SYMBOL(zstd_decompress_level);
+EXPORT_SYMBOL(zstd_decompress);
+#endif