diff options
author | Matthew Ahrens <[email protected]> | 2014-11-03 12:15:08 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2015-05-11 12:23:16 -0700 |
commit | f1512ee61e2f22186ac16481a09d86112b2d6788 (patch) | |
tree | 6098f841f27955f13d4c2a53b2a4826b5f3bacda /module/zfs/zfs_ioctl.c | |
parent | 3df293404a102398445fc013b67250073db9004e (diff) |
Illumos 5027 - zfs large block support
5027 zfs large block support
Reviewed by: Alek Pinchuk <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: Josef 'Jeff' Sipek <[email protected]>
Reviewed by: Richard Elling <[email protected]>
Reviewed by: Saso Kiselkov <[email protected]>
Reviewed by: Brian Behlendorf <[email protected]>
Approved by: Dan McDonald <[email protected]>
References:
https://www.illumos.org/issues/5027
https://github.com/illumos/illumos-gate/commit/b515258
Porting Notes:
* Included in this patch is a tiny ISP2() cleanup in zio_init() from
Illumos 5255.
* Unlike the upstream Illumos commit this patch does not impose an
arbitrary 128K block size limit on volumes. Volumes, like filesystems,
are limited by the zfs_max_recordsize=1M module option.
* By default the maximum record size is limited to 1M by the module
option zfs_max_recordsize. This value may be safely increased up to
16M which is the largest block size supported by the on-disk format.
At the moment, 1M blocks clearly offer a significant performance
improvement but the benefits of going beyond this for the majority
of workloads are less clear.
* The illumos version of this patch increased DMU_MAX_ACCESS to 32M.
This was determined not to be large enough when using 16M blocks
because the zfs_make_xattrdir() function will fail (EFBIG) when
assigning a TX. This was immediately observed under Linux because
all newly created files must have a security xattr created and
that was failing. Therefore, we've set DMU_MAX_ACCESS to 64M.
* On 32-bit platforms a hard limit of 1M is set for blocks due
to the limited virtual address space. We should be able to relax
this one the ABD patches are merged.
Ported-by: Brian Behlendorf <[email protected]>
Closes #354
Diffstat (limited to 'module/zfs/zfs_ioctl.c')
-rw-r--r-- | module/zfs/zfs_ioctl.c | 54 |
1 files changed, 48 insertions, 6 deletions
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index caaa3a21c..f5137d09e 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -2392,7 +2392,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, const char *propname = nvpair_name(pair); zfs_prop_t prop = zfs_name_to_prop(propname); uint64_t intval; - int err; + int err = -1; if (prop == ZPROP_INVAL) { if (zfs_prop_userquota(propname)) @@ -3790,8 +3790,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) * the SPA supports it. We ignore any errors here since * we'll catch them later. */ - if (nvpair_type(pair) == DATA_TYPE_UINT64 && - nvpair_value_uint64(pair, &intval) == 0) { + if (nvpair_value_uint64(pair, &intval) == 0) { if (intval >= ZIO_COMPRESS_GZIP_1 && intval <= ZIO_COMPRESS_GZIP_9 && zfs_earlier_version(dsname, @@ -3842,6 +3841,42 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) return (SET_ERROR(ENOTSUP)); break; + case ZFS_PROP_RECORDSIZE: + /* Record sizes above 128k need the feature to be enabled */ + if (nvpair_value_uint64(pair, &intval) == 0 && + intval > SPA_OLD_MAXBLOCKSIZE) { + spa_t *spa; + + /* + * If this is a bootable dataset then + * the we don't allow large (>128K) blocks, + * because GRUB doesn't support them. + */ + if (zfs_is_bootfs(dsname) && + intval > SPA_OLD_MAXBLOCKSIZE) { + return (SET_ERROR(EDOM)); + } + + /* + * We don't allow setting the property above 1MB, + * unless the tunable has been changed. + */ + if (intval > zfs_max_recordsize || + intval > SPA_MAXBLOCKSIZE) + return (SET_ERROR(EDOM)); + + if ((err = spa_open(dsname, &spa, FTAG)) != 0) + return (err); + + if (!spa_feature_is_enabled(spa, + SPA_FEATURE_LARGE_BLOCKS)) { + spa_close(spa, FTAG); + return (SET_ERROR(ENOTSUP)); + } + spa_close(spa, FTAG); + } + break; + case ZFS_PROP_SHARESMB: if (zpl_earlier_version(dsname, ZPL_VERSION_FUID)) return (SET_ERROR(ENOTSUP)); @@ -4221,7 +4256,7 @@ out: * zc_fromobj objsetid of incremental fromsnap (may be zero) * zc_guid if set, estimate size of stream only. zc_cookie is ignored. * output size in zc_objset_type. - * zc_flags if =1, WRITE_EMBEDDED records are permitted + * zc_flags lzc_send_flags * * outputs: * zc_objset_type estimated size, if zc_guid is set @@ -4233,6 +4268,7 @@ zfs_ioc_send(zfs_cmd_t *zc) offset_t off; boolean_t estimate = (zc->zc_guid != 0); boolean_t embedok = (zc->zc_flags & 0x1); + boolean_t large_block_ok = (zc->zc_flags & 0x2); if (zc->zc_obj != 0) { dsl_pool_t *dp; @@ -4294,7 +4330,8 @@ zfs_ioc_send(zfs_cmd_t *zc) off = fp->f_offset; error = dmu_send_obj(zc->zc_name, zc->zc_sendobj, - zc->zc_fromobj, embedok, zc->zc_cookie, fp->f_vnode, &off); + zc->zc_fromobj, embedok, large_block_ok, + zc->zc_cookie, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; @@ -5160,6 +5197,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) * innvl: { * "fd" -> file descriptor to write stream to (int32) * (optional) "fromsnap" -> full snap name to send an incremental from + * (optional) "largeblockok" -> (value ignored) + * indicates that blocks > 128KB are permitted * (optional) "embedok" -> (value ignored) * presence indicates DRR_WRITE_EMBEDDED records are permitted * } @@ -5175,6 +5214,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) char *fromname = NULL; int fd; file_t *fp; + boolean_t largeblockok; boolean_t embedok; error = nvlist_lookup_int32(innvl, "fd", &fd); @@ -5183,13 +5223,15 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) (void) nvlist_lookup_string(innvl, "fromsnap", &fromname); + largeblockok = nvlist_exists(innvl, "largeblockok"); embedok = nvlist_exists(innvl, "embedok"); if ((fp = getf(fd)) == NULL) return (SET_ERROR(EBADF)); off = fp->f_offset; - error = dmu_send(snapname, fromname, embedok, fd, fp->f_vnode, &off); + error = dmu_send(snapname, fromname, embedok, largeblockok, + fd, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; |