aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2019-06-19 14:54:02 -0700
committerBrian Behlendorf <[email protected]>2019-06-19 14:54:02 -0700
commit050d720c43b6285fc0c30e1e97591f6b796dbd68 (patch)
tree1186ac17585e51e426282f3fd3d114913084fcf3 /module
parentfb0be12d7b6db16eabfe8f91da12da7d7854ea9a (diff)
Remove dedupditto functionality
If dedup is in use, the `dedupditto` property can be set, causing ZFS to keep an extra copy of data that is referenced many times (>100x). The idea was that this data is more important than other data and thus we want to be really sure that it is not lost if the disk experiences a small amount of random corruption. ZFS (and system administrators) rely on the pool-level redundancy to protect their data (e.g. mirroring or RAIDZ). Since the user/sysadmin doesn't have control over what data will be offered extra redundancy by dedupditto, this extra redundancy is not very useful. The bulk of the data is still vulnerable to loss based on the pool-level redundancy. For example, if particle strikes corrupt 0.1% of blocks, you will either be saved by mirror/raidz, or you will be sad. This is true even if dedupditto saved another 0.01% of blocks from being corrupted. Therefore, the dedupditto functionality is rarely enabled (i.e. the property is rarely set), and it fulfills its promise of increased redundancy even more rarely. Additionally, this feature does not work as advertised (on existing releases), because scrub/resilver did not repair the extra (dedupditto) copy (see https://github.com/zfsonlinux/zfs/pull/8270). In summary, this seldom-used feature doesn't work, and even if it did it wouldn't provide useful data protection. It has a non-trivial maintenance burden (again see https://github.com/zfsonlinux/zfs/pull/8270). We should remove the dedupditto functionality. For backwards compatibility with the existing CLI, "zpool set dedupditto" will still "succeed" (exit code zero), but won't have any effect. For backwards compatibility with existing pools that had dedupditto enabled at some point, the code will still be able to understand dedupditto blocks and free them when appropriate. However, ZFS won't write any new dedupditto blocks. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Igor Kozhukhov <[email protected]> Reviewed-by: Alek Pinchuk <[email protected]> Issue #8270 Closes #8310
Diffstat (limited to 'module')
-rw-r--r--module/zcommon/zpool_prop.c4
-rw-r--r--module/zfs/ddt.c72
-rw-r--r--module/zfs/spa.c18
-rw-r--r--module/zfs/zio.c68
4 files changed, 13 insertions, 149 deletions
diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c
index ac1c42b3f..539de5645 100644
--- a/module/zcommon/zpool_prop.c
+++ b/module/zcommon/zpool_prop.c
@@ -104,8 +104,6 @@ zpool_prop_init(void)
/* default number properties */
zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION");
- zprop_register_number(ZPOOL_PROP_DEDUPDITTO, "dedupditto", 0,
- PROP_DEFAULT, ZFS_TYPE_POOL, "<threshold (min 100)>", "DEDUPDITTO");
zprop_register_number(ZPOOL_PROP_ASHIFT, "ashift", 0, PROP_DEFAULT,
ZFS_TYPE_POOL, "<ashift, 9-16, or 0=default>", "ASHIFT");
@@ -143,6 +141,8 @@ zpool_prop_init(void)
PROP_ONETIME, ZFS_TYPE_POOL, "TNAME");
zprop_register_hidden(ZPOOL_PROP_MAXDNODESIZE, "maxdnodesize",
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_POOL, "MAXDNODESIZE");
+ zprop_register_hidden(ZPOOL_PROP_DEDUPDITTO, "dedupditto",
+ PROP_TYPE_NUMBER, PROP_DEFAULT, ZFS_TYPE_POOL, "DEDUPDITTO");
}
/*
diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c
index a38c2b24e..05424d875 100644
--- a/module/zfs/ddt.c
+++ b/module/zfs/ddt.c
@@ -552,65 +552,6 @@ ddt_get_pool_dedup_ratio(spa_t *spa)
return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize);
}
-int
-ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref)
-{
- spa_t *spa = ddt->ddt_spa;
- uint64_t total_refcnt = 0;
- uint64_t ditto = spa->spa_dedup_ditto;
- int total_copies = 0;
- int desired_copies = 0;
- int copies_needed = 0;
-
- for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
- ddt_phys_t *ddp = &dde->dde_phys[p];
- zio_t *zio = dde->dde_lead_zio[p];
- uint64_t refcnt = ddp->ddp_refcnt; /* committed refs */
- if (zio != NULL)
- refcnt += zio->io_parent_count; /* pending refs */
- if (ddp == ddp_willref)
- refcnt++; /* caller's ref */
- if (refcnt != 0) {
- total_refcnt += refcnt;
- total_copies += p;
- }
- }
-
- if (ditto == 0 || ditto > UINT32_MAX)
- ditto = UINT32_MAX;
-
- if (total_refcnt >= 1)
- desired_copies++;
- if (total_refcnt >= ditto)
- desired_copies++;
- if (total_refcnt >= ditto * ditto)
- desired_copies++;
-
- copies_needed = MAX(desired_copies, total_copies) - total_copies;
-
- /* encrypted blocks store their IV in DVA[2] */
- if (DDK_GET_CRYPT(&dde->dde_key))
- copies_needed = MIN(copies_needed, SPA_DVAS_PER_BP - 1);
-
- return (copies_needed);
-}
-
-int
-ddt_ditto_copies_present(ddt_entry_t *dde)
-{
- ddt_phys_t *ddp = &dde->dde_phys[DDT_PHYS_DITTO];
- dva_t *dva = ddp->ddp_dva;
- int copies = 0 - DVA_GET_GANG(dva);
-
- for (int d = 0; d < DDE_GET_NDVAS(dde); d++, dva++)
- if (DVA_IS_VALID(dva))
- copies++;
-
- ASSERT(copies >= 0 && copies < SPA_DVAS_PER_BP);
-
- return (copies);
-}
-
size_t
ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)
{
@@ -1088,8 +1029,11 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
continue;
}
if (p == DDT_PHYS_DITTO) {
- if (ddt_ditto_copies_needed(ddt, dde, NULL) == 0)
- ddt_phys_free(ddt, ddk, ddp, txg);
+ /*
+ * Note, we no longer create DDT-DITTO blocks, but we
+ * don't want to leak any written by older software.
+ */
+ ddt_phys_free(ddt, ddk, ddp, txg);
continue;
}
if (ddp->ddp_refcnt == 0)
@@ -1097,9 +1041,9 @@ ddt_sync_entry(ddt_t *ddt, ddt_entry_t *dde, dmu_tx_t *tx, uint64_t txg)
total_refcnt += ddp->ddp_refcnt;
}
- if (dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth != 0)
- nclass = DDT_CLASS_DITTO;
- else if (total_refcnt > 1)
+ /* We do not create new DDT-DITTO blocks. */
+ ASSERT0(dde->dde_phys[DDT_PHYS_DITTO].ddp_phys_birth);
+ if (total_refcnt > 1)
nclass = DDT_CLASS_DUPLICATE;
else
nclass = DDT_CLASS_UNIQUE;
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 5e0338d3e..23cf17a58 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -694,16 +694,6 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
error = SET_ERROR(E2BIG);
break;
- case ZPOOL_PROP_DEDUPDITTO:
- if (spa_version(spa) < SPA_VERSION_DEDUP)
- error = SET_ERROR(ENOTSUP);
- else
- error = nvpair_value_uint64(elem, &intval);
- if (error == 0 &&
- intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
- error = SET_ERROR(EINVAL);
- break;
-
default:
break;
}
@@ -712,6 +702,9 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
break;
}
+ (void) nvlist_remove_all(props,
+ zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO));
+
if (!error && reset_bootfs) {
error = nvlist_remove(props,
zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
@@ -3604,8 +3597,6 @@ spa_ld_get_props(spa_t *spa)
spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost);
- spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
- &spa->spa_dedup_ditto);
spa_prop_find(spa, ZPOOL_PROP_AUTOTRIM, &spa->spa_autotrim);
spa->spa_autoreplace = (autoreplace != 0);
}
@@ -7983,9 +7974,6 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
case ZPOOL_PROP_MULTIHOST:
spa->spa_multihost = intval;
break;
- case ZPOOL_PROP_DEDUPDITTO:
- spa->spa_dedup_ditto = intval;
- break;
default:
break;
}
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 78dff51af..e36a7893d 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -3118,35 +3118,6 @@ zio_ddt_child_write_done(zio_t *zio)
ddt_exit(ddt);
}
-static void
-zio_ddt_ditto_write_done(zio_t *zio)
-{
- int p = DDT_PHYS_DITTO;
- ASSERTV(zio_prop_t *zp = &zio->io_prop);
- blkptr_t *bp = zio->io_bp;
- ddt_t *ddt = ddt_select(zio->io_spa, bp);
- ddt_entry_t *dde = zio->io_private;
- ddt_phys_t *ddp = &dde->dde_phys[p];
- ddt_key_t *ddk = &dde->dde_key;
-
- ddt_enter(ddt);
-
- ASSERT(ddp->ddp_refcnt == 0);
- ASSERT(dde->dde_lead_zio[p] == zio);
- dde->dde_lead_zio[p] = NULL;
-
- if (zio->io_error == 0) {
- ASSERT(ZIO_CHECKSUM_EQUAL(bp->blk_cksum, ddk->ddk_cksum));
- ASSERT(zp->zp_copies < SPA_DVAS_PER_BP);
- ASSERT(zp->zp_copies == BP_GET_NDVAS(bp) - BP_IS_GANG(bp));
- if (ddp->ddp_phys_birth != 0)
- ddt_phys_free(ddt, ddk, ddp, zio->io_txg);
- ddt_phys_fill(ddp, bp);
- }
-
- ddt_exit(ddt);
-}
-
static zio_t *
zio_ddt_write(zio_t *zio)
{
@@ -3155,9 +3126,7 @@ zio_ddt_write(zio_t *zio)
uint64_t txg = zio->io_txg;
zio_prop_t *zp = &zio->io_prop;
int p = zp->zp_copies;
- int ditto_copies;
zio_t *cio = NULL;
- zio_t *dio = NULL;
ddt_t *ddt = ddt_select(spa, bp);
ddt_entry_t *dde;
ddt_phys_t *ddp;
@@ -3192,41 +3161,6 @@ zio_ddt_write(zio_t *zio)
return (zio);
}
- ditto_copies = ddt_ditto_copies_needed(ddt, dde, ddp);
- ASSERT(ditto_copies < SPA_DVAS_PER_BP);
-
- if (ditto_copies > ddt_ditto_copies_present(dde) &&
- dde->dde_lead_zio[DDT_PHYS_DITTO] == NULL) {
- zio_prop_t czp = *zp;
-
- czp.zp_copies = ditto_copies;
-
- /*
- * If we arrived here with an override bp, we won't have run
- * the transform stack, so we won't have the data we need to
- * generate a child i/o. So, toss the override bp and restart.
- * This is safe, because using the override bp is just an
- * optimization; and it's rare, so the cost doesn't matter.
- */
- if (zio->io_bp_override) {
- zio_pop_transforms(zio);
- zio->io_stage = ZIO_STAGE_OPEN;
- zio->io_pipeline = ZIO_WRITE_PIPELINE;
- zio->io_bp_override = NULL;
- BP_ZERO(bp);
- ddt_exit(ddt);
- return (zio);
- }
-
- dio = zio_write(zio, spa, txg, bp, zio->io_orig_abd,
- zio->io_orig_size, zio->io_orig_size, &czp, NULL, NULL,
- NULL, zio_ddt_ditto_write_done, dde, zio->io_priority,
- ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
-
- zio_push_transform(dio, zio->io_abd, zio->io_size, 0, NULL);
- dde->dde_lead_zio[DDT_PHYS_DITTO] = dio;
- }
-
if (ddp->ddp_phys_birth != 0 || dde->dde_lead_zio[p] != NULL) {
if (ddp->ddp_phys_birth != 0)
ddt_bp_fill(ddp, bp, txg);
@@ -3254,8 +3188,6 @@ zio_ddt_write(zio_t *zio)
if (cio)
zio_nowait(cio);
- if (dio)
- zio_nowait(dio);
return (zio);
}