aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2022-01-04 16:46:32 -0800
committerTony Hutter <[email protected]>2022-02-03 15:28:01 -0800
commit6575defc527ff78d2754f0d95815ea995724c2b2 (patch)
tree0d6ce5ddb782da9c9861a97ea186e2cfe8470a09 /module
parent5d8c081193d4fdfdd58a0edf4dc87aa18f75ed33 (diff)
Verify dRAID empty sectors
Verify that all empty sectors are zero filled before using them to calculate parity. Failure to do so can result in incorrect parity columns being generated and written to disk if the contents of an empty sector are non-zero. This was possible because the checksum only protects the data portions of the buffer, not the empty sector padding. This issue has been addressed by updating raidz_parity_verify() to check that all dRAID empty sectors are zero filled. Any sectors which are non-zero will be fixed, repair IO issued, and a checksum error logged. They can then be safely used to verify the parity. This specific type of damage is unlikely to occur since it requires a disk to have silently returned bad data, for an empty sector, while performing a scrub. However, if a pool were to have been damaged in this way, scrubbing the pool with this change applied will repair both the empty sector and parity columns as long as the data checksum is valid. Checksum errors will be reported in the `zpool status` output for any repairs which are made. Reviewed-by: Tony Hutter <[email protected]> Reviewed-by: Mark Maybee <[email protected]> Reviewed-by: Brian Atkinson <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #12857
Diffstat (limited to 'module')
-rw-r--r--module/zfs/vdev_draid.c47
-rw-r--r--module/zfs/vdev_raidz.c16
2 files changed, 58 insertions, 5 deletions
diff --git a/module/zfs/vdev_draid.c b/module/zfs/vdev_draid.c
index b8f82d52e..2d83c1ac9 100644
--- a/module/zfs/vdev_draid.c
+++ b/module/zfs/vdev_draid.c
@@ -842,6 +842,53 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr)
}
/*
+ * Verify that all empty sectors are zero filled before using them to
+ * calculate parity. Otherwise, silent corruption in an empty sector will
+ * result in bad parity being generated. That bad parity will then be
+ * considered authoritative and overwrite the good parity on disk. This
+ * is possible because the checksum is only calculated over the data,
+ * thus it cannot be used to detect damage in empty sectors.
+ */
+int
+vdev_draid_map_verify_empty(zio_t *zio, raidz_row_t *rr)
+{
+ uint64_t skip_size = 1ULL << zio->io_vd->vdev_top->vdev_ashift;
+ uint64_t parity_size = rr->rr_col[0].rc_size;
+ uint64_t skip_off = parity_size - skip_size;
+ uint64_t empty_off = 0;
+ int ret = 0;
+
+ ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
+ ASSERT3P(rr->rr_abd_empty, !=, NULL);
+ ASSERT3U(rr->rr_bigcols, >, 0);
+
+ void *zero_buf = kmem_zalloc(skip_size, KM_SLEEP);
+
+ for (int c = rr->rr_bigcols; c < rr->rr_cols; c++) {
+ raidz_col_t *rc = &rr->rr_col[c];
+
+ ASSERT3P(rc->rc_abd, !=, NULL);
+ ASSERT3U(rc->rc_size, ==, parity_size);
+
+ if (abd_cmp_buf_off(rc->rc_abd, zero_buf, skip_off,
+ skip_size) != 0) {
+ vdev_raidz_checksum_error(zio, rc, rc->rc_abd);
+ abd_zero_off(rc->rc_abd, skip_off, skip_size);
+ rc->rc_error = SET_ERROR(ECKSUM);
+ ret++;
+ }
+
+ empty_off += skip_size;
+ }
+
+ ASSERT3U(empty_off, ==, abd_get_size(rr->rr_abd_empty));
+
+ kmem_free(zero_buf, skip_size);
+
+ return (ret);
+}
+
+/*
* Given a logical address within a dRAID configuration, return the physical
* address on the first drive in the group that this address maps to
* (at position 'start' in permutation number 'perm').
diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c
index 1feebf708..9a7cf6656 100644
--- a/module/zfs/vdev_raidz.c
+++ b/module/zfs/vdev_raidz.c
@@ -1654,8 +1654,8 @@ vdev_raidz_io_start(zio_t *zio)
/*
* Report a checksum error for a child of a RAID-Z device.
*/
-static void
-raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
+void
+vdev_raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
{
vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx];
@@ -1726,6 +1726,13 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
}
/*
+ * Verify any empty sectors are zero filled to ensure the parity
+ * is calculated correctly even if these non-data sectors are damaged.
+ */
+ if (rr->rr_nempty && rr->rr_abd_empty != NULL)
+ ret += vdev_draid_map_verify_empty(zio, rr);
+
+ /*
* Regenerates parity even for !tried||rc_error!=0 columns. This
* isn't harmful but it does have the side effect of fixing stuff
* we didn't realize was necessary (i.e. even if we return 0).
@@ -1739,7 +1746,7 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
continue;
if (abd_cmp(orig[c], rc->rc_abd) != 0) {
- raidz_checksum_error(zio, rc, orig[c]);
+ vdev_raidz_checksum_error(zio, rc, orig[c]);
rc->rc_error = SET_ERROR(ECKSUM);
ret++;
}
@@ -1799,7 +1806,6 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
(zio->io_flags & ZIO_FLAG_RESILVER)) {
int n = raidz_parity_verify(zio, rr);
unexpected_errors += n;
- ASSERT3U(parity_errors + n, <=, rr->rr_firstdatacol);
}
if (zio->io_error == 0 && spa_writeable(zio->io_spa) &&
@@ -1925,7 +1931,7 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
*/
if (rc->rc_error == 0 &&
c >= rr->rr_firstdatacol) {
- raidz_checksum_error(zio,
+ vdev_raidz_checksum_error(zio,
rc, rc->rc_orig_data);
rc->rc_error =
SET_ERROR(ECKSUM);