diff options
author | Brian Behlendorf <[email protected]> | 2022-01-04 16:46:32 -0800 |
---|---|---|
committer | Tony Hutter <[email protected]> | 2022-02-03 15:28:01 -0800 |
commit | 6575defc527ff78d2754f0d95815ea995724c2b2 (patch) | |
tree | 0d6ce5ddb782da9c9861a97ea186e2cfe8470a09 /module | |
parent | 5d8c081193d4fdfdd58a0edf4dc87aa18f75ed33 (diff) |
Verify dRAID empty sectors
Verify that all empty sectors are zero filled before using them to
calculate parity. Failure to do so can result in incorrect parity
columns being generated and written to disk if the contents of an
empty sector are non-zero. This was possible because the checksum
only protects the data portions of the buffer, not the empty sector
padding.
This issue has been addressed by updating raidz_parity_verify() to
check that all dRAID empty sectors are zero filled. Any sectors
which are non-zero will be fixed, repair IO issued, and a checksum
error logged. They can then be safely used to verify the parity.
This specific type of damage is unlikely to occur since it requires
a disk to have silently returned bad data, for an empty sector, while
performing a scrub. However, if a pool were to have been damaged
in this way, scrubbing the pool with this change applied will repair
both the empty sector and parity columns as long as the data checksum
is valid. Checksum errors will be reported in the `zpool status`
output for any repairs which are made.
Reviewed-by: Tony Hutter <[email protected]>
Reviewed-by: Mark Maybee <[email protected]>
Reviewed-by: Brian Atkinson <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #12857
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/vdev_draid.c | 47 | ||||
-rw-r--r-- | module/zfs/vdev_raidz.c | 16 |
2 files changed, 58 insertions, 5 deletions
diff --git a/module/zfs/vdev_draid.c b/module/zfs/vdev_draid.c index b8f82d52e..2d83c1ac9 100644 --- a/module/zfs/vdev_draid.c +++ b/module/zfs/vdev_draid.c @@ -842,6 +842,53 @@ vdev_draid_map_alloc_empty(zio_t *zio, raidz_row_t *rr) } /* + * Verify that all empty sectors are zero filled before using them to + * calculate parity. Otherwise, silent corruption in an empty sector will + * result in bad parity being generated. That bad parity will then be + * considered authoritative and overwrite the good parity on disk. This + * is possible because the checksum is only calculated over the data, + * thus it cannot be used to detect damage in empty sectors. + */ +int +vdev_draid_map_verify_empty(zio_t *zio, raidz_row_t *rr) +{ + uint64_t skip_size = 1ULL << zio->io_vd->vdev_top->vdev_ashift; + uint64_t parity_size = rr->rr_col[0].rc_size; + uint64_t skip_off = parity_size - skip_size; + uint64_t empty_off = 0; + int ret = 0; + + ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ); + ASSERT3P(rr->rr_abd_empty, !=, NULL); + ASSERT3U(rr->rr_bigcols, >, 0); + + void *zero_buf = kmem_zalloc(skip_size, KM_SLEEP); + + for (int c = rr->rr_bigcols; c < rr->rr_cols; c++) { + raidz_col_t *rc = &rr->rr_col[c]; + + ASSERT3P(rc->rc_abd, !=, NULL); + ASSERT3U(rc->rc_size, ==, parity_size); + + if (abd_cmp_buf_off(rc->rc_abd, zero_buf, skip_off, + skip_size) != 0) { + vdev_raidz_checksum_error(zio, rc, rc->rc_abd); + abd_zero_off(rc->rc_abd, skip_off, skip_size); + rc->rc_error = SET_ERROR(ECKSUM); + ret++; + } + + empty_off += skip_size; + } + + ASSERT3U(empty_off, ==, abd_get_size(rr->rr_abd_empty)); + + kmem_free(zero_buf, skip_size); + + return (ret); +} + +/* * Given a logical address within a dRAID configuration, return the physical * address on the first drive in the group that this address maps to * (at position 'start' in permutation number 'perm'). diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 1feebf708..9a7cf6656 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -1654,8 +1654,8 @@ vdev_raidz_io_start(zio_t *zio) /* * Report a checksum error for a child of a RAID-Z device. */ -static void -raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data) +void +vdev_raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data) { vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx]; @@ -1726,6 +1726,13 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr) } /* + * Verify any empty sectors are zero filled to ensure the parity + * is calculated correctly even if these non-data sectors are damaged. + */ + if (rr->rr_nempty && rr->rr_abd_empty != NULL) + ret += vdev_draid_map_verify_empty(zio, rr); + + /* * Regenerates parity even for !tried||rc_error!=0 columns. This * isn't harmful but it does have the side effect of fixing stuff * we didn't realize was necessary (i.e. even if we return 0). @@ -1739,7 +1746,7 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr) continue; if (abd_cmp(orig[c], rc->rc_abd) != 0) { - raidz_checksum_error(zio, rc, orig[c]); + vdev_raidz_checksum_error(zio, rc, orig[c]); rc->rc_error = SET_ERROR(ECKSUM); ret++; } @@ -1799,7 +1806,6 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr) (zio->io_flags & ZIO_FLAG_RESILVER)) { int n = raidz_parity_verify(zio, rr); unexpected_errors += n; - ASSERT3U(parity_errors + n, <=, rr->rr_firstdatacol); } if (zio->io_error == 0 && spa_writeable(zio->io_spa) && @@ -1925,7 +1931,7 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity) */ if (rc->rc_error == 0 && c >= rr->rr_firstdatacol) { - raidz_checksum_error(zio, + vdev_raidz_checksum_error(zio, rc, rc->rc_orig_data); rc->rc_error = SET_ERROR(ECKSUM); |