diff options
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/vdev_draid.c | 240 | ||||
-rw-r--r-- | module/zfs/vdev_indirect.c | 1 | ||||
-rw-r--r-- | module/zfs/vdev_mirror.c | 5 | ||||
-rw-r--r-- | module/zfs/vdev_raidz.c | 197 | ||||
-rw-r--r-- | module/zfs/zfs_fm.c | 8 | ||||
-rw-r--r-- | module/zfs/zio.c | 4 |
6 files changed, 21 insertions, 434 deletions
diff --git a/module/zfs/vdev_draid.c b/module/zfs/vdev_draid.c index a4f48cf74..fb2143e94 100644 --- a/module/zfs/vdev_draid.c +++ b/module/zfs/vdev_draid.c @@ -632,236 +632,6 @@ vdev_draid_group_to_offset(vdev_t *vd, uint64_t group) return (group * vdc->vdc_groupsz); } - -static void -vdev_draid_map_free_vsd(zio_t *zio) -{ - raidz_map_t *rm = zio->io_vsd; - - ASSERT0(rm->rm_freed); - rm->rm_freed = B_TRUE; - - if (rm->rm_reports == 0) { - vdev_raidz_map_free(rm); - } -} - -/*ARGSUSED*/ -static void -vdev_draid_cksum_free(void *arg, size_t ignored) -{ - raidz_map_t *rm = arg; - - ASSERT3U(rm->rm_reports, >, 0); - - if (--rm->rm_reports == 0 && rm->rm_freed) - vdev_raidz_map_free(rm); -} - -static void -vdev_draid_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data) -{ - raidz_map_t *rm = zcr->zcr_cbdata; - const size_t c = zcr->zcr_cbinfo; - uint64_t skip_size = zcr->zcr_sector; - uint64_t parity_size; - size_t x, offset, size; - - if (good_data == NULL) { - zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE); - return; - } - - /* - * Detailed cksum reporting is currently only supported for single - * row draid mappings, this covers the vast majority of zios. Only - * a dRAID zio which spans groups will have multiple rows. - */ - if (rm->rm_nrows != 1) { - zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE); - return; - } - - raidz_row_t *rr = rm->rm_row[0]; - const abd_t *good = NULL; - const abd_t *bad = rr->rr_col[c].rc_abd; - - if (c < rr->rr_firstdatacol) { - /* - * The first time through, calculate the parity blocks for - * the good data (this relies on the fact that the good - * data never changes for a given logical zio) - */ - if (rr->rr_col[0].rc_gdata == NULL) { - abd_t *bad_parity[VDEV_DRAID_MAXPARITY]; - - /* - * Set up the rr_col[]s to generate the parity for - * good_data, first saving the parity bufs and - * replacing them with buffers to hold the result. - */ - for (x = 0; x < rr->rr_firstdatacol; x++) { - bad_parity[x] = rr->rr_col[x].rc_abd; - rr->rr_col[x].rc_abd = rr->rr_col[x].rc_gdata = - abd_alloc_sametype(rr->rr_col[x].rc_abd, - rr->rr_col[x].rc_size); - } - - /* - * Fill in the data columns from good_data being - * careful to pad short columns and empty columns - * with a skip sector. - */ - uint64_t good_size = abd_get_size((abd_t *)good_data); - - offset = 0; - for (; x < rr->rr_cols; x++) { - abd_free(rr->rr_col[x].rc_abd); - - if (offset == good_size) { - /* empty data column (small write) */ - rr->rr_col[x].rc_abd = - abd_get_zeros(skip_size); - } else if (x < rr->rr_bigcols) { - /* this is a "big column" */ - size = rr->rr_col[x].rc_size; - rr->rr_col[x].rc_abd = - abd_get_offset_size( - (abd_t *)good_data, offset, size); - offset += size; - } else { - /* short data column, add skip sector */ - size = rr->rr_col[x].rc_size -skip_size; - rr->rr_col[x].rc_abd = abd_alloc( - rr->rr_col[x].rc_size, B_TRUE); - abd_copy_off(rr->rr_col[x].rc_abd, - (abd_t *)good_data, 0, offset, - size); - abd_zero_off(rr->rr_col[x].rc_abd, - size, skip_size); - offset += size; - } - } - - /* - * Construct the parity from the good data. - */ - vdev_raidz_generate_parity_row(rm, rr); - - /* restore everything back to its original state */ - for (x = 0; x < rr->rr_firstdatacol; x++) - rr->rr_col[x].rc_abd = bad_parity[x]; - - offset = 0; - for (x = rr->rr_firstdatacol; x < rr->rr_cols; x++) { - abd_free(rr->rr_col[x].rc_abd); - rr->rr_col[x].rc_abd = abd_get_offset_size( - rr->rr_abd_copy, offset, - rr->rr_col[x].rc_size); - offset += rr->rr_col[x].rc_size; - } - } - - ASSERT3P(rr->rr_col[c].rc_gdata, !=, NULL); - good = abd_get_offset_size(rr->rr_col[c].rc_gdata, 0, - rr->rr_col[c].rc_size); - } else { - /* adjust good_data to point at the start of our column */ - parity_size = size = rr->rr_col[0].rc_size; - if (c >= rr->rr_bigcols) { - size -= skip_size; - zcr->zcr_length = size; - } - - /* empty column */ - if (size == 0) { - zfs_ereport_finish_checksum(zcr, NULL, NULL, B_TRUE); - return; - } - - offset = 0; - for (x = rr->rr_firstdatacol; x < c; x++) { - if (x < rr->rr_bigcols) { - offset += parity_size; - } else { - offset += parity_size - skip_size; - } - } - - good = abd_get_offset_size((abd_t *)good_data, offset, size); - } - - /* we drop the ereport if it ends up that the data was good */ - zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE); - abd_free((abd_t *)good); -} - -/* - * Invoked indirectly by zfs_ereport_start_checksum(), called - * below when our read operation fails completely. The main point - * is to keep a copy of everything we read from disk, so that at - * vdev_draid_cksum_finish() time we can compare it with the good data. - */ -static void -vdev_draid_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) -{ - size_t c = (size_t)(uintptr_t)arg; - raidz_map_t *rm = zio->io_vsd; - - /* set up the report and bump the refcount */ - zcr->zcr_cbdata = rm; - zcr->zcr_cbinfo = c; - zcr->zcr_finish = vdev_draid_cksum_finish; - zcr->zcr_free = vdev_draid_cksum_free; - - rm->rm_reports++; - ASSERT3U(rm->rm_reports, >, 0); - - if (rm->rm_row[0]->rr_abd_copy != NULL) - return; - - /* - * It's the first time we're called for this raidz_map_t, so we need - * to copy the data aside; there's no guarantee that our zio's buffer - * won't be re-used for something else. - * - * Our parity data is already in separate buffers, so there's no need - * to copy them. Furthermore, all columns should have been expanded - * by vdev_draid_map_alloc_empty() when attempting reconstruction. - */ - for (int i = 0; i < rm->rm_nrows; i++) { - raidz_row_t *rr = rm->rm_row[i]; - size_t offset = 0; - size_t size = 0; - - for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { - ASSERT3U(rr->rr_col[c].rc_size, ==, - rr->rr_col[0].rc_size); - size += rr->rr_col[c].rc_size; - } - - rr->rr_abd_copy = abd_alloc_for_io(size, B_FALSE); - - for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { - raidz_col_t *col = &rr->rr_col[c]; - abd_t *tmp = abd_get_offset_size(rr->rr_abd_copy, - offset, col->rc_size); - - abd_copy(tmp, col->rc_abd, col->rc_size); - abd_free(col->rc_abd); - - col->rc_abd = tmp; - offset += col->rc_size; - } - ASSERT3U(offset, ==, size); - } -} - -const zio_vsd_ops_t vdev_draid_vsd_ops = { - .vsd_free = vdev_draid_map_free_vsd, - .vsd_cksum_report = vdev_draid_cksum_report -}; - /* * Full stripe writes. When writing, all columns (D+P) are required. Parity * is calculated over all the columns, including empty zero filled sectors, @@ -1208,7 +978,6 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset, rr->rr_missingdata = 0; rr->rr_missingparity = 0; rr->rr_firstdatacol = vdc->vdc_nparity; - rr->rr_abd_copy = NULL; rr->rr_abd_empty = NULL; #ifdef ZFS_DEBUG rr->rr_offset = io_offset; @@ -1230,7 +999,6 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset, rc->rc_devidx = vdev_draid_permute_id(vdc, base, iter, c); rc->rc_offset = physical_offset; rc->rc_abd = NULL; - rc->rc_gdata = NULL; rc->rc_orig_data = NULL; rc->rc_error = 0; rc->rc_tried = 0; @@ -1328,9 +1096,6 @@ vdev_draid_map_alloc(zio_t *zio) if (nrows == 2) rm->rm_row[1] = rr[1]; - zio->io_vsd = rm; - zio->io_vsd_ops = &vdev_draid_vsd_ops; - return (rm); } @@ -2183,12 +1948,13 @@ static void vdev_draid_io_start(zio_t *zio) { vdev_t *vd __maybe_unused = zio->io_vd; - raidz_map_t *rm; ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops); ASSERT3U(zio->io_offset, ==, vdev_draid_get_astart(vd, zio->io_offset)); - rm = vdev_draid_map_alloc(zio); + raidz_map_t *rm = vdev_draid_map_alloc(zio); + zio->io_vsd = rm; + zio->io_vsd_ops = &vdev_raidz_vsd_ops; if (zio->io_type == ZIO_TYPE_WRITE) { for (int i = 0; i < rm->rm_nrows; i++) { diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index 416f4c54d..bafb2c767 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -315,7 +315,6 @@ vdev_indirect_map_free(zio_t *zio) static const zio_vsd_ops_t vdev_indirect_vsd_ops = { .vsd_free = vdev_indirect_map_free, - .vsd_cksum_report = zio_vsd_default_cksum_report }; /* diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 71ca43cae..f360a18c0 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -174,7 +174,6 @@ vdev_mirror_map_free(zio_t *zio) static const zio_vsd_ops_t vdev_mirror_vsd_ops = { .vsd_free = vdev_mirror_map_free, - .vsd_cksum_report = zio_vsd_default_cksum_report }; static int @@ -379,8 +378,6 @@ vdev_mirror_map_init(zio_t *zio) } } - zio->io_vsd = mm; - zio->io_vsd_ops = &vdev_mirror_vsd_ops; return (mm); } @@ -629,6 +626,8 @@ vdev_mirror_io_start(zio_t *zio) int c, children; mm = vdev_mirror_map_init(zio); + zio->io_vsd = mm; + zio->io_vsd_ops = &vdev_mirror_vsd_ops; if (mm == NULL) { ASSERT(!spa_trust_config(zio->io_spa)); diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 28280e15a..db753ec16 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -143,15 +143,10 @@ vdev_raidz_row_free(raidz_row_t *rr) if (rc->rc_size != 0) abd_free(rc->rc_abd); - if (rc->rc_gdata != NULL) - abd_free(rc->rc_gdata); if (rc->rc_orig_data != NULL) - zio_buf_free(rc->rc_orig_data, rc->rc_size); + abd_free(rc->rc_orig_data); } - if (rr->rr_abd_copy != NULL) - abd_free(rr->rr_abd_copy); - if (rr->rr_abd_empty != NULL) abd_free(rr->rr_abd_empty); @@ -172,175 +167,11 @@ vdev_raidz_map_free_vsd(zio_t *zio) { raidz_map_t *rm = zio->io_vsd; - ASSERT0(rm->rm_freed); - rm->rm_freed = B_TRUE; - - if (rm->rm_reports == 0) { - vdev_raidz_map_free(rm); - } -} - -/*ARGSUSED*/ -static void -vdev_raidz_cksum_free(void *arg, size_t ignored) -{ - raidz_map_t *rm = arg; - - ASSERT3U(rm->rm_reports, >, 0); - - if (--rm->rm_reports == 0 && rm->rm_freed) - vdev_raidz_map_free(rm); + vdev_raidz_map_free(rm); } -static void -vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data) -{ - raidz_map_t *rm = zcr->zcr_cbdata; - const size_t c = zcr->zcr_cbinfo; - size_t x, offset; - - if (good_data == NULL) { - zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE); - return; - } - - ASSERT3U(rm->rm_nrows, ==, 1); - raidz_row_t *rr = rm->rm_row[0]; - - const abd_t *good = NULL; - const abd_t *bad = rr->rr_col[c].rc_abd; - - if (c < rr->rr_firstdatacol) { - /* - * The first time through, calculate the parity blocks for - * the good data (this relies on the fact that the good - * data never changes for a given logical ZIO) - */ - if (rr->rr_col[0].rc_gdata == NULL) { - abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY]; - - /* - * Set up the rr_col[]s to generate the parity for - * good_data, first saving the parity bufs and - * replacing them with buffers to hold the result. - */ - for (x = 0; x < rr->rr_firstdatacol; x++) { - bad_parity[x] = rr->rr_col[x].rc_abd; - rr->rr_col[x].rc_abd = rr->rr_col[x].rc_gdata = - abd_alloc_sametype(rr->rr_col[x].rc_abd, - rr->rr_col[x].rc_size); - } - - /* fill in the data columns from good_data */ - offset = 0; - for (; x < rr->rr_cols; x++) { - abd_free(rr->rr_col[x].rc_abd); - - rr->rr_col[x].rc_abd = - abd_get_offset_size((abd_t *)good_data, - offset, rr->rr_col[x].rc_size); - offset += rr->rr_col[x].rc_size; - } - - /* - * Construct the parity from the good data. - */ - vdev_raidz_generate_parity_row(rm, rr); - - /* restore everything back to its original state */ - for (x = 0; x < rr->rr_firstdatacol; x++) - rr->rr_col[x].rc_abd = bad_parity[x]; - - offset = 0; - for (x = rr->rr_firstdatacol; x < rr->rr_cols; x++) { - abd_free(rr->rr_col[x].rc_abd); - rr->rr_col[x].rc_abd = abd_get_offset_size( - rr->rr_abd_copy, offset, - rr->rr_col[x].rc_size); - offset += rr->rr_col[x].rc_size; - } - } - - ASSERT3P(rr->rr_col[c].rc_gdata, !=, NULL); - good = abd_get_offset_size(rr->rr_col[c].rc_gdata, 0, - rr->rr_col[c].rc_size); - } else { - /* adjust good_data to point at the start of our column */ - offset = 0; - for (x = rr->rr_firstdatacol; x < c; x++) - offset += rr->rr_col[x].rc_size; - - good = abd_get_offset_size((abd_t *)good_data, offset, - rr->rr_col[c].rc_size); - } - - /* we drop the ereport if it ends up that the data was good */ - zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE); - abd_free((abd_t *)good); -} - -/* - * Invoked indirectly by zfs_ereport_start_checksum(), called - * below when our read operation fails completely. The main point - * is to keep a copy of everything we read from disk, so that at - * vdev_raidz_cksum_finish() time we can compare it with the good data. - */ -static void -vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) -{ - size_t c = (size_t)(uintptr_t)arg; - raidz_map_t *rm = zio->io_vsd; - - /* set up the report and bump the refcount */ - zcr->zcr_cbdata = rm; - zcr->zcr_cbinfo = c; - zcr->zcr_finish = vdev_raidz_cksum_finish; - zcr->zcr_free = vdev_raidz_cksum_free; - - rm->rm_reports++; - ASSERT3U(rm->rm_reports, >, 0); - ASSERT3U(rm->rm_nrows, ==, 1); - - if (rm->rm_row[0]->rr_abd_copy != NULL) - return; - - /* - * It's the first time we're called for this raidz_map_t, so we need - * to copy the data aside; there's no guarantee that our zio's buffer - * won't be re-used for something else. - * - * Our parity data is already in separate buffers, so there's no need - * to copy them. - */ - for (int i = 0; i < rm->rm_nrows; i++) { - raidz_row_t *rr = rm->rm_row[i]; - size_t offset = 0; - size_t size = 0; - - for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) - size += rr->rr_col[c].rc_size; - - rr->rr_abd_copy = abd_alloc_for_io(size, B_FALSE); - - for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { - raidz_col_t *col = &rr->rr_col[c]; - abd_t *tmp = abd_get_offset_size(rr->rr_abd_copy, - offset, col->rc_size); - - abd_copy(tmp, col->rc_abd, col->rc_size); - - abd_free(col->rc_abd); - col->rc_abd = tmp; - - offset += col->rc_size; - } - ASSERT3U(offset, ==, size); - } -} - -static const zio_vsd_ops_t vdev_raidz_vsd_ops = { +const zio_vsd_ops_t vdev_raidz_vsd_ops = { .vsd_free = vdev_raidz_map_free_vsd, - .vsd_cksum_report = vdev_raidz_cksum_report }; /* @@ -414,7 +245,6 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, rr->rr_missingdata = 0; rr->rr_missingparity = 0; rr->rr_firstdatacol = nparity; - rr->rr_abd_copy = NULL; rr->rr_abd_empty = NULL; rr->rr_nempty = 0; #ifdef ZFS_DEBUG @@ -435,7 +265,6 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, rc->rc_devidx = col; rc->rc_offset = coff; rc->rc_abd = NULL; - rc->rc_gdata = NULL; rc->rc_orig_data = NULL; rc->rc_error = 0; rc->rc_tried = 0; @@ -1798,10 +1627,11 @@ vdev_raidz_io_start(zio_t *zio) vdev_t *vd = zio->io_vd; vdev_t *tvd = vd->vdev_top; vdev_raidz_t *vdrz = vd->vdev_tsd; - raidz_map_t *rm; - rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, + raidz_map_t *rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vdrz->vd_logical_width, vdrz->vd_nparity); + zio->io_vsd = rm; + zio->io_vsd_ops = &vdev_raidz_vsd_ops; /* * Until raidz expansion is implemented all maps for a raidz vdev @@ -1810,9 +1640,6 @@ vdev_raidz_io_start(zio_t *zio) ASSERT3U(rm->rm_nrows, ==, 1); raidz_row_t *rr = rm->rm_row[0]; - zio->io_vsd = rm; - zio->io_vsd_ops = &vdev_raidz_vsd_ops; - if (zio->io_type == ZIO_TYPE_WRITE) { vdev_raidz_io_start_write(zio, rr, tvd->vdev_ashift); } else { @@ -2008,7 +1835,7 @@ raidz_restore_orig_data(raidz_map_t *rm) for (int c = 0; c < rr->rr_cols; c++) { raidz_col_t *rc = &rr->rr_col[c]; if (rc->rc_need_orig_restore) { - abd_copy_from_buf(rc->rc_abd, + abd_copy(rc->rc_abd, rc->rc_orig_data, rc->rc_size); rc->rc_need_orig_restore = B_FALSE; } @@ -2049,9 +1876,9 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity) if (rc->rc_devidx == ltgts[lt]) { if (rc->rc_orig_data == NULL) { rc->rc_orig_data = - zio_buf_alloc(rc->rc_size); - abd_copy_to_buf( - rc->rc_orig_data, + abd_alloc_linear( + rc->rc_size, B_TRUE); + abd_copy(rc->rc_orig_data, rc->rc_abd, rc->rc_size); } rc->rc_need_orig_restore = B_TRUE; @@ -2096,7 +1923,7 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity) if (rc->rc_error == 0 && c >= rr->rr_firstdatacol) { raidz_checksum_error(zio, - rc, rc->rc_gdata); + rc, rc->rc_orig_data); rc->rc_error = SET_ERROR(ECKSUM); } @@ -2431,7 +2258,7 @@ vdev_raidz_io_done_unrecoverable(zio_t *zio) (void) zfs_ereport_start_checksum(zio->io_spa, cvd, &zio->io_bookmark, zio, rc->rc_offset, - rc->rc_size, (void *)(uintptr_t)c, &zbc); + rc->rc_size, &zbc); mutex_enter(&cvd->vdev_stat_lock); cvd->vdev_stat.vs_checksum_errors++; mutex_exit(&cvd->vdev_stat_lock); diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index 9e9f4a80b..f0f953405 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -1125,8 +1125,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, */ int zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb, - struct zio *zio, uint64_t offset, uint64_t length, void *arg, - zio_bad_cksum_t *info) + struct zio *zio, uint64_t offset, uint64_t length, zio_bad_cksum_t *info) { zio_cksum_report_t *report; @@ -1144,10 +1143,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb, report = kmem_zalloc(sizeof (*report), KM_SLEEP); - if (zio->io_vsd != NULL) - zio->io_vsd_ops->vsd_cksum_report(zio, report, arg); - else - zio_vsd_default_cksum_report(zio, report, arg); + zio_vsd_default_cksum_report(zio, report); /* copy the checksum failure information if it was provided */ if (info != NULL) { diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 74d1595a8..23d45ded6 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -3919,7 +3919,7 @@ zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr, /*ARGSUSED*/ void -zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored) +zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr) { void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size); @@ -4257,7 +4257,7 @@ zio_checksum_verify(zio_t *zio) !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { (void) zfs_ereport_start_checksum(zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, - zio->io_offset, zio->io_size, NULL, &info); + zio->io_offset, zio->io_size, &info); mutex_enter(&zio->io_vd->vdev_stat_lock); zio->io_vd->vdev_stat.vs_checksum_errors++; mutex_exit(&zio->io_vd->vdev_stat_lock); |