diff options
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/vdev_removal.c | 100 |
1 files changed, 89 insertions, 11 deletions
diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index 49b9ed3a1..5952a5d8f 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -80,6 +80,8 @@ typedef struct vdev_copy_arg { metaslab_t *vca_msp; uint64_t vca_outstanding_bytes; + uint64_t vca_read_error_bytes; + uint64_t vca_write_error_bytes; kcondvar_t vca_cv; kmutex_t vca_lock; } vdev_copy_arg_t; @@ -100,6 +102,14 @@ int zfs_remove_max_copy_bytes = 64 * 1024 * 1024; int zfs_remove_max_segment = SPA_MAXBLOCKSIZE; /* + * Ignore hard IO errors during device removal. When set if a device + * encounters hard IO error during the removal process the removal will + * not be cancelled. This can result in a normally recoverable block + * becoming permanently damaged and is not recommended. + */ +int zfs_removal_ignore_errors = 0; + +/* * Allow a remap segment to span free chunks of at most this size. The main * impact of a larger span is that we will read and write larger, more * contiguous chunks, with more "unnecessary" data -- trading off bandwidth @@ -126,6 +136,7 @@ int zfs_removal_suspend_progress = 0; #define VDEV_REMOVAL_ZAP_OBJS "lzap" static void spa_vdev_remove_thread(void *arg); +static int spa_vdev_remove_cancel_impl(spa_t *spa); static void spa_sync_removing_state(spa_t *spa, dmu_tx_t *tx) @@ -802,6 +813,10 @@ spa_vdev_copy_segment_write_done(zio_t *zio) mutex_enter(&vca->vca_lock); vca->vca_outstanding_bytes -= zio->io_size; + + if (zio->io_error != 0) + vca->vca_write_error_bytes += zio->io_size; + cv_signal(&vca->vca_cv); mutex_exit(&vca->vca_lock); } @@ -813,6 +828,14 @@ spa_vdev_copy_segment_write_done(zio_t *zio) static void spa_vdev_copy_segment_read_done(zio_t *zio) { + vdev_copy_arg_t *vca = zio->io_private; + + if (zio->io_error != 0) { + mutex_enter(&vca->vca_lock); + vca->vca_read_error_bytes += zio->io_size; + mutex_exit(&vca->vca_lock); + } + zio_nowait(zio_unique_parent(zio)); } @@ -866,25 +889,45 @@ spa_vdev_copy_one_child(vdev_copy_arg_t *vca, zio_t *nzio, { ASSERT3U(spa_config_held(nzio->io_spa, SCL_ALL, RW_READER), !=, 0); + /* + * If the destination child in unwritable then there is no point + * in issuing the source reads which cannot be written. + */ + if (!vdev_writeable(dest_child_vd)) + return; + mutex_enter(&vca->vca_lock); vca->vca_outstanding_bytes += size; mutex_exit(&vca->vca_lock); abd_t *abd = abd_alloc_for_io(size, B_FALSE); - vdev_t *source_child_vd; + vdev_t *source_child_vd = NULL; if (source_vd->vdev_ops == &vdev_mirror_ops && dest_id != -1) { /* * Source and dest are both mirrors. Copy from the same * child id as we are copying to (wrapping around if there - * are more dest children than source children). + * are more dest children than source children). If the + * preferred source child is unreadable select another. */ - source_child_vd = - source_vd->vdev_child[dest_id % source_vd->vdev_children]; + for (int i = 0; i < source_vd->vdev_children; i++) { + source_child_vd = source_vd->vdev_child[ + (dest_id + i) % source_vd->vdev_children]; + if (vdev_readable(source_child_vd)) + break; + } } else { source_child_vd = source_vd; } + /* + * There should always be at least one readable source child or + * the pool would be in a suspended state. Somehow selecting an + * unreadable child would result in IO errors, the removal process + * being cancelled, and the pool reverting to its pre-removal state. + */ + ASSERT3P(source_child_vd, !=, NULL); + zio_t *write_zio = zio_vdev_child_io(nzio, NULL, dest_child_vd, dest_offset, abd, size, ZIO_TYPE_WRITE, ZIO_PRIORITY_REMOVAL, @@ -1361,6 +1404,8 @@ spa_vdev_remove_thread(void *arg) mutex_init(&vca.vca_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&vca.vca_cv, NULL, CV_DEFAULT, NULL); vca.vca_outstanding_bytes = 0; + vca.vca_read_error_bytes = 0; + vca.vca_write_error_bytes = 0; mutex_enter(&svr->svr_lock); @@ -1490,6 +1535,14 @@ spa_vdev_remove_thread(void *arg) dmu_tx_commit(tx); mutex_enter(&svr->svr_lock); } + + mutex_enter(&vca.vca_lock); + if (zfs_removal_ignore_errors == 0 && + (vca.vca_read_error_bytes > 0 || + vca.vca_write_error_bytes > 0)) { + svr->svr_thread_exit = B_TRUE; + } + mutex_exit(&vca.vca_lock); } mutex_exit(&svr->svr_lock); @@ -1511,6 +1564,21 @@ spa_vdev_remove_thread(void *arg) svr->svr_thread = NULL; cv_broadcast(&svr->svr_cv); mutex_exit(&svr->svr_lock); + + /* + * During the removal process an unrecoverable read or write + * error was encountered. The removal process must be + * cancelled or this damage may become permanent. + */ + if (zfs_removal_ignore_errors == 0 && + (vca.vca_read_error_bytes > 0 || + vca.vca_write_error_bytes > 0)) { + zfs_dbgmsg("canceling removal due to IO errors: " + "[read_error_bytes=%llu] [write_error_bytes=%llu]", + vca.vca_read_error_bytes, + vca.vca_write_error_bytes); + spa_vdev_remove_cancel_impl(spa); + } } else { ASSERT0(range_tree_space(svr->svr_allocd_segs)); vdev_remove_complete(spa); @@ -1689,14 +1757,9 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx) vd->vdev_id, (vd->vdev_path != NULL) ? vd->vdev_path : "-"); } -int -spa_vdev_remove_cancel(spa_t *spa) +static int +spa_vdev_remove_cancel_impl(spa_t *spa) { - spa_vdev_remove_suspend(spa); - - if (spa->spa_vdev_removal == NULL) - return (ENOTACTIVE); - uint64_t vdid = spa->spa_vdev_removal->svr_vdev_id; int error = dsl_sync_task(spa->spa_name, spa_vdev_remove_cancel_check, @@ -1713,6 +1776,17 @@ spa_vdev_remove_cancel(spa_t *spa) return (error); } +int +spa_vdev_remove_cancel(spa_t *spa) +{ + spa_vdev_remove_suspend(spa); + + if (spa->spa_vdev_removal == NULL) + return (ENOTACTIVE); + + return (spa_vdev_remove_cancel_impl(spa)); +} + /* * Called every sync pass of every txg if there's a svr. */ @@ -2162,6 +2236,10 @@ spa_removal_get_stats(spa_t *spa, pool_removal_stat_t *prs) } #if defined(_KERNEL) +module_param(zfs_removal_ignore_errors, int, 0644); +MODULE_PARM_DESC(zfs_removal_ignore_errors, + "Ignore hard IO errors when removing device"); + module_param(zfs_remove_max_segment, int, 0644); MODULE_PARM_DESC(zfs_remove_max_segment, "Largest contiguous segment to allocate when removing device"); |