summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorsamwyc <[email protected]>2022-10-20 04:18:13 +0530
committerBrian Behlendorf <[email protected]>2022-10-21 14:05:06 -0700
commitfc1c0053f9c6fd8e894c3378b489fa817f5d0330 (patch)
tree473e6411129a95a4e7b9477036337d1004dedeff /module
parent7795975681736c7e76bb5303b28558dff8e4a14d (diff)
Fix sequential resilver drive failure race condition
This patch handles the race condition on simultaneous failure of 2 drives, which misses the vdev_rebuild_reset_wanted signal in vdev_rebuild_thread. We retry to catch this inside the vdev_rebuild_complete_sync function. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Richard Yao <[email protected]> Reviewed-by: Dipak Ghosh <[email protected]> Reviewed-by: Akash B <[email protected]> Signed-off-by: Samuel Wycliffe J <[email protected]> Closes #14041 Closes #14050
Diffstat (limited to 'module')
-rw-r--r--module/zfs/vdev_rebuild.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c
index 4d7de0c6c..9dfbe0cf6 100644
--- a/module/zfs/vdev_rebuild.c
+++ b/module/zfs/vdev_rebuild.c
@@ -22,6 +22,7 @@
*
* Copyright (c) 2018, Intel Corporation.
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
+ * Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
#include <sys/vdev_impl.h>
@@ -134,6 +135,7 @@ int zfs_rebuild_scrub_enabled = 1;
* For vdev_rebuild_initiate_sync() and vdev_rebuild_reset_sync().
*/
static void vdev_rebuild_thread(void *arg);
+static void vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx);
/*
* Clear the per-vdev rebuild bytes value for a vdev tree.
@@ -307,6 +309,17 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx)
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
mutex_enter(&vd->vdev_rebuild_lock);
+
+ /*
+ * Handle a second device failure if it occurs after all rebuild I/O
+ * has completed but before this sync task has been executed.
+ */
+ if (vd->vdev_rebuild_reset_wanted) {
+ mutex_exit(&vd->vdev_rebuild_lock);
+ vdev_rebuild_reset_sync(arg, tx);
+ return;
+ }
+
vrp->vrp_rebuild_state = VDEV_REBUILD_COMPLETE;
vrp->vrp_end_time = gethrestime_sec();
@@ -760,7 +773,6 @@ vdev_rebuild_thread(void *arg)
ASSERT(vd->vdev_rebuilding);
ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REBUILD));
ASSERT3B(vd->vdev_rebuild_cancel_wanted, ==, B_FALSE);
- ASSERT3B(vd->vdev_rebuild_reset_wanted, ==, B_FALSE);
vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;