aboutsummaryrefslogtreecommitdiffstats
path: root/include/sys/vdev.h
diff options
context:
space:
mode:
authorGeorge Wilson <[email protected]>2023-03-24 13:27:07 -0400
committerGitHub <[email protected]>2023-03-24 10:27:07 -0700
commit460d887c439079422b642da87a77dbb896f5e64a (patch)
tree7482bc18c921bd18d21ec5e5a79bc7c1025d87b8 /include/sys/vdev.h
parent2bd0490fafe5ebb811dee9b0eb5d5c245c7a1cbf (diff)
panic loop when removing slog device
There is a window in the slog removal code where a panic loop could ensue if the system crashes during that operation. The original design of slog removal did not persisted any state because the removal happened synchronously. This was changed by a later commit which persisted the vdev_removing flag and exposed this bug. If a slog removal is in progress and happens to crash after persisting the vdev_removing flag to the label but before the vdev is removed from the spa config, then the pool will continue to panic on import. Here's a sample of the panic: [ 134.387411] VERIFY0(0 == dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG, &numbufs, &dbp)) failed (0 == 22) [ 134.393865] PANIC at dmu.c:1135:dmu_write() [ 134.396035] Kernel panic - not syncing: VERIFY0(0 == dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG, &numbufs, &dbp)) failed (0 == 22) [ 134.397857] CPU: 2 PID: 5914 Comm: txg_sync Kdump: loaded Tainted: P OE 5.4.0-1100-dx2023020205-b3751f8c2-azure #106 [ 134.407938] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090008 12/07/2018 [ 134.407938] Call Trace: [ 134.407938] dump_stack+0x57/0x6d [ 134.407938] panic+0xfb/0x2d7 [ 134.407938] spl_panic+0xcf/0x102 [spl] [ 134.407938] ? traverse_impl+0x1ca/0x420 [zfs] [ 134.407938] ? dmu_object_alloc_impl+0x3b4/0x3c0 [zfs] [ 134.407938] ? dnode_hold+0x1b/0x20 [zfs] [ 134.407938] dmu_write+0xc3/0xd0 [zfs] [ 134.407938] ? space_map_alloc+0x55/0x80 [zfs] [ 134.407938] metaslab_sync+0x61a/0x830 [zfs] [ 134.407938] ? queued_spin_unlock+0x9/0x10 [zfs] [ 134.407938] vdev_sync+0x72/0x190 [zfs] [ 134.407938] spa_sync_iterate_to_convergence+0x160/0x250 [zfs] [ 134.407938] spa_sync+0x2f7/0x670 [zfs] [ 134.407938] txg_sync_thread+0x22d/0x2d0 [zfs] [ 134.407938] ? txg_dispatch_callbacks+0xf0/0xf0 [zfs] [ 134.407938] thread_generic_wrapper+0x83/0xa0 [spl] [ 134.407938] kthread+0x104/0x140 [ 134.407938] ? kasan_check_write.constprop.0+0x10/0x10 [spl] [ 134.407938] ? kthread_park+0x90/0x90 [ 134.457802] ret_from_fork+0x1f/0x40 This change no longer persists the vdev_removing flag when removing slog devices and also cleans up some code that was added which is not used. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Matthew Ahrens <[email protected]> Reviewed-by: Mark Maybee <[email protected]> Signed-off-by: George Wilson <[email protected]> Closes #14652
Diffstat (limited to 'include/sys/vdev.h')
-rw-r--r--include/sys/vdev.h5
1 files changed, 2 insertions, 3 deletions
diff --git a/include/sys/vdev.h b/include/sys/vdev.h
index 7a7c70dc1..d529bbcdd 100644
--- a/include/sys/vdev.h
+++ b/include/sys/vdev.h
@@ -186,9 +186,8 @@ extern boolean_t vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx);
typedef enum vdev_config_flag {
VDEV_CONFIG_SPARE = 1 << 0,
VDEV_CONFIG_L2CACHE = 1 << 1,
- VDEV_CONFIG_REMOVING = 1 << 2,
- VDEV_CONFIG_MOS = 1 << 3,
- VDEV_CONFIG_MISSING = 1 << 4
+ VDEV_CONFIG_MOS = 1 << 2,
+ VDEV_CONFIG_MISSING = 1 << 3
} vdev_config_flag_t;
extern void vdev_post_kobj_evt(vdev_t *vd);