diff options
author | Paul Dagnelie <[email protected]> | 2019-08-13 20:24:43 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2019-08-13 21:24:43 -0600 |
commit | dc04a8c757d7df91efbca05491174112540f6e7a (patch) | |
tree | a7f13ef90ddddd8c64e5af6f0ac9a4470ec4c27e /module | |
parent | 8e556c5ebc7b66caf2cdcc561b6644f9f8437a6d (diff) |
Prevent race in blkptr_verify against device removal
When we check the vdev of the blkptr in zfs_blkptr_verify, we can run
into a race condition where that vdev is temporarily unavailable. This
happens when a device removal operation and the old vdev_t has been
removed from the array, but the new indirect vdev has not yet been
inserted.
We hold the spa_config_lock while doing our sensitive verification.
To ensure that we don't deadlock, we only grab the lock if we don't
have config_writer held. In addition, I had to const the tags of the
refcounts and the spa_config_lock arguments.
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Serapheim Dimitropoulos <[email protected]>
Signed-off-by: Paul Dagnelie <[email protected]>
Closes #9112
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/refcount.c | 19 | ||||
-rw-r--r-- | module/zfs/spa_misc.c | 4 | ||||
-rw-r--r-- | module/zfs/zio.c | 16 |
3 files changed, 23 insertions, 16 deletions
diff --git a/module/zfs/refcount.c b/module/zfs/refcount.c index 89528e6d3..a612b2f40 100644 --- a/module/zfs/refcount.c +++ b/module/zfs/refcount.c @@ -121,7 +121,7 @@ zfs_refcount_count(zfs_refcount_t *rc) } int64_t -zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder) +zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, const void *holder) { reference_t *ref = NULL; int64_t count; @@ -143,13 +143,14 @@ zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder) } int64_t -zfs_refcount_add(zfs_refcount_t *rc, void *holder) +zfs_refcount_add(zfs_refcount_t *rc, const void *holder) { return (zfs_refcount_add_many(rc, 1, holder)); } int64_t -zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder) +zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, + const void *holder) { reference_t *ref; int64_t count; @@ -197,7 +198,7 @@ zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder) } int64_t -zfs_refcount_remove(zfs_refcount_t *rc, void *holder) +zfs_refcount_remove(zfs_refcount_t *rc, const void *holder) { return (zfs_refcount_remove_many(rc, 1, holder)); } @@ -235,7 +236,7 @@ zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src) void zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number, - void *current_holder, void *new_holder) + const void *current_holder, const void *new_holder) { reference_t *ref; boolean_t found = B_FALSE; @@ -260,8 +261,8 @@ zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number, } void -zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder, - void *new_holder) +zfs_refcount_transfer_ownership(zfs_refcount_t *rc, const void *current_holder, + const void *new_holder) { return (zfs_refcount_transfer_ownership_many(rc, 1, current_holder, new_holder)); @@ -273,7 +274,7 @@ zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder, * might be held. */ boolean_t -zfs_refcount_held(zfs_refcount_t *rc, void *holder) +zfs_refcount_held(zfs_refcount_t *rc, const void *holder) { reference_t *ref; @@ -301,7 +302,7 @@ zfs_refcount_held(zfs_refcount_t *rc, void *holder) * since the reference might not be held. */ boolean_t -zfs_refcount_not_held(zfs_refcount_t *rc, void *holder) +zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder) { reference_t *ref; diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 3d6c0ee3e..d998fe225 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -484,7 +484,7 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw) } void -spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw) +spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw) { int wlocks_held = 0; @@ -517,7 +517,7 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw) } void -spa_config_exit(spa_t *spa, int locks, void *tag) +spa_config_exit(spa_t *spa, int locks, const void *tag) { for (int i = SCL_LOCKS - 1; i >= 0; i--) { spa_config_lock_t *scl = &spa->spa_config_lock[i]; diff --git a/module/zfs/zio.c b/module/zfs/zio.c index b740afde6..7b4ab341f 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -881,8 +881,8 @@ zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags) return (zio_null(NULL, spa, NULL, done, private, flags)); } -void -zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) +static void +zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held) { if (!DMU_OT_IS_VALID(BP_GET_TYPE(bp))) { zfs_panic_recover("blkptr at %p has invalid TYPE %llu", @@ -921,6 +921,10 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) if (!spa->spa_trust_config) return; + if (!config_held) + spa_config_enter(spa, SCL_VDEV, bp, RW_READER); + else + ASSERT(spa_config_held(spa, SCL_VDEV, RW_WRITER)); /* * Pool-specific checks. * @@ -969,6 +973,8 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) bp, i, (longlong_t)offset); } } + if (!config_held) + spa_config_exit(spa, SCL_VDEV, bp); } boolean_t @@ -1008,7 +1014,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, { zio_t *zio; - zfs_blkptr_verify(spa, bp); + zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER); zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp, data, size, size, done, private, @@ -1101,7 +1107,7 @@ void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) { - zfs_blkptr_verify(spa, bp); + zfs_blkptr_verify(spa, bp, B_FALSE); /* * The check for EMBEDDED is a performance optimization. We @@ -1171,7 +1177,7 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, { zio_t *zio; - zfs_blkptr_verify(spa, bp); + zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER); if (BP_IS_EMBEDDED(bp)) return (zio_null(pio, spa, NULL, NULL, NULL, 0)); |