diff options
author | Matthew Ahrens <[email protected]> | 2022-12-22 11:48:49 -0800 |
---|---|---|
committer | GitHub <[email protected]> | 2022-12-22 11:48:49 -0800 |
commit | 018f26041d67447de28c293c35e1f664ce3c1abb (patch) | |
tree | 30d8ffa1ef9a5e61281da311a582139eed401765 /lib/libzfs/libzfs_pool.c | |
parent | 29e1b089c14bf0f09d16801652cd82376fa3feb2 (diff) |
deadlock between spa_errlog_lock and dp_config_rwlock
There is a lock order inversion deadlock between `spa_errlog_lock` and
`dp_config_rwlock`:
A thread in `spa_delete_dataset_errlog()` is running from a sync task.
It is holding the `dp_config_rwlock` for writer (see
`dsl_sync_task_sync()`), and waiting for the `spa_errlog_lock`.
A thread in `dsl_pool_config_enter()` is holding the `spa_errlog_lock`
(see `spa_get_errlog_size()`) and waiting for the `dp_config_rwlock` (as
reader).
Note that this was introduced by #12812.
This commit address this by defining the lock ordering to be
dp_config_rwlock first, then spa_errlog_lock / spa_errlist_lock.
spa_get_errlog() and spa_get_errlog_size() can acquire the locks in this
order, and then process_error_block() and get_head_and_birth_txg() can
verify that the dp_config_rwlock is already held.
Additionally, a buffer overrun in `spa_get_errlog()` is corrected. Many
code paths didn't check if `*count` got to zero, instead continuing to
overwrite past the beginning of the userspace buffer at `uaddr`.
Tested by having some errors in the pool (via `zinject -t data
/path/to/file`), one thread running `zpool iostat 0.001`, and another
thread runs `zfs destroy` (in a loop, although it hits the first time).
This reproduces the problem easily without the fix, and works with the
fix.
Reviewed-by: Mark Maybee <[email protected]>
Reviewed-by: George Amanakis <[email protected]>
Reviewed-by: George Wilson <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Matthew Ahrens <[email protected]>
Closes #14239
Closes #14289
Diffstat (limited to 'lib/libzfs/libzfs_pool.c')
-rw-r--r-- | lib/libzfs/libzfs_pool.c | 46 |
1 files changed, 20 insertions, 26 deletions
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 7f7e19a09..2977986e1 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -4133,33 +4133,28 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) { zfs_cmd_t zc = {"\0"}; libzfs_handle_t *hdl = zhp->zpool_hdl; - uint64_t count; - zbookmark_phys_t *zb = NULL; - int i; + zbookmark_phys_t *buf; + uint64_t buflen = 10000; /* approx. 1MB of RAM */ + + if (fnvlist_lookup_uint64(zhp->zpool_config, + ZPOOL_CONFIG_ERRCOUNT) == 0) + return (0); /* - * Retrieve the raw error list from the kernel. If the number of errors - * has increased, allocate more space and continue until we get the - * entire list. + * Retrieve the raw error list from the kernel. If it doesn't fit, + * allocate a larger buffer and retry. */ - count = fnvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT); - if (count == 0) - return (0); - zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl, - count * sizeof (zbookmark_phys_t)); - zc.zc_nvlist_dst_size = count; (void) strcpy(zc.zc_name, zhp->zpool_name); for (;;) { + buf = zfs_alloc(zhp->zpool_hdl, + buflen * sizeof (zbookmark_phys_t)); + zc.zc_nvlist_dst = (uintptr_t)buf; + zc.zc_nvlist_dst_size = buflen; if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_ERROR_LOG, &zc) != 0) { - free((void *)(uintptr_t)zc.zc_nvlist_dst); + free(buf); if (errno == ENOMEM) { - void *dst; - - count = zc.zc_nvlist_dst_size; - dst = zfs_alloc(zhp->zpool_hdl, count * - sizeof (zbookmark_phys_t)); - zc.zc_nvlist_dst = (uintptr_t)dst; + buflen *= 2; } else { return (zpool_standard_error_fmt(hdl, errno, dgettext(TEXT_DOMAIN, "errors: List of " @@ -4177,18 +4172,17 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) * _not_ copied as part of the process. So we point the start of our * array appropriate and decrement the total number of elements. */ - zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) + - zc.zc_nvlist_dst_size; - count -= zc.zc_nvlist_dst_size; + zbookmark_phys_t *zb = buf + zc.zc_nvlist_dst_size; + uint64_t zblen = buflen - zc.zc_nvlist_dst_size; - qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare); + qsort(zb, zblen, sizeof (zbookmark_phys_t), zbookmark_mem_compare); verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0); /* * Fill in the nverrlistp with nvlist's of dataset and object numbers. */ - for (i = 0; i < count; i++) { + for (uint64_t i = 0; i < zblen; i++) { nvlist_t *nv; /* ignoring zb_blkid and zb_level for now */ @@ -4215,11 +4209,11 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp) nvlist_free(nv); } - free((void *)(uintptr_t)zc.zc_nvlist_dst); + free(buf); return (0); nomem: - free((void *)(uintptr_t)zc.zc_nvlist_dst); + free(buf); return (no_memory(zhp->zpool_hdl)); } |