aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmeer Hamza <[email protected]>2022-09-17 01:52:25 +0500
committerGitHub <[email protected]>2022-09-16 13:52:25 -0700
commit577d41d3b2e4b37f51270c399c85b2708e21238a (patch)
treed946e9b410d7009b542d4b754400fdc097e9a24d
parent8da218a7a2ee0d0c0a0741f1007ebce0bf22584a (diff)
zfs recv hangs if max recordsize is less than received recordsize
- Some optimizations for bqueue enqueue/dequeue. - Added a fix to prevent deadlock when both bqueue_enqueue_impl() and bqueue_dequeue() waits for signal to be triggered. Reviewed-by: Alexander Motin <[email protected]> Reviewed-by: Ryan Moeller <[email protected]> Signed-off-by: Ameer Hamza <[email protected]> Closes #13855
-rw-r--r--include/sys/bqueue.h14
-rw-r--r--include/sys/fs/zfs.h6
-rw-r--r--module/zfs/bqueue.c23
3 files changed, 23 insertions, 20 deletions
diff --git a/include/sys/bqueue.h b/include/sys/bqueue.h
index 797aecd79..b96219660 100644
--- a/include/sys/bqueue.h
+++ b/include/sys/bqueue.h
@@ -30,22 +30,22 @@ typedef struct bqueue {
kmutex_t bq_lock;
kcondvar_t bq_add_cv;
kcondvar_t bq_pop_cv;
- uint64_t bq_size;
- uint64_t bq_maxsize;
- uint64_t bq_fill_fraction;
+ size_t bq_size;
+ size_t bq_maxsize;
+ uint_t bq_fill_fraction;
size_t bq_node_offset;
} bqueue_t;
typedef struct bqueue_node {
list_node_t bqn_node;
- uint64_t bqn_size;
+ size_t bqn_size;
} bqueue_node_t;
-int bqueue_init(bqueue_t *, uint64_t, uint64_t, size_t);
+int bqueue_init(bqueue_t *, uint_t, size_t, size_t);
void bqueue_destroy(bqueue_t *);
-void bqueue_enqueue(bqueue_t *, void *, uint64_t);
-void bqueue_enqueue_flush(bqueue_t *, void *, uint64_t);
+void bqueue_enqueue(bqueue_t *, void *, size_t);
+void bqueue_enqueue_flush(bqueue_t *, void *, size_t);
void *bqueue_dequeue(bqueue_t *);
boolean_t bqueue_empty(bqueue_t *);
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 8cbd0e602..dedee0e7b 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -1758,9 +1758,9 @@ typedef enum {
* against the cost of COWing a giant block to modify one byte, and the
* large latency of reading or writing a large block.
*
- * Note that although blocks up to 16MB are supported, the recordsize
- * property can not be set larger than zfs_max_recordsize (default 1MB).
- * See the comment near zfs_max_recordsize in dsl_dataset.c for details.
+ * The recordsize property can not be set larger than zfs_max_recordsize
+ * (default 16MB on 64-bit and 1MB on 32-bit). See the comment near
+ * zfs_max_recordsize in dsl_dataset.c for details.
*
* Note that although the LSIZE field of the blkptr_t can store sizes up
* to 32MB, the dnode's dn_datablkszsec can only store sizes up to
diff --git a/module/zfs/bqueue.c b/module/zfs/bqueue.c
index 22539efc4..ec5ce4388 100644
--- a/module/zfs/bqueue.c
+++ b/module/zfs/bqueue.c
@@ -42,8 +42,7 @@ obj2node(bqueue_t *q, void *data)
* Return 0 on success, or -1 on failure.
*/
int
-bqueue_init(bqueue_t *q, uint64_t fill_fraction, uint64_t size,
- size_t node_offset)
+bqueue_init(bqueue_t *q, uint_t fill_fraction, size_t size, size_t node_offset)
{
if (fill_fraction == 0) {
return (-1);
@@ -78,22 +77,26 @@ bqueue_destroy(bqueue_t *q)
}
static void
-bqueue_enqueue_impl(bqueue_t *q, void *data, uint64_t item_size,
- boolean_t flush)
+bqueue_enqueue_impl(bqueue_t *q, void *data, size_t item_size, boolean_t flush)
{
ASSERT3U(item_size, >, 0);
ASSERT3U(item_size, <=, q->bq_maxsize);
mutex_enter(&q->bq_lock);
obj2node(q, data)->bqn_size = item_size;
- while (q->bq_size + item_size > q->bq_maxsize) {
+ while (q->bq_size && q->bq_size + item_size > q->bq_maxsize) {
+ /*
+ * Wake up bqueue_dequeue() thread if already sleeping in order
+ * to prevent the deadlock condition
+ */
+ cv_signal(&q->bq_pop_cv);
cv_wait_sig(&q->bq_add_cv, &q->bq_lock);
}
q->bq_size += item_size;
list_insert_tail(&q->bq_list, data);
- if (q->bq_size >= q->bq_maxsize / q->bq_fill_fraction)
- cv_signal(&q->bq_pop_cv);
if (flush)
cv_broadcast(&q->bq_pop_cv);
+ else if (q->bq_size >= q->bq_maxsize / q->bq_fill_fraction)
+ cv_signal(&q->bq_pop_cv);
mutex_exit(&q->bq_lock);
}
@@ -103,7 +106,7 @@ bqueue_enqueue_impl(bqueue_t *q, void *data, uint64_t item_size,
* > 0.
*/
void
-bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
+bqueue_enqueue(bqueue_t *q, void *data, size_t item_size)
{
bqueue_enqueue_impl(q, data, item_size, B_FALSE);
}
@@ -117,7 +120,7 @@ bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
* destroy the condvar before the enqueuing thread is done.
*/
void
-bqueue_enqueue_flush(bqueue_t *q, void *data, uint64_t item_size)
+bqueue_enqueue_flush(bqueue_t *q, void *data, size_t item_size)
{
bqueue_enqueue_impl(q, data, item_size, B_TRUE);
}
@@ -130,7 +133,7 @@ void *
bqueue_dequeue(bqueue_t *q)
{
void *ret = NULL;
- uint64_t item_size;
+ size_t item_size;
mutex_enter(&q->bq_lock);
while (q->bq_size == 0) {
cv_wait_sig(&q->bq_pop_cv, &q->bq_lock);