aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--man/man4/zfs.42
-rw-r--r--module/zfs/vdev_rebuild.c27
2 files changed, 19 insertions, 10 deletions
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index 031981f9b..88a044f63 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -1769,7 +1769,7 @@ completes in order to verify the checksums of all blocks which have been
resilvered.
This is enabled by default and strongly recommended.
.
-.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32 MiB Pc Pq u64
+.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq u64
Maximum amount of I/O that can be concurrently issued for a sequential
resilver per leaf device, given in bytes.
.
diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c
index 1f56275c8..62aa61b3b 100644
--- a/module/zfs/vdev_rebuild.c
+++ b/module/zfs/vdev_rebuild.c
@@ -34,6 +34,7 @@
#include <sys/zio.h>
#include <sys/dmu_tx.h>
#include <sys/arc.h>
+#include <sys/arc_impl.h>
#include <sys/zap.h>
/*
@@ -116,13 +117,12 @@ static uint64_t zfs_rebuild_max_segment = 1024 * 1024;
* segment size is also large (zfs_rebuild_max_segment=1M). This helps keep
* the queue depth short.
*
- * 32MB was selected as the default value to achieve good performance with
- * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential
- * rebuild was unable to saturate all of the drives using smaller values.
- * With a value of 32MB the sequential resilver write rate was measured at
- * 800MB/s sustained while rebuilding to a distributed spare.
+ * 64MB was observed to deliver the best performance and set as the default.
+ * Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c)
+ * and a rebuild rate of 1.2GB/s was measured to the distribute spare.
+ * Smaller values were unable to fully saturate the available pool I/O.
*/
-static uint64_t zfs_rebuild_vdev_limit = 32 << 20;
+static uint64_t zfs_rebuild_vdev_limit = 64 << 20;
/*
* Automatically start a pool scrub when the last active sequential resilver
@@ -754,6 +754,7 @@ vdev_rebuild_thread(void *arg)
{
vdev_t *vd = arg;
spa_t *spa = vd->vdev_spa;
+ vdev_t *rvd = spa->spa_root_vdev;
int error = 0;
/*
@@ -786,9 +787,6 @@ vdev_rebuild_thread(void *arg)
vr->vr_pass_bytes_scanned = 0;
vr->vr_pass_bytes_issued = 0;
- vr->vr_bytes_inflight_max = MAX(1ULL << 20,
- zfs_rebuild_vdev_limit * vd->vdev_children);
-
uint64_t update_est_time = gethrtime();
vdev_rebuild_update_bytes_est(vd, 0);
@@ -805,6 +803,17 @@ vdev_rebuild_thread(void *arg)
vr->vr_scan_msp = msp;
/*
+ * Calculate the max number of in-flight bytes for top-level
+ * vdev scanning operations (minimum 1MB, maximum 1/4 of
+ * arc_c_max shared by all top-level vdevs). Limits for the
+ * issuing phase are done per top-level vdev and are handled
+ * separately.
+ */
+ uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1);
+ vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20,
+ zfs_rebuild_vdev_limit * vd->vdev_children));
+
+ /*
* Removal of vdevs from the vdev tree may eliminate the need
* for the rebuild, in which case it should be canceled. The
* vdev_rebuild_cancel_wanted flag is set until the sync task