aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/vdev.c
diff options
context:
space:
mode:
authorAlan Somers <[email protected]>2021-01-11 17:00:19 -0700
committerBrian Behlendorf <[email protected]>2021-01-26 19:35:59 -0800
commita0e01997ec013fa24a570a36689235ee6c21d9e1 (patch)
tree3a4f7e1ad7e950e725b161d432283d22aff26eca /module/zfs/vdev.c
parentdfb44c500e0b05b2e3ff058e6e5cbf2431d4f80b (diff)
Parallelize vdev_load
metaslab_init is the slowest part of importing a mature pool, and it must be repeated hundreds of times for each top-level vdev. But its speed is dominated by a few serialized disk accesses. That can lead to import times of > 1 hour for pools with many top-level vdevs on spinny disks. Speed up the import by using a taskqueue to parallelize vdev_load across all top-level vdevs. This also requires adding mutex protection to metaslab_class_t.mc_historgram. The mc_histogram fields were unprotected when that code was first written in "Illumos 4976-4984 - metaslab improvements" (OpenZFS f3a7f6610f2df0217ba3b99099019417a954b673). The lock wasn't added until 3dfb57a35e8cbaa7c424611235d669f3c575ada1, though it's unclear exactly which fields it's supposed to protect. In any case, it wasn't until vdev_load was parallelized that any code attempted concurrent access to those fields. Sponsored by: Axcient Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Alan Somers <[email protected]> Closes #11470
Diffstat (limited to 'module/zfs/vdev.c')
-rw-r--r--module/zfs/vdev.c42
1 files changed, 39 insertions, 3 deletions
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index f305da6f5..018e48c38 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -1725,6 +1725,14 @@ vdev_probe(vdev_t *vd, zio_t *zio)
}
static void
+vdev_load_child(void *arg)
+{
+ vdev_t *vd = arg;
+
+ vd->vdev_load_error = vdev_load(vd);
+}
+
+static void
vdev_open_child(void *arg)
{
vdev_t *vd = arg;
@@ -3350,18 +3358,46 @@ vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj)
int
vdev_load(vdev_t *vd)
{
+ int children = vd->vdev_children;
int error = 0;
+ taskq_t *tq = NULL;
+
+ /*
+ * It's only worthwhile to use the taskq for the root vdev, because the
+ * slow part is metaslab_init, and that only happens for top-level
+ * vdevs.
+ */
+ if (vd->vdev_ops == &vdev_root_ops && vd->vdev_children > 0) {
+ tq = taskq_create("vdev_load", children, minclsyspri,
+ children, children, TASKQ_PREPOPULATE);
+ }
/*
* Recursively load all children.
*/
for (int c = 0; c < vd->vdev_children; c++) {
- error = vdev_load(vd->vdev_child[c]);
- if (error != 0) {
- return (error);
+ vdev_t *cvd = vd->vdev_child[c];
+
+ if (tq == NULL || vdev_uses_zvols(cvd)) {
+ cvd->vdev_load_error = vdev_load(cvd);
+ } else {
+ VERIFY(taskq_dispatch(tq, vdev_load_child,
+ cvd, TQ_SLEEP) != TASKQID_INVALID);
}
}
+ if (tq != NULL) {
+ taskq_wait(tq);
+ taskq_destroy(tq);
+ }
+
+ for (int c = 0; c < vd->vdev_children; c++) {
+ int error = vd->vdev_child[c]->vdev_load_error;
+
+ if (error != 0)
+ return (error);
+ }
+
vdev_set_deflate_ratio(vd);
/*