diff options
author | Alan Somers <[email protected]> | 2021-01-11 17:00:19 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2021-01-26 19:35:59 -0800 |
commit | a0e01997ec013fa24a570a36689235ee6c21d9e1 (patch) | |
tree | 3a4f7e1ad7e950e725b161d432283d22aff26eca /module/zfs/vdev.c | |
parent | dfb44c500e0b05b2e3ff058e6e5cbf2431d4f80b (diff) |
Parallelize vdev_load
metaslab_init is the slowest part of importing a mature pool, and it
must be repeated hundreds of times for each top-level vdev. But its
speed is dominated by a few serialized disk accesses. That can lead to
import times of > 1 hour for pools with many top-level vdevs on spinny
disks.
Speed up the import by using a taskqueue to parallelize vdev_load across
all top-level vdevs.
This also requires adding mutex protection to
metaslab_class_t.mc_historgram. The mc_histogram fields were
unprotected when that code was first written in "Illumos 4976-4984 -
metaslab improvements" (OpenZFS
f3a7f6610f2df0217ba3b99099019417a954b673). The lock wasn't added until
3dfb57a35e8cbaa7c424611235d669f3c575ada1, though it's unclear exactly
which fields it's supposed to protect. In any case, it wasn't until
vdev_load was parallelized that any code attempted concurrent access to
those fields.
Sponsored by: Axcient
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Alan Somers <[email protected]>
Closes #11470
Diffstat (limited to 'module/zfs/vdev.c')
-rw-r--r-- | module/zfs/vdev.c | 42 |
1 files changed, 39 insertions, 3 deletions
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index f305da6f5..018e48c38 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1725,6 +1725,14 @@ vdev_probe(vdev_t *vd, zio_t *zio) } static void +vdev_load_child(void *arg) +{ + vdev_t *vd = arg; + + vd->vdev_load_error = vdev_load(vd); +} + +static void vdev_open_child(void *arg) { vdev_t *vd = arg; @@ -3350,18 +3358,46 @@ vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj) int vdev_load(vdev_t *vd) { + int children = vd->vdev_children; int error = 0; + taskq_t *tq = NULL; + + /* + * It's only worthwhile to use the taskq for the root vdev, because the + * slow part is metaslab_init, and that only happens for top-level + * vdevs. + */ + if (vd->vdev_ops == &vdev_root_ops && vd->vdev_children > 0) { + tq = taskq_create("vdev_load", children, minclsyspri, + children, children, TASKQ_PREPOPULATE); + } /* * Recursively load all children. */ for (int c = 0; c < vd->vdev_children; c++) { - error = vdev_load(vd->vdev_child[c]); - if (error != 0) { - return (error); + vdev_t *cvd = vd->vdev_child[c]; + + if (tq == NULL || vdev_uses_zvols(cvd)) { + cvd->vdev_load_error = vdev_load(cvd); + } else { + VERIFY(taskq_dispatch(tq, vdev_load_child, + cvd, TQ_SLEEP) != TASKQID_INVALID); } } + if (tq != NULL) { + taskq_wait(tq); + taskq_destroy(tq); + } + + for (int c = 0; c < vd->vdev_children; c++) { + int error = vd->vdev_child[c]->vdev_load_error; + + if (error != 0) + return (error); + } + vdev_set_deflate_ratio(vd); /* |