diff options
author | Arne Jansen <[email protected]> | 2015-05-06 09:07:55 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2015-06-09 13:48:02 -0700 |
commit | 9c43027b3f18769f2ace16eaa222ac8b301501f4 (patch) | |
tree | 90a3577807c203ca9022234405b30e7a7fdcad2d /module/zfs/dmu_objset.c | |
parent | d050c627b5fdfaf72eac90bc07e03fcb2d8a123f (diff) |
Illumos 5269 - zpool import slow
5269 zpool import slow
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: Dan McDonald <[email protected]>
Approved by: Dan McDonald <[email protected]>
References:
https://www.illumos.org/issues/5269
https://github.com/illumos/illumos-gate/commit/12380e1e
Ported-by: DHE <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #3396
Diffstat (limited to 'module/zfs/dmu_objset.c')
-rw-r--r-- | module/zfs/dmu_objset.c | 221 |
1 files changed, 182 insertions, 39 deletions
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 888c251ce..6ef6adbd9 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -24,6 +24,7 @@ * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. + * Copyright (c) 2015, STRATO AG, Inc. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -48,6 +49,7 @@ #include <sys/sa.h> #include <sys/zfs_onexit.h> #include <sys/dsl_destroy.h> +#include <sys/vdev.h> /* * Needed to close a window in dnode_move() that allows the objset to be freed @@ -55,6 +57,16 @@ */ krwlock_t os_lock; +/* + * Tunable to overwrite the maximum number of threads for the parallization + * of dmu_objset_find_dp, needed to speed up the import of pools with many + * datasets. + * Default is 4 times the number of leaf vdevs. + */ +int dmu_find_threads = 0; + +static void dmu_objset_find_dp_cb(void *arg); + void dmu_objset_init(void) { @@ -504,6 +516,25 @@ dmu_objset_hold(const char *name, void *tag, objset_t **osp) return (err); } +static int +dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, + boolean_t readonly, void *tag, objset_t **osp) +{ + int err; + + err = dmu_objset_from_ds(ds, osp); + if (err != 0) { + dsl_dataset_disown(ds, tag); + } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { + dsl_dataset_disown(ds, tag); + return (SET_ERROR(EINVAL)); + } else if (!readonly && dsl_dataset_is_snapshot(ds)) { + dsl_dataset_disown(ds, tag); + return (SET_ERROR(EROFS)); + } + return (err); +} + /* * dsl_pool must not be held when this is called. * Upon successful return, there will be a longhold on the dataset, @@ -525,21 +556,26 @@ dmu_objset_own(const char *name, dmu_objset_type_t type, dsl_pool_rele(dp, FTAG); return (err); } - - err = dmu_objset_from_ds(ds, osp); + err = dmu_objset_own_impl(ds, type, readonly, tag, osp); dsl_pool_rele(dp, FTAG); - if (err != 0) { - dsl_dataset_disown(ds, tag); - } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { - dsl_dataset_disown(ds, tag); - return (SET_ERROR(EINVAL)); - } else if (!readonly && ds->ds_is_snapshot) { - dsl_dataset_disown(ds, tag); - return (SET_ERROR(EROFS)); - } + return (err); } +int +dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type, + boolean_t readonly, void *tag, objset_t **osp) +{ + dsl_dataset_t *ds; + int err; + + err = dsl_dataset_own_obj(dp, obj, tag, &ds); + if (err != 0) + return (err); + + return (dmu_objset_own_impl(ds, type, readonly, tag, osp)); +} + void dmu_objset_rele(objset_t *os, void *tag) { @@ -1618,30 +1654,41 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name, return (0); } -/* - * Find objsets under and including ddobj, call func(ds) on each. - */ -int -dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj, - int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags) +typedef struct dmu_objset_find_ctx { + taskq_t *dc_tq; + dsl_pool_t *dc_dp; + uint64_t dc_ddobj; + int (*dc_func)(dsl_pool_t *, dsl_dataset_t *, void *); + void *dc_arg; + int dc_flags; + kmutex_t *dc_error_lock; + int *dc_error; +} dmu_objset_find_ctx_t; + +static void +dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp) { + dsl_pool_t *dp = dcp->dc_dp; + dmu_objset_find_ctx_t *child_dcp; dsl_dir_t *dd; dsl_dataset_t *ds; zap_cursor_t zc; zap_attribute_t *attr; uint64_t thisobj; - int err; + int err = 0; - ASSERT(dsl_pool_config_held(dp)); + /* don't process if there already was an error */ + if (*dcp->dc_error != 0) + goto out; - err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd); + err = dsl_dir_hold_obj(dp, dcp->dc_ddobj, NULL, FTAG, &dd); if (err != 0) - return (err); + goto out; /* Don't visit hidden ($MOS & $ORIGIN) objsets. */ if (dd->dd_myname[0] == '$') { dsl_dir_rele(dd, FTAG); - return (0); + goto out; } thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj; @@ -1650,7 +1697,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj, /* * Iterate over all children. */ - if (flags & DS_FIND_CHILDREN) { + if (dcp->dc_flags & DS_FIND_CHILDREN) { for (zap_cursor_init(&zc, dp->dp_meta_objset, dsl_dir_phys(dd)->dd_child_dir_zapobj); zap_cursor_retrieve(&zc, attr) == 0; @@ -1659,24 +1706,22 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj, sizeof (uint64_t)); ASSERT3U(attr->za_num_integers, ==, 1); - err = dmu_objset_find_dp(dp, attr->za_first_integer, - func, arg, flags); - if (err != 0) - break; + child_dcp = kmem_alloc(sizeof (*child_dcp), KM_SLEEP); + *child_dcp = *dcp; + child_dcp->dc_ddobj = attr->za_first_integer; + if (dcp->dc_tq != NULL) + (void) taskq_dispatch(dcp->dc_tq, + dmu_objset_find_dp_cb, child_dcp, TQ_SLEEP); + else + dmu_objset_find_dp_impl(child_dcp); } zap_cursor_fini(&zc); - - if (err != 0) { - dsl_dir_rele(dd, FTAG); - kmem_free(attr, sizeof (zap_attribute_t)); - return (err); - } } /* * Iterate over all snapshots. */ - if (flags & DS_FIND_SNAPSHOTS) { + if (dcp->dc_flags & DS_FIND_SNAPSHOTS) { dsl_dataset_t *ds; err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); @@ -1697,7 +1742,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj, attr->za_first_integer, FTAG, &ds); if (err != 0) break; - err = func(dp, ds, arg); + err = dcp->dc_func(dp, ds, dcp->dc_arg); dsl_dataset_rele(ds, FTAG); if (err != 0) break; @@ -1710,17 +1755,115 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj, kmem_free(attr, sizeof (zap_attribute_t)); if (err != 0) - return (err); + goto out; /* * Apply to self. */ err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); if (err != 0) - return (err); - err = func(dp, ds, arg); + goto out; + err = dcp->dc_func(dp, ds, dcp->dc_arg); dsl_dataset_rele(ds, FTAG); - return (err); + +out: + if (err != 0) { + mutex_enter(dcp->dc_error_lock); + /* only keep first error */ + if (*dcp->dc_error == 0) + *dcp->dc_error = err; + mutex_exit(dcp->dc_error_lock); + } + + kmem_free(dcp, sizeof (*dcp)); +} + +static void +dmu_objset_find_dp_cb(void *arg) +{ + dmu_objset_find_ctx_t *dcp = arg; + dsl_pool_t *dp = dcp->dc_dp; + + dsl_pool_config_enter(dp, FTAG); + + dmu_objset_find_dp_impl(dcp); + + dsl_pool_config_exit(dp, FTAG); +} + +/* + * Find objsets under and including ddobj, call func(ds) on each. + * The order for the enumeration is completely undefined. + * func is called with dsl_pool_config held. + */ +int +dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj, + int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags) +{ + int error = 0; + taskq_t *tq = NULL; + int ntasks; + dmu_objset_find_ctx_t *dcp; + kmutex_t err_lock; + + mutex_init(&err_lock, NULL, MUTEX_DEFAULT, NULL); + dcp = kmem_alloc(sizeof (*dcp), KM_SLEEP); + dcp->dc_tq = NULL; + dcp->dc_dp = dp; + dcp->dc_ddobj = ddobj; + dcp->dc_func = func; + dcp->dc_arg = arg; + dcp->dc_flags = flags; + dcp->dc_error_lock = &err_lock; + dcp->dc_error = &error; + + if ((flags & DS_FIND_SERIALIZE) || dsl_pool_config_held_writer(dp)) { + /* + * In case a write lock is held we can't make use of + * parallelism, as down the stack of the worker threads + * the lock is asserted via dsl_pool_config_held. + * In case of a read lock this is solved by getting a read + * lock in each worker thread, which isn't possible in case + * of a writer lock. So we fall back to the synchronous path + * here. + * In the future it might be possible to get some magic into + * dsl_pool_config_held in a way that it returns true for + * the worker threads so that a single lock held from this + * thread suffices. For now, stay single threaded. + */ + dmu_objset_find_dp_impl(dcp); + + return (error); + } + + ntasks = dmu_find_threads; + if (ntasks == 0) + ntasks = vdev_count_leaves(dp->dp_spa) * 4; + tq = taskq_create("dmu_objset_find", ntasks, minclsyspri, ntasks, + INT_MAX, 0); + if (tq == NULL) { + kmem_free(dcp, sizeof (*dcp)); + return (SET_ERROR(ENOMEM)); + } + dcp->dc_tq = tq; + + /* dcp will be freed by task */ + (void) taskq_dispatch(tq, dmu_objset_find_dp_cb, dcp, TQ_SLEEP); + + /* + * PORTING: this code relies on the property of taskq_wait to wait + * until no more tasks are queued and no more tasks are active. As + * we always queue new tasks from within other tasks, task_wait + * reliably waits for the full recursion to finish, even though we + * enqueue new tasks after taskq_wait has been called. + * On platforms other than illumos, taskq_wait may not have this + * property. + */ + taskq_wait(tq); + taskq_destroy(tq); + mutex_destroy(&err_lock); + + return (error); } /* |