From dea377c0d9d92bd7d10c0e2f006efa11ab28060a Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Tue, 15 Jul 2014 10:58:41 -0800 Subject: Illumos 4970-4974 - extreme rewind enhancements 4970 need controls on i/o issued by zpool import -XF 4971 zpool import -T should accept hex values 4972 zpool import -T implies extreme rewind, and thus a scrub 4973 spa_load_retry retries the same txg 4974 spa_load_verify() reads all data twice Reviewed by: Christopher Siden Reviewed by: Dan McDonald Reviewed by: George Wilson Approved by: Robert Mustacchi References: https://www.illumos.org/issues/4970 https://www.illumos.org/issues/4971 https://www.illumos.org/issues/4972 https://www.illumos.org/issues/4973 https://www.illumos.org/issues/4974 https://github.com/illumos/illumos-gate/commit/e42d205 Notes: This set of patches adds a set of tunable parameters for the "extreme rewind" mode of pool import which allows control over the traversal performed during such an import. Ported by: Tim Chase Signed-off-by: Brian Behlendorf Closes #2598 --- module/zfs/spa.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 13 deletions(-) (limited to 'module') diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 0824f9c7f..62887122d 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1855,6 +1855,7 @@ spa_load_verify_done(zio_t *zio) spa_load_error_t *sle = zio->io_private; dmu_object_type_t type = BP_GET_TYPE(bp); int error = zio->io_error; + spa_t *spa = zio->io_spa; if (error) { if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && @@ -1864,23 +1865,56 @@ spa_load_verify_done(zio_t *zio) atomic_add_64(&sle->sle_data_count, 1); } zio_data_buf_free(zio->io_data, zio->io_size); + + mutex_enter(&spa->spa_scrub_lock); + spa->spa_scrub_inflight--; + cv_broadcast(&spa->spa_scrub_io_cv); + mutex_exit(&spa->spa_scrub_lock); } +/* + * Maximum number of concurrent scrub i/os to create while verifying + * a pool while importing it. + */ +int spa_load_verify_maxinflight = 10000; +int spa_load_verify_metadata = B_TRUE; +int spa_load_verify_data = B_TRUE; + /*ARGSUSED*/ static int spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { - if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) { - zio_t *rio = arg; - size_t size = BP_GET_PSIZE(bp); - void *data = zio_data_buf_alloc(size); + zio_t *rio; + size_t size; + void *data; - zio_nowait(zio_read(rio, spa, bp, data, size, - spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, - ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | - ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); - } + if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) + return (0); + /* + * Note: normally this routine will not be called if + * spa_load_verify_metadata is not set. However, it may be useful + * to manually set the flag after the traversal has begun. + */ + if (!spa_load_verify_metadata) + return (0); + if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data) + return (0); + + rio = arg; + size = BP_GET_PSIZE(bp); + data = zio_data_buf_alloc(size); + + mutex_enter(&spa->spa_scrub_lock); + while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight) + cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); + spa->spa_scrub_inflight++; + mutex_exit(&spa->spa_scrub_lock); + + zio_nowait(zio_read(rio, spa, bp, data, size, + spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); return (0); } @@ -1891,7 +1925,7 @@ spa_load_verify(spa_t *spa) spa_load_error_t sle = { 0 }; zpool_rewind_policy_t policy; boolean_t verify_ok = B_FALSE; - int error; + int error = 0; zpool_get_rewind_policy(spa->spa_config, &policy); @@ -1901,8 +1935,11 @@ spa_load_verify(spa_t *spa) rio = zio_root(spa, NULL, &sle, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); - error = traverse_pool(spa, spa->spa_verify_min_txg, - TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio); + if (spa_load_verify_metadata) { + error = traverse_pool(spa, spa->spa_verify_min_txg, + TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, + spa_load_verify_cb, rio); + } (void) zio_wait(rio); @@ -2781,7 +2818,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig) spa_unload(spa); spa_deactivate(spa); - spa->spa_load_max_txg--; + spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1; spa_activate(spa, mode); spa_async_suspend(spa); @@ -2811,6 +2848,8 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig, spa_set_log_state(spa, SPA_LOG_CLEAR); } else { spa->spa_load_max_txg = max_request; + if (max_request != UINT64_MAX) + spa->spa_extreme_rewind = B_TRUE; } load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING, @@ -6603,3 +6642,17 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs); /* asynchronous event notification */ EXPORT_SYMBOL(spa_event_notify); #endif + +#if defined(_KERNEL) && defined(HAVE_SPL) +module_param(spa_load_verify_maxinflight, int, 0644); +MODULE_PARM_DESC(spa_load_verify_maxinflight, + "Max concurrent traversal I/Os while verifying pool during import -X"); + +module_param(spa_load_verify_metadata, int, 0644); +MODULE_PARM_DESC(spa_load_verify_metadata, + "Set to traverse metadata on pool import"); + +module_param(spa_load_verify_data, int, 0644); +MODULE_PARM_DESC(spa_load_verify_data, + "Set to traverse data on pool import"); +#endif -- cgit v1.2.3