summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2014-07-15 10:58:41 -0800
committerBrian Behlendorf <[email protected]>2014-08-26 16:29:57 -0700
commitdea377c0d9d92bd7d10c0e2f006efa11ab28060a (patch)
treeb8ef6185688103208cd903b7739a80073a35bce7
parent49ddb315066e372f31bda29a5c546a9eccc8b418 (diff)
Illumos 4970-4974 - extreme rewind enhancements
4970 need controls on i/o issued by zpool import -XF 4971 zpool import -T should accept hex values 4972 zpool import -T implies extreme rewind, and thus a scrub 4973 spa_load_retry retries the same txg 4974 spa_load_verify() reads all data twice Reviewed by: Christopher Siden <[email protected]> Reviewed by: Dan McDonald <[email protected]> Reviewed by: George Wilson <[email protected]> Approved by: Robert Mustacchi <[email protected]> References: https://www.illumos.org/issues/4970 https://www.illumos.org/issues/4971 https://www.illumos.org/issues/4972 https://www.illumos.org/issues/4973 https://www.illumos.org/issues/4974 https://github.com/illumos/illumos-gate/commit/e42d205 Notes: This set of patches adds a set of tunable parameters for the "extreme rewind" mode of pool import which allows control over the traversal performed during such an import. Ported by: Tim Chase <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #2598
-rw-r--r--cmd/zpool/zpool_main.c4
-rw-r--r--man/man5/zfs-module-parameters.546
-rw-r--r--module/zfs/spa.c79
3 files changed, 114 insertions, 15 deletions
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index 920acc367..952645ea5 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2012 by Frederik Wessels. All rights reserved.
* Copyright (c) 2012 by Cyril Plisko. All rights reserved.
*/
@@ -2082,7 +2082,7 @@ zpool_do_import(int argc, char **argv)
case 'T':
errno = 0;
- txg = strtoull(optarg, &endptr, 10);
+ txg = strtoull(optarg, &endptr, 0);
if (errno != 0 || *endptr != '\0') {
(void) fprintf(stderr,
gettext("invalid txg value\n"));
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index 4671c396f..3d7093b73 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -233,6 +233,52 @@ Default value: 24
.sp
.ne 2
.na
+\fBspa_load_verify_data\fR (int)
+.ad
+.RS 12n
+Whether to traverse data blocks during an "extreme rewind" (\fB-X\fR)
+import. Use 0 to disable and 1 to enable.
+
+An extreme rewind import normally performs a full traversal of all
+blocks in the pool for verification. If this parameter is set to 0,
+the traversal skips non-metadata blocks. It can be toggled once the
+import has started to stop or start the traversal of non-metadata blocks.
+.sp
+Default value: 1
+.RE
+
+.sp
+.ne 2
+.na
+\fBspa_load_verify_metadata\fR (int)
+.ad
+.RS 12n
+Whether to traverse blocks during an "extreme rewind" (\fB-X\fR)
+pool import. Use 0 to disable and 1 to enable.
+
+An extreme rewind import normally performs a full traversal of all
+blocks in the pool for verification. If this parameter is set to 1,
+the traversal is not performed. It can be toggled once the import has
+started to stop or start the traversal.
+.sp
+Default value: 1
+.RE
+
+.sp
+.ne 2
+.na
+\fBspa_load_verify_maxinflight\fR (int)
+.ad
+.RS 12n
+Maximum concurrent I/Os during the traversal performed during an "extreme
+rewind" (\fB-X\fR) pool import.
+.sp
+Default value: 10000
+.RE
+
+.sp
+.ne 2
+.na
\fBzfetch_array_rd_sz\fR (ulong)
.ad
.RS 12n
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 0824f9c7f..62887122d 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -1855,6 +1855,7 @@ spa_load_verify_done(zio_t *zio)
spa_load_error_t *sle = zio->io_private;
dmu_object_type_t type = BP_GET_TYPE(bp);
int error = zio->io_error;
+ spa_t *spa = zio->io_spa;
if (error) {
if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
@@ -1864,23 +1865,56 @@ spa_load_verify_done(zio_t *zio)
atomic_add_64(&sle->sle_data_count, 1);
}
zio_data_buf_free(zio->io_data, zio->io_size);
+
+ mutex_enter(&spa->spa_scrub_lock);
+ spa->spa_scrub_inflight--;
+ cv_broadcast(&spa->spa_scrub_io_cv);
+ mutex_exit(&spa->spa_scrub_lock);
}
+/*
+ * Maximum number of concurrent scrub i/os to create while verifying
+ * a pool while importing it.
+ */
+int spa_load_verify_maxinflight = 10000;
+int spa_load_verify_metadata = B_TRUE;
+int spa_load_verify_data = B_TRUE;
+
/*ARGSUSED*/
static int
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{
- if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
- zio_t *rio = arg;
- size_t size = BP_GET_PSIZE(bp);
- void *data = zio_data_buf_alloc(size);
+ zio_t *rio;
+ size_t size;
+ void *data;
- zio_nowait(zio_read(rio, spa, bp, data, size,
- spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
- ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
- ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
- }
+ if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+ return (0);
+ /*
+ * Note: normally this routine will not be called if
+ * spa_load_verify_metadata is not set. However, it may be useful
+ * to manually set the flag after the traversal has begun.
+ */
+ if (!spa_load_verify_metadata)
+ return (0);
+ if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data)
+ return (0);
+
+ rio = arg;
+ size = BP_GET_PSIZE(bp);
+ data = zio_data_buf_alloc(size);
+
+ mutex_enter(&spa->spa_scrub_lock);
+ while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight)
+ cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
+ spa->spa_scrub_inflight++;
+ mutex_exit(&spa->spa_scrub_lock);
+
+ zio_nowait(zio_read(rio, spa, bp, data, size,
+ spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
+ ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
+ ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
return (0);
}
@@ -1891,7 +1925,7 @@ spa_load_verify(spa_t *spa)
spa_load_error_t sle = { 0 };
zpool_rewind_policy_t policy;
boolean_t verify_ok = B_FALSE;
- int error;
+ int error = 0;
zpool_get_rewind_policy(spa->spa_config, &policy);
@@ -1901,8 +1935,11 @@ spa_load_verify(spa_t *spa)
rio = zio_root(spa, NULL, &sle,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
- error = traverse_pool(spa, spa->spa_verify_min_txg,
- TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
+ if (spa_load_verify_metadata) {
+ error = traverse_pool(spa, spa->spa_verify_min_txg,
+ TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
+ spa_load_verify_cb, rio);
+ }
(void) zio_wait(rio);
@@ -2781,7 +2818,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
spa_unload(spa);
spa_deactivate(spa);
- spa->spa_load_max_txg--;
+ spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1;
spa_activate(spa, mode);
spa_async_suspend(spa);
@@ -2811,6 +2848,8 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
spa_set_log_state(spa, SPA_LOG_CLEAR);
} else {
spa->spa_load_max_txg = max_request;
+ if (max_request != UINT64_MAX)
+ spa->spa_extreme_rewind = B_TRUE;
}
load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
@@ -6603,3 +6642,17 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs);
/* asynchronous event notification */
EXPORT_SYMBOL(spa_event_notify);
#endif
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+module_param(spa_load_verify_maxinflight, int, 0644);
+MODULE_PARM_DESC(spa_load_verify_maxinflight,
+ "Max concurrent traversal I/Os while verifying pool during import -X");
+
+module_param(spa_load_verify_metadata, int, 0644);
+MODULE_PARM_DESC(spa_load_verify_metadata,
+ "Set to traverse metadata on pool import");
+
+module_param(spa_load_verify_data, int, 0644);
+MODULE_PARM_DESC(spa_load_verify_data,
+ "Set to traverse data on pool import");
+#endif