summaryrefslogtreecommitdiffstats
path: root/cmd
diff options
context:
space:
mode:
authorGeorge Wilson <[email protected]>2013-05-02 16:36:32 -0700
committerBrian Behlendorf <[email protected]>2013-05-03 16:53:52 -0700
commit5853fe790d1df58c5dd85ea52c5e165b6d43013c (patch)
treea17c4c13e3dba02ef842b408b9b2dfc76387422c /cmd
parent5165473737e488447edfe25209c68704e08b3a2d (diff)
Illumos #3306, #3321
3306 zdb should be able to issue reads in parallel 3321 'zpool reopen' command should be documented in the man page and help Reviewed by: Adam Leventhal <[email protected]> Reviewed by: Matt Ahrens <[email protected]> Reviewed by: Christopher Siden <[email protected]> Approved by: Garrett D'Amore <[email protected]> References: illumos/illumos-gate@31d7e8fa33fae995f558673adb22641b5aa8b6e1 https://www.illumos.org/issues/3306 https://www.illumos.org/issues/3321 The vdev_file.c implementation in this patch diverges significantly from the upstream version. For consistenty with the vdev_disk.c code the upstream version leverages the Illumos bio interfaces. This makes sense for Illumos but not for ZoL for two reasons. 1) The vdev_disk.c code in ZoL has been rewritten to use the Linux block device interfaces which differ significantly from those in Illumos. Therefore, updating the vdev_file.c to use the Illumos interfaces doesn't get you consistency with vdev_disk.c. 2) Using the upstream patch as is would requiring implementing compatibility code for those Solaris block device interfaces in user and kernel space. That additional complexity could lead to confusion and doesn't buy us anything. For these reasons I've opted to simply move the existing vn_rdwr() as is in to the taskq function. This has the advantage of being low risk and easy to understand. Moving the vn_rdwr() function in to its own taskq thread also neatly avoids the possibility of a stack overflow. Finally, because of the additional work which is being handled by the free taskq the number of threads has been increased. The thread count under Illumos defaults to 100 but was decreased to 2 in commit 08d08e due to contention. We increase it to 8 until the contention can be address by porting Illumos #3581. Ported-by: Brian Behlendorf <[email protected]> Closes #1354
Diffstat (limited to 'cmd')
-rw-r--r--cmd/zdb/zdb.c106
-rw-r--r--cmd/zpool/zpool_main.c27
2 files changed, 98 insertions, 35 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 70b4ba268..0114c4168 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -86,6 +86,7 @@ extern void dump_intent_log(zilog_t *);
uint64_t *zopt_object = NULL;
int zopt_objects = 0;
libzfs_handle_t *g_zfs;
+uint64_t max_inflight = 200;
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
@@ -108,13 +109,14 @@ usage(void)
{
(void) fprintf(stderr,
"Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
- "poolname [object...]\n"
- " %s [-divPA] [-e -p path...] dataset [object...]\n"
- " %s -m [-LXFPA] [-t txg] [-e [-p path...]] "
+ "[-U config] [-M inflight I/Os] poolname [object...]\n"
+ " %s [-divPA] [-e -p path...] [-U config] dataset "
+ "[object...]\n"
+ " %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
"poolname [vdev [metaslab...]]\n"
" %s -R [-A] [-e [-p path...]] poolname "
"vdev:offset:size[:flags]\n"
- " %s -S [-PA] [-e [-p path...]] poolname\n"
+ " %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
" %s -l [-uA] device\n"
" %s -C [-A] [-U config]\n\n",
cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
@@ -161,6 +163,8 @@ usage(void)
(void) fprintf(stderr, " -P print numbers in parseable form\n");
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
"searching for uberblocks\n");
+ (void) fprintf(stderr, " -M <number of inflight I/Os> -- "
+ "specify the maximum number of checksumming I/Os [default is 200]");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
@@ -2005,6 +2009,45 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
}
+static void
+zdb_blkptr_done(zio_t *zio)
+{
+ spa_t *spa = zio->io_spa;
+ blkptr_t *bp = zio->io_bp;
+ int ioerr = zio->io_error;
+ zdb_cb_t *zcb = zio->io_private;
+ zbookmark_t *zb = &zio->io_bookmark;
+
+ zio_data_buf_free(zio->io_data, zio->io_size);
+
+ mutex_enter(&spa->spa_scrub_lock);
+ spa->spa_scrub_inflight--;
+ cv_broadcast(&spa->spa_scrub_io_cv);
+
+ if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
+ char blkbuf[BP_SPRINTF_LEN];
+
+ zcb->zcb_haderrors = 1;
+ zcb->zcb_errors[ioerr]++;
+
+ if (dump_opt['b'] >= 2)
+ sprintf_blkptr(blkbuf, bp);
+ else
+ blkbuf[0] = '\0';
+
+ (void) printf("zdb_blkptr_cb: "
+ "Got error %d reading "
+ "<%llu, %llu, %lld, %llx> %s -- skipping\n",
+ ioerr,
+ (u_longlong_t)zb->zb_objset,
+ (u_longlong_t)zb->zb_object,
+ (u_longlong_t)zb->zb_level,
+ (u_longlong_t)zb->zb_blkid,
+ blkbuf);
+ }
+ mutex_exit(&spa->spa_scrub_lock);
+}
+
/* ARGSUSED */
static int
zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
@@ -2026,39 +2069,23 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
- int ioerr;
size_t size = BP_GET_PSIZE(bp);
- void *data = malloc(size);
+ void *data = zio_data_buf_alloc(size);
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
/* If it's an intent log block, failure is expected. */
if (zb->zb_level == ZB_ZIL_LEVEL)
flags |= ZIO_FLAG_SPECULATIVE;
- ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
- NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
-
- free(data);
+ mutex_enter(&spa->spa_scrub_lock);
+ while (spa->spa_scrub_inflight > max_inflight)
+ cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
+ spa->spa_scrub_inflight++;
+ mutex_exit(&spa->spa_scrub_lock);
- if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
- zcb->zcb_haderrors = 1;
- zcb->zcb_errors[ioerr]++;
+ zio_nowait(zio_read(NULL, spa, bp, data, size,
+ zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
- if (dump_opt['b'] >= 2)
- sprintf_blkptr(blkbuf, bp);
- else
- blkbuf[0] = '\0';
-
- (void) printf("zdb_blkptr_cb: "
- "Got error %d reading "
- "<%llu, %llu, %lld, %llx> %s -- skipping\n",
- ioerr,
- (u_longlong_t)zb->zb_objset,
- (u_longlong_t)zb->zb_object,
- (u_longlong_t)zb->zb_level,
- (u_longlong_t)zb->zb_blkid,
- blkbuf);
- }
}
zcb->zcb_readfails = 0;
@@ -2266,6 +2293,18 @@ dump_block_stats(spa_t *spa)
zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
+ /*
+ * If we've traversed the data blocks then we need to wait for those
+ * I/Os to complete. We leverage "The Godfather" zio to wait on
+ * all async I/Os to complete.
+ */
+ if (dump_opt['c']) {
+ (void) zio_wait(spa->spa_async_zio_root);
+ spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
+ ZIO_FLAG_GODFATHER);
+ }
+
if (zcb.zcb_haderrors) {
(void) printf("\nError counts:\n\n");
(void) printf("\t%5s %s\n", "errno", "count");
@@ -3026,7 +3065,7 @@ main(int argc, char **argv)
dprintf_setup(&argc, argv);
- while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
+ while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) {
switch (c) {
case 'b':
case 'c':
@@ -3055,6 +3094,15 @@ main(int argc, char **argv)
case 'v':
verbose++;
break;
+ case 'M':
+ max_inflight = strtoull(optarg, NULL, 0);
+ if (max_inflight == 0) {
+ (void) fprintf(stderr, "maximum number "
+ "of inflight I/Os must be greater "
+ "than 0\n");
+ usage();
+ }
+ break;
case 'p':
if (searchdirs == NULL) {
searchdirs = umem_alloc(sizeof (char *),
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index 320069873..37aa2894d 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -246,7 +246,7 @@ get_usage(zpool_help_t idx) {
case HELP_REMOVE:
return (gettext("\tremove <pool> <device> ...\n"));
case HELP_REOPEN:
- return (""); /* Undocumented command */
+ return (gettext("\treopen <pool>\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s] <pool> ...\n"));
case HELP_STATUS:
@@ -3612,22 +3612,37 @@ zpool_do_reguid(int argc, char **argv)
* zpool reopen <pool>
*
* Reopen the pool so that the kernel can update the sizes of all vdevs.
- *
- * NOTE: This command is currently undocumented. If the command is ever
- * exposed then the appropriate usage() messages will need to be made.
*/
int
zpool_do_reopen(int argc, char **argv)
{
+ int c;
int ret = 0;
zpool_handle_t *zhp;
char *pool;
+ /* check options */
+ while ((c = getopt(argc, argv, "")) != -1) {
+ switch (c) {
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
argc--;
argv++;
- if (argc != 1)
- return (2);
+ if (argc < 1) {
+ (void) fprintf(stderr, gettext("missing pool name\n"));
+ usage(B_FALSE);
+ }
+
+ if (argc > 1) {
+ (void) fprintf(stderr, gettext("too many arguments\n"));
+ usage(B_FALSE);
+ }
pool = argv[0];
if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)