diff options
author | Don Brady <[email protected]> | 2018-09-05 19:33:36 -0600 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-09-05 18:33:36 -0700 |
commit | cc99f275a28c43fe450a66a7544f73c4935f7361 (patch) | |
tree | f867e1d2cbb550a047c0f87986831252c41a2fd9 /cmd/ztest | |
parent | cfa37548ebc880580782b245f2d233ed540e7a01 (diff) |
Pool allocation classes
Allocation Classes add the ability to have allocation classes in a
pool that are dedicated to serving specific block categories, such
as DDT data, metadata, and small file blocks. A pool can opt-in to
this feature by adding a 'special' or 'dedup' top-level VDEV.
Reviewed by: Pavel Zakharov <[email protected]>
Reviewed-by: Richard Laager <[email protected]>
Reviewed-by: Alek Pinchuk <[email protected]>
Reviewed-by: HÃ¥kan Johansson <[email protected]>
Reviewed-by: Andreas Dilger <[email protected]>
Reviewed-by: DHE <[email protected]>
Reviewed-by: Richard Elling <[email protected]>
Reviewed-by: Gregor Kopka <[email protected]>
Reviewed-by: Kash Pande <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Matthew Ahrens <[email protected]>
Signed-off-by: Don Brady <[email protected]>
Closes #5182
Diffstat (limited to 'cmd/ztest')
-rw-r--r-- | cmd/ztest/ztest.c | 187 |
1 files changed, 170 insertions, 17 deletions
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 71d5ed646..83d057a74 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -20,11 +20,12 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 Joyent, Inc. + * Copyright (c) 2017, Intel Corporation. */ /* @@ -149,6 +150,12 @@ typedef struct ztest_shared_hdr { static ztest_shared_hdr_t *ztest_shared_hdr; +enum ztest_class_state { + ZTEST_VDEV_CLASS_OFF, + ZTEST_VDEV_CLASS_ON, + ZTEST_VDEV_CLASS_RND +}; + typedef struct ztest_shared_opts { char zo_pool[ZFS_MAX_DATASET_NAME_LEN]; char zo_dir[ZFS_MAX_DATASET_NAME_LEN]; @@ -171,6 +178,7 @@ typedef struct ztest_shared_opts { uint64_t zo_maxloops; uint64_t zo_metaslab_force_ganging; int zo_mmp_test; + int zo_special_vdevs; } ztest_shared_opts_t; static const ztest_shared_opts_t ztest_opts_defaults = { @@ -194,6 +202,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = { .zo_time = 300, /* 5 minutes */ .zo_maxloops = 50, /* max loops during spa_freeze() */ .zo_metaslab_force_ganging = 32 << 10, + .zo_special_vdevs = ZTEST_VDEV_CLASS_RND, }; extern uint64_t metaslab_force_ganging; @@ -342,6 +351,7 @@ ztest_func_t ztest_dsl_dataset_promote_busy; ztest_func_t ztest_vdev_attach_detach; ztest_func_t ztest_vdev_LUN_growth; ztest_func_t ztest_vdev_add_remove; +ztest_func_t ztest_vdev_class_add; ztest_func_t ztest_vdev_aux_add_remove; ztest_func_t ztest_split_pool; ztest_func_t ztest_reguid; @@ -398,6 +408,7 @@ ztest_info_t ztest_info[] = { ZTI_INIT(ztest_vdev_attach_detach, 1, &zopt_sometimes), ZTI_INIT(ztest_vdev_LUN_growth, 1, &zopt_rarely), ZTI_INIT(ztest_vdev_add_remove, 1, &ztest_opts.zo_vdevtime), + ZTI_INIT(ztest_vdev_class_add, 1, &ztest_opts.zo_vdevtime), ZTI_INIT(ztest_vdev_aux_add_remove, 1, &ztest_opts.zo_vdevtime), ZTI_INIT(ztest_device_removal, 1, &zopt_sometimes), ZTI_INIT(ztest_remap_blocks, 1, &zopt_sometimes), @@ -666,6 +677,7 @@ usage(boolean_t requested) "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n" "\t[-P passtime (default: %llu sec)] time per pass\n" "\t[-B alt_ztest (default: <none>)] alternate ztest path\n" + "\t[-C vdev class state (default: random)] special=on|off|random\n" "\t[-o variable=value] ... set global variable to an unsigned\n" "\t 32-bit integer value\n" "\t[-G dump zfs_dbgmsg buffer before exiting due to an error\n" @@ -691,6 +703,46 @@ usage(boolean_t requested) exit(requested ? 0 : 1); } + +static void +ztest_parse_name_value(const char *input, ztest_shared_opts_t *zo) +{ + char name[32]; + char *value; + int state = ZTEST_VDEV_CLASS_RND; + + (void) strlcpy(name, input, sizeof (name)); + + value = strchr(name, '='); + if (value == NULL) { + (void) fprintf(stderr, "missing value in property=value " + "'-C' argument (%s)\n", input); + usage(B_FALSE); + } + *(value) = '\0'; + value++; + + if (strcmp(value, "on") == 0) { + state = ZTEST_VDEV_CLASS_ON; + } else if (strcmp(value, "off") == 0) { + state = ZTEST_VDEV_CLASS_OFF; + } else if (strcmp(value, "random") == 0) { + state = ZTEST_VDEV_CLASS_RND; + } else { + (void) fprintf(stderr, "invalid property value '%s'\n", value); + usage(B_FALSE); + } + + if (strcmp(name, "special") == 0) { + zo->zo_special_vdevs = state; + } else { + (void) fprintf(stderr, "invalid property name '%s'\n", name); + usage(B_FALSE); + } + if (zo->zo_verbose >= 3) + (void) printf("%s vdev state is '%s'\n", name, value); +} + static void process_options(int argc, char **argv) { @@ -704,7 +756,7 @@ process_options(int argc, char **argv) bcopy(&ztest_opts_defaults, zo, sizeof (*zo)); while ((opt = getopt(argc, argv, - "v:s:a:m:r:R:d:t:g:i:k:p:f:MVET:P:hF:B:o:G")) != EOF) { + "v:s:a:m:r:R:d:t:g:i:k:p:f:MVET:P:hF:B:C:o:G")) != EOF) { value = 0; switch (opt) { case 'v': @@ -795,6 +847,9 @@ process_options(int argc, char **argv) case 'B': (void) strlcpy(altdir, optarg, sizeof (altdir)); break; + case 'C': + ztest_parse_name_value(optarg, zo); + break; case 'o': if (set_global_var(optarg) != 0) usage(B_FALSE); @@ -1022,13 +1077,16 @@ make_vdev_mirror(char *path, char *aux, char *pool, size_t size, static nvlist_t * make_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift, - int log, int r, int m, int t) + const char *class, int r, int m, int t) { nvlist_t *root, **child; int c; + boolean_t log; ASSERT(t > 0); + log = (class != NULL && strcmp(class, "log") == 0); + child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL); for (c = 0; c < t; c++) { @@ -1036,6 +1094,12 @@ make_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift, r, m); VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, log) == 0); + + if (class != NULL && class[0] != '\0') { + ASSERT(m > 1 || log); /* expecting a mirror */ + VERIFY(nvlist_add_string(child[c], + ZPOOL_CONFIG_ALLOCATION_BIAS, class) == 0); + } } VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0); @@ -1075,6 +1139,8 @@ ztest_random_spa_version(uint64_t initial_version) static int ztest_random_blocksize(void) { + ASSERT(ztest_spa->spa_max_ashift != 0); + /* * Choose a block size >= the ashift. * If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks. @@ -2722,7 +2788,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) /* * Attempt to create using a bad file. */ - nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); + nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1); VERIFY3U(ENOENT, ==, spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); @@ -2730,7 +2796,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) /* * Attempt to create using a bad mirror. */ - nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1); + nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 2, 1); VERIFY3U(ENOENT, ==, spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); @@ -2740,7 +2806,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) * what's in the nvroot; we should fail with EEXIST. */ (void) pthread_rwlock_rdlock(&ztest_name_lock); - nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); + nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1); VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); @@ -2816,7 +2882,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) (void) spa_destroy(name); nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0, - 0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1); + NULL, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1); /* * If we're configuring a RAIDZ device then make sure that the @@ -2990,10 +3056,16 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) * If we have slogs then remove them 1/4 of the time. */ if (spa_has_slogs(spa) && ztest_random(4) == 0) { + metaslab_group_t *mg; + /* - * Grab the guid from the head of the log class rotor. + * find the first real slog in log allocation class */ - guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid; + mg = spa_log_class(spa)->mc_rotor; + while (!mg->mg_vd->vdev_islog) + mg = mg->mg_next; + + guid = mg->mg_vd->vdev_guid; spa_config_exit(spa, SCL_VDEV, FTAG); @@ -3024,12 +3096,11 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) spa_config_exit(spa, SCL_VDEV, FTAG); /* - * Make 1/4 of the devices be log devices. + * Make 1/4 of the devices be log devices */ nvroot = make_vdev_root(NULL, NULL, NULL, - ztest_opts.zo_vdev_size, 0, - ztest_random(4) == 0, ztest_opts.zo_raidz, - zs->zs_mirrors, 1); + ztest_opts.zo_vdev_size, 0, (ztest_random(4) == 0) ? + "log" : NULL, ztest_opts.zo_raidz, zs->zs_mirrors, 1); error = spa_vdev_add(spa, nvroot); nvlist_free(nvroot); @@ -3048,6 +3119,83 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) mutex_exit(&ztest_vdev_lock); } +/* ARGSUSED */ +void +ztest_vdev_class_add(ztest_ds_t *zd, uint64_t id) +{ + ztest_shared_t *zs = ztest_shared; + spa_t *spa = ztest_spa; + uint64_t leaves; + nvlist_t *nvroot; + const char *class = (ztest_random(2) == 0) ? + VDEV_ALLOC_BIAS_SPECIAL : VDEV_ALLOC_BIAS_DEDUP; + int error; + + /* + * By default add a special vdev 50% of the time + */ + if ((ztest_opts.zo_special_vdevs == ZTEST_VDEV_CLASS_OFF) || + (ztest_opts.zo_special_vdevs == ZTEST_VDEV_CLASS_RND && + ztest_random(2) == 0)) { + return; + } + + mutex_enter(&ztest_vdev_lock); + + /* Only test with mirrors */ + if (zs->zs_mirrors < 2) { + mutex_exit(&ztest_vdev_lock); + return; + } + + /* requires feature@allocation_classes */ + if (!spa_feature_is_enabled(spa, SPA_FEATURE_ALLOCATION_CLASSES)) { + mutex_exit(&ztest_vdev_lock); + return; + } + + leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; + + spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); + ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves; + spa_config_exit(spa, SCL_VDEV, FTAG); + + nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0, + class, ztest_opts.zo_raidz, zs->zs_mirrors, 1); + + error = spa_vdev_add(spa, nvroot); + nvlist_free(nvroot); + + if (error == ENOSPC) + ztest_record_enospc("spa_vdev_add"); + else if (error != 0) + fatal(0, "spa_vdev_add() = %d", error); + + /* + * 50% of the time allow small blocks in the special class + */ + if (error == 0 && + spa_special_class(spa)->mc_groups == 1 && ztest_random(2) == 0) { + if (ztest_opts.zo_verbose >= 3) + (void) printf("Enabling special VDEV small blocks\n"); + (void) ztest_dsl_prop_set_uint64(zd->zd_name, + ZFS_PROP_SPECIAL_SMALL_BLOCKS, 32768, B_FALSE); + } + + mutex_exit(&ztest_vdev_lock); + + if (ztest_opts.zo_verbose >= 3) { + metaslab_class_t *mc; + + if (strcmp(class, VDEV_ALLOC_BIAS_SPECIAL) == 0) + mc = spa_special_class(spa); + else + mc = spa_dedup_class(spa); + (void) printf("Added a %s mirrored vdev (of %d)\n", + class, (int)mc->mc_groups); + } +} + /* * Verify that adding/removing aux devices (l2arc, hot spare) works as expected. */ @@ -3114,7 +3262,7 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) * Add a new device. */ nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL, - (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1); + (ztest_opts.zo_vdev_size * 5) / 4, 0, NULL, 0, 0, 1); error = spa_vdev_add(spa, nvroot); switch (error) { @@ -3316,11 +3464,15 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) * Locate this vdev. */ oldvd = rvd->vdev_child[top]; + + /* pick a child from the mirror */ if (zs->zs_mirrors >= 1) { ASSERT(oldvd->vdev_ops == &vdev_mirror_ops); ASSERT(oldvd->vdev_children >= zs->zs_mirrors); oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz]; } + + /* pick a child out of the raidz group */ if (ztest_opts.zo_raidz > 1) { ASSERT(oldvd->vdev_ops == &vdev_raidz_ops); ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz); @@ -3422,7 +3574,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) * Build the nvlist describing newpath. */ root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0, - ashift, 0, 0, 0, 1); + ashift, NULL, 0, 0, 1); error = spa_vdev_attach(spa, oldguid, root, replacing); @@ -3688,7 +3840,7 @@ ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id) return; } ASSERT(psize > 0); - newsize = psize + psize / 8; + newsize = psize + MAX(psize / 8, SPA_MAXBLOCKSIZE); ASSERT3U(newsize, >, psize); if (ztest_opts.zo_verbose >= 6) { @@ -7027,6 +7179,7 @@ make_random_props(void) nvlist_t *props; VERIFY0(nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); + if (ztest_random(2) == 0) return (props); @@ -7113,7 +7266,7 @@ ztest_init(ztest_shared_t *zs) zs->zs_splits = 0; zs->zs_mirrors = ztest_opts.zo_mirrors; nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0, - 0, ztest_opts.zo_raidz, zs->zs_mirrors, 1); + NULL, ztest_opts.zo_raidz, zs->zs_mirrors, 1); props = make_random_props(); /* |