diff options
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/arc.c | 2 | ||||
-rw-r--r-- | module/zfs/spa.c | 1 | ||||
-rw-r--r-- | module/zfs/spa_config.c | 4 | ||||
-rw-r--r-- | module/zfs/vdev.c | 83 | ||||
-rw-r--r-- | module/zfs/vdev_indirect.c | 5 | ||||
-rw-r--r-- | module/zfs/vdev_mirror.c | 6 | ||||
-rw-r--r-- | module/zfs/vdev_missing.c | 3 | ||||
-rw-r--r-- | module/zfs/vdev_raidz.c | 6 | ||||
-rw-r--r-- | module/zfs/vdev_root.c | 3 |
9 files changed, 92 insertions, 21 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c index fc62af7c7..bd1a993dc 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -9283,6 +9283,8 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd) ASSERT(!l2arc_vdev_present(vd)); + vdev_ashift_optimize(vd); + /* * Create a new l2arc device entry. */ diff --git a/module/zfs/spa.c b/module/zfs/spa.c index e358404db..1e3728d93 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -5747,6 +5747,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, for (int c = 0; error == 0 && c < rvd->vdev_children; c++) { vdev_t *vd = rvd->vdev_child[c]; + vdev_ashift_optimize(vd); vdev_metaslab_set_size(vd); vdev_expand(vd, txg); } diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 95dd19844..cc65a00d9 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -576,8 +576,10 @@ spa_config_update(spa_t *spa, int what) (tvd->vdev_islog && tvd->vdev_removing)) continue; - if (tvd->vdev_ms_array == 0) + if (tvd->vdev_ms_array == 0) { + vdev_ashift_optimize(tvd); vdev_metaslab_set_size(tvd); + } vdev_expand(tvd, txg); } } diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index a51e427f8..1844a5653 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -110,6 +110,9 @@ int zfs_vdev_standard_sm_blksz = (1 << 17); */ int zfs_nocacheflush = 0; +uint64_t zfs_vdev_max_auto_ashift = ASHIFT_MAX; +uint64_t zfs_vdev_min_auto_ashift = ASHIFT_MIN; + /*PRINTFLIKE2*/ void vdev_dbgmsg(vdev_t *vd, const char *fmt, ...) @@ -1176,6 +1179,8 @@ vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops) mvd->vdev_max_asize = cvd->vdev_max_asize; mvd->vdev_psize = cvd->vdev_psize; mvd->vdev_ashift = cvd->vdev_ashift; + mvd->vdev_logical_ashift = cvd->vdev_logical_ashift; + mvd->vdev_physical_ashift = cvd->vdev_physical_ashift; mvd->vdev_state = cvd->vdev_state; mvd->vdev_crtxg = cvd->vdev_crtxg; @@ -1207,7 +1212,8 @@ vdev_remove_parent(vdev_t *cvd) mvd->vdev_ops == &vdev_replacing_ops || mvd->vdev_ops == &vdev_spare_ops); cvd->vdev_ashift = mvd->vdev_ashift; - + cvd->vdev_logical_ashift = mvd->vdev_logical_ashift; + cvd->vdev_physical_ashift = mvd->vdev_physical_ashift; vdev_remove_child(mvd, cvd); vdev_remove_child(pvd, mvd); @@ -1677,7 +1683,8 @@ vdev_open(vdev_t *vd) uint64_t osize = 0; uint64_t max_osize = 0; uint64_t asize, max_asize, psize; - uint64_t ashift = 0; + uint64_t logical_ashift = 0; + uint64_t physical_ashift = 0; ASSERT(vd->vdev_open_thread == curthread || spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); @@ -1707,8 +1714,8 @@ vdev_open(vdev_t *vd) return (SET_ERROR(ENXIO)); } - error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift); - + error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, + &logical_ashift, &physical_ashift); /* * Physical volume size should never be larger than its max size, unless * the disk has shrunk while we were reading it or the device is buggy @@ -1823,6 +1830,17 @@ vdev_open(vdev_t *vd) return (SET_ERROR(EINVAL)); } + vd->vdev_physical_ashift = + MAX(physical_ashift, vd->vdev_physical_ashift); + vd->vdev_logical_ashift = MAX(logical_ashift, vd->vdev_logical_ashift); + vd->vdev_ashift = MAX(vd->vdev_logical_ashift, vd->vdev_ashift); + + if (vd->vdev_logical_ashift > ASHIFT_MAX) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_ASHIFT_TOO_BIG); + return (SET_ERROR(EDOM)); + } + if (vd->vdev_asize == 0) { /* * This is the first-ever open, so use the computed values. @@ -1830,9 +1848,6 @@ vdev_open(vdev_t *vd) */ vd->vdev_asize = asize; vd->vdev_max_asize = max_asize; - if (vd->vdev_ashift == 0) { - vd->vdev_ashift = ashift; /* use detected value */ - } if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN || vd->vdev_ashift > ASHIFT_MAX)) { vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, @@ -1841,16 +1856,17 @@ vdev_open(vdev_t *vd) } } else { /* - * Detect if the alignment requirement has increased. - * We don't want to make the pool unavailable, just - * post an event instead. + * Make sure the alignment required hasn't increased. */ - if (ashift > vd->vdev_top->vdev_ashift && + if (vd->vdev_ashift > vd->vdev_top->vdev_ashift && vd->vdev_ops->vdev_op_leaf) { zfs_ereport_post(FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT, spa, vd, NULL, NULL, 0, 0); - } + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_BAD_LABEL); + return (SET_ERROR(EDOM)); + } vd->vdev_max_asize = max_asize; } @@ -2428,6 +2444,35 @@ vdev_metaslab_set_size(vdev_t *vd) ASSERT3U(vd->vdev_ms_shift, >=, SPA_MAXBLOCKSHIFT); } +/* + * Maximize performance by inflating the configured ashift for top level + * vdevs to be as close to the physical ashift as possible while maintaining + * administrator defined limits and ensuring it doesn't go below the + * logical ashift. + */ +void +vdev_ashift_optimize(vdev_t *vd) +{ + if (vd == vd->vdev_top) { + if (vd->vdev_ashift < vd->vdev_physical_ashift) { + vd->vdev_ashift = MIN( + MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift), + MAX(zfs_vdev_min_auto_ashift, + vd->vdev_physical_ashift)); + } else { + /* + * Unusual case where logical ashift > physical ashift + * so we can't cap the calculated ashift based on max + * ashift as that would cause failures. + * We still check if we need to increase it to match + * the min ashift. + */ + vd->vdev_ashift = MAX(zfs_vdev_min_auto_ashift, + vd->vdev_ashift); + } + } +} + void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg) { @@ -4083,6 +4128,11 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx) 1ULL << tvd->vdev_ms_shift); } + vs->vs_configured_ashift = vd->vdev_top != NULL + ? vd->vdev_top->vdev_ashift : vd->vdev_ashift; + vs->vs_logical_ashift = vd->vdev_logical_ashift; + vs->vs_physical_ashift = vd->vdev_physical_ashift; + /* * Report fragmentation and rebuild progress for top-level, * non-auxiliary, concrete devices. @@ -5028,4 +5078,13 @@ ZFS_MODULE_PARAM(zfs_vdev, vdev_, validate_skip, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, nocacheflush, INT, ZMOD_RW, "Disable cache flushes"); + +ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, min_auto_ashift, + param_set_min_auto_ashift, param_get_ulong, ZMOD_RW, + "Minimum ashift used when creating new top-level vdevs"); + +ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift, + param_set_max_auto_ashift, param_get_ulong, ZMOD_RW, + "Maximum ashift used when optimizing for logical -> physical sector " + "size on new top-level vdevs"); /* END CSTYLED */ diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index 4cd83d79e..6a944f4e8 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -950,11 +950,12 @@ vdev_indirect_close(vdev_t *vd) /* ARGSUSED */ static int vdev_indirect_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, - uint64_t *ashift) + uint64_t *logical_ashift, uint64_t *physical_ashift) { *psize = *max_psize = vd->vdev_asize + VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; - *ashift = vd->vdev_ashift; + *logical_ashift = vd->vdev_ashift; + *physical_ashift = vd->vdev_physical_ashift; return (0); } diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 094530e9b..5e1060f12 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -366,7 +366,7 @@ vdev_mirror_map_init(zio_t *zio) static int vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, - uint64_t *ashift) + uint64_t *logical_ashift, uint64_t *physical_ashift) { int numerrors = 0; int lasterror = 0; @@ -389,7 +389,9 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1; - *ashift = MAX(*ashift, cvd->vdev_ashift); + *logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift); + *physical_ashift = MAX(*physical_ashift, + vd->vdev_physical_ashift); } if (numerrors == vd->vdev_children) { diff --git a/module/zfs/vdev_missing.c b/module/zfs/vdev_missing.c index 205b23eba..ce90df6e8 100644 --- a/module/zfs/vdev_missing.c +++ b/module/zfs/vdev_missing.c @@ -45,7 +45,7 @@ /* ARGSUSED */ static int vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, - uint64_t *ashift) + uint64_t *ashift, uint64_t *pshift) { /* * Really this should just fail. But then the root vdev will be in the @@ -56,6 +56,7 @@ vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, *psize = 0; *max_psize = 0; *ashift = 0; + *pshift = 0; return (0); } diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index be3466673..8d4962805 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -1554,7 +1554,7 @@ vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt) static int vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, - uint64_t *ashift) + uint64_t *logical_ashift, uint64_t *physical_ashift) { vdev_t *cvd; uint64_t nparity = vd->vdev_nparity; @@ -1583,7 +1583,9 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1; - *ashift = MAX(*ashift, cvd->vdev_ashift); + *logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift); + *physical_ashift = MAX(*physical_ashift, + cvd->vdev_physical_ashift); } *asize *= vd->vdev_children; diff --git a/module/zfs/vdev_root.c b/module/zfs/vdev_root.c index ce79f7c73..9e8aac7d0 100644 --- a/module/zfs/vdev_root.c +++ b/module/zfs/vdev_root.c @@ -82,7 +82,7 @@ too_many_errors(vdev_t *vd, uint64_t numerrors) static int vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, - uint64_t *ashift) + uint64_t *ashift, uint64_t *pshift) { spa_t *spa = vd->vdev_spa; int lasterror = 0; @@ -116,6 +116,7 @@ vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, *asize = 0; *max_asize = 0; *ashift = 0; + *pshift = 0; return (0); } |