aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/arc.c2
-rw-r--r--module/zfs/spa.c1
-rw-r--r--module/zfs/spa_config.c4
-rw-r--r--module/zfs/vdev.c83
-rw-r--r--module/zfs/vdev_indirect.c5
-rw-r--r--module/zfs/vdev_mirror.c6
-rw-r--r--module/zfs/vdev_missing.c3
-rw-r--r--module/zfs/vdev_raidz.c6
-rw-r--r--module/zfs/vdev_root.c3
9 files changed, 92 insertions, 21 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index fc62af7c7..bd1a993dc 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -9283,6 +9283,8 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
ASSERT(!l2arc_vdev_present(vd));
+ vdev_ashift_optimize(vd);
+
/*
* Create a new l2arc device entry.
*/
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index e358404db..1e3728d93 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -5747,6 +5747,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
for (int c = 0; error == 0 && c < rvd->vdev_children; c++) {
vdev_t *vd = rvd->vdev_child[c];
+ vdev_ashift_optimize(vd);
vdev_metaslab_set_size(vd);
vdev_expand(vd, txg);
}
diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c
index 95dd19844..cc65a00d9 100644
--- a/module/zfs/spa_config.c
+++ b/module/zfs/spa_config.c
@@ -576,8 +576,10 @@ spa_config_update(spa_t *spa, int what)
(tvd->vdev_islog && tvd->vdev_removing))
continue;
- if (tvd->vdev_ms_array == 0)
+ if (tvd->vdev_ms_array == 0) {
+ vdev_ashift_optimize(tvd);
vdev_metaslab_set_size(tvd);
+ }
vdev_expand(tvd, txg);
}
}
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index a51e427f8..1844a5653 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -110,6 +110,9 @@ int zfs_vdev_standard_sm_blksz = (1 << 17);
*/
int zfs_nocacheflush = 0;
+uint64_t zfs_vdev_max_auto_ashift = ASHIFT_MAX;
+uint64_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
+
/*PRINTFLIKE2*/
void
vdev_dbgmsg(vdev_t *vd, const char *fmt, ...)
@@ -1176,6 +1179,8 @@ vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops)
mvd->vdev_max_asize = cvd->vdev_max_asize;
mvd->vdev_psize = cvd->vdev_psize;
mvd->vdev_ashift = cvd->vdev_ashift;
+ mvd->vdev_logical_ashift = cvd->vdev_logical_ashift;
+ mvd->vdev_physical_ashift = cvd->vdev_physical_ashift;
mvd->vdev_state = cvd->vdev_state;
mvd->vdev_crtxg = cvd->vdev_crtxg;
@@ -1207,7 +1212,8 @@ vdev_remove_parent(vdev_t *cvd)
mvd->vdev_ops == &vdev_replacing_ops ||
mvd->vdev_ops == &vdev_spare_ops);
cvd->vdev_ashift = mvd->vdev_ashift;
-
+ cvd->vdev_logical_ashift = mvd->vdev_logical_ashift;
+ cvd->vdev_physical_ashift = mvd->vdev_physical_ashift;
vdev_remove_child(mvd, cvd);
vdev_remove_child(pvd, mvd);
@@ -1677,7 +1683,8 @@ vdev_open(vdev_t *vd)
uint64_t osize = 0;
uint64_t max_osize = 0;
uint64_t asize, max_asize, psize;
- uint64_t ashift = 0;
+ uint64_t logical_ashift = 0;
+ uint64_t physical_ashift = 0;
ASSERT(vd->vdev_open_thread == curthread ||
spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
@@ -1707,8 +1714,8 @@ vdev_open(vdev_t *vd)
return (SET_ERROR(ENXIO));
}
- error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift);
-
+ error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize,
+ &logical_ashift, &physical_ashift);
/*
* Physical volume size should never be larger than its max size, unless
* the disk has shrunk while we were reading it or the device is buggy
@@ -1823,6 +1830,17 @@ vdev_open(vdev_t *vd)
return (SET_ERROR(EINVAL));
}
+ vd->vdev_physical_ashift =
+ MAX(physical_ashift, vd->vdev_physical_ashift);
+ vd->vdev_logical_ashift = MAX(logical_ashift, vd->vdev_logical_ashift);
+ vd->vdev_ashift = MAX(vd->vdev_logical_ashift, vd->vdev_ashift);
+
+ if (vd->vdev_logical_ashift > ASHIFT_MAX) {
+ vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+ VDEV_AUX_ASHIFT_TOO_BIG);
+ return (SET_ERROR(EDOM));
+ }
+
if (vd->vdev_asize == 0) {
/*
* This is the first-ever open, so use the computed values.
@@ -1830,9 +1848,6 @@ vdev_open(vdev_t *vd)
*/
vd->vdev_asize = asize;
vd->vdev_max_asize = max_asize;
- if (vd->vdev_ashift == 0) {
- vd->vdev_ashift = ashift; /* use detected value */
- }
if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN ||
vd->vdev_ashift > ASHIFT_MAX)) {
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
@@ -1841,16 +1856,17 @@ vdev_open(vdev_t *vd)
}
} else {
/*
- * Detect if the alignment requirement has increased.
- * We don't want to make the pool unavailable, just
- * post an event instead.
+ * Make sure the alignment required hasn't increased.
*/
- if (ashift > vd->vdev_top->vdev_ashift &&
+ if (vd->vdev_ashift > vd->vdev_top->vdev_ashift &&
vd->vdev_ops->vdev_op_leaf) {
zfs_ereport_post(FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT,
spa, vd, NULL, NULL, 0, 0);
- }
+ vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+ VDEV_AUX_BAD_LABEL);
+ return (SET_ERROR(EDOM));
+ }
vd->vdev_max_asize = max_asize;
}
@@ -2428,6 +2444,35 @@ vdev_metaslab_set_size(vdev_t *vd)
ASSERT3U(vd->vdev_ms_shift, >=, SPA_MAXBLOCKSHIFT);
}
+/*
+ * Maximize performance by inflating the configured ashift for top level
+ * vdevs to be as close to the physical ashift as possible while maintaining
+ * administrator defined limits and ensuring it doesn't go below the
+ * logical ashift.
+ */
+void
+vdev_ashift_optimize(vdev_t *vd)
+{
+ if (vd == vd->vdev_top) {
+ if (vd->vdev_ashift < vd->vdev_physical_ashift) {
+ vd->vdev_ashift = MIN(
+ MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift),
+ MAX(zfs_vdev_min_auto_ashift,
+ vd->vdev_physical_ashift));
+ } else {
+ /*
+ * Unusual case where logical ashift > physical ashift
+ * so we can't cap the calculated ashift based on max
+ * ashift as that would cause failures.
+ * We still check if we need to increase it to match
+ * the min ashift.
+ */
+ vd->vdev_ashift = MAX(zfs_vdev_min_auto_ashift,
+ vd->vdev_ashift);
+ }
+ }
+}
+
void
vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg)
{
@@ -4083,6 +4128,11 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
1ULL << tvd->vdev_ms_shift);
}
+ vs->vs_configured_ashift = vd->vdev_top != NULL
+ ? vd->vdev_top->vdev_ashift : vd->vdev_ashift;
+ vs->vs_logical_ashift = vd->vdev_logical_ashift;
+ vs->vs_physical_ashift = vd->vdev_physical_ashift;
+
/*
* Report fragmentation and rebuild progress for top-level,
* non-auxiliary, concrete devices.
@@ -5028,4 +5078,13 @@ ZFS_MODULE_PARAM(zfs_vdev, vdev_, validate_skip, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, nocacheflush, INT, ZMOD_RW,
"Disable cache flushes");
+
+ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, min_auto_ashift,
+ param_set_min_auto_ashift, param_get_ulong, ZMOD_RW,
+ "Minimum ashift used when creating new top-level vdevs");
+
+ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift,
+ param_set_max_auto_ashift, param_get_ulong, ZMOD_RW,
+ "Maximum ashift used when optimizing for logical -> physical sector "
+ "size on new top-level vdevs");
/* END CSTYLED */
diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c
index 4cd83d79e..6a944f4e8 100644
--- a/module/zfs/vdev_indirect.c
+++ b/module/zfs/vdev_indirect.c
@@ -950,11 +950,12 @@ vdev_indirect_close(vdev_t *vd)
/* ARGSUSED */
static int
vdev_indirect_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
- uint64_t *ashift)
+ uint64_t *logical_ashift, uint64_t *physical_ashift)
{
*psize = *max_psize = vd->vdev_asize +
VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
- *ashift = vd->vdev_ashift;
+ *logical_ashift = vd->vdev_ashift;
+ *physical_ashift = vd->vdev_physical_ashift;
return (0);
}
diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c
index 094530e9b..5e1060f12 100644
--- a/module/zfs/vdev_mirror.c
+++ b/module/zfs/vdev_mirror.c
@@ -366,7 +366,7 @@ vdev_mirror_map_init(zio_t *zio)
static int
vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
- uint64_t *ashift)
+ uint64_t *logical_ashift, uint64_t *physical_ashift)
{
int numerrors = 0;
int lasterror = 0;
@@ -389,7 +389,9 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
- *ashift = MAX(*ashift, cvd->vdev_ashift);
+ *logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
+ *physical_ashift = MAX(*physical_ashift,
+ vd->vdev_physical_ashift);
}
if (numerrors == vd->vdev_children) {
diff --git a/module/zfs/vdev_missing.c b/module/zfs/vdev_missing.c
index 205b23eba..ce90df6e8 100644
--- a/module/zfs/vdev_missing.c
+++ b/module/zfs/vdev_missing.c
@@ -45,7 +45,7 @@
/* ARGSUSED */
static int
vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
- uint64_t *ashift)
+ uint64_t *ashift, uint64_t *pshift)
{
/*
* Really this should just fail. But then the root vdev will be in the
@@ -56,6 +56,7 @@ vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
*psize = 0;
*max_psize = 0;
*ashift = 0;
+ *pshift = 0;
return (0);
}
diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c
index be3466673..8d4962805 100644
--- a/module/zfs/vdev_raidz.c
+++ b/module/zfs/vdev_raidz.c
@@ -1554,7 +1554,7 @@ vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt)
static int
vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
- uint64_t *ashift)
+ uint64_t *logical_ashift, uint64_t *physical_ashift)
{
vdev_t *cvd;
uint64_t nparity = vd->vdev_nparity;
@@ -1583,7 +1583,9 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
- *ashift = MAX(*ashift, cvd->vdev_ashift);
+ *logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
+ *physical_ashift = MAX(*physical_ashift,
+ cvd->vdev_physical_ashift);
}
*asize *= vd->vdev_children;
diff --git a/module/zfs/vdev_root.c b/module/zfs/vdev_root.c
index ce79f7c73..9e8aac7d0 100644
--- a/module/zfs/vdev_root.c
+++ b/module/zfs/vdev_root.c
@@ -82,7 +82,7 @@ too_many_errors(vdev_t *vd, uint64_t numerrors)
static int
vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
- uint64_t *ashift)
+ uint64_t *ashift, uint64_t *pshift)
{
spa_t *spa = vd->vdev_spa;
int lasterror = 0;
@@ -116,6 +116,7 @@ vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
*asize = 0;
*max_asize = 0;
*ashift = 0;
+ *pshift = 0;
return (0);
}