aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
Diffstat (limited to 'module')
-rw-r--r--module/zfs/spa.c2
-rw-r--r--module/zfs/vdev.c149
-rw-r--r--module/zfs/vdev_disk.c9
-rw-r--r--module/zfs/vdev_label.c107
-rw-r--r--module/zfs/zio.c9
5 files changed, 234 insertions, 42 deletions
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 9246495ee..c23fd7a3a 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -3350,6 +3350,8 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
== 0);
vdev_get_stats(vd, vs);
+ vdev_config_generate_stats(vd, l2cache[i]);
+
}
}
}
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 7114c2efc..137390173 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -2764,50 +2764,124 @@ vdev_accessible(vdev_t *vd, zio_t *zio)
return (B_TRUE);
}
-/*
- * Get statistics for the given vdev.
- */
-void
-vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
+static void
+vdev_get_child_stat(vdev_t *cvd, vdev_stat_t *vs, vdev_stat_t *cvs)
{
- spa_t *spa = vd->vdev_spa;
- vdev_t *rvd = spa->spa_root_vdev;
- int c, t;
+ int t;
+ for (t = 0; t < ZIO_TYPES; t++) {
+ vs->vs_ops[t] += cvs->vs_ops[t];
+ vs->vs_bytes[t] += cvs->vs_bytes[t];
+ }
- ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
+ cvs->vs_scan_removing = cvd->vdev_removing;
+}
- mutex_enter(&vd->vdev_stat_lock);
- bcopy(&vd->vdev_stat, vs, sizeof (*vs));
- vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
- vs->vs_state = vd->vdev_state;
- vs->vs_rsize = vdev_get_min_asize(vd);
- if (vd->vdev_ops->vdev_op_leaf)
- vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
- vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
- if (vd->vdev_aux == NULL && vd == vd->vdev_top && !vd->vdev_ishole) {
- vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation;
+/*
+ * Get extended stats
+ */
+static void
+vdev_get_child_stat_ex(vdev_t *cvd, vdev_stat_ex_t *vsx, vdev_stat_ex_t *cvsx)
+{
+ int t, b;
+ for (t = 0; t < ZIO_TYPES; t++) {
+ for (b = 0; b < VDEV_HISTO_BUCKETS; b++) {
+ vsx->vsx_disk_histo[t][b] += cvsx->vsx_disk_histo[t][b];
+ vsx->vsx_total_histo[t][b] +=
+ cvsx->vsx_total_histo[t][b];
+ }
+ }
+
+ for (t = 0; t < ZIO_PRIORITY_NUM_QUEUEABLE; t++) {
+ for (b = 0; b < VDEV_HISTO_BUCKETS; b++) {
+ vsx->vsx_queue_histo[t][b] +=
+ cvsx->vsx_queue_histo[t][b];
+ }
+ vsx->vsx_active_queue[t] += cvsx->vsx_active_queue[t];
+ vsx->vsx_pend_queue[t] += cvsx->vsx_pend_queue[t];
}
+}
+/*
+ * Get statistics for the given vdev.
+ */
+static void
+vdev_get_stats_ex_impl(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
+{
+ int c, t;
/*
* If we're getting stats on the root vdev, aggregate the I/O counts
* over all top-level vdevs (i.e. the direct children of the root).
*/
- if (vd == rvd) {
- for (c = 0; c < rvd->vdev_children; c++) {
- vdev_t *cvd = rvd->vdev_child[c];
+ if (!vd->vdev_ops->vdev_op_leaf) {
+ if (vs) {
+ memset(vs->vs_ops, 0, sizeof (vs->vs_ops));
+ memset(vs->vs_bytes, 0, sizeof (vs->vs_bytes));
+ }
+ if (vsx)
+ memset(vsx, 0, sizeof (*vsx));
+
+ for (c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
vdev_stat_t *cvs = &cvd->vdev_stat;
+ vdev_stat_ex_t *cvsx = &cvd->vdev_stat_ex;
- for (t = 0; t < ZIO_TYPES; t++) {
- vs->vs_ops[t] += cvs->vs_ops[t];
- vs->vs_bytes[t] += cvs->vs_bytes[t];
- }
- cvs->vs_scan_removing = cvd->vdev_removing;
+ vdev_get_stats_ex_impl(cvd, cvs, cvsx);
+ if (vs)
+ vdev_get_child_stat(cvd, vs, cvs);
+ if (vsx)
+ vdev_get_child_stat_ex(cvd, vsx, cvsx);
+
+ }
+ } else {
+ /*
+ * We're a leaf. Just copy our ZIO active queue stats in. The
+ * other leaf stats are updated in vdev_stat_update().
+ */
+ if (!vsx)
+ return;
+
+ memcpy(vsx, &vd->vdev_stat_ex, sizeof (vd->vdev_stat_ex));
+
+ for (t = 0; t < ARRAY_SIZE(vd->vdev_queue.vq_class); t++) {
+ vsx->vsx_active_queue[t] =
+ vd->vdev_queue.vq_class[t].vqc_active;
+ vsx->vsx_pend_queue[t] = avl_numnodes(
+ &vd->vdev_queue.vq_class[t].vqc_queued_tree);
+ }
+ }
+}
+
+void
+vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
+{
+ mutex_enter(&vd->vdev_stat_lock);
+ if (vs) {
+ bcopy(&vd->vdev_stat, vs, sizeof (*vs));
+ vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
+ vs->vs_state = vd->vdev_state;
+ vs->vs_rsize = vdev_get_min_asize(vd);
+ if (vd->vdev_ops->vdev_op_leaf)
+ vs->vs_rsize += VDEV_LABEL_START_SIZE +
+ VDEV_LABEL_END_SIZE;
+ vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
+ if (vd->vdev_aux == NULL && vd == vd->vdev_top &&
+ !vd->vdev_ishole) {
+ vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation;
}
}
+
+ ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_READER) != 0);
+ vdev_get_stats_ex_impl(vd, vs, vsx);
mutex_exit(&vd->vdev_stat_lock);
}
void
+vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
+{
+ return (vdev_get_stats_ex(vd, vs, NULL));
+}
+
+void
vdev_clear_stats(vdev_t *vd)
{
mutex_enter(&vd->vdev_stat_lock);
@@ -2840,6 +2914,7 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
vdev_t *pvd;
uint64_t txg = zio->io_txg;
vdev_stat_t *vs = &vd->vdev_stat;
+ vdev_stat_ex_t *vsx = &vd->vdev_stat_ex;
zio_type_t type = zio->io_type;
int flags = zio->io_flags;
@@ -2890,8 +2965,24 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
vs->vs_self_healed += psize;
}
- vs->vs_ops[type]++;
- vs->vs_bytes[type] += psize;
+ /*
+ * The bytes/ops/histograms are recorded at the leaf level and
+ * aggregated into the higher level vdevs in vdev_get_stats().
+ */
+ if (vd->vdev_ops->vdev_op_leaf) {
+
+ vs->vs_ops[type]++;
+ vs->vs_bytes[type] += psize;
+
+ if (zio->io_delta && zio->io_delay) {
+ vsx->vsx_queue_histo[zio->io_priority]
+ [HISTO(zio->io_delta - zio->io_delay)]++;
+ vsx->vsx_disk_histo[type]
+ [HISTO(zio->io_delay)]++;
+ vsx->vsx_total_histo[type]
+ [HISTO(zio->io_delta)]++;
+ }
+ }
mutex_exit(&vd->vdev_stat_lock);
return;
diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
index 9b51ecc1d..4e362226a 100644
--- a/module/zfs/vdev_disk.c
+++ b/module/zfs/vdev_disk.c
@@ -100,9 +100,9 @@ vdev_disk_error(zio_t *zio)
{
#ifdef ZFS_DEBUG
printk("ZFS: zio error=%d type=%d offset=%llu size=%llu "
- "flags=%x delay=%llu\n", zio->io_error, zio->io_type,
+ "flags=%x\n", zio->io_error, zio->io_type,
(u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size,
- zio->io_flags, (u_longlong_t)zio->io_delay);
+ zio->io_flags);
#endif
}
@@ -410,7 +410,6 @@ vdev_disk_dio_put(dio_request_t *dr)
vdev_disk_dio_free(dr);
if (zio) {
- zio->io_delay = jiffies_64 - zio->io_delay;
zio->io_error = error;
ASSERT3S(zio->io_error, >=, 0);
if (zio->io_error)
@@ -588,8 +587,6 @@ retry:
/* Extra reference to protect dio_request during vdev_submit_bio */
vdev_disk_dio_get(dr);
- if (zio)
- zio->io_delay = jiffies_64;
/* Submit all bio's associated with this dio */
for (i = 0; i < dr->dr_bio_count; i++)
@@ -630,7 +627,6 @@ BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, rc)
int rc = bio->bi_error;
#endif
- zio->io_delay = jiffies_64 - zio->io_delay;
zio->io_error = -rc;
if (rc && (rc == -EOPNOTSUPP))
zio->io_vd->vdev_nowritecache = B_TRUE;
@@ -660,7 +656,6 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
bio->bi_end_io = vdev_disk_io_flush_completion;
bio->bi_private = zio;
bio->bi_bdev = bdev;
- zio->io_delay = jiffies_64;
vdev_submit_bio(VDEV_WRITE_FLUSH_FUA, bio);
invalidate_bdev(bdev);
diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c
index 3dc3d0d9d..1400aee7b 100644
--- a/module/zfs/vdev_label.c
+++ b/module/zfs/vdev_label.c
@@ -208,6 +208,107 @@ vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
}
/*
+ * Generate the nvlist representing this vdev's stats
+ */
+void
+vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv)
+{
+ nvlist_t *nvx;
+ vdev_stat_t *vs;
+ vdev_stat_ex_t *vsx;
+
+ vs = kmem_alloc(sizeof (*vs), KM_SLEEP);
+ vsx = kmem_alloc(sizeof (*vsx), KM_SLEEP);
+
+ vdev_get_stats_ex(vd, vs, vsx);
+ fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
+ (uint64_t *)vs, sizeof (*vs) / sizeof (uint64_t));
+
+ kmem_free(vs, sizeof (*vs));
+
+ /*
+ * Add extended stats into a special extended stats nvlist. This keeps
+ * all the extended stats nicely grouped together. The extended stats
+ * nvlist is then added to the main nvlist.
+ */
+ nvx = fnvlist_alloc();
+
+ /* ZIOs in flight to disk */
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE,
+ vsx->vsx_active_queue[ZIO_PRIORITY_SYNC_READ]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE,
+ vsx->vsx_active_queue[ZIO_PRIORITY_SYNC_WRITE]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE,
+ vsx->vsx_active_queue[ZIO_PRIORITY_ASYNC_READ]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE,
+ vsx->vsx_active_queue[ZIO_PRIORITY_ASYNC_WRITE]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,
+ vsx->vsx_active_queue[ZIO_PRIORITY_SCRUB]);
+
+ /* ZIOs pending */
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE,
+ vsx->vsx_pend_queue[ZIO_PRIORITY_SYNC_READ]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE,
+ vsx->vsx_pend_queue[ZIO_PRIORITY_SYNC_WRITE]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE,
+ vsx->vsx_pend_queue[ZIO_PRIORITY_ASYNC_READ]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE,
+ vsx->vsx_pend_queue[ZIO_PRIORITY_ASYNC_WRITE]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE,
+ vsx->vsx_pend_queue[ZIO_PRIORITY_SCRUB]);
+
+ /* Histograms */
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
+ vsx->vsx_total_histo[ZIO_TYPE_READ],
+ ARRAY_SIZE(vsx->vsx_total_histo[ZIO_TYPE_READ]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
+ vsx->vsx_total_histo[ZIO_TYPE_WRITE],
+ ARRAY_SIZE(vsx->vsx_total_histo[ZIO_TYPE_WRITE]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
+ vsx->vsx_disk_histo[ZIO_TYPE_READ],
+ ARRAY_SIZE(vsx->vsx_disk_histo[ZIO_TYPE_READ]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
+ vsx->vsx_disk_histo[ZIO_TYPE_WRITE],
+ ARRAY_SIZE(vsx->vsx_disk_histo[ZIO_TYPE_WRITE]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO,
+ vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_READ],
+ ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_READ]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO,
+ vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_WRITE],
+ ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_WRITE]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO,
+ vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_READ],
+ ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_READ]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
+ vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_WRITE],
+ ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_WRITE]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
+ vsx->vsx_queue_histo[ZIO_PRIORITY_SCRUB],
+ ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SCRUB]));
+
+ /* Add extended stats nvlist to main nvlist */
+ fnvlist_add_nvlist(nv, ZPOOL_CONFIG_VDEV_STATS_EX, nvx);
+
+ kmem_free(vsx, sizeof (*vsx));
+}
+
+/*
* Generate the nvlist representing this vdev's config.
*/
nvlist_t *
@@ -215,7 +316,6 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
vdev_config_flag_t flags)
{
nvlist_t *nv = NULL;
-
nv = fnvlist_alloc();
fnvlist_add_string(nv, ZPOOL_CONFIG_TYPE, vd->vdev_ops->vdev_op_type);
@@ -306,12 +406,9 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
}
if (getstats) {
- vdev_stat_t vs;
pool_scan_stat_t ps;
- vdev_get_stats(vd, &vs);
- fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
- (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t));
+ vdev_config_generate_stats(vd, nv);
/* provide either current or previous scan information */
if (spa_scan_get_stats(spa, &ps) == 0) {
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 2d16e632d..523a924d6 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -39,6 +39,7 @@
#include <sys/ddt.h>
#include <sys/blkptr.h>
#include <sys/zfeature.h>
+#include <sys/time.h>
/*
* ==========================================================================
@@ -2694,6 +2695,8 @@ zio_vdev_io_start(zio_t *zio)
uint64_t align;
spa_t *spa = zio->io_spa;
+ zio->io_delay = 0;
+
ASSERT(zio->io_error == 0);
ASSERT(zio->io_child_error[ZIO_CHILD_VDEV] == 0);
@@ -2799,6 +2802,7 @@ zio_vdev_io_start(zio_t *zio)
}
}
+ zio->io_delay = gethrtime();
vd->vdev_ops->vdev_op_io_start(zio);
return (ZIO_PIPELINE_STOP);
}
@@ -2815,6 +2819,9 @@ zio_vdev_io_done(zio_t *zio)
ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
+ if (zio->io_delay)
+ zio->io_delay = gethrtime() - zio->io_delay;
+
if (vd != NULL && vd->vdev_ops->vdev_op_leaf) {
vdev_queue_io_done(zio);
@@ -3217,7 +3224,7 @@ zio_done(zio_t *zio)
* 30 seconds to complete, post an error described the I/O delay.
* We ignore these errors if the device is currently unavailable.
*/
- if (zio->io_delay >= MSEC_TO_TICK(zio_delay_max)) {
+ if (zio->io_delay >= MSEC2NSEC(zio_delay_max)) {
if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd))
zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
zio->io_vd, zio, 0, 0);