diff options
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/spa.c | 2 | ||||
-rw-r--r-- | module/zfs/vdev.c | 149 | ||||
-rw-r--r-- | module/zfs/vdev_disk.c | 9 | ||||
-rw-r--r-- | module/zfs/vdev_label.c | 107 | ||||
-rw-r--r-- | module/zfs/zio.c | 9 |
5 files changed, 234 insertions, 42 deletions
diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 9246495ee..c23fd7a3a 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -3350,6 +3350,8 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config) ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) == 0); vdev_get_stats(vd, vs); + vdev_config_generate_stats(vd, l2cache[i]); + } } } diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 7114c2efc..137390173 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -2764,50 +2764,124 @@ vdev_accessible(vdev_t *vd, zio_t *zio) return (B_TRUE); } -/* - * Get statistics for the given vdev. - */ -void -vdev_get_stats(vdev_t *vd, vdev_stat_t *vs) +static void +vdev_get_child_stat(vdev_t *cvd, vdev_stat_t *vs, vdev_stat_t *cvs) { - spa_t *spa = vd->vdev_spa; - vdev_t *rvd = spa->spa_root_vdev; - int c, t; + int t; + for (t = 0; t < ZIO_TYPES; t++) { + vs->vs_ops[t] += cvs->vs_ops[t]; + vs->vs_bytes[t] += cvs->vs_bytes[t]; + } - ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); + cvs->vs_scan_removing = cvd->vdev_removing; +} - mutex_enter(&vd->vdev_stat_lock); - bcopy(&vd->vdev_stat, vs, sizeof (*vs)); - vs->vs_timestamp = gethrtime() - vs->vs_timestamp; - vs->vs_state = vd->vdev_state; - vs->vs_rsize = vdev_get_min_asize(vd); - if (vd->vdev_ops->vdev_op_leaf) - vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; - vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize; - if (vd->vdev_aux == NULL && vd == vd->vdev_top && !vd->vdev_ishole) { - vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation; +/* + * Get extended stats + */ +static void +vdev_get_child_stat_ex(vdev_t *cvd, vdev_stat_ex_t *vsx, vdev_stat_ex_t *cvsx) +{ + int t, b; + for (t = 0; t < ZIO_TYPES; t++) { + for (b = 0; b < VDEV_HISTO_BUCKETS; b++) { + vsx->vsx_disk_histo[t][b] += cvsx->vsx_disk_histo[t][b]; + vsx->vsx_total_histo[t][b] += + cvsx->vsx_total_histo[t][b]; + } + } + + for (t = 0; t < ZIO_PRIORITY_NUM_QUEUEABLE; t++) { + for (b = 0; b < VDEV_HISTO_BUCKETS; b++) { + vsx->vsx_queue_histo[t][b] += + cvsx->vsx_queue_histo[t][b]; + } + vsx->vsx_active_queue[t] += cvsx->vsx_active_queue[t]; + vsx->vsx_pend_queue[t] += cvsx->vsx_pend_queue[t]; } +} +/* + * Get statistics for the given vdev. + */ +static void +vdev_get_stats_ex_impl(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx) +{ + int c, t; /* * If we're getting stats on the root vdev, aggregate the I/O counts * over all top-level vdevs (i.e. the direct children of the root). */ - if (vd == rvd) { - for (c = 0; c < rvd->vdev_children; c++) { - vdev_t *cvd = rvd->vdev_child[c]; + if (!vd->vdev_ops->vdev_op_leaf) { + if (vs) { + memset(vs->vs_ops, 0, sizeof (vs->vs_ops)); + memset(vs->vs_bytes, 0, sizeof (vs->vs_bytes)); + } + if (vsx) + memset(vsx, 0, sizeof (*vsx)); + + for (c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; vdev_stat_t *cvs = &cvd->vdev_stat; + vdev_stat_ex_t *cvsx = &cvd->vdev_stat_ex; - for (t = 0; t < ZIO_TYPES; t++) { - vs->vs_ops[t] += cvs->vs_ops[t]; - vs->vs_bytes[t] += cvs->vs_bytes[t]; - } - cvs->vs_scan_removing = cvd->vdev_removing; + vdev_get_stats_ex_impl(cvd, cvs, cvsx); + if (vs) + vdev_get_child_stat(cvd, vs, cvs); + if (vsx) + vdev_get_child_stat_ex(cvd, vsx, cvsx); + + } + } else { + /* + * We're a leaf. Just copy our ZIO active queue stats in. The + * other leaf stats are updated in vdev_stat_update(). + */ + if (!vsx) + return; + + memcpy(vsx, &vd->vdev_stat_ex, sizeof (vd->vdev_stat_ex)); + + for (t = 0; t < ARRAY_SIZE(vd->vdev_queue.vq_class); t++) { + vsx->vsx_active_queue[t] = + vd->vdev_queue.vq_class[t].vqc_active; + vsx->vsx_pend_queue[t] = avl_numnodes( + &vd->vdev_queue.vq_class[t].vqc_queued_tree); + } + } +} + +void +vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx) +{ + mutex_enter(&vd->vdev_stat_lock); + if (vs) { + bcopy(&vd->vdev_stat, vs, sizeof (*vs)); + vs->vs_timestamp = gethrtime() - vs->vs_timestamp; + vs->vs_state = vd->vdev_state; + vs->vs_rsize = vdev_get_min_asize(vd); + if (vd->vdev_ops->vdev_op_leaf) + vs->vs_rsize += VDEV_LABEL_START_SIZE + + VDEV_LABEL_END_SIZE; + vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize; + if (vd->vdev_aux == NULL && vd == vd->vdev_top && + !vd->vdev_ishole) { + vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation; } } + + ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_READER) != 0); + vdev_get_stats_ex_impl(vd, vs, vsx); mutex_exit(&vd->vdev_stat_lock); } void +vdev_get_stats(vdev_t *vd, vdev_stat_t *vs) +{ + return (vdev_get_stats_ex(vd, vs, NULL)); +} + +void vdev_clear_stats(vdev_t *vd) { mutex_enter(&vd->vdev_stat_lock); @@ -2840,6 +2914,7 @@ vdev_stat_update(zio_t *zio, uint64_t psize) vdev_t *pvd; uint64_t txg = zio->io_txg; vdev_stat_t *vs = &vd->vdev_stat; + vdev_stat_ex_t *vsx = &vd->vdev_stat_ex; zio_type_t type = zio->io_type; int flags = zio->io_flags; @@ -2890,8 +2965,24 @@ vdev_stat_update(zio_t *zio, uint64_t psize) vs->vs_self_healed += psize; } - vs->vs_ops[type]++; - vs->vs_bytes[type] += psize; + /* + * The bytes/ops/histograms are recorded at the leaf level and + * aggregated into the higher level vdevs in vdev_get_stats(). + */ + if (vd->vdev_ops->vdev_op_leaf) { + + vs->vs_ops[type]++; + vs->vs_bytes[type] += psize; + + if (zio->io_delta && zio->io_delay) { + vsx->vsx_queue_histo[zio->io_priority] + [HISTO(zio->io_delta - zio->io_delay)]++; + vsx->vsx_disk_histo[type] + [HISTO(zio->io_delay)]++; + vsx->vsx_total_histo[type] + [HISTO(zio->io_delta)]++; + } + } mutex_exit(&vd->vdev_stat_lock); return; diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 9b51ecc1d..4e362226a 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -100,9 +100,9 @@ vdev_disk_error(zio_t *zio) { #ifdef ZFS_DEBUG printk("ZFS: zio error=%d type=%d offset=%llu size=%llu " - "flags=%x delay=%llu\n", zio->io_error, zio->io_type, + "flags=%x\n", zio->io_error, zio->io_type, (u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size, - zio->io_flags, (u_longlong_t)zio->io_delay); + zio->io_flags); #endif } @@ -410,7 +410,6 @@ vdev_disk_dio_put(dio_request_t *dr) vdev_disk_dio_free(dr); if (zio) { - zio->io_delay = jiffies_64 - zio->io_delay; zio->io_error = error; ASSERT3S(zio->io_error, >=, 0); if (zio->io_error) @@ -588,8 +587,6 @@ retry: /* Extra reference to protect dio_request during vdev_submit_bio */ vdev_disk_dio_get(dr); - if (zio) - zio->io_delay = jiffies_64; /* Submit all bio's associated with this dio */ for (i = 0; i < dr->dr_bio_count; i++) @@ -630,7 +627,6 @@ BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, rc) int rc = bio->bi_error; #endif - zio->io_delay = jiffies_64 - zio->io_delay; zio->io_error = -rc; if (rc && (rc == -EOPNOTSUPP)) zio->io_vd->vdev_nowritecache = B_TRUE; @@ -660,7 +656,6 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) bio->bi_end_io = vdev_disk_io_flush_completion; bio->bi_private = zio; bio->bi_bdev = bdev; - zio->io_delay = jiffies_64; vdev_submit_bio(VDEV_WRITE_FLUSH_FUA, bio); invalidate_bdev(bdev); diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 3dc3d0d9d..1400aee7b 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -208,6 +208,107 @@ vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, } /* + * Generate the nvlist representing this vdev's stats + */ +void +vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv) +{ + nvlist_t *nvx; + vdev_stat_t *vs; + vdev_stat_ex_t *vsx; + + vs = kmem_alloc(sizeof (*vs), KM_SLEEP); + vsx = kmem_alloc(sizeof (*vsx), KM_SLEEP); + + vdev_get_stats_ex(vd, vs, vsx); + fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t *)vs, sizeof (*vs) / sizeof (uint64_t)); + + kmem_free(vs, sizeof (*vs)); + + /* + * Add extended stats into a special extended stats nvlist. This keeps + * all the extended stats nicely grouped together. The extended stats + * nvlist is then added to the main nvlist. + */ + nvx = fnvlist_alloc(); + + /* ZIOs in flight to disk */ + fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, + vsx->vsx_active_queue[ZIO_PRIORITY_SYNC_READ]); + + fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, + vsx->vsx_active_queue[ZIO_PRIORITY_SYNC_WRITE]); + + fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, + vsx->vsx_active_queue[ZIO_PRIORITY_ASYNC_READ]); + + fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, + vsx->vsx_active_queue[ZIO_PRIORITY_ASYNC_WRITE]); + + fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, + vsx->vsx_active_queue[ZIO_PRIORITY_SCRUB]); + + /* ZIOs pending */ + fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, + vsx->vsx_pend_queue[ZIO_PRIORITY_SYNC_READ]); + + fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, + vsx->vsx_pend_queue[ZIO_PRIORITY_SYNC_WRITE]); + + fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, + vsx->vsx_pend_queue[ZIO_PRIORITY_ASYNC_READ]); + + fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, + vsx->vsx_pend_queue[ZIO_PRIORITY_ASYNC_WRITE]); + + fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, + vsx->vsx_pend_queue[ZIO_PRIORITY_SCRUB]); + + /* Histograms */ + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, + vsx->vsx_total_histo[ZIO_TYPE_READ], + ARRAY_SIZE(vsx->vsx_total_histo[ZIO_TYPE_READ])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, + vsx->vsx_total_histo[ZIO_TYPE_WRITE], + ARRAY_SIZE(vsx->vsx_total_histo[ZIO_TYPE_WRITE])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, + vsx->vsx_disk_histo[ZIO_TYPE_READ], + ARRAY_SIZE(vsx->vsx_disk_histo[ZIO_TYPE_READ])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, + vsx->vsx_disk_histo[ZIO_TYPE_WRITE], + ARRAY_SIZE(vsx->vsx_disk_histo[ZIO_TYPE_WRITE])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, + vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_READ], + ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_READ])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, + vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_WRITE], + ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_WRITE])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, + vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_READ], + ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_READ])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, + vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_WRITE], + ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_WRITE])); + + fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, + vsx->vsx_queue_histo[ZIO_PRIORITY_SCRUB], + ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SCRUB])); + + /* Add extended stats nvlist to main nvlist */ + fnvlist_add_nvlist(nv, ZPOOL_CONFIG_VDEV_STATS_EX, nvx); + + kmem_free(vsx, sizeof (*vsx)); +} + +/* * Generate the nvlist representing this vdev's config. */ nvlist_t * @@ -215,7 +316,6 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, vdev_config_flag_t flags) { nvlist_t *nv = NULL; - nv = fnvlist_alloc(); fnvlist_add_string(nv, ZPOOL_CONFIG_TYPE, vd->vdev_ops->vdev_op_type); @@ -306,12 +406,9 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, } if (getstats) { - vdev_stat_t vs; pool_scan_stat_t ps; - vdev_get_stats(vd, &vs); - fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t)); + vdev_config_generate_stats(vd, nv); /* provide either current or previous scan information */ if (spa_scan_get_stats(spa, &ps) == 0) { diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 2d16e632d..523a924d6 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -39,6 +39,7 @@ #include <sys/ddt.h> #include <sys/blkptr.h> #include <sys/zfeature.h> +#include <sys/time.h> /* * ========================================================================== @@ -2694,6 +2695,8 @@ zio_vdev_io_start(zio_t *zio) uint64_t align; spa_t *spa = zio->io_spa; + zio->io_delay = 0; + ASSERT(zio->io_error == 0); ASSERT(zio->io_child_error[ZIO_CHILD_VDEV] == 0); @@ -2799,6 +2802,7 @@ zio_vdev_io_start(zio_t *zio) } } + zio->io_delay = gethrtime(); vd->vdev_ops->vdev_op_io_start(zio); return (ZIO_PIPELINE_STOP); } @@ -2815,6 +2819,9 @@ zio_vdev_io_done(zio_t *zio) ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); + if (zio->io_delay) + zio->io_delay = gethrtime() - zio->io_delay; + if (vd != NULL && vd->vdev_ops->vdev_op_leaf) { vdev_queue_io_done(zio); @@ -3217,7 +3224,7 @@ zio_done(zio_t *zio) * 30 seconds to complete, post an error described the I/O delay. * We ignore these errors if the device is currently unavailable. */ - if (zio->io_delay >= MSEC_TO_TICK(zio_delay_max)) { + if (zio->io_delay >= MSEC2NSEC(zio_delay_max)) { if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd)) zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa, zio->io_vd, zio, 0, 0); |