summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2013-03-21 14:47:36 -0800
committerBrian Behlendorf <[email protected]>2013-08-12 16:46:50 -0700
commitcb682a173a84813b2aeb5d18f58cff1a07531fb3 (patch)
tree7139dec4e92bccbf18744b8543abd1e408846dff /module
parent570d6edf1d94917aab49c5755027d05b3c7bcd43 (diff)
Illumos #3618 ::zio dcmd does not show timestamp data
3618 ::zio dcmd does not show timestamp data Reviewed by: Adam Leventhal <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Christopher Siden <[email protected]> Reviewed by: Garrett D'Amore <[email protected]> Approved by: Dan McDonald <[email protected]> References: http://www.illumos.org/issues/3618 illumos/illumos-gate@c55e05cb35da47582b7afd38734d2f0d9c6deb40 Notes on porting to ZFS on Linux: The original changeset mostly deals with mdb ::zio dcmd. However, in order to provide the requested functionality it modifies vdev and zio structures to keep the timing data in nanoseconds instead of ticks. It is these changes that are ported over in the commit in hand. One visible change of this commit is that the default value of 'zfs_vdev_time_shift' tunable is changed: zfs_vdev_time_shift = 6 to zfs_vdev_time_shift = 29 The original value of 6 was inherited from OpenSolaris and was subotimal - since it shifted the raw tick value - it didn't compensate for different tick frequencies on Linux and OpenSolaris. The former has HZ=1000, while the latter HZ=100. (Which itself led to other interesting performance anomalies under non-trivial load. The deadline scheduler delays the IO according to its priority - the lower priority the further the deadline is set. The delay is measured in units of "shifted ticks". Since the HZ value was 10 times higher, the delay units were 10 times shorter. Thus really low priority IO like resilver (delay is 10 units) and scrub (delay is 20 units) were scheduled much sooner than intended. The overall effect is that resilver and scrub IO consumed more bandwidth at the expense of the other IO.) Now that the bookkeeping is done is nanoseconds the shift behaves correctly for any tick frequency (HZ). Ported-by: Cyril Plisko <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #1643
Diffstat (limited to 'module')
-rw-r--r--module/zfs/vdev.c8
-rw-r--r--module/zfs/vdev_queue.c13
2 files changed, 12 insertions, 9 deletions
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index d6b55ee48..662a877f8 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -3219,10 +3219,10 @@ vdev_deadman(vdev_t *vd)
* the spa_deadman_synctime we log a zevent.
*/
fio = avl_first(&vq->vq_pending_tree);
- delta = ddi_get_lbolt64() - fio->io_timestamp;
- if (delta > NSEC_TO_TICK(spa_deadman_synctime(spa))) {
- zfs_dbgmsg("SLOW IO: zio timestamp %llu, "
- "delta %llu, last io %llu",
+ delta = gethrtime() - fio->io_timestamp;
+ if (delta > spa_deadman_synctime(spa)) {
+ zfs_dbgmsg("SLOW IO: zio timestamp %lluns, "
+ "delta %lluns, last io %lluns",
fio->io_timestamp, delta,
vq->vq_io_complete_ts);
zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c
index 3f2793ba4..b2cc6b87f 100644
--- a/module/zfs/vdev_queue.c
+++ b/module/zfs/vdev_queue.c
@@ -44,8 +44,11 @@
int zfs_vdev_max_pending = 10;
int zfs_vdev_min_pending = 4;
-/* deadline = pri + ddi_get_lbolt64() >> time_shift) */
-int zfs_vdev_time_shift = 6;
+/*
+ * The deadlines are grouped into buckets based on zfs_vdev_time_shift:
+ * deadline = pri + gethrtime() >> time_shift)
+ */
+int zfs_vdev_time_shift = 29; /* each bucket is 0.537 seconds */
/* exponential I/O issue ramp-up rate */
int zfs_vdev_ramp_rate = 2;
@@ -396,7 +399,7 @@ vdev_queue_io(zio_t *zio)
mutex_enter(&vq->vq_lock);
- zio->io_timestamp = ddi_get_lbolt64();
+ zio->io_timestamp = gethrtime();
zio->io_deadline = (zio->io_timestamp >> zfs_vdev_time_shift) +
zio->io_priority;
@@ -430,8 +433,8 @@ vdev_queue_io_done(zio_t *zio)
avl_remove(&vq->vq_pending_tree, zio);
- zio->io_delta = ddi_get_lbolt64() - zio->io_timestamp;
- vq->vq_io_complete_ts = ddi_get_lbolt64();
+ zio->io_delta = gethrtime() - zio->io_timestamp;
+ vq->vq_io_complete_ts = gethrtime();
vq->vq_io_delta_ts = vq->vq_io_complete_ts - zio->io_timestamp;
for (i = 0; i < zfs_vdev_ramp_rate; i++) {