summaryrefslogtreecommitdiffstats
path: root/include/sys
diff options
context:
space:
mode:
authorGeorge.Wilson <[email protected]>2013-04-29 15:49:23 -0700
committerBrian Behlendorf <[email protected]>2013-05-01 17:05:52 -0700
commitcc92e9d0c3e67a7e66c844466f85696a087bf60a (patch)
tree97f27cdfb4a662dee0de0d916e7c92cc0e03a605 /include/sys
parent57f5a2008e2e6acf58934cf43c5fdca0faffa73e (diff)
3246 ZFS I/O deadman thread
Reviewed by: Matt Ahrens <[email protected]> Reviewed by: Eric Schrock <[email protected]> Reviewed by: Christopher Siden <[email protected]> Approved by: Garrett D'Amore <[email protected]> NOTES: This patch has been reworked from the original in the following ways to accomidate Linux ZFS implementation *) Usage of the cyclic interface was replaced by the delayed taskq interface. This avoids the need to implement new compatibility code and allows us to rely on the existing taskq implementation. *) An extern for zfs_txg_synctime_ms was added to sys/dsl_pool.h because declaring externs in source files as was done in the original patch is just plain wrong. *) Instead of panicing the system when the deadman triggers a zevent describing the blocked vdev and the first pending I/O is posted. If the panic behavior is desired Linux provides other generic methods to panic the system when threads are observed to hang. *) For reference, to delay zios by 30 seconds for testing you can use zinject as follows: 'zinject -d <vdev> -D30 <pool>' References: illumos/illumos-gate@283b84606b6fc326692c03273de1774e8c122f9a https://www.illumos.org/issues/3246 Ported-by: Brian Behlendorf <[email protected]> Closes #1396
Diffstat (limited to 'include/sys')
-rw-r--r--include/sys/dsl_pool.h2
-rw-r--r--include/sys/fm/fs/zfs.h5
-rw-r--r--include/sys/spa.h2
-rw-r--r--include/sys/spa_impl.h4
-rw-r--r--include/sys/vdev.h1
-rw-r--r--include/sys/vdev_impl.h2
-rw-r--r--include/sys/zfs_context.h9
-rw-r--r--include/sys/zfs_ioctl.h12
-rw-r--r--include/sys/zio.h9
9 files changed, 42 insertions, 4 deletions
diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h
index ff5df1414..96a2296bf 100644
--- a/include/sys/dsl_pool.h
+++ b/include/sys/dsl_pool.h
@@ -41,6 +41,8 @@
extern "C" {
#endif
+extern int zfs_txg_synctime_ms;
+
struct objset;
struct dsl_dir;
struct dsl_dataset;
diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h
index d5c6004c2..741b99e62 100644
--- a/include/sys/fm/fs/zfs.h
+++ b/include/sys/fm/fs/zfs.h
@@ -73,6 +73,8 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU "vdev_fru"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE "vdev_state"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_ASHIFT "vdev_ashift"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_COMP_TS "vdev_complete_ts"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DELTA_TS "vdev_delta_ts"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH "parent_path"
@@ -88,6 +90,9 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE "zio_stage"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE "zio_pipeline"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY "zio_delay"
+#define FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP "zio_timestamp"
+#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DEADLINE "zio_deadline"
+#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA "zio_delta"
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL "cksum_actual"
diff --git a/include/sys/spa.h b/include/sys/spa.h
index 821172297..ca9fb2438 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -570,6 +570,7 @@ extern int spa_offline_log(spa_t *spa);
/* Log claim callback */
extern void spa_claim_notify(zio_t *zio);
+extern void spa_deadman(void *);
/* Accessor functions */
extern boolean_t spa_shutting_down(spa_t *spa);
@@ -604,6 +605,7 @@ extern boolean_t spa_suspended(spa_t *spa);
extern uint64_t spa_bootfs(spa_t *spa);
extern uint64_t spa_delegation(spa_t *spa);
extern objset_t *spa_meta_objset(spa_t *spa);
+extern uint64_t spa_deadman_synctime(spa_t *spa);
/* Miscellaneous support routines */
extern void spa_activate_mos_feature(spa_t *spa, const char *feature);
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index 65edc97f5..5bd0e0a9c 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -227,6 +227,10 @@ struct spa {
uint64_t spa_feat_for_write_obj; /* required to write to pool */
uint64_t spa_feat_for_read_obj; /* required to read from pool */
uint64_t spa_feat_desc_obj; /* Feature descriptions */
+ taskqid_t spa_deadman_tqid; /* Task id */
+ uint64_t spa_deadman_calls; /* number of deadman calls */
+ uint64_t spa_sync_starttime; /* starting time fo spa_sync */
+ uint64_t spa_deadman_synctime; /* deadman expiration timer */
/*
* spa_refcnt & spa_config_lock must be the last elements
* because refcount_t changes size based on compilation options.
diff --git a/include/sys/vdev.h b/include/sys/vdev.h
index 8f297a917..f49086a47 100644
--- a/include/sys/vdev.h
+++ b/include/sys/vdev.h
@@ -78,6 +78,7 @@ extern void vdev_metaslab_fini(vdev_t *vd);
extern void vdev_metaslab_set_size(vdev_t *);
extern void vdev_expand(vdev_t *vd, uint64_t txg);
extern void vdev_split(vdev_t *vd);
+extern void vdev_deadman(vdev_t *vd);
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index 964ee2407..711408e6a 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -105,6 +105,8 @@ struct vdev_queue {
avl_tree_t vq_read_tree;
avl_tree_t vq_write_tree;
avl_tree_t vq_pending_tree;
+ uint64_t vq_io_complete_ts;
+ uint64_t vq_io_delta_ts;
list_t vq_io_list;
kmutex_t vq_lock;
};
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index 53080f382..a23bfdcf8 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -25,6 +25,7 @@
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZFS_CONTEXT_H
@@ -400,6 +401,8 @@ extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
(taskq_create(a, b, maxclsyspri, d, e, f))
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
+extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *, uint_t,
+ clock_t);
extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
taskq_ent_t *);
extern int taskq_empty_ent(taskq_ent_t *);
@@ -407,6 +410,7 @@ extern void taskq_init_ent(taskq_ent_t *);
extern void taskq_destroy(taskq_t *);
extern void taskq_wait(taskq_t *);
extern int taskq_member(taskq_t *, kthread_t *);
+extern int taskq_cancel_id(taskq_t *, taskqid_t);
extern void system_taskq_init(void);
extern void system_taskq_fini(void);
@@ -523,6 +527,11 @@ extern vnode_t *rootdir;
extern void delay(clock_t ticks);
+#define SEC_TO_TICK(sec) ((sec) * hz)
+#define MSEC_TO_TICK(msec) ((msec) / (MILLISEC / hz))
+#define USEC_TO_TICK(usec) ((usec) / (MICROSEC / hz))
+#define NSEC_TO_TICK(usec) ((usec) / (NANOSEC / hz))
+
#define gethrestime_sec() time(NULL)
#define gethrestime(t) \
do {\
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index 740d8eda4..c0cb4705d 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -236,6 +236,8 @@ typedef struct zinject_record {
uint32_t zi_iotype;
int32_t zi_duration;
uint64_t zi_timer;
+ uint32_t zi_cmd;
+ uint32_t zi_pad;
} zinject_record_t;
#define ZINJECT_NULL 0x1
@@ -245,6 +247,16 @@ typedef struct zinject_record {
#define ZEVENT_NONBLOCK 0x1
#define ZEVENT_SIZE 1024
+typedef enum zinject_type {
+ ZINJECT_UNINITIALIZED,
+ ZINJECT_DATA_FAULT,
+ ZINJECT_DEVICE_FAULT,
+ ZINJECT_LABEL_FAULT,
+ ZINJECT_IGNORED_WRITES,
+ ZINJECT_PANIC,
+ ZINJECT_DELAY_IO,
+} zinject_type_t;
+
typedef struct zfs_share {
uint64_t z_exportdata;
uint64_t z_sharedata;
diff --git a/include/sys/zio.h b/include/sys/zio.h
index 64efde016..03530330c 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -21,8 +21,6 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
@@ -411,7 +409,10 @@ struct zio {
const zio_vsd_ops_t *io_vsd_ops;
uint64_t io_offset;
- uint64_t io_deadline;
+ uint64_t io_deadline; /* expires at timestamp + deadline */
+ uint64_t io_timestamp; /* submitted at (ticks) */
+ uint64_t io_delta; /* vdev queue service delta (ticks) */
+ uint64_t io_delay; /* vdev disk service delta (ticks) */
avl_node_t io_offset_node;
avl_node_t io_deadline_node;
avl_tree_t *io_vdev_tree;
@@ -423,7 +424,6 @@ struct zio {
enum zio_flag io_orig_flags;
enum zio_stage io_orig_stage;
enum zio_stage io_orig_pipeline;
- uint64_t io_delay;
int io_error;
int io_child_error[ZIO_CHILD_TYPES];
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
@@ -560,6 +560,7 @@ extern int zio_handle_fault_injection(zio_t *zio, int error);
extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
extern int zio_handle_label_injection(zio_t *zio, int error);
extern void zio_handle_ignored_writes(zio_t *zio);
+extern uint64_t zio_handle_io_delay(zio_t *zio);
/*
* Checksum ereport functions