summaryrefslogtreecommitdiffstats
path: root/include/sys
diff options
context:
space:
mode:
Diffstat (limited to 'include/sys')
-rw-r--r--include/sys/Makefile.am1
-rw-r--r--include/sys/dsl_pool.h1
-rw-r--r--include/sys/fs/zfs.h20
-rw-r--r--include/sys/mmp.h63
-rw-r--r--include/sys/spa.h5
-rw-r--r--include/sys/spa_impl.h2
-rw-r--r--include/sys/uberblock.h3
-rw-r--r--include/sys/uberblock_impl.h6
-rw-r--r--include/sys/vdev.h2
-rw-r--r--include/sys/vdev_impl.h7
10 files changed, 108 insertions, 2 deletions
diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am
index 956643801..be606b8c6 100644
--- a/include/sys/Makefile.am
+++ b/include/sys/Makefile.am
@@ -36,6 +36,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/efi_partition.h \
$(top_srcdir)/include/sys/metaslab.h \
$(top_srcdir)/include/sys/metaslab_impl.h \
+ $(top_srcdir)/include/sys/mmp.h \
$(top_srcdir)/include/sys/mntent.h \
$(top_srcdir)/include/sys/multilist.h \
$(top_srcdir)/include/sys/nvpair.h \
diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h
index 674dd25b5..d2dabda6d 100644
--- a/include/sys/dsl_pool.h
+++ b/include/sys/dsl_pool.h
@@ -38,6 +38,7 @@
#include <sys/bpobj.h>
#include <sys/bptree.h>
#include <sys/rrwlock.h>
+#include <sys/mmp.h>
#ifdef __cplusplus
extern "C" {
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 9fbcfbef3..13b25a695 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -223,6 +223,7 @@ typedef enum {
ZPOOL_PROP_MAXBLOCKSIZE,
ZPOOL_PROP_TNAME,
ZPOOL_PROP_MAXDNODESIZE,
+ ZPOOL_PROP_MULTIHOST,
ZPOOL_NUM_PROPS
} zpool_prop_t;
@@ -651,6 +652,11 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_VDEV_TOP_ZAP "com.delphix:vdev_zap_top"
#define ZPOOL_CONFIG_VDEV_LEAF_ZAP "com.delphix:vdev_zap_leaf"
#define ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS "com.delphix:has_per_vdev_zaps"
+#define ZPOOL_CONFIG_MMP_STATE "mmp_state" /* not stored on disk */
+#define ZPOOL_CONFIG_MMP_TXG "mmp_txg" /* not stored on disk */
+#define ZPOOL_CONFIG_MMP_HOSTNAME "mmp_hostname" /* not stored on disk */
+#define ZPOOL_CONFIG_MMP_HOSTID "mmp_hostid" /* not stored on disk */
+
/*
* The persistent vdev state is stored as separate values rather than a single
* 'vdev_state' entry. This is because a device can be in multiple states, such
@@ -744,7 +750,8 @@ typedef enum vdev_aux {
VDEV_AUX_EXTERNAL, /* external diagnosis or forced fault */
VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */
VDEV_AUX_BAD_ASHIFT, /* vdev ashift is invalid */
- VDEV_AUX_EXTERNAL_PERSIST /* persistent forced fault */
+ VDEV_AUX_EXTERNAL_PERSIST, /* persistent forced fault */
+ VDEV_AUX_ACTIVE, /* vdev active on a different host */
} vdev_aux_t;
/*
@@ -765,6 +772,16 @@ typedef enum pool_state {
} pool_state_t;
/*
+ * mmp state. The following states provide additional detail describing
+ * why a pool couldn't be safely imported.
+ */
+typedef enum mmp_state {
+ MMP_STATE_ACTIVE = 0, /* In active use */
+ MMP_STATE_INACTIVE, /* Inactive and safe to import */
+ MMP_STATE_NO_HOSTID /* System hostid is not set */
+} mmp_state_t;
+
+/*
* Scan Functions.
*/
typedef enum pool_scan_func {
@@ -1126,6 +1143,7 @@ typedef enum {
#define ZFS_IMPORT_MISSING_LOG 0x4
#define ZFS_IMPORT_ONLY 0x8
#define ZFS_IMPORT_TEMP_NAME 0x10
+#define ZFS_IMPORT_SKIP_MMP 0x20
/*
* Sysevent payload members. ZFS will generate the following sysevents with the
diff --git a/include/sys/mmp.h b/include/sys/mmp.h
new file mode 100644
index 000000000..4da612d6a
--- /dev/null
+++ b/include/sys/mmp.h
@@ -0,0 +1,63 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2017 by Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef _SYS_MMP_H
+#define _SYS_MMP_H
+
+#include <sys/spa.h>
+#include <sys/zfs_context.h>
+#include <sys/uberblock_impl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MMP_MIN_INTERVAL 100 /* ms */
+#define MMP_DEFAULT_INTERVAL 1000 /* ms */
+#define MMP_DEFAULT_IMPORT_INTERVALS 10
+#define MMP_DEFAULT_FAIL_INTERVALS 5
+
+typedef struct mmp_thread {
+ kmutex_t mmp_thread_lock; /* protect thread mgmt fields */
+ kcondvar_t mmp_thread_cv;
+ kthread_t *mmp_thread;
+ uint8_t mmp_thread_exiting;
+ kmutex_t mmp_io_lock; /* protect below */
+ hrtime_t mmp_last_write; /* last successful MMP write */
+ uint64_t mmp_delay; /* decaying avg ns between MMP writes */
+ uberblock_t mmp_ub; /* last ub written by sync */
+ zio_t *mmp_zio_root; /* root of mmp write zios */
+} mmp_thread_t;
+
+
+extern void mmp_init(struct spa *spa);
+extern void mmp_fini(struct spa *spa);
+extern void mmp_thread_start(struct spa *spa);
+extern void mmp_thread_stop(struct spa *spa);
+extern void mmp_update_uberblock(struct spa *spa, struct uberblock *ub);
+
+/* Global tuning */
+extern ulong_t zfs_multihost_interval;
+extern uint_t zfs_multihost_fail_intervals;
+extern uint_t zfs_multihost_import_intervals;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_MMP_H */
diff --git a/include/sys/spa.h b/include/sys/spa.h
index dd86aad40..de942ad2b 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -729,6 +729,7 @@ typedef struct spa_stats {
spa_stats_history_t txg_history;
spa_stats_history_t tx_assign_histogram;
spa_stats_history_t io_history;
+ spa_stats_history_t mmp_history;
} spa_stats_t;
typedef enum txg_state {
@@ -758,6 +759,8 @@ extern txg_stat_t *spa_txg_history_init_io(spa_t *, uint64_t,
struct dsl_pool *);
extern void spa_txg_history_fini_io(spa_t *, txg_stat_t *);
extern void spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs);
+extern void spa_mmp_history_add(uint64_t txg, uint64_t timestamp,
+ uint64_t mmp_delay, vdev_t *vd, int label);
/* Pool configuration locks */
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
@@ -860,6 +863,8 @@ extern boolean_t spa_has_pending_synctask(spa_t *spa);
extern int spa_maxblocksize(spa_t *spa);
extern int spa_maxdnodesize(spa_t *spa);
extern void zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp);
+extern boolean_t spa_multihost(spa_t *spa);
+extern unsigned long spa_get_hostid(void);
extern int spa_mode(spa_t *spa);
extern uint64_t zfs_strtonum(const char *str, char **nptr);
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index 9142242cb..06de24421 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -275,6 +275,8 @@ struct spa {
spa_stats_t spa_stats; /* assorted spa statistics */
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor management */
+ uint64_t spa_multihost; /* multihost aware (mmp) */
+ mmp_thread_t spa_mmp; /* multihost mmp thread */
/*
* spa_refcount & spa_config_lock must be the last elements
diff --git a/include/sys/uberblock.h b/include/sys/uberblock.h
index 21e7ae0de..044e43838 100644
--- a/include/sys/uberblock.h
+++ b/include/sys/uberblock.h
@@ -40,7 +40,8 @@ extern "C" {
typedef struct uberblock uberblock_t;
extern int uberblock_verify(uberblock_t *);
-extern boolean_t uberblock_update(uberblock_t *, vdev_t *, uint64_t);
+extern boolean_t uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg,
+ uint64_t mmp_delay);
#ifdef __cplusplus
}
diff --git a/include/sys/uberblock_impl.h b/include/sys/uberblock_impl.h
index 6ab6aa313..9fdc70b91 100644
--- a/include/sys/uberblock_impl.h
+++ b/include/sys/uberblock_impl.h
@@ -43,6 +43,7 @@ extern "C" {
*/
#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
#define UBERBLOCK_SHIFT 10 /* up to 1K */
+#define MMP_MAGIC 0xa11cea11 /* all-see-all */
struct uberblock {
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
@@ -54,6 +55,11 @@ struct uberblock {
/* highest SPA_VERSION supported by software that wrote this txg */
uint64_t ub_software_version;
+
+ /* Maybe missing in uberblocks we read, but always written */
+ uint64_t ub_mmp_magic; /* MMP_MAGIC */
+ uint64_t ub_mmp_delay; /* nanosec since last MMP write */
+ uint64_t ub_mmp_seq; /* reserved for sequence number */
};
#ifdef __cplusplus
diff --git a/include/sys/vdev.h b/include/sys/vdev.h
index 63b4904c5..7157ef43f 100644
--- a/include/sys/vdev.h
+++ b/include/sys/vdev.h
@@ -155,6 +155,8 @@ extern int vdev_label_number(uint64_t psise, uint64_t offset);
extern nvlist_t *vdev_label_read_config(vdev_t *vd, uint64_t txg);
extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **);
extern void vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv);
+extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t
+ offset, uint64_t size, zio_done_func_t *done, void *private, int flags);
typedef enum {
VDEV_LABEL_CREATE, /* create/add a new device */
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index 835d2dbbf..7c5e54b08 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -238,6 +238,7 @@ struct vdev {
zio_t *vdev_probe_zio; /* root of current probe */
vdev_aux_t vdev_label_aux; /* on-disk aux state */
uint64_t vdev_leaf_zap;
+ hrtime_t vdev_mmp_pending; /* 0 if write finished */
/*
* For DTrace to work in userland (libzpool) context, these fields must
@@ -268,6 +269,12 @@ struct vdev {
#define VDEV_PHYS_SIZE (112 << 10)
#define VDEV_UBERBLOCK_RING (128 << 10)
+/*
+ * MMP blocks occupy the last MMP_BLOCKS_PER_LABEL slots in the uberblock
+ * ring when MMP is enabled.
+ */
+#define MMP_BLOCKS_PER_LABEL 1
+
/* The largest uberblock we support is 8k. */
#define MAX_UBERBLOCK_SHIFT (13)
#define VDEV_UBERBLOCK_SHIFT(vd) \