aboutsummaryrefslogtreecommitdiffstats
path: root/include/sys
diff options
context:
space:
mode:
Diffstat (limited to 'include/sys')
-rw-r--r--include/sys/Makefile.am1
-rw-r--r--include/sys/fs/zfs.h69
-rw-r--r--include/sys/metaslab.h2
-rw-r--r--include/sys/metaslab_impl.h25
-rw-r--r--include/sys/spa.h62
-rw-r--r--include/sys/spa_impl.h1
-rw-r--r--include/sys/sysevent/eventdefs.h5
-rw-r--r--include/sys/txg.h7
-rw-r--r--include/sys/vdev.h2
-rw-r--r--include/sys/vdev_impl.h30
-rw-r--r--include/sys/vdev_initialize.h2
-rw-r--r--include/sys/vdev_trim.h52
-rw-r--r--include/sys/zfs_context.h12
-rw-r--r--include/sys/zfs_debug.h1
-rw-r--r--include/sys/zio.h13
-rw-r--r--include/sys/zio_impl.h5
-rw-r--r--include/sys/zio_priority.h1
17 files changed, 265 insertions, 25 deletions
diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am
index e6c82d113..31ffdfb4a 100644
--- a/include/sys/Makefile.am
+++ b/include/sys/Makefile.am
@@ -100,6 +100,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/vdev_raidz.h \
$(top_srcdir)/include/sys/vdev_raidz_impl.h \
$(top_srcdir)/include/sys/vdev_removal.h \
+ $(top_srcdir)/include/sys/vdev_trim.h \
$(top_srcdir)/include/sys/xvattr.h \
$(top_srcdir)/include/sys/zap.h \
$(top_srcdir)/include/sys/zap_impl.h \
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index e49a58f43..bdc25ee9f 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -244,6 +244,7 @@ typedef enum {
ZPOOL_PROP_MULTIHOST,
ZPOOL_PROP_CHECKPOINT,
ZPOOL_PROP_LOAD_GUID,
+ ZPOOL_PROP_AUTOTRIM,
ZPOOL_NUM_PROPS
} zpool_prop_t;
@@ -635,6 +636,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE "vdev_async_r_active_queue"
#define ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE "vdev_async_w_active_queue"
#define ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE "vdev_async_scrub_active_queue"
+#define ZPOOL_CONFIG_VDEV_TRIM_ACTIVE_QUEUE "vdev_async_trim_active_queue"
/* Queue sizes */
#define ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE "vdev_sync_r_pend_queue"
@@ -642,6 +644,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE "vdev_async_r_pend_queue"
#define ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE "vdev_async_w_pend_queue"
#define ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE "vdev_async_scrub_pend_queue"
+#define ZPOOL_CONFIG_VDEV_TRIM_PEND_QUEUE "vdev_async_trim_pend_queue"
/* Latency read/write histogram stats */
#define ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO "vdev_tot_r_lat_histo"
@@ -653,6 +656,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO "vdev_async_r_lat_histo"
#define ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO "vdev_async_w_lat_histo"
#define ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO "vdev_scrub_histo"
+#define ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO "vdev_trim_histo"
/* Request size histograms */
#define ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO "vdev_sync_ind_r_histo"
@@ -660,11 +664,13 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO "vdev_async_ind_r_histo"
#define ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO "vdev_async_ind_w_histo"
#define ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO "vdev_ind_scrub_histo"
+#define ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO "vdev_ind_trim_histo"
#define ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO "vdev_sync_agg_r_histo"
#define ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO "vdev_sync_agg_w_histo"
#define ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO "vdev_async_agg_r_histo"
#define ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO "vdev_async_agg_w_histo"
#define ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO "vdev_agg_scrub_histo"
+#define ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO "vdev_agg_trim_histo"
/* Number of slow IOs */
#define ZPOOL_CONFIG_VDEV_SLOW_IOS "vdev_slow_ios"
@@ -777,6 +783,7 @@ typedef struct zpool_load_policy {
#define VDEV_ALLOC_BIAS_SPECIAL "special"
#define VDEV_ALLOC_BIAS_DEDUP "dedup"
+/* vdev initialize state */
#define VDEV_LEAF_ZAP_INITIALIZE_LAST_OFFSET \
"com.delphix:next_offset_to_initialize"
#define VDEV_LEAF_ZAP_INITIALIZE_STATE \
@@ -784,6 +791,20 @@ typedef struct zpool_load_policy {
#define VDEV_LEAF_ZAP_INITIALIZE_ACTION_TIME \
"com.delphix:vdev_initialize_action_time"
+/* vdev TRIM state */
+#define VDEV_LEAF_ZAP_TRIM_LAST_OFFSET \
+ "org.zfsonlinux:next_offset_to_trim"
+#define VDEV_LEAF_ZAP_TRIM_STATE \
+ "org.zfsonlinux:vdev_trim_state"
+#define VDEV_LEAF_ZAP_TRIM_ACTION_TIME \
+ "org.zfsonlinux:vdev_trim_action_time"
+#define VDEV_LEAF_ZAP_TRIM_RATE \
+ "org.zfsonlinux:vdev_trim_rate"
+#define VDEV_LEAF_ZAP_TRIM_PARTIAL \
+ "org.zfsonlinux:vdev_trim_partial"
+#define VDEV_LEAF_ZAP_TRIM_SECURE \
+ "org.zfsonlinux:vdev_trim_secure"
+
/*
* This is needed in userland to report the minimum necessary device size.
*/
@@ -915,6 +936,7 @@ typedef enum zio_type {
ZIO_TYPE_FREE,
ZIO_TYPE_CLAIM,
ZIO_TYPE_IOCTL,
+ ZIO_TYPE_TRIM,
ZIO_TYPES
} zio_type_t;
@@ -982,8 +1004,14 @@ typedef enum zpool_errata {
/*
* Vdev statistics. Note: all fields should be 64-bit because this
- * is passed between kernel and userland as an nvlist uint64 array.
+ * is passed between kernel and user land as an nvlist uint64 array.
+ *
+ * The vs_ops[] and vs_bytes[] arrays must always be an array size of 6 in
+ * order to keep subsequent members at their known fixed offsets. When
+ * adding a new field it must be added to the end the structure.
*/
+#define VS_ZIO_TYPES 6
+
typedef struct vdev_stat {
hrtime_t vs_timestamp; /* time since vdev load */
uint64_t vs_state; /* vdev state */
@@ -993,8 +1021,8 @@ typedef struct vdev_stat {
uint64_t vs_dspace; /* deflated capacity */
uint64_t vs_rsize; /* replaceable dev size */
uint64_t vs_esize; /* expandable dev size */
- uint64_t vs_ops[ZIO_TYPES]; /* operation count */
- uint64_t vs_bytes[ZIO_TYPES]; /* bytes read/written */
+ uint64_t vs_ops[VS_ZIO_TYPES]; /* operation count */
+ uint64_t vs_bytes[VS_ZIO_TYPES]; /* bytes read/written */
uint64_t vs_read_errors; /* read errors */
uint64_t vs_write_errors; /* write errors */
uint64_t vs_checksum_errors; /* checksum errors */
@@ -1010,6 +1038,12 @@ typedef struct vdev_stat {
uint64_t vs_checkpoint_space; /* checkpoint-consumed space */
uint64_t vs_resilver_deferred; /* resilver deferred */
uint64_t vs_slow_ios; /* slow IOs */
+ uint64_t vs_trim_errors; /* trimming errors */
+ uint64_t vs_trim_notsup; /* supported by device */
+ uint64_t vs_trim_bytes_done; /* bytes trimmed */
+ uint64_t vs_trim_bytes_est; /* total bytes to trim */
+ uint64_t vs_trim_state; /* vdev_trim_state_t */
+ uint64_t vs_trim_action_time; /* time_t */
} vdev_stat_t;
/*
@@ -1068,13 +1102,23 @@ typedef struct vdev_stat_ex {
* Initialize functions.
*/
typedef enum pool_initialize_func {
- POOL_INITIALIZE_DO,
+ POOL_INITIALIZE_START,
POOL_INITIALIZE_CANCEL,
POOL_INITIALIZE_SUSPEND,
POOL_INITIALIZE_FUNCS
} pool_initialize_func_t;
/*
+ * TRIM functions.
+ */
+typedef enum pool_trim_func {
+ POOL_TRIM_START,
+ POOL_TRIM_CANCEL,
+ POOL_TRIM_SUSPEND,
+ POOL_TRIM_FUNCS
+} pool_trim_func_t;
+
+/*
* DDT statistics. Note: all fields should be 64-bit because this
* is passed between kernel and userland as an nvlist uint64 array.
*/
@@ -1126,6 +1170,14 @@ typedef enum {
VDEV_INITIALIZE_COMPLETE
} vdev_initializing_state_t;
+typedef enum {
+ VDEV_TRIM_NONE,
+ VDEV_TRIM_ACTIVE,
+ VDEV_TRIM_CANCELED,
+ VDEV_TRIM_SUSPENDED,
+ VDEV_TRIM_COMPLETE,
+} vdev_trim_state_t;
+
/*
* nvlist name constants. Facilitate restricting snapshot iteration range for
* the "list next snapshot" ioctl
@@ -1224,6 +1276,7 @@ typedef enum zfs_ioc {
ZFS_IOC_POOL_CHECKPOINT, /* 0x5a4d */
ZFS_IOC_POOL_DISCARD_CHECKPOINT, /* 0x5a4e */
ZFS_IOC_POOL_INITIALIZE, /* 0x5a4f */
+ ZFS_IOC_POOL_TRIM, /* 0x5a50 */
/*
* Linux - 3/64 numbers reserved.
@@ -1327,6 +1380,14 @@ typedef enum {
#define ZPOOL_INITIALIZE_VDEVS "initialize_vdevs"
/*
+ * The following are names used when invoking ZFS_IOC_POOL_TRIM.
+ */
+#define ZPOOL_TRIM_COMMAND "trim_command"
+#define ZPOOL_TRIM_VDEVS "trim_vdevs"
+#define ZPOOL_TRIM_RATE "trim_rate"
+#define ZPOOL_TRIM_SECURE "trim_secure"
+
+/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/
#define ZFS_ONLINE_CHECKREMOVE 0x1
diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h
index a513a6470..2790d06c7 100644
--- a/include/sys/metaslab.h
+++ b/include/sys/metaslab.h
@@ -120,6 +120,8 @@ void metaslab_group_alloc_decrement(spa_t *, uint64_t, void *, int, int,
boolean_t);
void metaslab_group_alloc_verify(spa_t *, const blkptr_t *, void *, int);
void metaslab_recalculate_weight_and_sort(metaslab_t *);
+void metaslab_disable(metaslab_t *);
+void metaslab_enable(metaslab_t *, boolean_t);
#ifdef __cplusplus
}
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h
index 676c5dd46..ca1104c14 100644
--- a/include/sys/metaslab_impl.h
+++ b/include/sys/metaslab_impl.h
@@ -69,7 +69,7 @@ typedef enum trace_alloc_type {
TRACE_ENOSPC = -6ULL,
TRACE_CONDENSING = -7ULL,
TRACE_VDEV_ERROR = -8ULL,
- TRACE_INITIALIZING = -9ULL
+ TRACE_DISABLED = -9ULL,
} trace_alloc_type_t;
#define METASLAB_WEIGHT_PRIMARY (1ULL << 63)
@@ -272,10 +272,10 @@ struct metaslab_group {
uint64_t mg_fragmentation;
uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE];
- int mg_ms_initializing;
- boolean_t mg_initialize_updating;
- kmutex_t mg_ms_initialize_lock;
- kcondvar_t mg_ms_initialize_cv;
+ int mg_ms_disabled;
+ boolean_t mg_disabled_updating;
+ kmutex_t mg_ms_disabled_lock;
+ kcondvar_t mg_ms_disabled_cv;
};
/*
@@ -389,11 +389,24 @@ struct metaslab {
range_tree_t *ms_defer[TXG_DEFER_SIZE];
range_tree_t *ms_checkpointing; /* to add to the checkpoint */
+ /*
+ * The ms_trim tree is the set of allocatable segments which are
+ * eligible for trimming. (When the metaslab is loaded, it's a
+ * subset of ms_allocatable.) It's kept in-core as long as the
+ * autotrim property is set and is not vacated when the metaslab
+ * is unloaded. Its purpose is to aggregate freed ranges to
+ * facilitate efficient trimming.
+ */
+ range_tree_t *ms_trim;
+
boolean_t ms_condensing; /* condensing? */
boolean_t ms_condense_wanted;
uint64_t ms_condense_checked_txg;
- uint64_t ms_initializing; /* leaves initializing this ms */
+ /*
+ * The number of consumers which have disabled the metaslab.
+ */
+ uint64_t ms_disabled;
/*
* We must always hold the ms_lock when modifying ms_loaded
diff --git a/include/sys/spa.h b/include/sys/spa.h
index febf0e8f2..343977b30 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -738,6 +738,24 @@ typedef enum spa_import_type {
SPA_IMPORT_ASSEMBLE
} spa_import_type_t;
+/*
+ * Send TRIM commands in-line during normal pool operation while deleting.
+ * OFF: no
+ * ON: yes
+ */
+typedef enum {
+ SPA_AUTOTRIM_OFF = 0, /* default */
+ SPA_AUTOTRIM_ON
+} spa_autotrim_t;
+
+/*
+ * Reason TRIM command was issued, used internally for accounting purposes.
+ */
+typedef enum trim_type {
+ TRIM_TYPE_MANUAL = 0,
+ TRIM_TYPE_AUTO = 1,
+} trim_type_t;
+
/* state manipulation functions */
extern int spa_open(const char *pool, spa_t **, void *tag);
extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
@@ -764,15 +782,17 @@ extern void spa_inject_delref(spa_t *spa);
extern void spa_scan_stat_init(spa_t *spa);
extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps);
-#define SPA_ASYNC_CONFIG_UPDATE 0x01
-#define SPA_ASYNC_REMOVE 0x02
-#define SPA_ASYNC_PROBE 0x04
-#define SPA_ASYNC_RESILVER_DONE 0x08
-#define SPA_ASYNC_RESILVER 0x10
-#define SPA_ASYNC_AUTOEXPAND 0x20
-#define SPA_ASYNC_REMOVE_DONE 0x40
-#define SPA_ASYNC_REMOVE_STOP 0x80
-#define SPA_ASYNC_INITIALIZE_RESTART 0x100
+#define SPA_ASYNC_CONFIG_UPDATE 0x01
+#define SPA_ASYNC_REMOVE 0x02
+#define SPA_ASYNC_PROBE 0x04
+#define SPA_ASYNC_RESILVER_DONE 0x08
+#define SPA_ASYNC_RESILVER 0x10
+#define SPA_ASYNC_AUTOEXPAND 0x20
+#define SPA_ASYNC_REMOVE_DONE 0x40
+#define SPA_ASYNC_REMOVE_STOP 0x80
+#define SPA_ASYNC_INITIALIZE_RESTART 0x100
+#define SPA_ASYNC_TRIM_RESTART 0x200
+#define SPA_ASYNC_AUTOTRIM_RESTART 0x400
/*
* Controls the behavior of spa_vdev_remove().
@@ -790,6 +810,8 @@ extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
extern boolean_t spa_vdev_remove_active(spa_t *spa);
extern int spa_vdev_initialize(spa_t *spa, nvlist_t *nv, uint64_t cmd_type,
nvlist_t *vdev_errlist);
+extern int spa_vdev_trim(spa_t *spa, nvlist_t *nv, uint64_t cmd_type,
+ uint64_t rate, boolean_t partial, boolean_t secure, nvlist_t *vdev_errlist);
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
@@ -887,6 +909,7 @@ typedef struct spa_stats {
spa_history_kstat_t io_history;
spa_history_list_t mmp_history;
spa_history_kstat_t state; /* pool state */
+ spa_history_kstat_t iostats;
} spa_stats_t;
typedef enum txg_state {
@@ -905,6 +928,22 @@ typedef struct txg_stat {
uint64_t ndirty;
} txg_stat_t;
+/* Assorted pool IO kstats */
+typedef struct spa_iostats {
+ kstat_named_t trim_extents_written;
+ kstat_named_t trim_bytes_written;
+ kstat_named_t trim_extents_skipped;
+ kstat_named_t trim_bytes_skipped;
+ kstat_named_t trim_extents_failed;
+ kstat_named_t trim_bytes_failed;
+ kstat_named_t autotrim_extents_written;
+ kstat_named_t autotrim_bytes_written;
+ kstat_named_t autotrim_extents_skipped;
+ kstat_named_t autotrim_bytes_skipped;
+ kstat_named_t autotrim_extents_failed;
+ kstat_named_t autotrim_bytes_failed;
+} spa_iostats_t;
+
extern void spa_stats_init(spa_t *spa);
extern void spa_stats_destroy(spa_t *spa);
extern void spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb,
@@ -922,6 +961,10 @@ extern int spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
extern void spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id,
int error);
+extern void spa_iostats_trim_add(spa_t *spa, trim_type_t type,
+ uint64_t extents_written, uint64_t bytes_written,
+ uint64_t extents_skipped, uint64_t bytes_skipped,
+ uint64_t extents_failed, uint64_t bytes_failed);
/* Pool configuration locks */
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
@@ -1005,6 +1048,7 @@ extern objset_t *spa_meta_objset(spa_t *spa);
extern uint64_t spa_deadman_synctime(spa_t *spa);
extern uint64_t spa_deadman_ziotime(spa_t *spa);
extern uint64_t spa_dirty_data(spa_t *spa);
+extern spa_autotrim_t spa_get_autotrim(spa_t *spa);
/* Miscellaneous support routines */
extern void spa_load_failed(spa_t *spa, const char *fmt, ...);
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index c3aaad611..66032d9aa 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -378,6 +378,7 @@ struct spa {
uint64_t spa_deadman_ziotime; /* deadman zio expiration */
uint64_t spa_all_vdev_zaps; /* ZAP of per-vd ZAP obj #s */
spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */
+ uint64_t spa_autotrim; /* automatic background trim? */
uint64_t spa_errata; /* errata issues detected */
spa_stats_t spa_stats; /* assorted spa statistics */
spa_keystore_t spa_keystore; /* loaded crypto keys */
diff --git a/include/sys/sysevent/eventdefs.h b/include/sys/sysevent/eventdefs.h
index aa13bd505..2067b355a 100644
--- a/include/sys/sysevent/eventdefs.h
+++ b/include/sys/sysevent/eventdefs.h
@@ -118,6 +118,11 @@ extern "C" {
#define ESC_ZFS_BOOTFS_VDEV_ATTACH "bootfs_vdev_attach"
#define ESC_ZFS_POOL_REGUID "pool_reguid"
#define ESC_ZFS_HISTORY_EVENT "history_event"
+#define ESC_ZFS_TRIM_START "trim_start"
+#define ESC_ZFS_TRIM_FINISH "trim_finish"
+#define ESC_ZFS_TRIM_CANCEL "trim_cancel"
+#define ESC_ZFS_TRIM_RESUME "trim_resume"
+#define ESC_ZFS_TRIM_SUSPEND "trim_suspend"
/*
* datalink subclass definitions.
diff --git a/include/sys/txg.h b/include/sys/txg.h
index ed0e7297c..760d5208b 100644
--- a/include/sys/txg.h
+++ b/include/sys/txg.h
@@ -90,10 +90,11 @@ extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg);
/*
* Wait until the given transaction group, or one after it, is
* the open transaction group. Try to make this happen as soon
- * as possible (eg. kick off any necessary syncs immediately).
- * If txg == 0, wait for the next open txg.
+ * as possible (eg. kick off any necessary syncs immediately) when
+ * should_quiesce is set. If txg == 0, wait for the next open txg.
*/
-extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg);
+extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg,
+ boolean_t should_quiesce);
/*
* Returns TRUE if we are "backed up" waiting for the syncing
diff --git a/include/sys/vdev.h b/include/sys/vdev.h
index 2091892b2..67ca0d116 100644
--- a/include/sys/vdev.h
+++ b/include/sys/vdev.h
@@ -95,6 +95,8 @@ extern void vdev_metaslab_set_size(vdev_t *);
extern void vdev_expand(vdev_t *vd, uint64_t txg);
extern void vdev_split(vdev_t *vd);
extern void vdev_deadman(vdev_t *vd, char *tag);
+extern void vdev_xlate(vdev_t *vd, const range_seg_t *logical_rs,
+ range_seg_t *physical_rs);
extern void vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx);
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index c115a5e10..f6f7bbb4b 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -145,6 +145,7 @@ struct vdev_queue {
avl_tree_t vq_active_tree;
avl_tree_t vq_read_offset_tree;
avl_tree_t vq_write_offset_tree;
+ avl_tree_t vq_trim_offset_tree;
uint64_t vq_last_offset;
hrtime_t vq_io_complete_ts; /* time last i/o completed */
hrtime_t vq_io_delta_ts;
@@ -260,6 +261,7 @@ struct vdev {
/* pool checkpoint related */
space_map_t *vdev_checkpoint_sm; /* contains reserved blocks */
+ /* Initialize related */
boolean_t vdev_initialize_exit_wanted;
vdev_initializing_state_t vdev_initialize_state;
list_node_t vdev_initialize_node;
@@ -274,10 +276,34 @@ struct vdev {
uint64_t vdev_initialize_bytes_done;
time_t vdev_initialize_action_time; /* start and end time */
- /* for limiting outstanding I/Os */
+ /* TRIM related */
+ boolean_t vdev_trim_exit_wanted;
+ boolean_t vdev_autotrim_exit_wanted;
+ vdev_trim_state_t vdev_trim_state;
+ list_node_t vdev_trim_node;
+ kmutex_t vdev_autotrim_lock;
+ kcondvar_t vdev_autotrim_cv;
+ kthread_t *vdev_autotrim_thread;
+ /* Protects vdev_trim_thread and vdev_trim_state. */
+ kmutex_t vdev_trim_lock;
+ kcondvar_t vdev_trim_cv;
+ kthread_t *vdev_trim_thread;
+ uint64_t vdev_trim_offset[TXG_SIZE];
+ uint64_t vdev_trim_last_offset;
+ uint64_t vdev_trim_bytes_est;
+ uint64_t vdev_trim_bytes_done;
+ uint64_t vdev_trim_rate; /* requested rate (bytes/sec) */
+ uint64_t vdev_trim_partial; /* requested partial TRIM */
+ uint64_t vdev_trim_secure; /* requested secure TRIM */
+ time_t vdev_trim_action_time; /* start and end time */
+
+ /* for limiting outstanding I/Os (initialize and TRIM) */
kmutex_t vdev_initialize_io_lock;
kcondvar_t vdev_initialize_io_cv;
uint64_t vdev_initialize_inflight;
+ kmutex_t vdev_trim_io_lock;
+ kcondvar_t vdev_trim_io_cv;
+ uint64_t vdev_trim_inflight[2];
/*
* Values stored in the config for an indirect or removing vdev.
@@ -343,6 +369,8 @@ struct vdev {
uint64_t vdev_not_present; /* not present during import */
uint64_t vdev_unspare; /* unspare when resilvering done */
boolean_t vdev_nowritecache; /* true if flushwritecache failed */
+ boolean_t vdev_has_trim; /* TRIM is supported */
+ boolean_t vdev_has_securetrim; /* secure TRIM is supported */
boolean_t vdev_checkremove; /* temporary online test */
boolean_t vdev_forcefault; /* force online fault */
boolean_t vdev_splitting; /* split or repair in progress */
diff --git a/include/sys/vdev_initialize.h b/include/sys/vdev_initialize.h
index 319fb9bc0..81d39ebeb 100644
--- a/include/sys/vdev_initialize.h
+++ b/include/sys/vdev_initialize.h
@@ -39,8 +39,6 @@ extern void vdev_initialize_stop_all(vdev_t *vd,
vdev_initializing_state_t tgt_state);
extern void vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list);
extern void vdev_initialize_restart(vdev_t *vd);
-extern void vdev_xlate(vdev_t *vd, const range_seg_t *logical_rs,
- range_seg_t *physical_rs);
#ifdef __cplusplus
}
diff --git a/include/sys/vdev_trim.h b/include/sys/vdev_trim.h
new file mode 100644
index 000000000..1e5401766
--- /dev/null
+++ b/include/sys/vdev_trim.h
@@ -0,0 +1,52 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2019 Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef _SYS_VDEV_TRIM_H
+#define _SYS_VDEV_TRIM_H
+
+#include <sys/spa.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern unsigned int zfs_trim_metaslab_skip;
+
+extern void vdev_trim(vdev_t *vd, uint64_t rate, boolean_t partial,
+ boolean_t secure);
+extern void vdev_trim_stop(vdev_t *vd, vdev_trim_state_t tgt, list_t *vd_list);
+extern void vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state);
+extern void vdev_trim_stop_wait(spa_t *spa, list_t *vd_list);
+extern void vdev_trim_restart(vdev_t *vd);
+extern void vdev_autotrim(spa_t *spa);
+extern void vdev_autotrim_stop_all(spa_t *spa);
+extern void vdev_autotrim_stop_wait(vdev_t *vd);
+extern void vdev_autotrim_restart(spa_t *spa);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VDEV_TRIM_H */
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index 260b8a458..87ddde30a 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -579,6 +579,8 @@ typedef struct vsecattr {
#define CRCREAT 0
+#define F_FREESP 11
+
extern int fop_getattr(vnode_t *vp, vattr_t *vap);
#define VOP_CLOSE(vp, f, c, o, cr, ct) vn_close(vp)
@@ -587,6 +589,16 @@ extern int fop_getattr(vnode_t *vp, vattr_t *vap);
#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd)
+#if defined(HAVE_FILE_FALLOCATE) && \
+ defined(FALLOC_FL_PUNCH_HOLE) && \
+ defined(FALLOC_FL_KEEP_SIZE)
+#define VOP_SPACE(vp, cmd, flck, fl, off, cr, ct) \
+ fallocate((vp)->v_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, \
+ (flck)->l_start, (flck)->l_len)
+#else
+#define VOP_SPACE(vp, cmd, flck, fl, off, cr, ct) (0)
+#endif
+
#define VN_RELE(vp) vn_close(vp)
extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp,
diff --git a/include/sys/zfs_debug.h b/include/sys/zfs_debug.h
index 7564ae0e4..7968a01cd 100644
--- a/include/sys/zfs_debug.h
+++ b/include/sys/zfs_debug.h
@@ -54,6 +54,7 @@ extern int zfs_dbgmsg_enable;
#define ZFS_DEBUG_METASLAB_VERIFY (1 << 8)
#define ZFS_DEBUG_SET_ERROR (1 << 9)
#define ZFS_DEBUG_INDIRECT_REMAP (1 << 10)
+#define ZFS_DEBUG_TRIM (1 << 11)
extern void __zfs_dbgmsg(char *buf);
extern void __dprintf(boolean_t dprint, const char *file, const char *func,
diff --git a/include/sys/zio.h b/include/sys/zio.h
index 4b7ad3e22..e69bf9208 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -416,6 +416,14 @@ typedef zio_t *zio_pipe_stage_t(zio_t *zio);
#define ZIO_REEXECUTE_NOW 0x01
#define ZIO_REEXECUTE_SUSPEND 0x02
+/*
+ * The io_trim flags are used to specify the type of TRIM to perform. They
+ * only apply to ZIO_TYPE_TRIM zios are distinct from io_flags.
+ */
+enum trim_flag {
+ ZIO_TRIM_SECURE = 1 << 0,
+};
+
typedef struct zio_alloc_list {
list_t zal_list;
uint64_t zal_size;
@@ -434,6 +442,7 @@ struct zio {
zio_prop_t io_prop;
zio_type_t io_type;
enum zio_child io_child_type;
+ enum trim_flag io_trim_flags;
int io_cmd;
zio_priority_t io_priority;
uint8_t io_reexecute;
@@ -549,6 +558,10 @@ extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg,
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
zio_done_func_t *done, void *private, enum zio_flag flags);
+extern zio_t *zio_trim(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
+ zio_done_func_t *done, void *private, zio_priority_t priority,
+ enum zio_flag flags, enum trim_flag trim_flags);
+
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, struct abd *data, int checksum,
zio_done_func_t *done, void *private, zio_priority_t priority,
diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h
index 344048c6a..fbbe06eb0 100644
--- a/include/sys/zio_impl.h
+++ b/include/sys/zio_impl.h
@@ -250,6 +250,11 @@ enum zio_stage {
ZIO_STAGE_VDEV_IO_START | \
ZIO_STAGE_VDEV_IO_ASSESS)
+#define ZIO_TRIM_PIPELINE \
+ (ZIO_INTERLOCK_STAGES | \
+ ZIO_STAGE_ISSUE_ASYNC | \
+ ZIO_VDEV_IO_STAGES)
+
#define ZIO_BLOCKING_STAGES \
(ZIO_STAGE_DVA_ALLOCATE | \
ZIO_STAGE_DVA_CLAIM | \
diff --git a/include/sys/zio_priority.h b/include/sys/zio_priority.h
index d8e6a1745..0b422904e 100644
--- a/include/sys/zio_priority.h
+++ b/include/sys/zio_priority.h
@@ -30,6 +30,7 @@ typedef enum zio_priority {
ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */
ZIO_PRIORITY_REMOVAL, /* reads/writes for vdev removal */
ZIO_PRIORITY_INITIALIZING, /* initializing I/O */
+ ZIO_PRIORITY_TRIM, /* trim I/O (discard) */
ZIO_PRIORITY_NUM_QUEUEABLE,
ZIO_PRIORITY_NOW, /* non-queued i/os (e.g. free) */
} zio_priority_t;