diff options
Diffstat (limited to 'include/sys')
-rw-r--r-- | include/sys/Makefile.am | 1 | ||||
-rw-r--r-- | include/sys/fs/zfs.h | 69 | ||||
-rw-r--r-- | include/sys/metaslab.h | 2 | ||||
-rw-r--r-- | include/sys/metaslab_impl.h | 25 | ||||
-rw-r--r-- | include/sys/spa.h | 62 | ||||
-rw-r--r-- | include/sys/spa_impl.h | 1 | ||||
-rw-r--r-- | include/sys/sysevent/eventdefs.h | 5 | ||||
-rw-r--r-- | include/sys/txg.h | 7 | ||||
-rw-r--r-- | include/sys/vdev.h | 2 | ||||
-rw-r--r-- | include/sys/vdev_impl.h | 30 | ||||
-rw-r--r-- | include/sys/vdev_initialize.h | 2 | ||||
-rw-r--r-- | include/sys/vdev_trim.h | 52 | ||||
-rw-r--r-- | include/sys/zfs_context.h | 12 | ||||
-rw-r--r-- | include/sys/zfs_debug.h | 1 | ||||
-rw-r--r-- | include/sys/zio.h | 13 | ||||
-rw-r--r-- | include/sys/zio_impl.h | 5 | ||||
-rw-r--r-- | include/sys/zio_priority.h | 1 |
17 files changed, 265 insertions, 25 deletions
diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am index e6c82d113..31ffdfb4a 100644 --- a/include/sys/Makefile.am +++ b/include/sys/Makefile.am @@ -100,6 +100,7 @@ COMMON_H = \ $(top_srcdir)/include/sys/vdev_raidz.h \ $(top_srcdir)/include/sys/vdev_raidz_impl.h \ $(top_srcdir)/include/sys/vdev_removal.h \ + $(top_srcdir)/include/sys/vdev_trim.h \ $(top_srcdir)/include/sys/xvattr.h \ $(top_srcdir)/include/sys/zap.h \ $(top_srcdir)/include/sys/zap_impl.h \ diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index e49a58f43..bdc25ee9f 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -244,6 +244,7 @@ typedef enum { ZPOOL_PROP_MULTIHOST, ZPOOL_PROP_CHECKPOINT, ZPOOL_PROP_LOAD_GUID, + ZPOOL_PROP_AUTOTRIM, ZPOOL_NUM_PROPS } zpool_prop_t; @@ -635,6 +636,7 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE "vdev_async_r_active_queue" #define ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE "vdev_async_w_active_queue" #define ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE "vdev_async_scrub_active_queue" +#define ZPOOL_CONFIG_VDEV_TRIM_ACTIVE_QUEUE "vdev_async_trim_active_queue" /* Queue sizes */ #define ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE "vdev_sync_r_pend_queue" @@ -642,6 +644,7 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE "vdev_async_r_pend_queue" #define ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE "vdev_async_w_pend_queue" #define ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE "vdev_async_scrub_pend_queue" +#define ZPOOL_CONFIG_VDEV_TRIM_PEND_QUEUE "vdev_async_trim_pend_queue" /* Latency read/write histogram stats */ #define ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO "vdev_tot_r_lat_histo" @@ -653,6 +656,7 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO "vdev_async_r_lat_histo" #define ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO "vdev_async_w_lat_histo" #define ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO "vdev_scrub_histo" +#define ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO "vdev_trim_histo" /* Request size histograms */ #define ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO "vdev_sync_ind_r_histo" @@ -660,11 +664,13 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO "vdev_async_ind_r_histo" #define ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO "vdev_async_ind_w_histo" #define ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO "vdev_ind_scrub_histo" +#define ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO "vdev_ind_trim_histo" #define ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO "vdev_sync_agg_r_histo" #define ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO "vdev_sync_agg_w_histo" #define ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO "vdev_async_agg_r_histo" #define ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO "vdev_async_agg_w_histo" #define ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO "vdev_agg_scrub_histo" +#define ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO "vdev_agg_trim_histo" /* Number of slow IOs */ #define ZPOOL_CONFIG_VDEV_SLOW_IOS "vdev_slow_ios" @@ -777,6 +783,7 @@ typedef struct zpool_load_policy { #define VDEV_ALLOC_BIAS_SPECIAL "special" #define VDEV_ALLOC_BIAS_DEDUP "dedup" +/* vdev initialize state */ #define VDEV_LEAF_ZAP_INITIALIZE_LAST_OFFSET \ "com.delphix:next_offset_to_initialize" #define VDEV_LEAF_ZAP_INITIALIZE_STATE \ @@ -784,6 +791,20 @@ typedef struct zpool_load_policy { #define VDEV_LEAF_ZAP_INITIALIZE_ACTION_TIME \ "com.delphix:vdev_initialize_action_time" +/* vdev TRIM state */ +#define VDEV_LEAF_ZAP_TRIM_LAST_OFFSET \ + "org.zfsonlinux:next_offset_to_trim" +#define VDEV_LEAF_ZAP_TRIM_STATE \ + "org.zfsonlinux:vdev_trim_state" +#define VDEV_LEAF_ZAP_TRIM_ACTION_TIME \ + "org.zfsonlinux:vdev_trim_action_time" +#define VDEV_LEAF_ZAP_TRIM_RATE \ + "org.zfsonlinux:vdev_trim_rate" +#define VDEV_LEAF_ZAP_TRIM_PARTIAL \ + "org.zfsonlinux:vdev_trim_partial" +#define VDEV_LEAF_ZAP_TRIM_SECURE \ + "org.zfsonlinux:vdev_trim_secure" + /* * This is needed in userland to report the minimum necessary device size. */ @@ -915,6 +936,7 @@ typedef enum zio_type { ZIO_TYPE_FREE, ZIO_TYPE_CLAIM, ZIO_TYPE_IOCTL, + ZIO_TYPE_TRIM, ZIO_TYPES } zio_type_t; @@ -982,8 +1004,14 @@ typedef enum zpool_errata { /* * Vdev statistics. Note: all fields should be 64-bit because this - * is passed between kernel and userland as an nvlist uint64 array. + * is passed between kernel and user land as an nvlist uint64 array. + * + * The vs_ops[] and vs_bytes[] arrays must always be an array size of 6 in + * order to keep subsequent members at their known fixed offsets. When + * adding a new field it must be added to the end the structure. */ +#define VS_ZIO_TYPES 6 + typedef struct vdev_stat { hrtime_t vs_timestamp; /* time since vdev load */ uint64_t vs_state; /* vdev state */ @@ -993,8 +1021,8 @@ typedef struct vdev_stat { uint64_t vs_dspace; /* deflated capacity */ uint64_t vs_rsize; /* replaceable dev size */ uint64_t vs_esize; /* expandable dev size */ - uint64_t vs_ops[ZIO_TYPES]; /* operation count */ - uint64_t vs_bytes[ZIO_TYPES]; /* bytes read/written */ + uint64_t vs_ops[VS_ZIO_TYPES]; /* operation count */ + uint64_t vs_bytes[VS_ZIO_TYPES]; /* bytes read/written */ uint64_t vs_read_errors; /* read errors */ uint64_t vs_write_errors; /* write errors */ uint64_t vs_checksum_errors; /* checksum errors */ @@ -1010,6 +1038,12 @@ typedef struct vdev_stat { uint64_t vs_checkpoint_space; /* checkpoint-consumed space */ uint64_t vs_resilver_deferred; /* resilver deferred */ uint64_t vs_slow_ios; /* slow IOs */ + uint64_t vs_trim_errors; /* trimming errors */ + uint64_t vs_trim_notsup; /* supported by device */ + uint64_t vs_trim_bytes_done; /* bytes trimmed */ + uint64_t vs_trim_bytes_est; /* total bytes to trim */ + uint64_t vs_trim_state; /* vdev_trim_state_t */ + uint64_t vs_trim_action_time; /* time_t */ } vdev_stat_t; /* @@ -1068,13 +1102,23 @@ typedef struct vdev_stat_ex { * Initialize functions. */ typedef enum pool_initialize_func { - POOL_INITIALIZE_DO, + POOL_INITIALIZE_START, POOL_INITIALIZE_CANCEL, POOL_INITIALIZE_SUSPEND, POOL_INITIALIZE_FUNCS } pool_initialize_func_t; /* + * TRIM functions. + */ +typedef enum pool_trim_func { + POOL_TRIM_START, + POOL_TRIM_CANCEL, + POOL_TRIM_SUSPEND, + POOL_TRIM_FUNCS +} pool_trim_func_t; + +/* * DDT statistics. Note: all fields should be 64-bit because this * is passed between kernel and userland as an nvlist uint64 array. */ @@ -1126,6 +1170,14 @@ typedef enum { VDEV_INITIALIZE_COMPLETE } vdev_initializing_state_t; +typedef enum { + VDEV_TRIM_NONE, + VDEV_TRIM_ACTIVE, + VDEV_TRIM_CANCELED, + VDEV_TRIM_SUSPENDED, + VDEV_TRIM_COMPLETE, +} vdev_trim_state_t; + /* * nvlist name constants. Facilitate restricting snapshot iteration range for * the "list next snapshot" ioctl @@ -1224,6 +1276,7 @@ typedef enum zfs_ioc { ZFS_IOC_POOL_CHECKPOINT, /* 0x5a4d */ ZFS_IOC_POOL_DISCARD_CHECKPOINT, /* 0x5a4e */ ZFS_IOC_POOL_INITIALIZE, /* 0x5a4f */ + ZFS_IOC_POOL_TRIM, /* 0x5a50 */ /* * Linux - 3/64 numbers reserved. @@ -1327,6 +1380,14 @@ typedef enum { #define ZPOOL_INITIALIZE_VDEVS "initialize_vdevs" /* + * The following are names used when invoking ZFS_IOC_POOL_TRIM. + */ +#define ZPOOL_TRIM_COMMAND "trim_command" +#define ZPOOL_TRIM_VDEVS "trim_vdevs" +#define ZPOOL_TRIM_RATE "trim_rate" +#define ZPOOL_TRIM_SECURE "trim_secure" + +/* * Flags for ZFS_IOC_VDEV_SET_STATE */ #define ZFS_ONLINE_CHECKREMOVE 0x1 diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index a513a6470..2790d06c7 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -120,6 +120,8 @@ void metaslab_group_alloc_decrement(spa_t *, uint64_t, void *, int, int, boolean_t); void metaslab_group_alloc_verify(spa_t *, const blkptr_t *, void *, int); void metaslab_recalculate_weight_and_sort(metaslab_t *); +void metaslab_disable(metaslab_t *); +void metaslab_enable(metaslab_t *, boolean_t); #ifdef __cplusplus } diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index 676c5dd46..ca1104c14 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -69,7 +69,7 @@ typedef enum trace_alloc_type { TRACE_ENOSPC = -6ULL, TRACE_CONDENSING = -7ULL, TRACE_VDEV_ERROR = -8ULL, - TRACE_INITIALIZING = -9ULL + TRACE_DISABLED = -9ULL, } trace_alloc_type_t; #define METASLAB_WEIGHT_PRIMARY (1ULL << 63) @@ -272,10 +272,10 @@ struct metaslab_group { uint64_t mg_fragmentation; uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE]; - int mg_ms_initializing; - boolean_t mg_initialize_updating; - kmutex_t mg_ms_initialize_lock; - kcondvar_t mg_ms_initialize_cv; + int mg_ms_disabled; + boolean_t mg_disabled_updating; + kmutex_t mg_ms_disabled_lock; + kcondvar_t mg_ms_disabled_cv; }; /* @@ -389,11 +389,24 @@ struct metaslab { range_tree_t *ms_defer[TXG_DEFER_SIZE]; range_tree_t *ms_checkpointing; /* to add to the checkpoint */ + /* + * The ms_trim tree is the set of allocatable segments which are + * eligible for trimming. (When the metaslab is loaded, it's a + * subset of ms_allocatable.) It's kept in-core as long as the + * autotrim property is set and is not vacated when the metaslab + * is unloaded. Its purpose is to aggregate freed ranges to + * facilitate efficient trimming. + */ + range_tree_t *ms_trim; + boolean_t ms_condensing; /* condensing? */ boolean_t ms_condense_wanted; uint64_t ms_condense_checked_txg; - uint64_t ms_initializing; /* leaves initializing this ms */ + /* + * The number of consumers which have disabled the metaslab. + */ + uint64_t ms_disabled; /* * We must always hold the ms_lock when modifying ms_loaded diff --git a/include/sys/spa.h b/include/sys/spa.h index febf0e8f2..343977b30 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -738,6 +738,24 @@ typedef enum spa_import_type { SPA_IMPORT_ASSEMBLE } spa_import_type_t; +/* + * Send TRIM commands in-line during normal pool operation while deleting. + * OFF: no + * ON: yes + */ +typedef enum { + SPA_AUTOTRIM_OFF = 0, /* default */ + SPA_AUTOTRIM_ON +} spa_autotrim_t; + +/* + * Reason TRIM command was issued, used internally for accounting purposes. + */ +typedef enum trim_type { + TRIM_TYPE_MANUAL = 0, + TRIM_TYPE_AUTO = 1, +} trim_type_t; + /* state manipulation functions */ extern int spa_open(const char *pool, spa_t **, void *tag); extern int spa_open_rewind(const char *pool, spa_t **, void *tag, @@ -764,15 +782,17 @@ extern void spa_inject_delref(spa_t *spa); extern void spa_scan_stat_init(spa_t *spa); extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps); -#define SPA_ASYNC_CONFIG_UPDATE 0x01 -#define SPA_ASYNC_REMOVE 0x02 -#define SPA_ASYNC_PROBE 0x04 -#define SPA_ASYNC_RESILVER_DONE 0x08 -#define SPA_ASYNC_RESILVER 0x10 -#define SPA_ASYNC_AUTOEXPAND 0x20 -#define SPA_ASYNC_REMOVE_DONE 0x40 -#define SPA_ASYNC_REMOVE_STOP 0x80 -#define SPA_ASYNC_INITIALIZE_RESTART 0x100 +#define SPA_ASYNC_CONFIG_UPDATE 0x01 +#define SPA_ASYNC_REMOVE 0x02 +#define SPA_ASYNC_PROBE 0x04 +#define SPA_ASYNC_RESILVER_DONE 0x08 +#define SPA_ASYNC_RESILVER 0x10 +#define SPA_ASYNC_AUTOEXPAND 0x20 +#define SPA_ASYNC_REMOVE_DONE 0x40 +#define SPA_ASYNC_REMOVE_STOP 0x80 +#define SPA_ASYNC_INITIALIZE_RESTART 0x100 +#define SPA_ASYNC_TRIM_RESTART 0x200 +#define SPA_ASYNC_AUTOTRIM_RESTART 0x400 /* * Controls the behavior of spa_vdev_remove(). @@ -790,6 +810,8 @@ extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare); extern boolean_t spa_vdev_remove_active(spa_t *spa); extern int spa_vdev_initialize(spa_t *spa, nvlist_t *nv, uint64_t cmd_type, nvlist_t *vdev_errlist); +extern int spa_vdev_trim(spa_t *spa, nvlist_t *nv, uint64_t cmd_type, + uint64_t rate, boolean_t partial, boolean_t secure, nvlist_t *vdev_errlist); extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath); extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru); extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, @@ -887,6 +909,7 @@ typedef struct spa_stats { spa_history_kstat_t io_history; spa_history_list_t mmp_history; spa_history_kstat_t state; /* pool state */ + spa_history_kstat_t iostats; } spa_stats_t; typedef enum txg_state { @@ -905,6 +928,22 @@ typedef struct txg_stat { uint64_t ndirty; } txg_stat_t; +/* Assorted pool IO kstats */ +typedef struct spa_iostats { + kstat_named_t trim_extents_written; + kstat_named_t trim_bytes_written; + kstat_named_t trim_extents_skipped; + kstat_named_t trim_bytes_skipped; + kstat_named_t trim_extents_failed; + kstat_named_t trim_bytes_failed; + kstat_named_t autotrim_extents_written; + kstat_named_t autotrim_bytes_written; + kstat_named_t autotrim_extents_skipped; + kstat_named_t autotrim_bytes_skipped; + kstat_named_t autotrim_extents_failed; + kstat_named_t autotrim_bytes_failed; +} spa_iostats_t; + extern void spa_stats_init(spa_t *spa); extern void spa_stats_destroy(spa_t *spa); extern void spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, @@ -922,6 +961,10 @@ extern int spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error, extern void spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp, uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id, int error); +extern void spa_iostats_trim_add(spa_t *spa, trim_type_t type, + uint64_t extents_written, uint64_t bytes_written, + uint64_t extents_skipped, uint64_t bytes_skipped, + uint64_t extents_failed, uint64_t bytes_failed); /* Pool configuration locks */ extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw); @@ -1005,6 +1048,7 @@ extern objset_t *spa_meta_objset(spa_t *spa); extern uint64_t spa_deadman_synctime(spa_t *spa); extern uint64_t spa_deadman_ziotime(spa_t *spa); extern uint64_t spa_dirty_data(spa_t *spa); +extern spa_autotrim_t spa_get_autotrim(spa_t *spa); /* Miscellaneous support routines */ extern void spa_load_failed(spa_t *spa, const char *fmt, ...); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index c3aaad611..66032d9aa 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -378,6 +378,7 @@ struct spa { uint64_t spa_deadman_ziotime; /* deadman zio expiration */ uint64_t spa_all_vdev_zaps; /* ZAP of per-vd ZAP obj #s */ spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */ + uint64_t spa_autotrim; /* automatic background trim? */ uint64_t spa_errata; /* errata issues detected */ spa_stats_t spa_stats; /* assorted spa statistics */ spa_keystore_t spa_keystore; /* loaded crypto keys */ diff --git a/include/sys/sysevent/eventdefs.h b/include/sys/sysevent/eventdefs.h index aa13bd505..2067b355a 100644 --- a/include/sys/sysevent/eventdefs.h +++ b/include/sys/sysevent/eventdefs.h @@ -118,6 +118,11 @@ extern "C" { #define ESC_ZFS_BOOTFS_VDEV_ATTACH "bootfs_vdev_attach" #define ESC_ZFS_POOL_REGUID "pool_reguid" #define ESC_ZFS_HISTORY_EVENT "history_event" +#define ESC_ZFS_TRIM_START "trim_start" +#define ESC_ZFS_TRIM_FINISH "trim_finish" +#define ESC_ZFS_TRIM_CANCEL "trim_cancel" +#define ESC_ZFS_TRIM_RESUME "trim_resume" +#define ESC_ZFS_TRIM_SUSPEND "trim_suspend" /* * datalink subclass definitions. diff --git a/include/sys/txg.h b/include/sys/txg.h index ed0e7297c..760d5208b 100644 --- a/include/sys/txg.h +++ b/include/sys/txg.h @@ -90,10 +90,11 @@ extern void txg_wait_synced(struct dsl_pool *dp, uint64_t txg); /* * Wait until the given transaction group, or one after it, is * the open transaction group. Try to make this happen as soon - * as possible (eg. kick off any necessary syncs immediately). - * If txg == 0, wait for the next open txg. + * as possible (eg. kick off any necessary syncs immediately) when + * should_quiesce is set. If txg == 0, wait for the next open txg. */ -extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg); +extern void txg_wait_open(struct dsl_pool *dp, uint64_t txg, + boolean_t should_quiesce); /* * Returns TRUE if we are "backed up" waiting for the syncing diff --git a/include/sys/vdev.h b/include/sys/vdev.h index 2091892b2..67ca0d116 100644 --- a/include/sys/vdev.h +++ b/include/sys/vdev.h @@ -95,6 +95,8 @@ extern void vdev_metaslab_set_size(vdev_t *); extern void vdev_expand(vdev_t *vd, uint64_t txg); extern void vdev_split(vdev_t *vd); extern void vdev_deadman(vdev_t *vd, char *tag); +extern void vdev_xlate(vdev_t *vd, const range_seg_t *logical_rs, + range_seg_t *physical_rs); extern void vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx); extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs); diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index c115a5e10..f6f7bbb4b 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -145,6 +145,7 @@ struct vdev_queue { avl_tree_t vq_active_tree; avl_tree_t vq_read_offset_tree; avl_tree_t vq_write_offset_tree; + avl_tree_t vq_trim_offset_tree; uint64_t vq_last_offset; hrtime_t vq_io_complete_ts; /* time last i/o completed */ hrtime_t vq_io_delta_ts; @@ -260,6 +261,7 @@ struct vdev { /* pool checkpoint related */ space_map_t *vdev_checkpoint_sm; /* contains reserved blocks */ + /* Initialize related */ boolean_t vdev_initialize_exit_wanted; vdev_initializing_state_t vdev_initialize_state; list_node_t vdev_initialize_node; @@ -274,10 +276,34 @@ struct vdev { uint64_t vdev_initialize_bytes_done; time_t vdev_initialize_action_time; /* start and end time */ - /* for limiting outstanding I/Os */ + /* TRIM related */ + boolean_t vdev_trim_exit_wanted; + boolean_t vdev_autotrim_exit_wanted; + vdev_trim_state_t vdev_trim_state; + list_node_t vdev_trim_node; + kmutex_t vdev_autotrim_lock; + kcondvar_t vdev_autotrim_cv; + kthread_t *vdev_autotrim_thread; + /* Protects vdev_trim_thread and vdev_trim_state. */ + kmutex_t vdev_trim_lock; + kcondvar_t vdev_trim_cv; + kthread_t *vdev_trim_thread; + uint64_t vdev_trim_offset[TXG_SIZE]; + uint64_t vdev_trim_last_offset; + uint64_t vdev_trim_bytes_est; + uint64_t vdev_trim_bytes_done; + uint64_t vdev_trim_rate; /* requested rate (bytes/sec) */ + uint64_t vdev_trim_partial; /* requested partial TRIM */ + uint64_t vdev_trim_secure; /* requested secure TRIM */ + time_t vdev_trim_action_time; /* start and end time */ + + /* for limiting outstanding I/Os (initialize and TRIM) */ kmutex_t vdev_initialize_io_lock; kcondvar_t vdev_initialize_io_cv; uint64_t vdev_initialize_inflight; + kmutex_t vdev_trim_io_lock; + kcondvar_t vdev_trim_io_cv; + uint64_t vdev_trim_inflight[2]; /* * Values stored in the config for an indirect or removing vdev. @@ -343,6 +369,8 @@ struct vdev { uint64_t vdev_not_present; /* not present during import */ uint64_t vdev_unspare; /* unspare when resilvering done */ boolean_t vdev_nowritecache; /* true if flushwritecache failed */ + boolean_t vdev_has_trim; /* TRIM is supported */ + boolean_t vdev_has_securetrim; /* secure TRIM is supported */ boolean_t vdev_checkremove; /* temporary online test */ boolean_t vdev_forcefault; /* force online fault */ boolean_t vdev_splitting; /* split or repair in progress */ diff --git a/include/sys/vdev_initialize.h b/include/sys/vdev_initialize.h index 319fb9bc0..81d39ebeb 100644 --- a/include/sys/vdev_initialize.h +++ b/include/sys/vdev_initialize.h @@ -39,8 +39,6 @@ extern void vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state); extern void vdev_initialize_stop_wait(spa_t *spa, list_t *vd_list); extern void vdev_initialize_restart(vdev_t *vd); -extern void vdev_xlate(vdev_t *vd, const range_seg_t *logical_rs, - range_seg_t *physical_rs); #ifdef __cplusplus } diff --git a/include/sys/vdev_trim.h b/include/sys/vdev_trim.h new file mode 100644 index 000000000..1e5401766 --- /dev/null +++ b/include/sys/vdev_trim.h @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2019 Lawrence Livermore National Security, LLC. + */ + +#ifndef _SYS_VDEV_TRIM_H +#define _SYS_VDEV_TRIM_H + +#include <sys/spa.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern unsigned int zfs_trim_metaslab_skip; + +extern void vdev_trim(vdev_t *vd, uint64_t rate, boolean_t partial, + boolean_t secure); +extern void vdev_trim_stop(vdev_t *vd, vdev_trim_state_t tgt, list_t *vd_list); +extern void vdev_trim_stop_all(vdev_t *vd, vdev_trim_state_t tgt_state); +extern void vdev_trim_stop_wait(spa_t *spa, list_t *vd_list); +extern void vdev_trim_restart(vdev_t *vd); +extern void vdev_autotrim(spa_t *spa); +extern void vdev_autotrim_stop_all(spa_t *spa); +extern void vdev_autotrim_stop_wait(vdev_t *vd); +extern void vdev_autotrim_restart(spa_t *spa); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VDEV_TRIM_H */ diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 260b8a458..87ddde30a 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -579,6 +579,8 @@ typedef struct vsecattr { #define CRCREAT 0 +#define F_FREESP 11 + extern int fop_getattr(vnode_t *vp, vattr_t *vap); #define VOP_CLOSE(vp, f, c, o, cr, ct) vn_close(vp) @@ -587,6 +589,16 @@ extern int fop_getattr(vnode_t *vp, vattr_t *vap); #define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd) +#if defined(HAVE_FILE_FALLOCATE) && \ + defined(FALLOC_FL_PUNCH_HOLE) && \ + defined(FALLOC_FL_KEEP_SIZE) +#define VOP_SPACE(vp, cmd, flck, fl, off, cr, ct) \ + fallocate((vp)->v_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, \ + (flck)->l_start, (flck)->l_len) +#else +#define VOP_SPACE(vp, cmd, flck, fl, off, cr, ct) (0) +#endif + #define VN_RELE(vp) vn_close(vp) extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp, diff --git a/include/sys/zfs_debug.h b/include/sys/zfs_debug.h index 7564ae0e4..7968a01cd 100644 --- a/include/sys/zfs_debug.h +++ b/include/sys/zfs_debug.h @@ -54,6 +54,7 @@ extern int zfs_dbgmsg_enable; #define ZFS_DEBUG_METASLAB_VERIFY (1 << 8) #define ZFS_DEBUG_SET_ERROR (1 << 9) #define ZFS_DEBUG_INDIRECT_REMAP (1 << 10) +#define ZFS_DEBUG_TRIM (1 << 11) extern void __zfs_dbgmsg(char *buf); extern void __dprintf(boolean_t dprint, const char *file, const char *func, diff --git a/include/sys/zio.h b/include/sys/zio.h index 4b7ad3e22..e69bf9208 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -416,6 +416,14 @@ typedef zio_t *zio_pipe_stage_t(zio_t *zio); #define ZIO_REEXECUTE_NOW 0x01 #define ZIO_REEXECUTE_SUSPEND 0x02 +/* + * The io_trim flags are used to specify the type of TRIM to perform. They + * only apply to ZIO_TYPE_TRIM zios are distinct from io_flags. + */ +enum trim_flag { + ZIO_TRIM_SECURE = 1 << 0, +}; + typedef struct zio_alloc_list { list_t zal_list; uint64_t zal_size; @@ -434,6 +442,7 @@ struct zio { zio_prop_t io_prop; zio_type_t io_type; enum zio_child io_child_type; + enum trim_flag io_trim_flags; int io_cmd; zio_priority_t io_priority; uint8_t io_reexecute; @@ -549,6 +558,10 @@ extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, zio_done_func_t *done, void *private, enum zio_flag flags); +extern zio_t *zio_trim(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, + zio_done_func_t *done, void *private, zio_priority_t priority, + enum zio_flag flags, enum trim_flag trim_flags); + extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, struct abd *data, int checksum, zio_done_func_t *done, void *private, zio_priority_t priority, diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h index 344048c6a..fbbe06eb0 100644 --- a/include/sys/zio_impl.h +++ b/include/sys/zio_impl.h @@ -250,6 +250,11 @@ enum zio_stage { ZIO_STAGE_VDEV_IO_START | \ ZIO_STAGE_VDEV_IO_ASSESS) +#define ZIO_TRIM_PIPELINE \ + (ZIO_INTERLOCK_STAGES | \ + ZIO_STAGE_ISSUE_ASYNC | \ + ZIO_VDEV_IO_STAGES) + #define ZIO_BLOCKING_STAGES \ (ZIO_STAGE_DVA_ALLOCATE | \ ZIO_STAGE_DVA_CLAIM | \ diff --git a/include/sys/zio_priority.h b/include/sys/zio_priority.h index d8e6a1745..0b422904e 100644 --- a/include/sys/zio_priority.h +++ b/include/sys/zio_priority.h @@ -30,6 +30,7 @@ typedef enum zio_priority { ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */ ZIO_PRIORITY_REMOVAL, /* reads/writes for vdev removal */ ZIO_PRIORITY_INITIALIZING, /* initializing I/O */ + ZIO_PRIORITY_TRIM, /* trim I/O (discard) */ ZIO_PRIORITY_NUM_QUEUEABLE, ZIO_PRIORITY_NOW, /* non-queued i/os (e.g. free) */ } zio_priority_t; |