diff options
Diffstat (limited to 'include/sys')
-rw-r--r-- | include/sys/Makefile.am | 3 | ||||
-rw-r--r-- | include/sys/dmu.h | 1 | ||||
-rw-r--r-- | include/sys/fs/zfs.h | 2 | ||||
-rw-r--r-- | include/sys/metaslab.h | 13 | ||||
-rw-r--r-- | include/sys/metaslab_impl.h | 28 | ||||
-rw-r--r-- | include/sys/range_tree.h | 8 | ||||
-rw-r--r-- | include/sys/spa.h | 5 | ||||
-rw-r--r-- | include/sys/spa_impl.h | 11 | ||||
-rw-r--r-- | include/sys/spa_log_spacemap.h | 79 | ||||
-rw-r--r-- | include/sys/space_map.h | 9 | ||||
-rw-r--r-- | include/sys/vdev_impl.h | 4 | ||||
-rw-r--r-- | include/sys/zfs_debug.h | 3 |
12 files changed, 155 insertions, 11 deletions
diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am index af45eb3bc..c5d64f9fd 100644 --- a/include/sys/Makefile.am +++ b/include/sys/Makefile.am @@ -13,7 +13,6 @@ COMMON_H = \ $(top_srcdir)/include/sys/bptree.h \ $(top_srcdir)/include/sys/bqueue.h \ $(top_srcdir)/include/sys/cityhash.h \ - $(top_srcdir)/include/sys/spa_checkpoint.h \ $(top_srcdir)/include/sys/dataset_kstats.h \ $(top_srcdir)/include/sys/dbuf.h \ $(top_srcdir)/include/sys/ddt.h \ @@ -63,6 +62,8 @@ COMMON_H = \ $(top_srcdir)/include/sys/sha2.h \ $(top_srcdir)/include/sys/skein.h \ $(top_srcdir)/include/sys/spa_boot.h \ + $(top_srcdir)/include/sys/spa_checkpoint.h \ + $(top_srcdir)/include/sys/spa_log_spacemap.h \ $(top_srcdir)/include/sys/space_map.h \ $(top_srcdir)/include/sys/space_reftree.h \ $(top_srcdir)/include/sys/spa.h \ diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 3f7350554..65da78eb5 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -382,6 +382,7 @@ typedef struct dmu_buf { #define DMU_POOL_OBSOLETE_BPOBJ "com.delphix:obsolete_bpobj" #define DMU_POOL_CONDENSING_INDIRECT "com.delphix:condensing_indirect" #define DMU_POOL_ZPOOL_CHECKPOINT "com.delphix:zpool_checkpoint" +#define DMU_POOL_LOG_SPACEMAP_ZAP "com.delphix:log_spacemap_zap" /* * Allocate an object from this objset. The range of object numbers diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index a9dd8e466..2cd133b1f 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -770,6 +770,8 @@ typedef struct zpool_load_policy { "com.delphix:obsolete_counts_are_precise" #define VDEV_TOP_ZAP_POOL_CHECKPOINT_SM \ "com.delphix:pool_checkpoint_sm" +#define VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS \ + "com.delphix:ms_unflushed_phys_txgs" #define VDEV_TOP_ZAP_ALLOCATION_BIAS \ "org.zfsonlinux:allocation_bias" diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h index 330902529..973f15d75 100644 --- a/include/sys/metaslab.h +++ b/include/sys/metaslab.h @@ -49,9 +49,17 @@ int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t, metaslab_t **); void metaslab_fini(metaslab_t *); +void metaslab_set_unflushed_txg(metaslab_t *, uint64_t, dmu_tx_t *); +void metaslab_set_estimated_condensed_size(metaslab_t *, uint64_t, dmu_tx_t *); +uint64_t metaslab_unflushed_txg(metaslab_t *); +uint64_t metaslab_estimated_condensed_size(metaslab_t *); +int metaslab_sort_by_flushed(const void *, const void *); +uint64_t metaslab_unflushed_changes_memused(metaslab_t *); + int metaslab_load(metaslab_t *); void metaslab_potentially_unload(metaslab_t *, uint64_t); void metaslab_unload(metaslab_t *); +boolean_t metaslab_flush(metaslab_t *, dmu_tx_t *); uint64_t metaslab_allocated_space(metaslab_t *); @@ -108,6 +116,9 @@ uint64_t metaslab_class_get_space(metaslab_class_t *); uint64_t metaslab_class_get_dspace(metaslab_class_t *); uint64_t metaslab_class_get_deferred(metaslab_class_t *); +void metaslab_space_update(vdev_t *, metaslab_class_t *, + int64_t, int64_t, int64_t); + metaslab_group_t *metaslab_group_create(metaslab_class_t *, vdev_t *, int); void metaslab_group_destroy(metaslab_group_t *); void metaslab_group_activate(metaslab_group_t *); @@ -124,6 +135,8 @@ void metaslab_recalculate_weight_and_sort(metaslab_t *); void metaslab_disable(metaslab_t *); void metaslab_enable(metaslab_t *, boolean_t); +extern int metaslab_debug_load; + #ifdef __cplusplus } #endif diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index ca1104c14..29bc8cd5e 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. */ #ifndef _SYS_METASLAB_IMPL_H @@ -357,7 +357,7 @@ struct metaslab { * write to metaslab data on-disk (i.e flushing entries to * the metaslab's space map). It helps coordinate readers of * the metaslab's space map [see spa_vdev_remove_thread()] - * with writers [see metaslab_sync()]. + * with writers [see metaslab_sync() or metaslab_flush()]. * * Note that metaslab_load(), even though a reader, uses * a completely different mechanism to deal with the reading @@ -401,7 +401,6 @@ struct metaslab { boolean_t ms_condensing; /* condensing? */ boolean_t ms_condense_wanted; - uint64_t ms_condense_checked_txg; /* * The number of consumers which have disabled the metaslab. @@ -414,6 +413,8 @@ struct metaslab { */ boolean_t ms_loaded; boolean_t ms_loading; + kcondvar_t ms_flush_cv; + boolean_t ms_flushing; /* * The following histograms count entries that are in the @@ -499,6 +500,22 @@ struct metaslab { metaslab_group_t *ms_group; /* metaslab group */ avl_node_t ms_group_node; /* node in metaslab group tree */ txg_node_t ms_txg_node; /* per-txg dirty metaslab links */ + avl_node_t ms_spa_txg_node; /* node in spa_metaslabs_by_txg */ + + /* + * Allocs and frees that are committed to the vdev log spacemap but + * not yet to this metaslab's spacemap. + */ + range_tree_t *ms_unflushed_allocs; + range_tree_t *ms_unflushed_frees; + + /* + * We have flushed entries up to but not including this TXG. In + * other words, all changes from this TXG and onward should not + * be in this metaslab's space map and must be read from the + * log space maps. + */ + uint64_t ms_unflushed_txg; /* updated every time we are done syncing the metaslab's space map */ uint64_t ms_synced_length; @@ -506,6 +523,11 @@ struct metaslab { boolean_t ms_new; }; +typedef struct metaslab_unflushed_phys { + /* on-disk counterpart of ms_unflushed_txg */ + uint64_t msp_unflushed_txg; +} metaslab_unflushed_phys_t; + #ifdef __cplusplus } #endif diff --git a/include/sys/range_tree.h b/include/sys/range_tree.h index ae1a0c323..fce1df68d 100644 --- a/include/sys/range_tree.h +++ b/include/sys/range_tree.h @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2013, 2017 by Delphix. All rights reserved. + * Copyright (c) 2013, 2019 by Delphix. All rights reserved. */ #ifndef _SYS_RANGE_TREE_H @@ -95,6 +95,7 @@ range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size); void range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs, uint64_t newstart, uint64_t newsize); uint64_t range_tree_space(range_tree_t *rt); +uint64_t range_tree_numsegs(range_tree_t *rt); boolean_t range_tree_is_empty(range_tree_t *rt); void range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst); void range_tree_stat_verify(range_tree_t *rt); @@ -112,6 +113,11 @@ void range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg); void range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg); range_seg_t *range_tree_first(range_tree_t *rt); +void range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, + range_tree_t *removefrom, range_tree_t *addto); +void range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom, + range_tree_t *addto); + void rt_avl_create(range_tree_t *rt, void *arg); void rt_avl_destroy(range_tree_t *rt, void *arg); void rt_avl_add(range_tree_t *rt, range_seg_t *rs, void *arg); diff --git a/include/sys/spa.h b/include/sys/spa.h index a7e4d154f..50ca15be5 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. @@ -42,6 +42,7 @@ #include <sys/fs/zfs.h> #include <sys/spa_checksum.h> #include <sys/dmu.h> +#include <sys/space_map.h> #ifdef __cplusplus extern "C" { @@ -1075,6 +1076,7 @@ extern boolean_t spa_suspended(spa_t *spa); extern uint64_t spa_bootfs(spa_t *spa); extern uint64_t spa_delegation(spa_t *spa); extern objset_t *spa_meta_objset(spa_t *spa); +extern space_map_t *spa_syncing_log_sm(spa_t *spa); extern uint64_t spa_deadman_synctime(spa_t *spa); extern uint64_t spa_deadman_ziotime(spa_t *spa); extern uint64_t spa_dirty_data(spa_t *spa); @@ -1125,6 +1127,7 @@ extern boolean_t spa_trust_config(spa_t *spa); extern uint64_t spa_missing_tvds_allowed(spa_t *spa); extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing); extern boolean_t spa_top_vdevs_spacemap_addressable(spa_t *spa); +extern uint64_t spa_total_metaslabs(spa_t *spa); extern boolean_t spa_multihost(spa_t *spa); extern unsigned long spa_get_hostid(void); extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index d49b970c9..ff69286cc 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. @@ -34,6 +34,7 @@ #include <sys/spa.h> #include <sys/spa_checkpoint.h> +#include <sys/spa_log_spacemap.h> #include <sys/vdev.h> #include <sys/vdev_removal.h> #include <sys/metaslab.h> @@ -307,6 +308,14 @@ struct spa { spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */ zthr_t *spa_checkpoint_discard_zthr; + space_map_t *spa_syncing_log_sm; /* current log space map */ + avl_tree_t spa_sm_logs_by_txg; + kmutex_t spa_flushed_ms_lock; /* for metaslabs_by_flushed */ + avl_tree_t spa_metaslabs_by_flushed; + spa_unflushed_stats_t spa_unflushed_stats; + list_t spa_log_summary; + uint64_t spa_log_flushall_txg; + char *spa_root; /* alternate root directory */ uint64_t spa_ena; /* spa-wide ereport ENA */ int spa_last_open_failed; /* error if last open failed */ diff --git a/include/sys/spa_log_spacemap.h b/include/sys/spa_log_spacemap.h new file mode 100644 index 000000000..b2ed77fac --- /dev/null +++ b/include/sys/spa_log_spacemap.h @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, 2019 by Delphix. All rights reserved. + */ + +#ifndef _SYS_SPA_LOG_SPACEMAP_H +#define _SYS_SPA_LOG_SPACEMAP_H + +#include <sys/avl.h> + +typedef struct log_summary_entry { + uint64_t lse_start; /* start TXG */ + uint64_t lse_mscount; /* # of metaslabs needed to be flushed */ + uint64_t lse_blkcount; /* blocks held by this entry */ + list_node_t lse_node; +} log_summary_entry_t; + +typedef struct spa_unflushed_stats { + /* used for memory heuristic */ + uint64_t sus_memused; /* current memory used for unflushed trees */ + + /* used for block heuristic */ + uint64_t sus_blocklimit; /* max # of log blocks allowed */ + uint64_t sus_nblocks; /* # of blocks in log space maps currently */ +} spa_unflushed_stats_t; + +typedef struct spa_log_sm { + uint64_t sls_sm_obj; /* space map object ID */ + uint64_t sls_txg; /* txg logged on the space map */ + uint64_t sls_nblocks; /* number of blocks in this log */ + uint64_t sls_mscount; /* # of metaslabs flushed in the log's txg */ + avl_node_t sls_node; /* node in spa_sm_logs_by_txg */ +} spa_log_sm_t; + +int spa_ld_log_spacemaps(spa_t *); + +void spa_generate_syncing_log_sm(spa_t *, dmu_tx_t *); +void spa_flush_metaslabs(spa_t *, dmu_tx_t *); +void spa_sync_close_syncing_log_sm(spa_t *); + +void spa_cleanup_old_sm_logs(spa_t *, dmu_tx_t *); + +uint64_t spa_log_sm_blocklimit(spa_t *); +void spa_log_sm_set_blocklimit(spa_t *); +uint64_t spa_log_sm_nblocks(spa_t *); +uint64_t spa_log_sm_memused(spa_t *); + +void spa_log_sm_decrement_mscount(spa_t *, uint64_t); +void spa_log_sm_increment_current_mscount(spa_t *); + +void spa_log_summary_add_flushed_metaslab(spa_t *); +void spa_log_summary_decrement_mscount(spa_t *, uint64_t); +void spa_log_summary_decrement_blkcount(spa_t *, uint64_t); + +boolean_t spa_flush_all_logs_requested(spa_t *); + +extern int zfs_keep_log_spacemaps_at_export; + +#endif /* _SYS_SPA_LOG_SPACEMAP_H */ diff --git a/include/sys/space_map.h b/include/sys/space_map.h index 7731a352f..81f56076a 100644 --- a/include/sys/space_map.h +++ b/include/sys/space_map.h @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012, 2019 by Delphix. All rights reserved. */ #ifndef _SYS_SPACE_MAP_H @@ -72,6 +72,11 @@ typedef struct space_map_phys { * bucket, smp_histogram[i], contains the number of free regions * whose size is: * 2^(i+sm_shift) <= size of free region in bytes < 2^(i+sm_shift+1) + * + * Note that, if log space map feature is enabled, histograms of + * space maps that belong to metaslabs will take into account any + * unflushed changes for their metaslabs, even though the actual + * space map doesn't have entries for these changes. */ uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE]; } space_map_phys_t; @@ -209,6 +214,8 @@ void space_map_histogram_add(space_map_t *sm, range_tree_t *rt, uint64_t space_map_object(space_map_t *sm); int64_t space_map_allocated(space_map_t *sm); uint64_t space_map_length(space_map_t *sm); +uint64_t space_map_entries(space_map_t *sm, range_tree_t *rt); +uint64_t space_map_nblocks(space_map_t *sm); void space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype, uint64_t vdev_id, dmu_tx_t *tx); diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index f6f7bbb4b..c179191e3 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. */ @@ -535,7 +535,7 @@ extern void vdev_set_min_asize(vdev_t *vd); /* * Global variables */ -extern int vdev_standard_sm_blksz; +extern int zfs_vdev_standard_sm_blksz; /* zdb uses this tunable, so it must be declared here to make lint happy. */ extern int zfs_vdev_cache_size; diff --git a/include/sys/zfs_debug.h b/include/sys/zfs_debug.h index 7968a01cd..78d5efc8a 100644 --- a/include/sys/zfs_debug.h +++ b/include/sys/zfs_debug.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012, 2019 by Delphix. All rights reserved. */ #ifndef _SYS_ZFS_DEBUG_H @@ -55,6 +55,7 @@ extern int zfs_dbgmsg_enable; #define ZFS_DEBUG_SET_ERROR (1 << 9) #define ZFS_DEBUG_INDIRECT_REMAP (1 << 10) #define ZFS_DEBUG_TRIM (1 << 11) +#define ZFS_DEBUG_LOG_SPACEMAP (1 << 12) extern void __zfs_dbgmsg(char *buf); extern void __dprintf(boolean_t dprint, const char *file, const char *func, |