aboutsummaryrefslogtreecommitdiffstats
path: root/include/sys
diff options
context:
space:
mode:
Diffstat (limited to 'include/sys')
-rw-r--r--include/sys/Makefile.am3
-rw-r--r--include/sys/dmu.h1
-rw-r--r--include/sys/fs/zfs.h2
-rw-r--r--include/sys/metaslab.h13
-rw-r--r--include/sys/metaslab_impl.h28
-rw-r--r--include/sys/range_tree.h8
-rw-r--r--include/sys/spa.h5
-rw-r--r--include/sys/spa_impl.h11
-rw-r--r--include/sys/spa_log_spacemap.h79
-rw-r--r--include/sys/space_map.h9
-rw-r--r--include/sys/vdev_impl.h4
-rw-r--r--include/sys/zfs_debug.h3
12 files changed, 155 insertions, 11 deletions
diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am
index af45eb3bc..c5d64f9fd 100644
--- a/include/sys/Makefile.am
+++ b/include/sys/Makefile.am
@@ -13,7 +13,6 @@ COMMON_H = \
$(top_srcdir)/include/sys/bptree.h \
$(top_srcdir)/include/sys/bqueue.h \
$(top_srcdir)/include/sys/cityhash.h \
- $(top_srcdir)/include/sys/spa_checkpoint.h \
$(top_srcdir)/include/sys/dataset_kstats.h \
$(top_srcdir)/include/sys/dbuf.h \
$(top_srcdir)/include/sys/ddt.h \
@@ -63,6 +62,8 @@ COMMON_H = \
$(top_srcdir)/include/sys/sha2.h \
$(top_srcdir)/include/sys/skein.h \
$(top_srcdir)/include/sys/spa_boot.h \
+ $(top_srcdir)/include/sys/spa_checkpoint.h \
+ $(top_srcdir)/include/sys/spa_log_spacemap.h \
$(top_srcdir)/include/sys/space_map.h \
$(top_srcdir)/include/sys/space_reftree.h \
$(top_srcdir)/include/sys/spa.h \
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index 3f7350554..65da78eb5 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -382,6 +382,7 @@ typedef struct dmu_buf {
#define DMU_POOL_OBSOLETE_BPOBJ "com.delphix:obsolete_bpobj"
#define DMU_POOL_CONDENSING_INDIRECT "com.delphix:condensing_indirect"
#define DMU_POOL_ZPOOL_CHECKPOINT "com.delphix:zpool_checkpoint"
+#define DMU_POOL_LOG_SPACEMAP_ZAP "com.delphix:log_spacemap_zap"
/*
* Allocate an object from this objset. The range of object numbers
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index a9dd8e466..2cd133b1f 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -770,6 +770,8 @@ typedef struct zpool_load_policy {
"com.delphix:obsolete_counts_are_precise"
#define VDEV_TOP_ZAP_POOL_CHECKPOINT_SM \
"com.delphix:pool_checkpoint_sm"
+#define VDEV_TOP_ZAP_MS_UNFLUSHED_PHYS_TXGS \
+ "com.delphix:ms_unflushed_phys_txgs"
#define VDEV_TOP_ZAP_ALLOCATION_BIAS \
"org.zfsonlinux:allocation_bias"
diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h
index 330902529..973f15d75 100644
--- a/include/sys/metaslab.h
+++ b/include/sys/metaslab.h
@@ -49,9 +49,17 @@ int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t,
metaslab_t **);
void metaslab_fini(metaslab_t *);
+void metaslab_set_unflushed_txg(metaslab_t *, uint64_t, dmu_tx_t *);
+void metaslab_set_estimated_condensed_size(metaslab_t *, uint64_t, dmu_tx_t *);
+uint64_t metaslab_unflushed_txg(metaslab_t *);
+uint64_t metaslab_estimated_condensed_size(metaslab_t *);
+int metaslab_sort_by_flushed(const void *, const void *);
+uint64_t metaslab_unflushed_changes_memused(metaslab_t *);
+
int metaslab_load(metaslab_t *);
void metaslab_potentially_unload(metaslab_t *, uint64_t);
void metaslab_unload(metaslab_t *);
+boolean_t metaslab_flush(metaslab_t *, dmu_tx_t *);
uint64_t metaslab_allocated_space(metaslab_t *);
@@ -108,6 +116,9 @@ uint64_t metaslab_class_get_space(metaslab_class_t *);
uint64_t metaslab_class_get_dspace(metaslab_class_t *);
uint64_t metaslab_class_get_deferred(metaslab_class_t *);
+void metaslab_space_update(vdev_t *, metaslab_class_t *,
+ int64_t, int64_t, int64_t);
+
metaslab_group_t *metaslab_group_create(metaslab_class_t *, vdev_t *, int);
void metaslab_group_destroy(metaslab_group_t *);
void metaslab_group_activate(metaslab_group_t *);
@@ -124,6 +135,8 @@ void metaslab_recalculate_weight_and_sort(metaslab_t *);
void metaslab_disable(metaslab_t *);
void metaslab_enable(metaslab_t *, boolean_t);
+extern int metaslab_debug_load;
+
#ifdef __cplusplus
}
#endif
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h
index ca1104c14..29bc8cd5e 100644
--- a/include/sys/metaslab_impl.h
+++ b/include/sys/metaslab_impl.h
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_IMPL_H
@@ -357,7 +357,7 @@ struct metaslab {
* write to metaslab data on-disk (i.e flushing entries to
* the metaslab's space map). It helps coordinate readers of
* the metaslab's space map [see spa_vdev_remove_thread()]
- * with writers [see metaslab_sync()].
+ * with writers [see metaslab_sync() or metaslab_flush()].
*
* Note that metaslab_load(), even though a reader, uses
* a completely different mechanism to deal with the reading
@@ -401,7 +401,6 @@ struct metaslab {
boolean_t ms_condensing; /* condensing? */
boolean_t ms_condense_wanted;
- uint64_t ms_condense_checked_txg;
/*
* The number of consumers which have disabled the metaslab.
@@ -414,6 +413,8 @@ struct metaslab {
*/
boolean_t ms_loaded;
boolean_t ms_loading;
+ kcondvar_t ms_flush_cv;
+ boolean_t ms_flushing;
/*
* The following histograms count entries that are in the
@@ -499,6 +500,22 @@ struct metaslab {
metaslab_group_t *ms_group; /* metaslab group */
avl_node_t ms_group_node; /* node in metaslab group tree */
txg_node_t ms_txg_node; /* per-txg dirty metaslab links */
+ avl_node_t ms_spa_txg_node; /* node in spa_metaslabs_by_txg */
+
+ /*
+ * Allocs and frees that are committed to the vdev log spacemap but
+ * not yet to this metaslab's spacemap.
+ */
+ range_tree_t *ms_unflushed_allocs;
+ range_tree_t *ms_unflushed_frees;
+
+ /*
+ * We have flushed entries up to but not including this TXG. In
+ * other words, all changes from this TXG and onward should not
+ * be in this metaslab's space map and must be read from the
+ * log space maps.
+ */
+ uint64_t ms_unflushed_txg;
/* updated every time we are done syncing the metaslab's space map */
uint64_t ms_synced_length;
@@ -506,6 +523,11 @@ struct metaslab {
boolean_t ms_new;
};
+typedef struct metaslab_unflushed_phys {
+ /* on-disk counterpart of ms_unflushed_txg */
+ uint64_t msp_unflushed_txg;
+} metaslab_unflushed_phys_t;
+
#ifdef __cplusplus
}
#endif
diff --git a/include/sys/range_tree.h b/include/sys/range_tree.h
index ae1a0c323..fce1df68d 100644
--- a/include/sys/range_tree.h
+++ b/include/sys/range_tree.h
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2019 by Delphix. All rights reserved.
*/
#ifndef _SYS_RANGE_TREE_H
@@ -95,6 +95,7 @@ range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size);
void range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs,
uint64_t newstart, uint64_t newsize);
uint64_t range_tree_space(range_tree_t *rt);
+uint64_t range_tree_numsegs(range_tree_t *rt);
boolean_t range_tree_is_empty(range_tree_t *rt);
void range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst);
void range_tree_stat_verify(range_tree_t *rt);
@@ -112,6 +113,11 @@ void range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg);
void range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg);
range_seg_t *range_tree_first(range_tree_t *rt);
+void range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
+ range_tree_t *removefrom, range_tree_t *addto);
+void range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom,
+ range_tree_t *addto);
+
void rt_avl_create(range_tree_t *rt, void *arg);
void rt_avl_destroy(range_tree_t *rt, void *arg);
void rt_avl_add(range_tree_t *rt, range_seg_t *rs, void *arg);
diff --git a/include/sys/spa.h b/include/sys/spa.h
index a7e4d154f..50ca15be5 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
@@ -42,6 +42,7 @@
#include <sys/fs/zfs.h>
#include <sys/spa_checksum.h>
#include <sys/dmu.h>
+#include <sys/space_map.h>
#ifdef __cplusplus
extern "C" {
@@ -1075,6 +1076,7 @@ extern boolean_t spa_suspended(spa_t *spa);
extern uint64_t spa_bootfs(spa_t *spa);
extern uint64_t spa_delegation(spa_t *spa);
extern objset_t *spa_meta_objset(spa_t *spa);
+extern space_map_t *spa_syncing_log_sm(spa_t *spa);
extern uint64_t spa_deadman_synctime(spa_t *spa);
extern uint64_t spa_deadman_ziotime(spa_t *spa);
extern uint64_t spa_dirty_data(spa_t *spa);
@@ -1125,6 +1127,7 @@ extern boolean_t spa_trust_config(spa_t *spa);
extern uint64_t spa_missing_tvds_allowed(spa_t *spa);
extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing);
extern boolean_t spa_top_vdevs_spacemap_addressable(spa_t *spa);
+extern uint64_t spa_total_metaslabs(spa_t *spa);
extern boolean_t spa_multihost(spa_t *spa);
extern unsigned long spa_get_hostid(void);
extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *);
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index d49b970c9..ff69286cc 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
@@ -34,6 +34,7 @@
#include <sys/spa.h>
#include <sys/spa_checkpoint.h>
+#include <sys/spa_log_spacemap.h>
#include <sys/vdev.h>
#include <sys/vdev_removal.h>
#include <sys/metaslab.h>
@@ -307,6 +308,14 @@ struct spa {
spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */
zthr_t *spa_checkpoint_discard_zthr;
+ space_map_t *spa_syncing_log_sm; /* current log space map */
+ avl_tree_t spa_sm_logs_by_txg;
+ kmutex_t spa_flushed_ms_lock; /* for metaslabs_by_flushed */
+ avl_tree_t spa_metaslabs_by_flushed;
+ spa_unflushed_stats_t spa_unflushed_stats;
+ list_t spa_log_summary;
+ uint64_t spa_log_flushall_txg;
+
char *spa_root; /* alternate root directory */
uint64_t spa_ena; /* spa-wide ereport ENA */
int spa_last_open_failed; /* error if last open failed */
diff --git a/include/sys/spa_log_spacemap.h b/include/sys/spa_log_spacemap.h
new file mode 100644
index 000000000..b2ed77fac
--- /dev/null
+++ b/include/sys/spa_log_spacemap.h
@@ -0,0 +1,79 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2018, 2019 by Delphix. All rights reserved.
+ */
+
+#ifndef _SYS_SPA_LOG_SPACEMAP_H
+#define _SYS_SPA_LOG_SPACEMAP_H
+
+#include <sys/avl.h>
+
+typedef struct log_summary_entry {
+ uint64_t lse_start; /* start TXG */
+ uint64_t lse_mscount; /* # of metaslabs needed to be flushed */
+ uint64_t lse_blkcount; /* blocks held by this entry */
+ list_node_t lse_node;
+} log_summary_entry_t;
+
+typedef struct spa_unflushed_stats {
+ /* used for memory heuristic */
+ uint64_t sus_memused; /* current memory used for unflushed trees */
+
+ /* used for block heuristic */
+ uint64_t sus_blocklimit; /* max # of log blocks allowed */
+ uint64_t sus_nblocks; /* # of blocks in log space maps currently */
+} spa_unflushed_stats_t;
+
+typedef struct spa_log_sm {
+ uint64_t sls_sm_obj; /* space map object ID */
+ uint64_t sls_txg; /* txg logged on the space map */
+ uint64_t sls_nblocks; /* number of blocks in this log */
+ uint64_t sls_mscount; /* # of metaslabs flushed in the log's txg */
+ avl_node_t sls_node; /* node in spa_sm_logs_by_txg */
+} spa_log_sm_t;
+
+int spa_ld_log_spacemaps(spa_t *);
+
+void spa_generate_syncing_log_sm(spa_t *, dmu_tx_t *);
+void spa_flush_metaslabs(spa_t *, dmu_tx_t *);
+void spa_sync_close_syncing_log_sm(spa_t *);
+
+void spa_cleanup_old_sm_logs(spa_t *, dmu_tx_t *);
+
+uint64_t spa_log_sm_blocklimit(spa_t *);
+void spa_log_sm_set_blocklimit(spa_t *);
+uint64_t spa_log_sm_nblocks(spa_t *);
+uint64_t spa_log_sm_memused(spa_t *);
+
+void spa_log_sm_decrement_mscount(spa_t *, uint64_t);
+void spa_log_sm_increment_current_mscount(spa_t *);
+
+void spa_log_summary_add_flushed_metaslab(spa_t *);
+void spa_log_summary_decrement_mscount(spa_t *, uint64_t);
+void spa_log_summary_decrement_blkcount(spa_t *, uint64_t);
+
+boolean_t spa_flush_all_logs_requested(spa_t *);
+
+extern int zfs_keep_log_spacemaps_at_export;
+
+#endif /* _SYS_SPA_LOG_SPACEMAP_H */
diff --git a/include/sys/space_map.h b/include/sys/space_map.h
index 7731a352f..81f56076a 100644
--- a/include/sys/space_map.h
+++ b/include/sys/space_map.h
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
*/
#ifndef _SYS_SPACE_MAP_H
@@ -72,6 +72,11 @@ typedef struct space_map_phys {
* bucket, smp_histogram[i], contains the number of free regions
* whose size is:
* 2^(i+sm_shift) <= size of free region in bytes < 2^(i+sm_shift+1)
+ *
+ * Note that, if log space map feature is enabled, histograms of
+ * space maps that belong to metaslabs will take into account any
+ * unflushed changes for their metaslabs, even though the actual
+ * space map doesn't have entries for these changes.
*/
uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE];
} space_map_phys_t;
@@ -209,6 +214,8 @@ void space_map_histogram_add(space_map_t *sm, range_tree_t *rt,
uint64_t space_map_object(space_map_t *sm);
int64_t space_map_allocated(space_map_t *sm);
uint64_t space_map_length(space_map_t *sm);
+uint64_t space_map_entries(space_map_t *sm, range_tree_t *rt);
+uint64_t space_map_nblocks(space_map_t *sm);
void space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
uint64_t vdev_id, dmu_tx_t *tx);
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index f6f7bbb4b..c179191e3 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
*/
@@ -535,7 +535,7 @@ extern void vdev_set_min_asize(vdev_t *vd);
/*
* Global variables
*/
-extern int vdev_standard_sm_blksz;
+extern int zfs_vdev_standard_sm_blksz;
/* zdb uses this tunable, so it must be declared here to make lint happy. */
extern int zfs_vdev_cache_size;
diff --git a/include/sys/zfs_debug.h b/include/sys/zfs_debug.h
index 7968a01cd..78d5efc8a 100644
--- a/include/sys/zfs_debug.h
+++ b/include/sys/zfs_debug.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZFS_DEBUG_H
@@ -55,6 +55,7 @@ extern int zfs_dbgmsg_enable;
#define ZFS_DEBUG_SET_ERROR (1 << 9)
#define ZFS_DEBUG_INDIRECT_REMAP (1 << 10)
#define ZFS_DEBUG_TRIM (1 << 11)
+#define ZFS_DEBUG_LOG_SPACEMAP (1 << 12)
extern void __zfs_dbgmsg(char *buf);
extern void __dprintf(boolean_t dprint, const char *file, const char *func,