aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorKevin Jin <[email protected]>2021-07-20 11:40:24 -0400
committerGitHub <[email protected]>2021-07-20 09:40:24 -0600
commita7bd20e309a4b45b18b1da8e379f5826debe4870 (patch)
tree269f2d997e22982e7399249bfeea88ee90c01d2e /module
parent8172df643b6cdc7fe233b18c8d8e9b29455ae9f1 (diff)
Add Module Parameter Regarding Log Size Limit
* Add Module Parameters Regarding Log Size Limit zfs_wrlog_data_max The upper limit of TX_WRITE log data. Once it is reached, write operation is blocked, until log data is cleared out after txg sync. It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY. Reviewed-by: Prakash Surya <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: jxdking <[email protected]> Closes #12284
Diffstat (limited to 'module')
-rw-r--r--module/zfs/arc.c12
-rw-r--r--module/zfs/dmu_tx.c7
-rw-r--r--module/zfs/dsl_pool.c57
-rw-r--r--module/zfs/zfs_log.c5
-rw-r--r--module/zfs/zvol.c7
5 files changed, 86 insertions, 2 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index bf76c8523..02663e8e2 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -7980,6 +7980,18 @@ arc_init(void)
zfs_dirty_data_max = MIN(zfs_dirty_data_max,
zfs_dirty_data_max_max);
}
+
+ if (zfs_wrlog_data_max == 0) {
+
+ /*
+ * dp_wrlog_total is reduced for each txg at the end of
+ * spa_sync(). However, dp_dirty_total is reduced every time
+ * a block is written out. Thus under normal operation,
+ * dp_wrlog_total could grow 2 times as big as
+ * zfs_dirty_data_max.
+ */
+ zfs_wrlog_data_max = zfs_dirty_data_max * 2;
+ }
}
void
diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
index 0beb983f9..5fa516866 100644
--- a/module/zfs/dmu_tx.c
+++ b/module/zfs/dmu_tx.c
@@ -53,6 +53,7 @@ dmu_tx_stats_t dmu_tx_stats = {
{ "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 },
{ "dmu_tx_dirty_delay", KSTAT_DATA_UINT64 },
{ "dmu_tx_dirty_over_max", KSTAT_DATA_UINT64 },
+ { "dmu_tx_wrlog_over_max", KSTAT_DATA_UINT64 },
{ "dmu_tx_dirty_frees_delay", KSTAT_DATA_UINT64 },
{ "dmu_tx_quota", KSTAT_DATA_UINT64 },
};
@@ -885,6 +886,12 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
}
if (!tx->tx_dirty_delayed &&
+ dsl_pool_wrlog_over_max(tx->tx_pool)) {
+ DMU_TX_STAT_BUMP(dmu_tx_wrlog_over_max);
+ return (SET_ERROR(ERESTART));
+ }
+
+ if (!tx->tx_dirty_delayed &&
dsl_pool_need_dirty_delay(tx->tx_pool)) {
tx->tx_wait_dirty = B_TRUE;
DMU_TX_STAT_BUMP(dmu_tx_dirty_delay);
diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c
index 72f4b86d7..1350f1329 100644
--- a/module/zfs/dsl_pool.c
+++ b/module/zfs/dsl_pool.c
@@ -105,6 +105,14 @@ int zfs_dirty_data_max_percent = 10;
int zfs_dirty_data_max_max_percent = 25;
/*
+ * zfs_wrlog_data_max, the upper limit of TX_WRITE log data.
+ * Once it is reached, write operation is blocked,
+ * until log data is cleared out after txg sync.
+ * It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY.
+ */
+unsigned long zfs_wrlog_data_max = 0;
+
+/*
* If there's at least this much dirty data (as a percentage of
* zfs_dirty_data_max), push out a txg. This should be less than
* zfs_vdev_async_write_active_min_dirty_percent.
@@ -220,6 +228,11 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
+ aggsum_init(&dp->dp_wrlog_total, 0);
+ for (int i = 0; i < TXG_SIZE; i++) {
+ aggsum_init(&dp->dp_wrlog_pertxg[i], 0);
+ }
+
dp->dp_zrele_taskq = taskq_create("z_zrele", 100, defclsyspri,
boot_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
TASKQ_THREADS_CPU_PCT);
@@ -416,6 +429,14 @@ dsl_pool_close(dsl_pool_t *dp)
rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
cv_destroy(&dp->dp_spaceavail_cv);
+
+ ASSERT0(aggsum_value(&dp->dp_wrlog_total));
+ aggsum_fini(&dp->dp_wrlog_total);
+ for (int i = 0; i < TXG_SIZE; i++) {
+ ASSERT0(aggsum_value(&dp->dp_wrlog_pertxg[i]));
+ aggsum_fini(&dp->dp_wrlog_pertxg[i]);
+ }
+
taskq_destroy(dp->dp_unlinked_drain_taskq);
taskq_destroy(dp->dp_zrele_taskq);
if (dp->dp_blkstats != NULL) {
@@ -592,6 +613,36 @@ dsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta)
cv_signal(&dp->dp_spaceavail_cv);
}
+void
+dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg)
+{
+ ASSERT3S(size, >=, 0);
+
+ aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], size);
+ aggsum_add(&dp->dp_wrlog_total, size);
+
+ /* Choose a value slightly bigger than min dirty sync bytes */
+ uint64_t sync_min =
+ zfs_dirty_data_max * (zfs_dirty_data_sync_percent + 10) / 100;
+ if (aggsum_compare(&dp->dp_wrlog_pertxg[txg & TXG_MASK], sync_min) > 0)
+ txg_kick(dp, txg);
+}
+
+boolean_t
+dsl_pool_wrlog_over_max(dsl_pool_t *dp)
+{
+ return (aggsum_compare(&dp->dp_wrlog_total, zfs_wrlog_data_max) > 0);
+}
+
+static void
+dsl_pool_wrlog_clear(dsl_pool_t *dp, uint64_t txg)
+{
+ int64_t delta;
+ delta = -(int64_t)aggsum_value(&dp->dp_wrlog_pertxg[txg & TXG_MASK]);
+ aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], delta);
+ aggsum_add(&dp->dp_wrlog_total, delta);
+}
+
#ifdef ZFS_DEBUG
static boolean_t
dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg)
@@ -816,6 +867,9 @@ dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
dmu_buf_rele(ds->ds_dbuf, zilog);
}
+
+ dsl_pool_wrlog_clear(dp, txg);
+
ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
}
@@ -1405,6 +1459,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, delay_min_dirty_percent, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW,
"Determines the dirty space limit");
+ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, ULONG, ZMOD_RW,
+ "The size limit of write-transaction zil log data");
+
/* zfs_dirty_data_max_max only applied at module load in arc_init(). */
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD,
"zfs_dirty_data_max upper bound in bytes");
diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c
index 30d5c4821..0f330ec93 100644
--- a/module/zfs/zfs_log.c
+++ b/module/zfs/zfs_log.c
@@ -541,6 +541,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
itx_wr_state_t write_state;
uintptr_t fsync_cnt;
uint64_t gen = 0;
+ ssize_t size = resid;
if (zil_replaying(zilog, tx) || zp->z_unlinked ||
zfs_xattr_owner_unlinked(zp)) {
@@ -626,6 +627,10 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
off += len;
resid -= len;
}
+
+ if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
+ dsl_pool_wrlog_count(zilog->zl_dmu_pool, size, tx->tx_txg);
+ }
}
/*
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index c4ecf14df..b7bc587cf 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -84,10 +84,8 @@
#include <sys/zfs_rlock.h>
#include <sys/spa_impl.h>
#include <sys/zvol.h>
-
#include <sys/zvol_impl.h>
-
unsigned int zvol_inhibit_dev = 0;
unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
@@ -577,6 +575,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
itx_wr_state_t write_state;
+ uint64_t sz = size;
if (zil_replaying(zilog, tx))
return;
@@ -628,6 +627,10 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
offset += len;
size -= len;
}
+
+ if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
+ dsl_pool_wrlog_count(zilog->zl_dmu_pool, sz, tx->tx_txg);
+ }
}
/*