aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
authorGiuseppe Di Natale <[email protected]>2017-06-09 09:15:37 -0700
committerBrian Behlendorf <[email protected]>2017-06-09 09:15:37 -0700
commit1b7c1e5ce90ae27d9bb1f6f3616bf079c168005c (patch)
treec3f9172ab7cd4039ec660f1e34700eae745e6d6a /module/zfs
parent82644107c4e7f3e899ebde18f65cbac7c604583c (diff)
OpenZFS 7578 - Fix/improve some aspects of ZIL writing
- After some ZIL changes 6 years ago zil_slog_limit got partially broken due to zl_itx_list_sz not updated when async itx'es upgraded to sync. Actually because of other changes about that time zl_itx_list_sz is not really required to implement the functionality, so this patch removes some unneeded broken code and variables. - Original idea of zil_slog_limit was to reduce chance of SLOG abuse by single heavy logger, that increased latency for other (more latency critical) loggers, by pushing heavy log out into the main pool instead of SLOG. Beside huge latency increase for heavy writers, this implementation caused double write of all data, since the log records were explicitly prepared for SLOG. Since we now have I/O scheduler, I've found it can be much more efficient to reduce priority of heavy logger SLOG writes from ZIO_PRIORITY_SYNC_WRITE to ZIO_PRIORITY_ASYNC_WRITE, while still leave them on SLOG. - Existing ZIL implementation had problem with space efficiency when it has to write large chunks of data into log blocks of limited size. In some cases efficiency stopped to almost as low as 50%. In case of ZIL stored on spinning rust, that also reduced log write speed in half, since head had to uselessly fly over allocated but not written areas. This change improves the situation by offloading problematic operations from z*_log_write() to zil_lwb_commit(), which knows real situation of log blocks allocation and can split large requests into pieces much more efficiently. Also as side effect it removes one of two data copy operations done by ZIL code WR_COPIED case. - While there, untangle and unify code of z*_log_write() functions. Also zfs_log_write() alike to zvol_log_write() can now handle writes crossing block boundary, that may also improve efficiency if ZPL is made to do that. Sponsored by: iXsystems, Inc. Authored by: Alexander Motin <[email protected]> Reviewed by: Matthew Ahrens <[email protected]> Reviewed by: Prakash Surya <[email protected]> Reviewed by: Andriy Gapon <[email protected]> Reviewed by: Steven Hartland <[email protected]> Reviewed by: Brad Lewis <[email protected]> Reviewed by: Richard Elling <[email protected]> Approved by: Robert Mustacchi <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Richard Yao <[email protected]> Ported-by: Giuseppe Di Natale <[email protected]> OpenZFS-issue: https://www.illumos.org/issues/7578 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/aeb13ac Closes #6191
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/zfs_log.c37
-rw-r--r--module/zfs/zil.c118
-rw-r--r--module/zfs/zio.c17
-rw-r--r--module/zfs/zvol.c49
4 files changed, 107 insertions, 114 deletions
diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c
index ca3699052..8887f037a 100644
--- a/module/zfs/zfs_log.c
+++ b/module/zfs/zfs_log.c
@@ -487,10 +487,9 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t resid, int ioflag,
zil_callback_t callback, void *callback_data)
{
+ uint32_t blocksize = zp->z_blksz;
itx_wr_state_t write_state;
- boolean_t slogging;
uintptr_t fsync_cnt;
- ssize_t immediate_write_sz;
if (zil_replaying(zilog, tx) || zp->z_unlinked ||
zfs_xattr_owner_unlinked(zp)) {
@@ -499,12 +498,10 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
return;
}
- immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
- ? 0 : (ssize_t)zfs_immediate_write_sz;
-
- slogging = spa_has_slogs(zilog->zl_spa) &&
- (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
- if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz)
+ if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ write_state = WR_INDIRECT;
+ else if (!spa_has_slogs(zilog->zl_spa) &&
+ resid >= zfs_immediate_write_sz)
write_state = WR_INDIRECT;
else if (ioflag & (FSYNC | FDSYNC))
write_state = WR_COPIED;
@@ -518,30 +515,26 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
while (resid) {
itx_t *itx;
lr_write_t *lr;
- ssize_t len;
+ itx_wr_state_t wr_state = write_state;
+ ssize_t len = resid;
- /*
- * If the write would overflow the largest block then split it.
- */
- if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA)
- len = SPA_OLD_MAXBLOCKSIZE >> 1;
- else
- len = resid;
+ if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA)
+ wr_state = WR_NEED_COPY;
+ else if (wr_state == WR_INDIRECT)
+ len = MIN(blocksize - P2PHASE(off, blocksize), resid);
itx = zil_itx_create(txtype, sizeof (*lr) +
- (write_state == WR_COPIED ? len : 0));
+ (wr_state == WR_COPIED ? len : 0));
lr = (lr_write_t *)&itx->itx_lr;
- if (write_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os,
+ if (wr_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os,
zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
zil_itx_destroy(itx);
itx = zil_itx_create(txtype, sizeof (*lr));
lr = (lr_write_t *)&itx->itx_lr;
- write_state = WR_NEED_COPY;
+ wr_state = WR_NEED_COPY;
}
- itx->itx_wr_state = write_state;
- if (write_state == WR_NEED_COPY)
- itx->itx_sod += len;
+ itx->itx_wr_state = wr_state;
lr->lr_foid = zp->z_id;
lr->lr_offset = off;
lr->lr_length = len;
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index 12a034d5b..6a1f190f5 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -102,6 +102,13 @@ int zil_replay_disable = 0;
*/
int zfs_nocacheflush = 0;
+/*
+ * Limit SLOG write size per commit executed with synchronous priority.
+ * Any writes above that will be executed with lower (asynchronous) priority
+ * to limit potential SLOG device abuse by single active ZIL writer.
+ */
+unsigned long zil_slog_bulk = 768 * 1024;
+
static kmem_cache_t *zil_lwb_cache;
static void zil_async_to_sync(zilog_t *zilog, uint64_t foid);
@@ -449,7 +456,8 @@ zil_free_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t claim_txg)
}
static lwb_t *
-zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, uint64_t txg, boolean_t fastwrite)
+zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg,
+ boolean_t fastwrite)
{
lwb_t *lwb;
@@ -457,6 +465,7 @@ zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, uint64_t txg, boolean_t fastwrite)
lwb->lwb_zilog = zilog;
lwb->lwb_blk = *bp;
lwb->lwb_fastwrite = fastwrite;
+ lwb->lwb_slog = slog;
lwb->lwb_buf = zio_buf_alloc(BP_GET_LSIZE(bp));
lwb->lwb_max_txg = txg;
lwb->lwb_zio = NULL;
@@ -542,6 +551,7 @@ zil_create(zilog_t *zilog)
blkptr_t blk;
int error = 0;
boolean_t fastwrite = FALSE;
+ boolean_t slog = FALSE;
/*
* Wait for any previous destroy to complete.
@@ -570,7 +580,7 @@ zil_create(zilog_t *zilog)
}
error = zio_alloc_zil(zilog->zl_spa, txg, &blk,
- ZIL_MIN_BLKSZ, B_TRUE);
+ ZIL_MIN_BLKSZ, &slog);
fastwrite = TRUE;
if (error == 0)
@@ -581,7 +591,7 @@ zil_create(zilog_t *zilog)
* Allocate a log write buffer (lwb) for the first log block.
*/
if (error == 0)
- lwb = zil_alloc_lwb(zilog, &blk, txg, fastwrite);
+ lwb = zil_alloc_lwb(zilog, &blk, slog, txg, fastwrite);
/*
* If we just allocated the first log block, commit our transaction
@@ -915,6 +925,7 @@ static void
zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb)
{
zbookmark_phys_t zb;
+ zio_priority_t prio;
SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
@@ -934,9 +945,13 @@ zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb)
metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
lwb->lwb_fastwrite = 1;
}
+ if (!lwb->lwb_slog || zilog->zl_cur_used <= zil_slog_bulk)
+ prio = ZIO_PRIORITY_SYNC_WRITE;
+ else
+ prio = ZIO_PRIORITY_ASYNC_WRITE;
lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa,
0, &lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk),
- zil_lwb_write_done, lwb, ZIO_PRIORITY_SYNC_WRITE,
+ zil_lwb_write_done, lwb, prio,
ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
ZIO_FLAG_FASTWRITE, &zb);
}
@@ -958,15 +973,6 @@ uint64_t zil_block_buckets[] = {
};
/*
- * Use the slog as long as the current commit size is less than the
- * limit or the total list size is less than 2X the limit. Limit
- * checking is disabled by setting zil_slog_limit to UINT64_MAX.
- */
-unsigned long zil_slog_limit = 1024 * 1024;
-#define USE_SLOG(zilog) (((zilog)->zl_cur_used < zil_slog_limit) || \
- ((zilog)->zl_itx_list_sz < (zil_slog_limit << 1)))
-
-/*
* Start a log block write and advance to the next log block.
* Calls are serialized.
*/
@@ -981,7 +987,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
uint64_t txg;
uint64_t zil_blksz, wsz;
int i, error;
- boolean_t use_slog;
+ boolean_t slog;
if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
zilc = (zil_chain_t *)lwb->lwb_buf;
@@ -1037,10 +1043,8 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);
BP_ZERO(bp);
- use_slog = USE_SLOG(zilog);
- error = zio_alloc_zil(spa, txg, bp, zil_blksz,
- USE_SLOG(zilog));
- if (use_slog) {
+ error = zio_alloc_zil(spa, txg, bp, zil_blksz, &slog);
+ if (slog) {
ZIL_STAT_BUMP(zil_itx_metaslab_slog_count);
ZIL_STAT_INCR(zil_itx_metaslab_slog_bytes, lwb->lwb_nused);
} else {
@@ -1055,7 +1059,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
/*
* Allocate a new log write buffer (lwb).
*/
- nlwb = zil_alloc_lwb(zilog, bp, txg, TRUE);
+ nlwb = zil_alloc_lwb(zilog, bp, slog, txg, TRUE);
/* Record the block for later vdev flushing */
zil_add_block(zilog, &lwb->lwb_blk);
@@ -1092,45 +1096,53 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
static lwb_t *
zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
{
- lr_t *lrc = &itx->itx_lr; /* common log record */
- lr_write_t *lrw = (lr_write_t *)lrc;
+ lr_t *lrcb, *lrc;
+ lr_write_t *lrwb, *lrw;
char *lr_buf;
- uint64_t txg = lrc->lrc_txg;
- uint64_t reclen = lrc->lrc_reclen;
- uint64_t dlen = 0;
+ uint64_t dlen, dnow, lwb_sp, reclen, txg;
if (lwb == NULL)
return (NULL);
ASSERT(lwb->lwb_buf != NULL);
- if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY)
+ lrc = &itx->itx_lr; /* Common log record inside itx. */
+ lrw = (lr_write_t *)lrc; /* Write log record inside itx. */
+ if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
dlen = P2ROUNDUP_TYPED(
lrw->lr_length, sizeof (uint64_t), uint64_t);
-
+ } else {
+ dlen = 0;
+ }
+ reclen = lrc->lrc_reclen;
zilog->zl_cur_used += (reclen + dlen);
+ txg = lrc->lrc_txg;
zil_lwb_write_init(zilog, lwb);
+cont:
/*
* If this record won't fit in the current log block, start a new one.
+ * For WR_NEED_COPY optimize layout for minimal number of chunks.
*/
- if (lwb->lwb_nused + reclen + dlen > lwb->lwb_sz) {
+ lwb_sp = lwb->lwb_sz - lwb->lwb_nused;
+ if (reclen > lwb_sp || (reclen + dlen > lwb_sp &&
+ lwb_sp < ZIL_MAX_WASTE_SPACE && (dlen % ZIL_MAX_LOG_DATA == 0 ||
+ lwb_sp < reclen + dlen % ZIL_MAX_LOG_DATA))) {
lwb = zil_lwb_write_start(zilog, lwb);
if (lwb == NULL)
return (NULL);
zil_lwb_write_init(zilog, lwb);
ASSERT(LWB_EMPTY(lwb));
- if (lwb->lwb_nused + reclen + dlen > lwb->lwb_sz) {
- txg_wait_synced(zilog->zl_dmu_pool, txg);
- return (lwb);
- }
+ lwb_sp = lwb->lwb_sz - lwb->lwb_nused;
+ ASSERT3U(reclen + MIN(dlen, sizeof (uint64_t)), <=, lwb_sp);
}
+ dnow = MIN(dlen, lwb_sp - reclen);
lr_buf = lwb->lwb_buf + lwb->lwb_nused;
bcopy(lrc, lr_buf, reclen);
- lrc = (lr_t *)lr_buf;
- lrw = (lr_write_t *)lrc;
+ lrcb = (lr_t *)lr_buf; /* Like lrc, but inside lwb. */
+ lrwb = (lr_write_t *)lrcb; /* Like lrw, but inside lwb. */
ZIL_STAT_BUMP(zil_itx_count);
@@ -1147,10 +1159,13 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
char *dbuf;
int error;
- if (dlen) {
- ASSERT(itx->itx_wr_state == WR_NEED_COPY);
+ if (itx->itx_wr_state == WR_NEED_COPY) {
dbuf = lr_buf + reclen;
- lrw->lr_common.lrc_reclen += dlen;
+ lrcb->lrc_reclen += dnow;
+ if (lrwb->lr_length > dnow)
+ lrwb->lr_length = dnow;
+ lrw->lr_offset += dnow;
+ lrw->lr_length -= dnow;
ZIL_STAT_BUMP(zil_itx_needcopy_count);
ZIL_STAT_INCR(zil_itx_needcopy_bytes,
lrw->lr_length);
@@ -1162,7 +1177,7 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
lrw->lr_length);
}
error = zilog->zl_get_data(
- itx->itx_private, lrw, dbuf, lwb->lwb_zio);
+ itx->itx_private, lrwb, dbuf, lwb->lwb_zio);
if (error == EIO) {
txg_wait_synced(zilog->zl_dmu_pool, txg);
return (lwb);
@@ -1181,12 +1196,18 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
* equal to the itx sequence number because not all transactions
* are synchronous, and sometimes spa_sync() gets there first.
*/
- lrc->lrc_seq = ++zilog->zl_lr_seq; /* we are single threaded */
- lwb->lwb_nused += reclen + dlen;
+ lrcb->lrc_seq = ++zilog->zl_lr_seq; /* we are single threaded */
+ lwb->lwb_nused += reclen + dnow;
lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg);
ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
+ dlen -= dnow;
+ if (dlen > 0) {
+ zilog->zl_cur_used += reclen;
+ goto cont;
+ }
+
return (lwb);
}
@@ -1200,7 +1221,6 @@ zil_itx_create(uint64_t txtype, size_t lrsize)
itx = zio_data_buf_alloc(offsetof(itx_t, itx_lr) + lrsize);
itx->itx_lr.lrc_txtype = txtype;
itx->itx_lr.lrc_reclen = lrsize;
- itx->itx_sod = lrsize; /* if write & WR_NEED_COPY will be increased */
itx->itx_lr.lrc_seq = 0; /* defensive */
itx->itx_sync = B_TRUE; /* default is synchronous */
itx->itx_callback = NULL;
@@ -1351,11 +1371,8 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
*/
zfs_dbgmsg("zil_itx_assign: missed itx cleanup for "
"txg %llu", itxg->itxg_txg);
- atomic_add_64(&zilog->zl_itx_list_sz, -itxg->itxg_sod);
- itxg->itxg_sod = 0;
clean = itxg->itxg_itxs;
}
- ASSERT(itxg->itxg_sod == 0);
itxg->itxg_txg = txg;
itxs = itxg->itxg_itxs = kmem_zalloc(sizeof (itxs_t),
KM_SLEEP);
@@ -1368,8 +1385,6 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
}
if (itx->itx_sync) {
list_insert_tail(&itxs->i_sync_list, itx);
- atomic_add_64(&zilog->zl_itx_list_sz, itx->itx_sod);
- itxg->itxg_sod += itx->itx_sod;
} else {
avl_tree_t *t = &itxs->i_async_tree;
uint64_t foid =
@@ -1419,8 +1434,6 @@ zil_clean(zilog_t *zilog, uint64_t synced_txg)
ASSERT3U(itxg->itxg_txg, <=, synced_txg);
ASSERT(itxg->itxg_txg != 0);
ASSERT(zilog->zl_clean_taskq != NULL);
- atomic_add_64(&zilog->zl_itx_list_sz, -itxg->itxg_sod);
- itxg->itxg_sod = 0;
clean_me = itxg->itxg_itxs;
itxg->itxg_itxs = NULL;
itxg->itxg_txg = 0;
@@ -1444,7 +1457,6 @@ zil_get_commit_list(zilog_t *zilog)
{
uint64_t otxg, txg;
list_t *commit_list = &zilog->zl_itx_commit_list;
- uint64_t push_sod = 0;
if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */
otxg = ZILTEST_TXG;
@@ -1476,12 +1488,9 @@ zil_get_commit_list(zilog_t *zilog)
ASSERT(zilog_is_dirty_in_txg(zilog, txg) ||
spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);
list_move_tail(commit_list, &itxg->itxg_itxs->i_sync_list);
- push_sod += itxg->itxg_sod;
- itxg->itxg_sod = 0;
mutex_exit(&itxg->itxg_lock);
}
- atomic_add_64(&zilog->zl_itx_list_sz, -push_sod);
}
/*
@@ -2304,13 +2313,14 @@ EXPORT_SYMBOL(zil_bp_tree_add);
EXPORT_SYMBOL(zil_set_sync);
EXPORT_SYMBOL(zil_set_logbias);
+/* BEGIN CSTYLED */
module_param(zil_replay_disable, int, 0644);
MODULE_PARM_DESC(zil_replay_disable, "Disable intent logging replay");
module_param(zfs_nocacheflush, int, 0644);
MODULE_PARM_DESC(zfs_nocacheflush, "Disable cache flushes");
-/* CSTYLED */
-module_param(zil_slog_limit, ulong, 0644);
-MODULE_PARM_DESC(zil_slog_limit, "Max commit bytes to separate log device");
+module_param(zil_slog_bulk, ulong, 0644);
+MODULE_PARM_DESC(zil_slog_bulk, "Limit in bytes slog sync writes per commit");
+/* END CSTYLED */
#endif
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 61eb575ef..acfc49eb5 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -3098,7 +3098,7 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp)
*/
int
zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size,
- boolean_t use_slog)
+ boolean_t *slog)
{
int error = 1;
zio_alloc_list_t io_alloc_list;
@@ -3106,17 +3106,16 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size,
ASSERT(txg > spa_syncing_txg(spa));
metaslab_trace_init(&io_alloc_list);
-
- if (use_slog) {
- error = metaslab_alloc(spa, spa_log_class(spa), size,
- new_bp, 1, txg, NULL, METASLAB_FASTWRITE,
- &io_alloc_list, NULL);
- }
-
- if (error) {
+ error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
+ txg, NULL, METASLAB_FASTWRITE, &io_alloc_list, NULL);
+ if (error == 0) {
+ *slog = TRUE;
+ } else {
error = metaslab_alloc(spa, spa_normal_class(spa), size,
new_bp, 1, txg, NULL, METASLAB_FASTWRITE,
&io_alloc_list, NULL);
+ if (error == 0)
+ *slog = FALSE;
}
metaslab_trace_fini(&io_alloc_list);
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index 1a86cd3cd..bf9f48adb 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -611,53 +611,44 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
{
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
- boolean_t slogging;
- ssize_t immediate_write_sz;
+ itx_wr_state_t write_state;
if (zil_replaying(zilog, tx))
return;
- immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
- ? 0 : zvol_immediate_write_sz;
- slogging = spa_has_slogs(zilog->zl_spa) &&
- (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
+ if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ write_state = WR_INDIRECT;
+ else if (!spa_has_slogs(zilog->zl_spa) &&
+ size >= blocksize && blocksize > zvol_immediate_write_sz)
+ write_state = WR_INDIRECT;
+ else if (sync)
+ write_state = WR_COPIED;
+ else
+ write_state = WR_NEED_COPY;
while (size) {
itx_t *itx;
lr_write_t *lr;
- ssize_t len;
- itx_wr_state_t write_state;
+ itx_wr_state_t wr_state = write_state;
+ ssize_t len = size;
- /*
- * Unlike zfs_log_write() we can be called with
- * up to DMU_MAX_ACCESS/2 (5MB) writes.
- */
- if (blocksize > immediate_write_sz && !slogging &&
- size >= blocksize && offset % blocksize == 0) {
- write_state = WR_INDIRECT; /* uses dmu_sync */
- len = blocksize;
- } else if (sync) {
- write_state = WR_COPIED;
- len = MIN(ZIL_MAX_LOG_DATA, size);
- } else {
- write_state = WR_NEED_COPY;
- len = MIN(ZIL_MAX_LOG_DATA, size);
- }
+ if (wr_state == WR_COPIED && size > ZIL_MAX_COPIED_DATA)
+ wr_state = WR_NEED_COPY;
+ else if (wr_state == WR_INDIRECT)
+ len = MIN(blocksize - P2PHASE(offset, blocksize), size);
itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
- (write_state == WR_COPIED ? len : 0));
+ (wr_state == WR_COPIED ? len : 0));
lr = (lr_write_t *)&itx->itx_lr;
- if (write_state == WR_COPIED && dmu_read(zv->zv_objset,
+ if (wr_state == WR_COPIED && dmu_read(zv->zv_objset,
ZVOL_OBJ, offset, len, lr+1, DMU_READ_NO_PREFETCH) != 0) {
zil_itx_destroy(itx);
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
lr = (lr_write_t *)&itx->itx_lr;
- write_state = WR_NEED_COPY;
+ wr_state = WR_NEED_COPY;
}
- itx->itx_wr_state = write_state;
- if (write_state == WR_NEED_COPY)
- itx->itx_sod += len;
+ itx->itx_wr_state = wr_state;
lr->lr_foid = ZVOL_OBJ;
lr->lr_offset = offset;
lr->lr_length = len;